xf86-video-ati: Branch 'master' - 12 commits

Alex Deucher agd5f at kemper.freedesktop.org
Mon Oct 4 09:44:22 PDT 2010


 src/Makefile.am                     |    8 
 src/evergreen_accel.c               | 1112 +++++++++
 src/evergreen_exa.c                 | 1947 +++++++++++++++++
 src/evergreen_reg.h                 |  247 ++
 src/evergreen_reg_auto.h            | 4039 ++++++++++++++++++++++++++++++++++++
 src/evergreen_shader.c              | 2790 ++++++++++++++++++++++++
 src/evergreen_shader.h              |  292 ++
 src/evergreen_state.h               |  341 +++
 src/evergreen_textured_videofuncs.c |  556 ++++
 src/r600_exa.c                      |  144 -
 src/r600_state.h                    |   42 
 src/r600_textured_videofuncs.c      |   60 
 src/r6xx_accel.c                    |  122 -
 src/radeon.h                        |   39 
 src/radeon_accel.c                  |    5 
 src/radeon_dri2.c                   |   89 
 src/radeon_exa_shared.c             |   50 
 src/radeon_exa_shared.h             |    4 
 src/radeon_kms.c                    |   35 
 src/radeon_reg.h                    |    6 
 src/radeon_textured_video.c         |    8 
 src/radeon_vbo.c                    |   38 
 src/radeon_vbo.h                    |   40 
 23 files changed, 11735 insertions(+), 279 deletions(-)

New commits:
commit 0921ecc1c751df0dd56e0b1d0b78ab53d7164904
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Mon Oct 4 12:34:47 2010 -0400

    evergreen: use EXACreatePixmap2 if available

diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 7b6b7ea..f9d3b48 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -1873,6 +1873,9 @@ EVERGREENDrawInit(ScreenPtr pScreen)
     info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
     info->accel_state->exa->UploadToScreen = EVERGREENUploadToScreen;
     info->accel_state->exa->DownloadFromScreen = EVERGREENDownloadFromScreen;
+#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 5)
+    info->accel_state->exa->CreatePixmap2 = RADEONEXACreatePixmap2;
+#endif
 
     info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
 #ifdef EXA_SUPPORTS_PREPARE_AUX
diff --git a/src/evergreen_state.h b/src/evergreen_state.h
index 5869256..7e49164 100644
--- a/src/evergreen_state.h
+++ b/src/evergreen_state.h
@@ -331,6 +331,9 @@ EVERGREENSetAccelState(ScrnInfoPtr pScrn,
 extern Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index);
 extern void RADEONFinishAccess_CS(PixmapPtr pPix, int index);
 extern void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align);
+extern void *RADEONEXACreatePixmap2(ScreenPtr pScreen, int width, int height,
+				    int depth, int usage_hint, int bitsPerPixel,
+				    int *new_pitch);
 extern void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv);
 extern struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix);
 extern Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix);
commit 5bdb6434975584eef90eb9e5955b9c2a14b7f327
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Mon Oct 4 12:31:51 2010 -0400

    evergreen: port Karl's UTS/DFS changes to evergreen

diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 7b2a65e..7b6b7ea 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -1524,13 +1524,18 @@ EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_accel_state *accel_state = info->accel_state;
     struct radeon_exa_pixmap_priv *driver_priv;
-    struct radeon_bo *scratch;
+    struct radeon_bo *scratch = NULL;
+    struct radeon_bo *copy_dst;
+    unsigned char *dst;
     unsigned size;
     uint32_t dst_domain;
     int bpp = pDst->drawable.bitsPerPixel;
     uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
+    uint32_t copy_pitch;
     uint32_t src_pitch_hw = scratch_pitch / (bpp / 8);
     uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
+    int ret;
+    Bool flush = TRUE;
     Bool r;
     int i;
     struct r600_accel_object src_obj, dst_obj;
@@ -1540,15 +1545,19 @@ EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
 
     driver_priv = exaGetPixmapDriverPrivate(pDst);
 
-    /* If we know the BO won't be busy, don't bother */
-    if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs) &&
-	!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
-	return FALSE;
+    /* If we know the BO won't be busy, don't bother with a scratch */
+    copy_dst = driver_priv->bo;
+    copy_pitch = pDst->devKind;
+    if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
+	flush = FALSE;
+	if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
+	    goto copy;
+    }
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
     if (scratch == NULL) {
-	return FALSE;
+	goto copy;
     }
 
     src_obj.pitch = src_pitch_hw;
@@ -1573,33 +1582,45 @@ EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
 				&dst_obj,
 				accel_state->copy_vs_offset, accel_state->copy_ps_offset,
 				3, 0xffffffff)) {
-        r = FALSE;
-        goto out;
+        goto copy;
     }
+    copy_dst = scratch;
+    copy_pitch = scratch_pitch;
+    flush = FALSE;
 
-    r = radeon_bo_map(scratch, 0);
-    if (r) {
+copy:
+    if (flush)
+	radeon_cs_flush_indirect(pScrn);
+
+    ret = radeon_bo_map(copy_dst, 0);
+    if (ret) {
         r = FALSE;
         goto out;
     }
     r = TRUE;
     size = w * bpp / 8;
+    dst = copy_dst->ptr;
+    if (copy_dst == driver_priv->bo)
+	dst += y * copy_pitch + x * bpp / 8;
     for (i = 0; i < h; i++) {
-        memcpy(scratch->ptr + i * scratch_pitch, src, size);
+	memcpy(dst + i * copy_pitch, src, size);
         src += src_pitch;
     }
-    radeon_bo_unmap(scratch);
+    radeon_bo_unmap(copy_dst);
 
-    if (info->accel_state->vsync)
-	RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
+    if (copy_dst == scratch) {
+	if (info->accel_state->vsync)
+	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
 
-    /* blit from gart to vram */
-    EVERGREENDoPrepareCopy(pScrn);
-    EVERGREENAppendCopyVertex(pScrn, 0, 0, x, y, w, h);
-    EVERGREENDoCopyVline(pDst);
+	/* blit from gart to vram */
+	EVERGREENDoPrepareCopy(pScrn);
+	EVERGREENAppendCopyVertex(pScrn, 0, 0, x, y, w, h);
+	EVERGREENDoCopyVline(pDst);
+    }
 
 out:
-    radeon_bo_unref(scratch);
+    if (scratch)
+	radeon_bo_unref(scratch);
     return r;
 }
 
@@ -1611,13 +1632,17 @@ EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_accel_state *accel_state = info->accel_state;
     struct radeon_exa_pixmap_priv *driver_priv;
-    struct radeon_bo *scratch;
+    struct radeon_bo *scratch = NULL;
+    struct radeon_bo *copy_src;
     unsigned size;
     uint32_t src_domain = 0;
     int bpp = pSrc->drawable.bitsPerPixel;
     uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
+    uint32_t copy_pitch;
     uint32_t dst_pitch_hw = scratch_pitch / (bpp / 8);
     uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
+    int ret;
+    Bool flush = FALSE;
     Bool r;
     struct r600_accel_object src_obj, dst_obj;
 
@@ -1626,24 +1651,28 @@ EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
 
     driver_priv = exaGetPixmapDriverPrivate(pSrc);
 
-    /* If we know the BO won't end up in VRAM anyway, don't bother */
+    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
+    copy_src = driver_priv->bo;
+    copy_pitch = pSrc->devKind;
     if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
 	src_domain = radeon_bo_get_src_domain(driver_priv->bo);
 	if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
 	    (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
 	    src_domain = 0;
+	else /* A write may be scheduled */
+	    flush = TRUE;
     }
 
     if (!src_domain)
 	radeon_bo_is_busy(driver_priv->bo, &src_domain);
 
-    if (src_domain != RADEON_GEM_DOMAIN_VRAM)
-	return FALSE;
+    if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
+	goto copy;
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
     if (scratch == NULL) {
-	return FALSE;
+	goto copy;
     }
     radeon_cs_space_reset_bos(info->cs);
     radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
@@ -1652,10 +1681,9 @@ EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
     radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
     accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
     radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
-    r = radeon_cs_space_check(info->cs);
-    if (r) {
-        r = FALSE;
-        goto out;
+    ret = radeon_cs_space_check(info->cs);
+    if (ret) {
+	goto copy;
     }
 
     src_obj.pitch = src_pitch_hw;
@@ -1680,33 +1708,42 @@ EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
 				&dst_obj,
 				accel_state->copy_vs_offset, accel_state->copy_ps_offset,
 				3, 0xffffffff)) {
-        r = FALSE;
-        goto out;
+	goto copy;
     }
 
     /* blit from vram to gart */
     EVERGREENDoPrepareCopy(pScrn);
     EVERGREENAppendCopyVertex(pScrn, x, y, 0, 0, w, h);
     EVERGREENDoCopy(pScrn);
+    copy_src = scratch;
+    copy_pitch = scratch_pitch;
+    flush = TRUE;
 
-    radeon_cs_flush_indirect(pScrn);
+copy:
+    if (flush)
+	radeon_cs_flush_indirect(pScrn);
 
-    r = radeon_bo_map(scratch, 0);
-    if (r) {
+    ret = radeon_bo_map(copy_src, 0);
+    if (ret) {
+	ErrorF("failed to map pixmap: %d\n", ret);
         r = FALSE;
         goto out;
     }
     r = TRUE;
     w *= bpp / 8;
-    size = 0;
+    if (copy_src == driver_priv->bo)
+	size = y * copy_pitch + x * bpp / 8;
+    else
+	size = 0;
     while (h--) {
-        memcpy(dst, scratch->ptr + size, w);
-        size += scratch_pitch;
+	memcpy(dst, copy_src->ptr + size, w);
+	size += copy_pitch;
         dst += dst_pitch;
     }
-    radeon_bo_unmap(scratch);
+    radeon_bo_unmap(copy_src);
 out:
-    radeon_bo_unref(scratch);
+    if (scratch)
+	radeon_bo_unref(scratch);
     return r;
 }
 
commit be8f45cbd313b68ad663f303c64edb4525b8f981
Merge: e843faf... ff5f466...
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Mon Oct 4 12:36:56 2010 -0400

    Merge branch 'evergreen_accel' of git+ssh://git.freedesktop.org/git/xorg/driver/xf86-video-ati

commit ff5f466e6ca0ee541a255facff6c9198976dfd52
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Fri Oct 1 15:46:43 2010 -0400

    evergreen/dri2: fix depth allocation for depth+stencil
    
    evergreen uses separate allocations for depth and stencil,
    so to handle that, create a depth buffer large enough to
    handle both.  This is required for using the stencil
    buffer in mesa.

diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c
index 4cafbc6..ec52f6a 100644
--- a/src/radeon_dri2.c
+++ b/src/radeon_dri2.c
@@ -71,7 +71,7 @@ radeon_dri2_create_buffers(DrawablePtr drawable,
     struct dri2_buffer_priv *privates;
     PixmapPtr pixmap, depth_pixmap;
     struct radeon_exa_pixmap_priv *driver_priv;
-    int i, r;
+    int i, r, need_enlarge = 0;
     int flags = 0;
 
     buffers = calloc(count, sizeof *buffers);
@@ -100,7 +100,6 @@ radeon_dri2_create_buffers(DrawablePtr drawable,
 	    /* tile the back buffer */
 	    switch(attachments[i]) {
 	    case DRI2BufferDepth:
-	    case DRI2BufferDepthStencil:
 		if (info->ChipFamily >= CHIP_FAMILY_R600)
 		    /* macro is the preferred setting, but the 2D detiling for software
 		     * fallbacks in mesa still has issues on some configurations
@@ -109,6 +108,17 @@ radeon_dri2_create_buffers(DrawablePtr drawable,
 		else
 		    flags = RADEON_CREATE_PIXMAP_TILING_MACRO | RADEON_CREATE_PIXMAP_TILING_MICRO;
 		break;
+	    case DRI2BufferDepthStencil:
+		if (info->ChipFamily >= CHIP_FAMILY_R600) {
+		    /* macro is the preferred setting, but the 2D detiling for software
+		     * fallbacks in mesa still has issues on some configurations
+		     */
+		    flags = RADEON_CREATE_PIXMAP_TILING_MICRO;
+		    if (info->ChipFamily >= CHIP_FAMILY_CEDAR)
+			need_enlarge = 1;
+		} else
+		    flags = RADEON_CREATE_PIXMAP_TILING_MACRO | RADEON_CREATE_PIXMAP_TILING_MICRO;
+		break;
 	    case DRI2BufferBackLeft:
 	    case DRI2BufferBackRight:
 	    case DRI2BufferFakeFrontLeft:
@@ -124,11 +134,31 @@ radeon_dri2_create_buffers(DrawablePtr drawable,
 	    default:
 		flags = 0;
 	    }
-	    pixmap = (*pScreen->CreatePixmap)(pScreen,
-                                              drawable->width,
-                                              drawable->height,
-                                              drawable->depth,
-                                              flags);
+
+	    if (need_enlarge) {
+		/* evergreen uses separate allocations for depth and stencil
+		 * so we make an extra large depth buffer to cover stencil
+		 * as well.
+		 */
+		int pitch = drawable->width * (drawable->depth / 8);
+		int aligned_height = (drawable->height + 7) & ~7;
+		int size = pitch * aligned_height;
+		size = (size + 255) & ~255;
+		size += drawable->width * aligned_height;
+		aligned_height = ((size / pitch) + 7) & ~7;
+
+		pixmap = (*pScreen->CreatePixmap)(pScreen,
+						  drawable->width,
+						  aligned_height,
+						  drawable->depth,
+						  flags);
+
+	    } else
+		pixmap = (*pScreen->CreatePixmap)(pScreen,
+						  drawable->width,
+						  drawable->height,
+						  drawable->depth,
+						  flags);
         }
 
         if (attachments[i] == DRI2BufferDepth) {
@@ -165,7 +195,7 @@ radeon_dri2_create_buffer(DrawablePtr drawable,
     struct dri2_buffer_priv *privates;
     PixmapPtr pixmap, depth_pixmap;
     struct radeon_exa_pixmap_priv *driver_priv;
-    int r;
+    int r, need_enlarge = 0;
     int flags;
 
     buffers = calloc(1, sizeof *buffers);
@@ -194,7 +224,6 @@ radeon_dri2_create_buffer(DrawablePtr drawable,
 	/* tile the back buffer */
 	switch(attachment) {
 	case DRI2BufferDepth:
-	case DRI2BufferDepthStencil:
 	    /* macro is the preferred setting, but the 2D detiling for software
 	     * fallbacks in mesa still has issues on some configurations
 	     */
@@ -203,6 +232,17 @@ radeon_dri2_create_buffer(DrawablePtr drawable,
 	    else
 		flags = RADEON_CREATE_PIXMAP_TILING_MACRO | RADEON_CREATE_PIXMAP_TILING_MICRO;
 	    break;
+	case DRI2BufferDepthStencil:
+	    /* macro is the preferred setting, but the 2D detiling for software
+	     * fallbacks in mesa still has issues on some configurations
+	     */
+	    if (info->ChipFamily >= CHIP_FAMILY_R600) {
+		flags = RADEON_CREATE_PIXMAP_TILING_MICRO;
+		if (info->ChipFamily >= CHIP_FAMILY_CEDAR)
+		    need_enlarge = 1;
+	    } else
+		flags = RADEON_CREATE_PIXMAP_TILING_MACRO | RADEON_CREATE_PIXMAP_TILING_MICRO;
+	    break;
 	case DRI2BufferBackLeft:
 	case DRI2BufferBackRight:
 	case DRI2BufferFakeFrontLeft:
@@ -218,11 +258,32 @@ radeon_dri2_create_buffer(DrawablePtr drawable,
 	default:
 	    flags = 0;
 	}
-        pixmap = (*pScreen->CreatePixmap)(pScreen,
-                drawable->width,
-                drawable->height,
-                (format != 0)?format:drawable->depth,
-                flags);
+
+	if (need_enlarge) {
+	    /* evergreen uses separate allocations for depth and stencil
+	     * so we make an extra large depth buffer to cover stencil
+	     * as well.
+	     */
+	    int depth = (format != 0) ? format : drawable->depth;
+	    int pitch = drawable->width * (depth / 8);
+	    int aligned_height = (drawable->height + 7) & ~7;
+	    int size = pitch * aligned_height;
+	    size = (size + 255) & ~255;
+	    size += drawable->width * aligned_height;
+	    aligned_height = ((size / pitch) + 7) & ~7;
+
+	    pixmap = (*pScreen->CreatePixmap)(pScreen,
+					      drawable->width,
+					      aligned_height,
+					      (format != 0)?format:drawable->depth,
+					      flags);
+
+	} else
+	    pixmap = (*pScreen->CreatePixmap)(pScreen,
+					      drawable->width,
+					      drawable->height,
+					      (format != 0)?format:drawable->depth,
+					      flags);
     }
 
     if (attachment == DRI2BufferDepth) {
commit e26a59e9db8067882327f872e3d2d760ce4c66f3
Author: Mathieu Bérard <mathieu at mberard.eu>
Date:   Thu Sep 9 19:32:38 2010 -0400

    Fix NULL possible deref in evergreen_cp_wait_vline_sync

diff --git a/src/evergreen_accel.c b/src/evergreen_accel.c
index d319bc2..38666ee 100644
--- a/src/evergreen_accel.c
+++ b/src/evergreen_accel.c
@@ -242,12 +242,14 @@ void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
 				  xf86CrtcPtr crtc, int start, int stop)
 {
     RADEONInfoPtr  info = RADEONPTR(pScrn);
-    drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
+    drmmode_crtc_private_ptr drmmode_crtc;
     uint32_t offset;
 
     if (!crtc)
         return;
 
+    drmmode_crtc = crtc->driver_private;
+
     if (stop < start)
         return;
 
commit 7cc0d3311f23ad569af004e0e4a0e2efbac107e5
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Fri Sep 3 00:32:27 2010 -0400

    evergreen: fix Xv
    
    VS const buffer offset was wrong.
    
    fixes:
    https://bugs.freedesktop.org/show_bug.cgi?id=29788

diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
index 7cf7279..a674680 100644
--- a/src/evergreen_textured_videofuncs.c
+++ b/src/evergreen_textured_videofuncs.c
@@ -487,7 +487,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 
     /* VS alu constants */
     vs_const_conf.bo = accel_state->cbuf.vb_bo;
-    vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op;
+    vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op + 256;
     evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
 
     if (pPriv->vsync) {
commit 7c6ce4e62693f446d7d3c8a86502ccc03c0e55b0
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Thu Sep 2 20:34:56 2010 -0400

    evergreen: add additional default state
    
    Note, you also need a drm patch to fix the GPU hangs:
    drm/radeon/kms/evergreen: fix gpu hangs in userspace accel code
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/evergreen_accel.c b/src/evergreen_accel.c
index 1b40aca..d319bc2 100644
--- a/src/evergreen_accel.c
+++ b/src/evergreen_accel.c
@@ -868,9 +868,18 @@ evergreen_set_default_state(ScrnInfoPtr pScrn)
     RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
     END_BATCH();
 
-    BEGIN_BATCH(45);
+    BEGIN_BATCH(3 + 2);
+    EREG(DB_HTILE_DATA_BASE,                    0);
+    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+    END_BATCH();
+
+    BEGIN_BATCH(49);
     EREG(DB_DEPTH_CONTROL,                    0);
 
+    PACK0(PA_SC_VPORT_ZMIN_0, 2);
+    EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
+    EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
+
     PACK0(DB_RENDER_CONTROL, 5);
     E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
     E32(0); // DB_COUNT_CONTROL
commit 141cbc283fddeb67c2a6c47a0f0f5c2aa2bfb430
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Tue Aug 24 18:01:05 2010 -0400

    evergreen Xv: remove stray line that caused a segfault
    
    fixes:
    https://bugs.freedesktop.org/show_bug.cgi?id=29788

diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
index 3ff4d61..7cf7279 100644
--- a/src/evergreen_textured_videofuncs.c
+++ b/src/evergreen_textured_videofuncs.c
@@ -478,7 +478,6 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     vs_const_conf.type = SHADER_TYPE_VS;
     vs_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
 
-    vs_alu_consts = (float *)vs_const_conf.bo->ptr;
     vs_alu_consts[0] = 1.0 / pPriv->w;
     vs_alu_consts[1] = 1.0 / pPriv->h;
     vs_alu_consts[2] = 0.0;
commit f170dddd3538a587f2363ef5fa10a4b484e762da
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Mon Aug 23 18:58:12 2010 -0400

    evergreen: use vbo pool for constant buffers

diff --git a/src/evergreen_accel.c b/src/evergreen_accel.c
index 31e460d..1b40aca 100644
--- a/src/evergreen_accel.c
+++ b/src/evergreen_accel.c
@@ -1062,6 +1062,7 @@ void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
 	radeon_ib_discard(pScrn);
 	radeon_cs_flush_indirect(pScrn);
 	radeon_vb_discard(pScrn, &accel_state->vbo);
+	radeon_vb_discard(pScrn, &accel_state->cbuf);
 	return;
     }
 
@@ -1093,6 +1094,7 @@ void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
 				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
 
     accel_state->vbo.vb_start_op = -1;
+    accel_state->cbuf.vb_start_op = -1;
     accel_state->ib_reset_op = 0;
 
 }
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 434516b..7b2a65e 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -158,7 +158,6 @@ EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     cb_config_t     cb_conf;
     shader_config_t vs_conf, ps_conf;
     int pmask = 0;
-    int ret;
     uint32_t a, r, g, b;
     float *ps_alu_consts;
     const_config_t ps_const_conf;
@@ -193,53 +192,8 @@ EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     CLEAR (ps_conf);
     CLEAR (ps_const_conf);
 
-    ps_const_conf.bo = radeon_bo_open(info->bufmgr, 0, 256, 0,
-				      RADEON_GEM_DOMAIN_GTT, 0);
-    if (ps_const_conf.bo == NULL)
-	RADEON_FALLBACK(("ps const buffer alloc failed\n"));
-    ret = radeon_bo_map(ps_const_conf.bo, 0);
-    if (ret)
-	RADEON_FALLBACK(("ps const buffer map failed\n"));
-
-    /* PS alu constants */
-    ps_const_conf.size_bytes = 256;
-    ps_const_conf.const_addr = 0;
-    ps_const_conf.type = SHADER_TYPE_PS;
-    ps_alu_consts = (float *)ps_const_conf.bo->ptr;
-    if (accel_state->dst_obj.bpp == 16) {
-	r = (fg >> 11) & 0x1f;
-	g = (fg >> 5) & 0x3f;
-	b = (fg >> 0) & 0x1f;
-	ps_alu_consts[0] = (float)r / 31; /* R */
-	ps_alu_consts[1] = (float)g / 63; /* G */
-	ps_alu_consts[2] = (float)b / 31; /* B */
-	ps_alu_consts[3] = 1.0; /* A */
-    } else if (accel_state->dst_obj.bpp == 8) {
-	a = (fg >> 0) & 0xff;
-	ps_alu_consts[0] = 0.0; /* R */
-	ps_alu_consts[1] = 0.0; /* G */
-	ps_alu_consts[2] = 0.0; /* B */
-	ps_alu_consts[3] = (float)a / 255; /* A */
-    } else {
-	a = (fg >> 24) & 0xff;
-	r = (fg >> 16) & 0xff;
-	g = (fg >> 8) & 0xff;
-	b = (fg >> 0) & 0xff;
-	ps_alu_consts[0] = (float)r / 255; /* R */
-	ps_alu_consts[1] = (float)g / 255; /* G */
-	ps_alu_consts[2] = (float)b / 255; /* B */
-	ps_alu_consts[3] = (float)a / 255; /* A */
-    }
-    radeon_bo_unmap(ps_const_conf.bo);
-
-    radeon_cs_space_add_persistent_bo(info->cs, ps_const_conf.bo,
-				      RADEON_GEM_DOMAIN_GTT, 0);
-    if (radeon_cs_space_check(info->cs)) {
-	radeon_bo_unref(ps_const_conf.bo);
-	RADEON_FALLBACK(("ps const buffer size check failed\n"));
-    }
-
     radeon_vbo_check(pScrn, &accel_state->vbo, 16);
+    radeon_vbo_check(pScrn, &accel_state->cbuf, 256);
     radeon_cp_start(pScrn);
 
     evergreen_set_default_state(pScrn);
@@ -320,6 +274,39 @@ EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     E32(FLAT_SHADE_ENA_bit); // SPI_INTERP_CONTROL_0
     END_BATCH();
 
+
+    /* PS alu constants */
+    ps_const_conf.size_bytes = 256;
+    ps_const_conf.type = SHADER_TYPE_PS;
+    ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
+    if (accel_state->dst_obj.bpp == 16) {
+	r = (fg >> 11) & 0x1f;
+	g = (fg >> 5) & 0x3f;
+	b = (fg >> 0) & 0x1f;
+	ps_alu_consts[0] = (float)r / 31; /* R */
+	ps_alu_consts[1] = (float)g / 63; /* G */
+	ps_alu_consts[2] = (float)b / 31; /* B */
+	ps_alu_consts[3] = 1.0; /* A */
+    } else if (accel_state->dst_obj.bpp == 8) {
+	a = (fg >> 0) & 0xff;
+	ps_alu_consts[0] = 0.0; /* R */
+	ps_alu_consts[1] = 0.0; /* G */
+	ps_alu_consts[2] = 0.0; /* B */
+	ps_alu_consts[3] = (float)a / 255; /* A */
+    } else {
+	a = (fg >> 24) & 0xff;
+	r = (fg >> 16) & 0xff;
+	g = (fg >> 8) & 0xff;
+	b = (fg >> 0) & 0xff;
+	ps_alu_consts[0] = (float)r / 255; /* R */
+	ps_alu_consts[1] = (float)g / 255; /* G */
+	ps_alu_consts[2] = (float)b / 255; /* B */
+	ps_alu_consts[3] = (float)a / 255; /* A */
+    }
+    radeon_vbo_commit(pScrn, &accel_state->cbuf);
+
+    ps_const_conf.bo = accel_state->cbuf.vb_bo;
+    ps_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op;
     evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
 
     if (accel_state->vsync)
@@ -1208,7 +1195,7 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
     shader_config_t vs_conf, ps_conf;
     const_config_t vs_const_conf;
     struct r600_accel_object src_obj, mask_obj, dst_obj;
-    int ret;
+    float *cbuf;
 
     //return FALSE;
 
@@ -1284,37 +1271,13 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
     CLEAR (ps_conf);
     CLEAR (vs_const_conf);
 
-    vs_const_conf.bo = radeon_bo_open(info->bufmgr, 0, 256, 0,
-				      RADEON_GEM_DOMAIN_GTT, 0);
-    if (vs_const_conf.bo == NULL)
-	RADEON_FALLBACK(("vs const buffer alloc failed\n"));
-    ret = radeon_bo_map(vs_const_conf.bo, 0);
-    if (ret)
-	RADEON_FALLBACK(("vs const buffer map failed\n"));
-
-    /* VS alu constants */
-    vs_const_conf.size_bytes = 256;
-    vs_const_conf.const_addr = 0;
-    vs_const_conf.type = SHADER_TYPE_VS;
-
-    EVERGREENXFormSetup(pSrcPicture, pSrc, 0, (float *)vs_const_conf.bo->ptr);
-    if (pMask)
-        EVERGREENXFormSetup(pMaskPicture, pMask, 1, (float *)vs_const_conf.bo->ptr);
-
-    radeon_bo_unmap(vs_const_conf.bo);
-
-    radeon_cs_space_add_persistent_bo(info->cs, vs_const_conf.bo,
-				      RADEON_GEM_DOMAIN_GTT, 0);
-    if (radeon_cs_space_check(info->cs)) {
-	radeon_bo_unref(vs_const_conf.bo);
-	RADEON_FALLBACK(("vs const buffer size check failed\n"));
-    }
-
     if (pMask)
         radeon_vbo_check(pScrn, &accel_state->vbo, 24);
     else
         radeon_vbo_check(pScrn, &accel_state->vbo, 16);
 
+    radeon_vbo_check(pScrn, &accel_state->cbuf, 256);
+
     radeon_cp_start(pScrn);
 
     evergreen_set_default_state(pScrn);
@@ -1327,6 +1290,7 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
         radeon_ib_discard(pScrn);
         radeon_cs_flush_indirect(pScrn);
         radeon_vb_discard(pScrn, &accel_state->vbo);
+        radeon_vb_discard(pScrn, &accel_state->cbuf);
         return FALSE;
     }
 
@@ -1335,6 +1299,7 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
 	    radeon_ib_discard(pScrn);
 	    radeon_cs_flush_indirect(pScrn);
             radeon_vb_discard(pScrn, &accel_state->vbo);
+            radeon_vb_discard(pScrn, &accel_state->cbuf);
             return FALSE;
         }
     } else
@@ -1446,6 +1411,19 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
     END_BATCH();
 
     /* VS alu constants */
+    vs_const_conf.size_bytes = 256;
+    vs_const_conf.type = SHADER_TYPE_VS;
+    cbuf = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
+
+    EVERGREENXFormSetup(pSrcPicture, pSrc, 0, cbuf);
+    if (pMask)
+        EVERGREENXFormSetup(pMaskPicture, pMask, 1, cbuf);
+
+    radeon_vbo_commit(pScrn, &accel_state->cbuf);
+
+    /* VS alu constants */
+    vs_const_conf.bo = accel_state->cbuf.vb_bo;
+    vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op;
     evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
 
     if (accel_state->vsync)
@@ -1907,8 +1885,10 @@ EVERGREENDrawInit(ScreenPtr pScreen)
     info->accel_state->dst_obj.bo = NULL;
     info->accel_state->copy_area_bo = NULL;
     info->accel_state->vbo.vb_start_op = -1;
+    info->accel_state->cbuf.vb_start_op = -1;
     info->accel_state->finish_op = evergreen_finish_op;
     info->accel_state->vbo.verts_per_op = 3;
+    info->accel_state->cbuf.verts_per_op = 1;
     RADEONVlineHelperClear(pScrn);
 
     radeon_vbo_init_lists(pScrn);
diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
index 6a2041b..3ff4d61 100644
--- a/src/evergreen_textured_videofuncs.c
+++ b/src/evergreen_textured_videofuncs.c
@@ -111,7 +111,6 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     const_config_t ps_const_conf;
     float *vs_alu_consts;
     const_config_t vs_const_conf;
-    int ret;
 
     cont = RTFContrast(pPriv->contrast);
     bright = RTFBrightness(pPriv->brightness);
@@ -153,75 +152,6 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     CLEAR (vs_const_conf);
     CLEAR (ps_const_conf);
 
-    /* setup the ps consts */
-    ps_const_conf.bo = radeon_bo_open(info->bufmgr, 0, 256, 0,
-				      RADEON_GEM_DOMAIN_GTT, 0);
-    if (ps_const_conf.bo == NULL) {
-	ErrorF("ps const buffer alloc failed\n");
-	return;
-    }
-    ret = radeon_bo_map(ps_const_conf.bo, 0);
-    if (ret) {
-	ErrorF("ps const buffer map failed\n");
-	return;
-    }
-
-    /* PS alu constants */
-    ps_const_conf.size_bytes = 256;
-    ps_const_conf.const_addr = 0;
-    ps_const_conf.type = SHADER_TYPE_PS;
-    ps_alu_consts = (float *)ps_const_conf.bo->ptr;
-
-    ps_alu_consts[0] = off[0];
-    ps_alu_consts[1] = off[1];
-    ps_alu_consts[2] = off[2];
-    ps_alu_consts[3] = yco;
-
-    ps_alu_consts[4] = uco[0];
-    ps_alu_consts[5] = uco[1];
-    ps_alu_consts[6] = uco[2];
-    ps_alu_consts[7] = gamma;
-
-    ps_alu_consts[8] = vco[0];
-    ps_alu_consts[9] = vco[1];
-    ps_alu_consts[10] = vco[2];
-    ps_alu_consts[11] = 0.0;
-    radeon_bo_unmap(ps_const_conf.bo);
-
-    vs_const_conf.bo = radeon_bo_open(info->bufmgr, 0, 256, 0,
-				      RADEON_GEM_DOMAIN_GTT, 0);
-    if (vs_const_conf.bo == NULL) {
-	ErrorF("vs const buffer alloc failed\n");
-	return;
-    }
-    ret = radeon_bo_map(vs_const_conf.bo, 0);
-    if (ret) {
-	ErrorF("vs const buffer map failed\n");
-	return;
-    }
-
-    /* PS alu constants */
-    vs_const_conf.size_bytes = 256;
-    vs_const_conf.const_addr = 0;
-    vs_const_conf.type = SHADER_TYPE_VS;
-    vs_alu_consts = (float *)vs_const_conf.bo->ptr;
-    vs_alu_consts[0] = 1.0 / pPriv->w;
-    vs_alu_consts[1] = 1.0 / pPriv->h;
-    vs_alu_consts[2] = 0.0;
-    vs_alu_consts[3] = 0.0;
-    radeon_bo_unmap(vs_const_conf.bo);
-
-    radeon_cs_space_add_persistent_bo(info->cs, ps_const_conf.bo,
-				      RADEON_GEM_DOMAIN_GTT, 0);
-    radeon_cs_space_add_persistent_bo(info->cs, vs_const_conf.bo,
-				      RADEON_GEM_DOMAIN_GTT, 0);
-    if (radeon_cs_space_check(info->cs)) {
-	radeon_bo_unref(ps_const_conf.bo);
-	radeon_bo_unref(vs_const_conf.bo);
-	ErrorF("const buffer size check failed\n");
-	return;
-    }
-
 #if defined(XF86DRM_MODE)
     if (info->cs) {
 	dst_obj.offset = 0;
@@ -265,6 +195,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 #endif
 
     radeon_vbo_check(pScrn, &accel_state->vbo, 16);
+    radeon_vbo_check(pScrn, &accel_state->cbuf, 512);
     radeon_cp_start(pScrn);
 
     evergreen_set_default_state(pScrn);
@@ -303,9 +234,6 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     ps_conf.bo                  = accel_state->shaders_bo;
     evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
 
-    /* PS alu constants */
-    evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
-
     /* Texture */
     switch(pPriv->id) {
     case FOURCC_YV12:
@@ -518,7 +446,49 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     E32(0); // SPI_INTERP_CONTROL_0
     END_BATCH();
 
+    /* PS alu constants */
+    ps_const_conf.size_bytes = 256;
+    ps_const_conf.type = SHADER_TYPE_PS;
+    ps_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
+
+    ps_alu_consts[0] = off[0];
+    ps_alu_consts[1] = off[1];
+    ps_alu_consts[2] = off[2];
+    ps_alu_consts[3] = yco;
+
+    ps_alu_consts[4] = uco[0];
+    ps_alu_consts[5] = uco[1];
+    ps_alu_consts[6] = uco[2];
+    ps_alu_consts[7] = gamma;
+
+    ps_alu_consts[8] = vco[0];
+    ps_alu_consts[9] = vco[1];
+    ps_alu_consts[10] = vco[2];
+    ps_alu_consts[11] = 0.0;
+
+    radeon_vbo_commit(pScrn, &accel_state->cbuf);
+
+    /* PS alu constants */
+    ps_const_conf.bo = accel_state->cbuf.vb_bo;
+    ps_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op;
+    evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
+
+    /* VS alu constants */
+    vs_const_conf.size_bytes = 256;
+    vs_const_conf.type = SHADER_TYPE_VS;
+    vs_alu_consts = radeon_vbo_space(pScrn, &accel_state->cbuf, 256);
+
+    vs_alu_consts = (float *)vs_const_conf.bo->ptr;
+    vs_alu_consts[0] = 1.0 / pPriv->w;
+    vs_alu_consts[1] = 1.0 / pPriv->h;
+    vs_alu_consts[2] = 0.0;
+    vs_alu_consts[3] = 0.0;
+
+    radeon_vbo_commit(pScrn, &accel_state->cbuf);
+
     /* VS alu constants */
+    vs_const_conf.bo = accel_state->cbuf.vb_bo;
+    vs_const_conf.const_addr = accel_state->cbuf.vb_mc_addr + accel_state->cbuf.vb_start_op;
     evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
 
     if (pPriv->vsync) {
diff --git a/src/radeon.h b/src/radeon.h
index a6a9dd2..e85d941 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -740,6 +740,7 @@ struct radeon_accel_state {
     drmBufPtr         ib;
 
     struct radeon_vbo_object vbo;
+    struct radeon_vbo_object cbuf;
 
     /* where to discard IB from if we cancel operation */
     uint32_t          ib_reset_op;
diff --git a/src/radeon_exa_shared.c b/src/radeon_exa_shared.c
index 9aa4f39..2ef0751 100644
--- a/src/radeon_exa_shared.c
+++ b/src/radeon_exa_shared.c
@@ -166,6 +166,7 @@ int radeon_cp_start(ScrnInfoPtr pScrn)
 	}
     }
     accel_state->vbo.vb_start_op = accel_state->vbo.vb_offset;
+    accel_state->cbuf.vb_start_op = accel_state->cbuf.vb_offset;
     return 0;
 }
 
@@ -214,6 +215,8 @@ void radeon_ib_discard(ScrnInfoPtr pScrn)
 
     info->accel_state->vbo.vb_offset = 0;
     info->accel_state->vbo.vb_start_op = -1;
+    info->accel_state->cbuf.vb_offset = 0;
+    info->accel_state->cbuf.vb_start_op = -1;
 
     if (CS_FULL(info->cs)) {
 	radeon_cs_flush_indirect(pScrn);
@@ -226,6 +229,14 @@ void radeon_ib_discard(ScrnInfoPtr pScrn)
     if (ret)
 	ErrorF("space check failed in flush\n");
 
+    if (info->accel_state->cbuf.vb_bo) {
+	ret = radeon_cs_space_check_with_bo(info->cs,
+					    info->accel_state->cbuf.vb_bo,
+					    RADEON_GEM_DOMAIN_GTT, 0);
+	if (ret)
+	    ErrorF("space check failed in flush\n");
+    }
+
  out:
     if (info->dri2.enabled) {
 	info->accel_state->XInited3D = FALSE;
diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index c372661..deecdc9 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -88,6 +88,12 @@ void radeon_cs_flush_indirect(ScrnInfoPtr pScrn)
         info->accel_state->vbo.vb_start_op = -1;
     }
 
+    /* release the current VBO so we don't block on mapping it later */
+    if (info->accel_state->cbuf.vb_offset && info->accel_state->cbuf.vb_bo) {
+        radeon_vbo_put(pScrn, &info->accel_state->cbuf);
+        info->accel_state->cbuf.vb_start_op = -1;
+    }
+
     radeon_cs_emit(info->cs);
     radeon_cs_erase(info->cs);
 
@@ -100,6 +106,14 @@ void radeon_cs_flush_indirect(ScrnInfoPtr pScrn)
     if (ret)
       ErrorF("space check failed in flush\n");
 
+    if (accel_state->cbuf.vb_bo) {
+	ret = radeon_cs_space_check_with_bo(info->cs,
+					    accel_state->cbuf.vb_bo,
+					    RADEON_GEM_DOMAIN_GTT, 0);
+	if (ret)
+	    ErrorF("space check failed in flush\n");
+    }
+
     if (info->reemit_current2d && info->state_2d.op)
         info->reemit_current2d(pScrn, info->state_2d.op);
 
commit eede93b057bbdddcde5da3220a3f8c6f73784a7e
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Mon Aug 23 13:16:27 2010 -0400

    radeon: move vbo data to a separate struct
    
    this way we can share the vbo code with const buffers

diff --git a/src/evergreen_accel.c b/src/evergreen_accel.c
index 748ff12..31e460d 100644
--- a/src/evergreen_accel.c
+++ b/src/evergreen_accel.c
@@ -495,12 +495,12 @@ evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t doma
     /* flush vertex cache */
     if (info->ChipFamily == CHIP_FAMILY_CEDAR)
 	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
-				      accel_state->vb_offset, accel_state->vb_mc_addr,
+				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
 				      res->bo,
 				      domain, 0);
     else
 	evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
-				      accel_state->vb_offset, accel_state->vb_mc_addr,
+				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
 				      res->bo,
 				      domain, 0);
 
@@ -1052,26 +1052,26 @@ void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
     draw_config_t   draw_conf;
     vtx_resource_t  vtx_res;
 
-    if (accel_state->vb_start_op == -1)
+    if (accel_state->vbo.vb_start_op == -1)
       return;
 
     CLEAR (draw_conf);
     CLEAR (vtx_res);
 
-    if (accel_state->vb_offset == accel_state->vb_start_op) {
+    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
 	radeon_ib_discard(pScrn);
 	radeon_cs_flush_indirect(pScrn);
-	radeon_vb_discard(pScrn);
+	radeon_vb_discard(pScrn, &accel_state->vbo);
 	return;
     }
 
     /* Vertex buffer setup */
-    accel_state->vb_size = accel_state->vb_offset - accel_state->vb_start_op;
+    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
     vtx_res.id              = SQ_FETCH_RESOURCE_vs;
     vtx_res.vtx_size_dw     = vtx_size / 4;
-    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
-    vtx_res.vb_addr         = accel_state->vb_mc_addr + accel_state->vb_start_op;
-    vtx_res.bo              = accel_state->vb_bo;
+    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
+    vtx_res.vb_addr         = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op;
+    vtx_res.bo              = accel_state->vbo.vb_bo;
     vtx_res.dst_sel_x       = SQ_SEL_X;
     vtx_res.dst_sel_y       = SQ_SEL_Y;
     vtx_res.dst_sel_z       = SQ_SEL_Z;
@@ -1092,7 +1092,7 @@ void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
 				  accel_state->dst_size, accel_state->dst_obj.offset,
 				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
 
-    accel_state->vb_start_op = -1;
+    accel_state->vbo.vb_start_op = -1;
     accel_state->ib_reset_op = 0;
 
 }
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 7eb1b72..434516b 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -41,8 +41,6 @@
 #include "radeon_exa_shared.h"
 #include "radeon_vbo.h"
 
-/* #define SHOW_VERTEXES */
-
 uint32_t EVERGREEN_ROP[16] = {
     RADEON_ROP3_ZERO, /* GXclear        */
     RADEON_ROP3_DSa,  /* Gxand          */
@@ -241,7 +239,7 @@ EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
 	RADEON_FALLBACK(("ps const buffer size check failed\n"));
     }
 
-    radeon_vbo_check(pScrn, 16);
+    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
     radeon_cp_start(pScrn);
 
     evergreen_set_default_state(pScrn);
@@ -342,7 +340,7 @@ EVERGREENSolid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
     if (accel_state->vsync)
 	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
 
-    vb = radeon_vbo_space(pScrn, 8);
+    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
 
     vb[0] = (float)x1;
     vb[1] = (float)y1;
@@ -353,7 +351,7 @@ EVERGREENSolid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
     vb[4] = (float)x2;
     vb[5] = (float)y2;
 
-    radeon_vbo_commit(pScrn);
+    radeon_vbo_commit(pScrn, &accel_state->vbo);
 }
 
 static void
@@ -389,7 +387,7 @@ EVERGREENDoPrepareCopy(ScrnInfoPtr pScrn)
     CLEAR (vs_conf);
     CLEAR (ps_conf);
 
-    radeon_vbo_check(pScrn, 16);
+    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
     radeon_cp_start(pScrn);
 
     evergreen_set_default_state(pScrn);
@@ -545,9 +543,11 @@ EVERGREENAppendCopyVertex(ScrnInfoPtr pScrn,
 			  int dstX, int dstY,
 			  int w, int h)
 {
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
     float *vb;
 
-    vb = radeon_vbo_space(pScrn, 16);
+    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
 
     vb[0] = (float)dstX;
     vb[1] = (float)dstY;
@@ -564,7 +564,7 @@ EVERGREENAppendCopyVertex(ScrnInfoPtr pScrn,
     vb[10] = (float)(srcX + w);
     vb[11] = (float)(srcY + h);
 
-    radeon_vbo_commit(pScrn);
+    radeon_vbo_commit(pScrn, &accel_state->vbo);
 }
 
 static Bool
@@ -1311,9 +1311,9 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
     }
 
     if (pMask)
-        radeon_vbo_check(pScrn, 24);
+        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
     else
-        radeon_vbo_check(pScrn, 16);
+        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
 
     radeon_cp_start(pScrn);
 
@@ -1326,7 +1326,7 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
     if (!EVERGREENTextureSetup(pSrcPicture, pSrc, 0)) {
         radeon_ib_discard(pScrn);
         radeon_cs_flush_indirect(pScrn);
-        radeon_vb_discard(pScrn);
+        radeon_vb_discard(pScrn, &accel_state->vbo);
         return FALSE;
     }
 
@@ -1334,7 +1334,7 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
         if (!EVERGREENTextureSetup(pMaskPicture, pMask, 1)) {
 	    radeon_ib_discard(pScrn);
 	    radeon_cs_flush_indirect(pScrn);
-            radeon_vb_discard(pScrn);
+            radeon_vb_discard(pScrn, &accel_state->vbo);
             return FALSE;
         }
     } else
@@ -1470,7 +1470,7 @@ static void EVERGREENComposite(PixmapPtr pDst,
 
     if (accel_state->msk_pic) {
 
-	vb = radeon_vbo_space(pScrn, 24);
+	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
 
 	vb[0] = (float)dstX;
 	vb[1] = (float)dstY;
@@ -1493,11 +1493,11 @@ static void EVERGREENComposite(PixmapPtr pDst,
 	vb[16] = (float)(maskX + w);
 	vb[17] = (float)(maskY + h);
 
-	radeon_vbo_commit(pScrn);
+	radeon_vbo_commit(pScrn, &accel_state->vbo);
 
     } else {
 
-	vb = radeon_vbo_space(pScrn, 16);
+	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
 
 	vb[0] = (float)dstX;
 	vb[1] = (float)dstY;
@@ -1514,7 +1514,7 @@ static void EVERGREENComposite(PixmapPtr pDst,
 	vb[10] = (float)(srcX + w);
 	vb[11] = (float)(srcY + h);
 
-	radeon_vbo_commit(pScrn);
+	radeon_vbo_commit(pScrn, &accel_state->vbo);
     }
 
 
@@ -1906,9 +1906,9 @@ EVERGREENDrawInit(ScreenPtr pScreen)
     info->accel_state->src_obj[1].bo = NULL;
     info->accel_state->dst_obj.bo = NULL;
     info->accel_state->copy_area_bo = NULL;
-    info->accel_state->vb_start_op = -1;
+    info->accel_state->vbo.vb_start_op = -1;
     info->accel_state->finish_op = evergreen_finish_op;
-    info->accel_state->verts_per_op = 3;
+    info->accel_state->vbo.verts_per_op = 3;
     RADEONVlineHelperClear(pScrn);
 
     radeon_vbo_init_lists(pScrn);
diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
index 2b8d65e..6a2041b 100644
--- a/src/evergreen_textured_videofuncs.c
+++ b/src/evergreen_textured_videofuncs.c
@@ -264,7 +264,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     dstyoff = 0;
 #endif
 
-    radeon_vbo_check(pScrn, 16);
+    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
     radeon_cp_start(pScrn);
 
     evergreen_set_default_state(pScrn);
@@ -559,7 +559,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
 	srch = (pPriv->src_h * dsth) / pPriv->dst_h;
 
-	vb = radeon_vbo_space(pScrn, 16);
+	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
 
 	vb[0] = (float)dstX;
 	vb[1] = (float)dstY;
@@ -576,7 +576,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	vb[10] = (float)(srcX + srcw);
 	vb[11] = (float)(srcY + srch);
 
-	radeon_vbo_commit(pScrn);
+	radeon_vbo_commit(pScrn, &accel_state->vbo);
 
 	pBox++;
     }
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 7702087..99670b4 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -208,7 +208,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     CLEAR (vs_conf);
     CLEAR (ps_conf);
 
-    radeon_vbo_check(pScrn, 16);
+    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
     radeon_cp_start(pScrn);
 
     r600_set_default_state(pScrn, accel_state->ib);
@@ -333,7 +333,7 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
     if (accel_state->vsync)
 	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
 
-    vb = radeon_vbo_space(pScrn, 8);
+    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 8);
 
     vb[0] = (float)x1;
     vb[1] = (float)y1;
@@ -344,7 +344,7 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
     vb[4] = (float)x2;
     vb[5] = (float)y2;
 
-    radeon_vbo_commit(pScrn);
+    radeon_vbo_commit(pScrn, &accel_state->vbo);
 }
 
 static void
@@ -380,7 +380,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
     CLEAR (vs_conf);
     CLEAR (ps_conf);
 
-    radeon_vbo_check(pScrn, 16);
+    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
     radeon_cp_start(pScrn);
 
     r600_set_default_state(pScrn, accel_state->ib);
@@ -535,9 +535,11 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn,
 		     int dstX, int dstY,
 		     int w, int h)
 {
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
     float *vb;
 
-    vb = radeon_vbo_space(pScrn, 16);
+    vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
 
     vb[0] = (float)dstX;
     vb[1] = (float)dstY;
@@ -554,7 +556,7 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn,
     vb[10] = (float)(srcX + w);
     vb[11] = (float)(srcY + h);
 
-    radeon_vbo_commit(pScrn);
+    radeon_vbo_commit(pScrn, &accel_state->vbo);
 }
 
 static Bool
@@ -1318,9 +1320,9 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     CLEAR (ps_conf);
 
     if (pMask)
-        radeon_vbo_check(pScrn, 24);
+        radeon_vbo_check(pScrn, &accel_state->vbo, 24);
     else
-        radeon_vbo_check(pScrn, 16);
+        radeon_vbo_check(pScrn, &accel_state->vbo, 16);
 
     radeon_cp_start(pScrn);
 
@@ -1332,14 +1334,14 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 
     if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
         R600IBDiscard(pScrn, accel_state->ib);
-        radeon_vb_discard(pScrn);
+        radeon_vb_discard(pScrn, &accel_state->vbo);
         return FALSE;
     }
 
     if (pMask) {
         if (!R600TextureSetup(pMaskPicture, pMask, 1)) {
             R600IBDiscard(pScrn, accel_state->ib);
-            radeon_vb_discard(pScrn);
+            radeon_vb_discard(pScrn, &accel_state->vbo);
             return FALSE;
         }
     } else
@@ -1484,7 +1486,7 @@ static void R600Composite(PixmapPtr pDst,
 
     if (accel_state->msk_pic) {
 
-	vb = radeon_vbo_space(pScrn, 24);
+	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24);
 
 	vb[0] = (float)dstX;
 	vb[1] = (float)dstY;
@@ -1507,11 +1509,11 @@ static void R600Composite(PixmapPtr pDst,
 	vb[16] = (float)(maskX + w);
 	vb[17] = (float)(maskY + h);
 
-	radeon_vbo_commit(pScrn);
+	radeon_vbo_commit(pScrn, &accel_state->vbo);
 
     } else {
 
-	vb = radeon_vbo_space(pScrn, 16);
+	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
 
 	vb[0] = (float)dstX;
 	vb[1] = (float)dstY;
@@ -1528,7 +1530,7 @@ static void R600Composite(PixmapPtr pDst,
 	vb[10] = (float)(srcX + w);
 	vb[11] = (float)(srcY + h);
 
-	radeon_vbo_commit(pScrn);
+	radeon_vbo_commit(pScrn, &accel_state->vbo);
     }
 
 
@@ -1642,7 +1644,7 @@ R600CopyToVRAM(ScrnInfoPtr pScrn,
     }
 
     R600IBDiscard(pScrn, scratch);
-    radeon_vb_discard(pScrn);
+    radeon_vb_discard(pScrn, &accel_state->vbo);
 
     return TRUE;
 }
@@ -1756,7 +1758,7 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
     }
 
     R600IBDiscard(pScrn, scratch);
-    radeon_vb_discard(pScrn);
+    radeon_vb_discard(pScrn, &accel_state->vbo);
 
     return TRUE;
 
@@ -2227,9 +2229,9 @@ R600DrawInit(ScreenPtr pScreen)
     info->accel_state->src_obj[1].bo = NULL;
     info->accel_state->dst_obj.bo = NULL;
     info->accel_state->copy_area_bo = NULL;
-    info->accel_state->vb_start_op = -1;
+    info->accel_state->vbo.vb_start_op = -1;
     info->accel_state->finish_op = r600_finish_op;
-    info->accel_state->verts_per_op = 3;
+    info->accel_state->vbo.verts_per_op = 3;
     RADEONVlineHelperClear(pScrn);
 
 #ifdef XF86DRM_MODE
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 66164ac..88073ac 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -206,7 +206,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     dstyoff = 0;
 #endif
 
-    radeon_vbo_check(pScrn, 16);
+    radeon_vbo_check(pScrn, &accel_state->vbo, 16);
     radeon_cp_start(pScrn);
 
     r600_set_default_state(pScrn, accel_state->ib);
@@ -510,7 +510,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
 	srch = (pPriv->src_h * dsth) / pPriv->dst_h;
 
-	vb = radeon_vbo_space(pScrn, 16);
+	vb = radeon_vbo_space(pScrn, &accel_state->vbo, 16);
 
 	vb[0] = (float)dstX;
 	vb[1] = (float)dstY;
@@ -527,7 +527,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	vb[10] = (float)(srcX + srcw);
 	vb[11] = (float)(srcY + srch);
 
-	radeon_vbo_commit(pScrn);
+	radeon_vbo_commit(pScrn, &accel_state->vbo);
 
 	pBox++;
     }
diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c
index 2952863..4b5c553 100644
--- a/src/r6xx_accel.c
+++ b/src/r6xx_accel.c
@@ -525,12 +525,12 @@ r600_set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint
 	(info->ChipFamily == CHIP_FAMILY_RS880) ||
 	(info->ChipFamily == CHIP_FAMILY_RV710))
 	r600_cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit,
-				 accel_state->vb_offset, accel_state->vb_mc_addr,
+				 accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
 				 res->bo,
 				 domain, 0);
     else
 	r600_cp_set_surface_sync(pScrn, ib, VC_ACTION_ENA_bit,
-				 accel_state->vb_offset, accel_state->vb_mc_addr,
+				 accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
 				 res->bo,
 				 domain, 0);
 
@@ -1165,26 +1165,26 @@ void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
     draw_config_t   draw_conf;
     vtx_resource_t  vtx_res;
 
-    if (accel_state->vb_start_op == -1)
+    if (accel_state->vbo.vb_start_op == -1)
 	return;
 
     CLEAR (draw_conf);
     CLEAR (vtx_res);
 
-    if (accel_state->vb_offset == accel_state->vb_start_op) {
+    if (accel_state->vbo.vb_offset == accel_state->vbo.vb_start_op) {
         R600IBDiscard(pScrn, accel_state->ib);
-	radeon_vb_discard(pScrn);
+	radeon_vb_discard(pScrn, &accel_state->vbo);
 	return;
     }
 
     /* Vertex buffer setup */
-    accel_state->vb_size = accel_state->vb_offset - accel_state->vb_start_op;
+    accel_state->vbo.vb_size = accel_state->vbo.vb_offset - accel_state->vbo.vb_start_op;
     vtx_res.id              = SQ_VTX_RESOURCE_vs;
     vtx_res.vtx_size_dw     = vtx_size / 4;
-    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
+    vtx_res.vtx_num_entries = accel_state->vbo.vb_size / 4;
     vtx_res.mem_req_size    = 1;
-    vtx_res.vb_addr         = accel_state->vb_mc_addr + accel_state->vb_start_op;
-    vtx_res.bo              = accel_state->vb_bo;
+    vtx_res.vb_addr         = accel_state->vbo.vb_mc_addr + accel_state->vbo.vb_start_op;
+    vtx_res.bo              = accel_state->vbo.vb_bo;
     r600_set_vtx_resource(pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT);
 
     /* Draw */
@@ -1204,7 +1204,7 @@ void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
 			     accel_state->dst_size, accel_state->dst_obj.offset,
 			     accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
 
-    accel_state->vb_start_op = -1;
+    accel_state->vbo.vb_start_op = -1;
     accel_state->ib_reset_op = 0;
 
 #if KMS_MULTI_OP
diff --git a/src/radeon.h b/src/radeon.h
index 8428e2d..a6a9dd2 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -676,6 +676,18 @@ struct r600_accel_object {
     struct radeon_bo *bo;
 };
 
+struct radeon_vbo_object {
+    int               vb_offset;
+    uint64_t          vb_mc_addr;
+    int               vb_total;
+    void              *vb_ptr;
+    uint32_t          vb_size;
+    uint32_t          vb_op_vert_size;
+    int32_t           vb_start_op;
+    struct radeon_bo *vb_bo;
+    unsigned          verts_per_op;
+};
+
 struct radeon_accel_state {
     /* common accel data */
     int               fifo_slots;       /* Free slots in the FIFO (64 max)   */
@@ -723,20 +735,14 @@ struct radeon_accel_state {
     uint32_t          *draw_header;
     unsigned          vtx_count;
     unsigned          num_vtx;
-    unsigned          verts_per_op;
     Bool              vsync;
 
     drmBufPtr         ib;
-    int               vb_offset;
-    uint64_t          vb_mc_addr;
-    int               vb_total;
-    void              *vb_ptr;
-    uint32_t          vb_size;
-    uint32_t          vb_op_vert_size;
-    int32_t           vb_start_op;
+
+    struct radeon_vbo_object vbo;
+
     /* where to discard IB from if we cancel operation */
     uint32_t          ib_reset_op;
-    struct radeon_bo *vb_bo;
 #ifdef XF86DRM_MODE
     struct radeon_dma_bo bo_free;
     struct radeon_dma_bo bo_wait;
diff --git a/src/radeon_exa_shared.c b/src/radeon_exa_shared.c
index d1926f4..9aa4f39 100644
--- a/src/radeon_exa_shared.c
+++ b/src/radeon_exa_shared.c
@@ -131,21 +131,19 @@ static Bool radeon_vb_get(ScrnInfoPtr pScrn)
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_accel_state *accel_state = info->accel_state;
 
-    accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart +
+    accel_state->vbo.vb_mc_addr = info->gartLocation + info->dri->bufStart +
 	(accel_state->ib->idx*accel_state->ib->total)+
 	(accel_state->ib->total / 2);
-    accel_state->vb_total = (accel_state->ib->total / 2);
-    accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address +
+    accel_state->vbo.vb_total = (accel_state->ib->total / 2);
+    accel_state->vbo.vb_ptr = (pointer)((char*)accel_state->ib->address +
 				    (accel_state->ib->total / 2));
-    accel_state->vb_offset = 0;
+    accel_state->vbo.vb_offset = 0;
     return TRUE;
 }
 
-void radeon_vb_discard(ScrnInfoPtr pScrn)
+void radeon_vb_discard(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo)
 {
-    RADEONInfoPtr info = RADEONPTR(pScrn);
-
-    info->accel_state->vb_start_op = -1;
+    vbo->vb_start_op = -1;
 }
 
 int radeon_cp_start(ScrnInfoPtr pScrn)
@@ -159,7 +157,6 @@ int radeon_cp_start(ScrnInfoPtr pScrn)
 	    radeon_cs_flush_indirect(pScrn);
 	}
 	accel_state->ib_reset_op = info->cs->cdw;
-	accel_state->vb_start_op = accel_state->vb_offset;
     } else
 #endif
     {
@@ -167,33 +164,35 @@ int radeon_cp_start(ScrnInfoPtr pScrn)
 	if (!radeon_vb_get(pScrn)) {
 	    return -1;
 	}
-	accel_state->vb_start_op = accel_state->vb_offset;
     }
+    accel_state->vbo.vb_start_op = accel_state->vbo.vb_offset;
     return 0;
 }
 
-void radeon_vb_no_space(ScrnInfoPtr pScrn, int vert_size)
+void radeon_vb_no_space(ScrnInfoPtr pScrn,
+			struct radeon_vbo_object *vbo,
+			int vert_size)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
-    struct radeon_accel_state *accel_state = info->accel_state; 
+    struct radeon_accel_state *accel_state = info->accel_state;
 
 #if defined(XF86DRM_MODE)
     if (info->cs) {
-	if (accel_state->vb_bo) {
-	    if (accel_state->vb_start_op != accel_state->vb_offset) { 
+	if (vbo->vb_bo) {
+	    if (vbo->vb_start_op != vbo->vb_offset) {
 		accel_state->finish_op(pScrn, vert_size);
 		accel_state->ib_reset_op = info->cs->cdw;
 	    }
 
 	    /* release the current VBO */
-	    radeon_vbo_put(pScrn);
+	    radeon_vbo_put(pScrn, vbo);
 	}
 	/* get a new one */
-	radeon_vbo_get(pScrn);
+	radeon_vbo_get(pScrn, vbo);
 	return;
     }
 #endif
-    if (accel_state->vb_start_op != -1) {
+    if (vbo->vb_start_op != -1) {
         accel_state->finish_op(pScrn, vert_size);
         radeon_cp_start(pScrn);
     }
@@ -213,8 +212,8 @@ void radeon_ib_discard(ScrnInfoPtr pScrn)
 	goto out;
     }
 
-    info->accel_state->vb_offset = 0;
-    info->accel_state->vb_start_op = -1;
+    info->accel_state->vbo.vb_offset = 0;
+    info->accel_state->vbo.vb_start_op = -1;
 
     if (CS_FULL(info->cs)) {
 	radeon_cs_flush_indirect(pScrn);
@@ -222,7 +221,7 @@ void radeon_ib_discard(ScrnInfoPtr pScrn)
     }
     radeon_cs_erase(info->cs);
     ret = radeon_cs_space_check_with_bo(info->cs,
-					info->accel_state->vb_bo,
+					info->accel_state->vbo.vb_bo,
 					RADEON_GEM_DOMAIN_GTT, 0);
     if (ret)
 	ErrorF("space check failed in flush\n");
diff --git a/src/radeon_exa_shared.h b/src/radeon_exa_shared.h
index 71068b1..489e3b0 100644
--- a/src/radeon_exa_shared.h
+++ b/src/radeon_exa_shared.h
@@ -72,9 +72,9 @@ static inline void radeon_add_pixmap(struct radeon_cs *cs, PixmapPtr pPix, int r
 extern void radeon_ib_discard(ScrnInfoPtr pScrn);
 #endif /* XF86DRM_MODE */
 
-extern void radeon_vb_discard(ScrnInfoPtr pScrn);
+extern void radeon_vb_discard(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo);
 extern int radeon_cp_start(ScrnInfoPtr pScrn);
-extern void radeon_vb_no_space(ScrnInfoPtr pScrn, int vert_size);
+extern void radeon_vb_no_space(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo, int vert_size);
 extern void radeon_vbo_done_composite(PixmapPtr pDst);
 
 #endif
diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index b762648..c372661 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -83,9 +83,9 @@ void radeon_cs_flush_indirect(ScrnInfoPtr pScrn)
 	return;
 
     /* release the current VBO so we don't block on mapping it later */
-    if (info->accel_state->vb_offset && info->accel_state->vb_bo) {
-        radeon_vbo_put(pScrn);
-        info->accel_state->vb_start_op = -1;
+    if (info->accel_state->vbo.vb_offset && info->accel_state->vbo.vb_bo) {
+        radeon_vbo_put(pScrn, &info->accel_state->vbo);
+        info->accel_state->vbo.vb_start_op = -1;
     }
 
     radeon_cs_emit(info->cs);
@@ -95,7 +95,7 @@ void radeon_cs_flush_indirect(ScrnInfoPtr pScrn)
         radeon_vbo_flush_bos(pScrn);
 
     ret = radeon_cs_space_check_with_bo(info->cs,
-					accel_state->vb_bo,
+					accel_state->vbo.vb_bo,
 					RADEON_GEM_DOMAIN_GTT, 0);
     if (ret)
       ErrorF("space check failed in flush\n");
diff --git a/src/radeon_vbo.c b/src/radeon_vbo.c
index 0735540..c0a668f 100644
--- a/src/radeon_vbo.c
+++ b/src/radeon_vbo.c
@@ -41,31 +41,27 @@
 
 static struct radeon_bo *radeon_vbo_get_bo(ScrnInfoPtr pScrn);
 
-void radeon_vbo_put(ScrnInfoPtr pScrn)
+void radeon_vbo_put(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo)
 {
-    RADEONInfoPtr info = RADEONPTR(pScrn);
-    struct radeon_accel_state *accel_state = info->accel_state;
-    
-    if (accel_state->vb_bo) {
-	radeon_bo_unmap(accel_state->vb_bo);
-	radeon_bo_unref(accel_state->vb_bo);
-	accel_state->vb_bo = NULL;
-	accel_state->vb_total = 0;
+
+    if (vbo->vb_bo) {
+	radeon_bo_unmap(vbo->vb_bo);
+	radeon_bo_unref(vbo->vb_bo);
+	vbo->vb_bo = NULL;
+	vbo->vb_total = 0;
     }
 
-    accel_state->vb_offset = 0;
+    vbo->vb_offset = 0;
 }
 
-void radeon_vbo_get(ScrnInfoPtr pScrn)
+void radeon_vbo_get(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo)
 {
-    RADEONInfoPtr info = RADEONPTR(pScrn);
-    struct radeon_accel_state *accel_state = info->accel_state;
 
-    accel_state->vb_bo = radeon_vbo_get_bo(pScrn);
+    vbo->vb_bo = radeon_vbo_get_bo(pScrn);
 
-    accel_state->vb_total = VBO_SIZE;
-    accel_state->vb_offset = 0;
-    accel_state->vb_start_op = accel_state->vb_offset;
+    vbo->vb_total = VBO_SIZE;
+    vbo->vb_offset = 0;
+    vbo->vb_start_op = vbo->vb_offset;
 }
 
 /* these functions could migrate to libdrm and
@@ -80,7 +76,7 @@ static int radeon_bo_is_idle(struct radeon_bo *bo)
 void radeon_vbo_init_lists(ScrnInfoPtr pScrn)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
-    struct radeon_accel_state *accel_state = info->accel_state; 
+    struct radeon_accel_state *accel_state = info->accel_state;
 
     accel_state->use_vbos = TRUE;
     make_empty_list(&accel_state->bo_free);
@@ -91,7 +87,7 @@ void radeon_vbo_init_lists(ScrnInfoPtr pScrn)
 void radeon_vbo_free_lists(ScrnInfoPtr pScrn)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
-    struct radeon_accel_state *accel_state = info->accel_state; 
+    struct radeon_accel_state *accel_state = info->accel_state;
     struct radeon_dma_bo *dma_bo, *temp;
 
     foreach_s(dma_bo, temp, &accel_state->bo_free) {
@@ -116,7 +112,7 @@ void radeon_vbo_free_lists(ScrnInfoPtr pScrn)
 void radeon_vbo_flush_bos(ScrnInfoPtr pScrn)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
-    struct radeon_accel_state *accel_state = info->accel_state; 
+    struct radeon_accel_state *accel_state = info->accel_state;
     struct radeon_dma_bo *dma_bo, *temp;
     const int expire_at = ++accel_state->bo_free.expire_counter + DMA_BO_FREE_TIME;
     const int time = accel_state->bo_free.expire_counter;
@@ -164,7 +160,7 @@ void radeon_vbo_flush_bos(ScrnInfoPtr pScrn)
 static struct radeon_bo *radeon_vbo_get_bo(ScrnInfoPtr pScrn)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
-    struct radeon_accel_state *accel_state = info->accel_state; 
+    struct radeon_accel_state *accel_state = info->accel_state;
     struct radeon_dma_bo *dma_bo = NULL;
     struct radeon_bo *bo;
 
diff --git a/src/radeon_vbo.h b/src/radeon_vbo.h
index b505f66..21533c2 100644
--- a/src/radeon_vbo.h
+++ b/src/radeon_vbo.h
@@ -2,39 +2,40 @@
 #ifndef RADEON_VBO_H
 #define RADEON_VBO_H
 
-extern void radeon_vb_no_space(ScrnInfoPtr pScrn, int vert_size);
+extern void radeon_vb_no_space(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo, int vert_size);
 extern void radeon_vbo_init_lists(ScrnInfoPtr pScrn);
 extern void radeon_vbo_free_lists(ScrnInfoPtr pScrn);
 extern void radeon_vbo_flush_bos(ScrnInfoPtr pScrn);
-extern void radeon_vbo_get(ScrnInfoPtr pScrn);
-extern void radeon_vbo_put(ScrnInfoPtr pScrn);
+extern void radeon_vbo_get(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo);
+extern void radeon_vbo_put(ScrnInfoPtr pScrn, struct radeon_vbo_object *vbo);
 
-static inline void radeon_vbo_check(ScrnInfoPtr pScrn, int vert_size)
+static inline void radeon_vbo_check(ScrnInfoPtr pScrn,
+				    struct radeon_vbo_object *vbo,
+				    int vert_size)
 {
-    RADEONInfoPtr info = RADEONPTR(pScrn);
-    struct radeon_accel_state *accel_state = info->accel_state;
 
-    if ((accel_state->vb_offset + (accel_state->verts_per_op * vert_size)) > accel_state->vb_total) {
-	radeon_vb_no_space(pScrn, vert_size);
+    if ((vbo->vb_offset + (vbo->verts_per_op * vert_size)) > vbo->vb_total) {
+	radeon_vb_no_space(pScrn, vbo, vert_size);
     }
 }
 
 static inline void *
-radeon_vbo_space(ScrnInfoPtr pScrn, int vert_size)
+radeon_vbo_space(ScrnInfoPtr pScrn,
+		 struct radeon_vbo_object *vbo,
+		 int vert_size)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
-    struct radeon_accel_state *accel_state = info->accel_state;
     void *vb;
-    
+
     /* we've ran out of space in the vertex buffer - need to get a
        new one */
-    radeon_vbo_check(pScrn, vert_size);
+    radeon_vbo_check(pScrn, vbo, vert_size);
 
-    accel_state->vb_op_vert_size = vert_size;
+    vbo->vb_op_vert_size = vert_size;
 #if defined(XF86DRM_MODE)
     if (info->cs) {
 	int ret;
-	struct radeon_bo *bo = accel_state->vb_bo;
+	struct radeon_bo *bo = vbo->vb_bo;
 
 	if (!bo->ptr) {
 	    ret = radeon_bo_map(bo, 1);
@@ -43,19 +44,18 @@ radeon_vbo_space(ScrnInfoPtr pScrn, int vert_size)
 		return NULL;
 	    }
 	}
-	vb = (pointer)((char *)bo->ptr + accel_state->vb_offset);
+	vb = (pointer)((char *)bo->ptr + vbo->vb_offset);
     } else
 #endif
-	vb = (pointer)((char *)accel_state->vb_ptr + accel_state->vb_offset);
+	vb = (pointer)((char *)vbo->vb_ptr + vbo->vb_offset);
     return vb;
 }
 
-static inline void radeon_vbo_commit(ScrnInfoPtr pScrn)
+static inline void radeon_vbo_commit(ScrnInfoPtr pScrn,
+				     struct radeon_vbo_object *vbo)
 {
-    RADEONInfoPtr info = RADEONPTR(pScrn);
-    struct radeon_accel_state *accel_state = info->accel_state;
 
-    accel_state->vb_offset += accel_state->verts_per_op * accel_state->vb_op_vert_size;
+    vbo->vb_offset += vbo->verts_per_op * vbo->vb_op_vert_size;
 }
 
 #endif
commit d8abf27dbd14f5eb746c5e8b8b1436ad292d8ec6
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Mon Aug 23 01:31:27 2010 -0400

    evergreen: fix dword counts in default state

diff --git a/src/evergreen_accel.c b/src/evergreen_accel.c
index 3b777f4..748ff12 100644
--- a/src/evergreen_accel.c
+++ b/src/evergreen_accel.c
@@ -868,7 +868,7 @@ evergreen_set_default_state(ScrnInfoPtr pScrn)
     RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
     END_BATCH();
 
-    BEGIN_BATCH(42);
+    BEGIN_BATCH(45);
     EREG(DB_DEPTH_CONTROL,                    0);
 
     PACK0(DB_RENDER_CONTROL, 5);
@@ -978,7 +978,7 @@ evergreen_set_default_state(ScrnInfoPtr pScrn)
     evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
 
     // VGT
-    BEGIN_BATCH(45);
+    BEGIN_BATCH(46);
 
     PACK0(VGT_MAX_VTX_INDX, 4);
     E32(0xffffff);
commit 6930d2c981221757b1e11ef194809f085753a611
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Fri Aug 20 16:55:21 2010 -0400

    Add initial EXA and Xv support for evergreen
    
    Based on the r6xx/r7xx code updated for evergreen.
    Still causes GPU hangs in some cases.  We haven't
    tracked down why yet.  Might be related to constant
    buffer persistence.
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/Makefile.am b/src/Makefile.am
index 033047e..e05722c 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -53,7 +53,8 @@ RADEON_KMS_SRCS=radeon_dri2.c radeon_kms.c drmmode_display.c radeon_vbo.c
 endif
 
 if USE_EXA
-RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c r600_shader.c radeon_exa_shared.c
+RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c r600_shader.c radeon_exa_shared.c \
+	evergreen_exa.c evergreen_accel.c evergreen_shader.c evergreen_textured_videofuncs.c
 endif
 
 AM_CFLAGS = \
@@ -129,6 +130,11 @@ EXTRA_DIST = \
 	r600_reg_r7xx.h \
 	r600_shader.h \
 	r600_state.h \
+	evergreen_reg.h \
+	evergreen_reg_auto.h \
+	evergreen_reg_r7xx.h \
+	evergreen_shader.h \
+	evergreen_state.h \
 	ati.h \
 	ativersion.h \
 	bicubic_table.h \
diff --git a/src/evergreen_accel.c b/src/evergreen_accel.c
new file mode 100644
index 0000000..3b777f4
--- /dev/null
+++ b/src/evergreen_accel.c
@@ -0,0 +1,1099 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Alex Deucher <alexander.deucher at amd.com>
+ *
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include <errno.h>
+
+#include "radeon.h"
+#include "evergreen_shader.h"
+#include "radeon_reg.h"
+#include "evergreen_reg.h"
+#include "evergreen_state.h"
+
+#include "radeon_drm.h"
+#include "radeon_vbo.h"
+#include "radeon_exa_shared.h"
+
+void
+evergreen_start_3d(ScrnInfoPtr pScrn)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+    BEGIN_BATCH(3);
+    PACK3(IT_CONTEXT_CONTROL, 2);
+    E32(0x80000000);
+    E32(0x80000000);
+    END_BATCH();
+
+}
+
+/*
+ * Setup of functional groups
+ */
+
+// asic stack/thread/gpr limits - need to query the drm
+static void
+evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
+{
+    uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
+    uint32_t sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
+    uint32_t sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+    if (info->ChipFamily == CHIP_FAMILY_CEDAR)
+	sq_config = 0;
+    else
+	sq_config = VC_ENABLE_bit;
+
+    sq_config |= (EXPORT_SRC_C_bit |
+		  (sq_conf->cs_prio << CS_PRIO_shift) |
+		  (sq_conf->ls_prio << LS_PRIO_shift) |
+		  (sq_conf->hs_prio << HS_PRIO_shift) |
+		  (sq_conf->ps_prio << PS_PRIO_shift) |
+		  (sq_conf->vs_prio << VS_PRIO_shift) |
+		  (sq_conf->gs_prio << GS_PRIO_shift) |
+		  (sq_conf->es_prio << ES_PRIO_shift));
+
+    sq_gpr_resource_mgmt_1 = ((sq_conf->num_ps_gprs << NUM_PS_GPRS_shift) |
+			      (sq_conf->num_vs_gprs << NUM_VS_GPRS_shift) |
+			      (sq_conf->num_temp_gprs << NUM_CLAUSE_TEMP_GPRS_shift));
+    sq_gpr_resource_mgmt_2 = ((sq_conf->num_gs_gprs << NUM_GS_GPRS_shift) |
+			      (sq_conf->num_es_gprs << NUM_ES_GPRS_shift));
+    sq_gpr_resource_mgmt_3 = ((sq_conf->num_hs_gprs << NUM_HS_GPRS_shift) |
+			      (sq_conf->num_ls_gprs << NUM_LS_GPRS_shift));
+
+    sq_thread_resource_mgmt = ((sq_conf->num_ps_threads << NUM_PS_THREADS_shift) |
+			       (sq_conf->num_vs_threads << NUM_VS_THREADS_shift) |
+			       (sq_conf->num_gs_threads << NUM_GS_THREADS_shift) |
+			       (sq_conf->num_es_threads << NUM_ES_THREADS_shift));
+    sq_thread_resource_mgmt_2 = ((sq_conf->num_hs_threads << NUM_HS_THREADS_shift) |
+				 (sq_conf->num_ls_threads << NUM_LS_THREADS_shift));
+
+    sq_stack_resource_mgmt_1 = ((sq_conf->num_ps_stack_entries << NUM_PS_STACK_ENTRIES_shift) |
+				(sq_conf->num_vs_stack_entries << NUM_VS_STACK_ENTRIES_shift));
+
+    sq_stack_resource_mgmt_2 = ((sq_conf->num_gs_stack_entries << NUM_GS_STACK_ENTRIES_shift) |
+				(sq_conf->num_es_stack_entries << NUM_ES_STACK_ENTRIES_shift));
+
+    sq_stack_resource_mgmt_3 = ((sq_conf->num_hs_stack_entries << NUM_HS_STACK_ENTRIES_shift) |
+				(sq_conf->num_ls_stack_entries << NUM_LS_STACK_ENTRIES_shift));
+
+    BEGIN_BATCH(16);
+    /* disable dyn gprs */
+    EREG(SQ_DYN_GPR_CNTL_PS_FLUSH_REQ, 0);
+    PACK0(SQ_CONFIG, 4);
+    E32(sq_config);
+    E32(sq_gpr_resource_mgmt_1);
+    E32(sq_gpr_resource_mgmt_2);
+    E32(sq_gpr_resource_mgmt_3);
+    PACK0(SQ_THREAD_RESOURCE_MGMT, 5);
+    E32(sq_thread_resource_mgmt);
+    E32(sq_thread_resource_mgmt_2);
+    E32(sq_stack_resource_mgmt_1);
+    E32(sq_stack_resource_mgmt_2);
+    E32(sq_stack_resource_mgmt_3);
+    END_BATCH();
+}
+
+void
+evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
+{
+    uint32_t cb_color_info, cb_color_attrib, cb_color_dim;
+    int pitch, slice, h;
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+    cb_color_info = ((cb_conf->endian      << ENDIAN_shift)				|
+		     (cb_conf->format      << CB_COLOR0_INFO__FORMAT_shift)		|
+		     (cb_conf->array_mode  << CB_COLOR0_INFO__ARRAY_MODE_shift)		|
+		     (cb_conf->number_type << NUMBER_TYPE_shift)			|
+		     (cb_conf->comp_swap   << COMP_SWAP_shift)				|
+		     (cb_conf->source_format << SOURCE_FORMAT_shift)                    |
+		     (cb_conf->resource_type << RESOURCE_TYPE_shift));
+    if (cb_conf->blend_clamp)
+	cb_color_info |= BLEND_CLAMP_bit;
+    if (cb_conf->fast_clear)
+	cb_color_info |= FAST_CLEAR_bit;
+    if (cb_conf->compression)
+	cb_color_info |= COMPRESSION_bit;
+    if (cb_conf->blend_bypass)
+	cb_color_info |= BLEND_BYPASS_bit;
+    if (cb_conf->simple_float)
+	cb_color_info |= SIMPLE_FLOAT_bit;
+    if (cb_conf->round_mode)
+	cb_color_info |= CB_COLOR0_INFO__ROUND_MODE_bit;
+    if (cb_conf->tile_compact)
+	cb_color_info |= CB_COLOR0_INFO__TILE_COMPACT_bit;
+    if (cb_conf->rat)
+	cb_color_info |= RAT_bit;
+
+    /* bit 4 needs to be set for linear and depth/stencil surfaces */
+    cb_color_attrib = CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit;
+
+    pitch = (cb_conf->w / 8) - 1;
+    h = RADEON_ALIGN(cb_conf->h, 8);
+    slice = ((cb_conf->w * h) / 64) - 1;
+
+    switch (cb_conf->resource_type) {
+    case BUFFER:
+	/* number of elements in the surface */
+	cb_color_dim = pitch * slice;
+	break;
+    default:
+	/* w/h of the surface */
+	cb_color_dim = (((cb_conf->w - 1) << WIDTH_MAX_shift) |
+			((cb_conf->h - 1) << HEIGHT_MAX_shift));
+	break;
+    }
+
+    BEGIN_BATCH(3 + 2);
+    EREG(CB_COLOR0_BASE + (0x3c * cb_conf->id), (cb_conf->base >> 8));
+    RELOC_BATCH(cb_conf->bo, 0, domain);
+    END_BATCH();
+
+    /* Set CMASK & FMASK buffer to the offset of color buffer as
+     * we don't use those this shouldn't cause any issue and we
+     * then have a valid cmd stream
+     */
+    BEGIN_BATCH(3 + 2);
+    EREG(CB_COLOR0_CMASK + (0x3c * cb_conf->id), (0     >> 8));
+    RELOC_BATCH(cb_conf->bo, 0, domain);
+    END_BATCH();
+    BEGIN_BATCH(3 + 2);
+    EREG(CB_COLOR0_FMASK + (0x3c * cb_conf->id), (0     >> 8));
+    RELOC_BATCH(cb_conf->bo, 0, domain);
+    END_BATCH();
+
+    /* tiling config */
+    BEGIN_BATCH(3 + 2);
+    EREG(CB_COLOR0_ATTRIB + (0x3c * cb_conf->id), cb_color_attrib);
+    RELOC_BATCH(cb_conf->bo, 0, domain);
+    END_BATCH();
+    BEGIN_BATCH(3 + 2);
+    EREG(CB_COLOR0_INFO + (0x3c * cb_conf->id), cb_color_info);
+    RELOC_BATCH(cb_conf->bo, 0, domain);
+    END_BATCH();
+
+    BEGIN_BATCH(24);
+    EREG(CB_COLOR0_PITCH + (0x3c * cb_conf->id), pitch);
+    EREG(CB_COLOR0_SLICE + (0x3c * cb_conf->id), slice);
+    EREG(CB_COLOR0_VIEW + (0x3c * cb_conf->id), 0);
+    EREG(CB_COLOR0_DIM + (0x3c * cb_conf->id), cb_color_dim);
+    EREG(CB_COLOR0_CMASK_SLICE + (0x3c * cb_conf->id), 0);
+    EREG(CB_COLOR0_FMASK_SLICE + (0x3c * cb_conf->id), 0);
+    PACK0(CB_COLOR0_CLEAR_WORD0 + (0x3c * cb_conf->id), 4);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    END_BATCH();
+}
+
+static void
+evergreen_cp_set_surface_sync(ScrnInfoPtr pScrn, uint32_t sync_type,
+			      uint32_t size, uint64_t mc_addr,
+			      struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    uint32_t cp_coher_size;
+    if (size == 0xffffffff)
+	cp_coher_size = 0xffffffff;
+    else
+	cp_coher_size = ((size + 255) >> 8);
+
+    BEGIN_BATCH(5 + 2);
+    PACK3(IT_SURFACE_SYNC, 4);
+    E32(sync_type);
+    E32(cp_coher_size);
+    E32((mc_addr >> 8));
+    E32(10); /* poll interval */
+    RELOC_BATCH(bo, rdomains, wdomain);
+    END_BATCH();
+}
+
+/* inserts a wait for vline in the command stream */
+void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix,
+				  xf86CrtcPtr crtc, int start, int stop)
+{
+    RADEONInfoPtr  info = RADEONPTR(pScrn);
+    drmmode_crtc_private_ptr drmmode_crtc = crtc->driver_private;
+    uint32_t offset;
+
+    if (!crtc)
+        return;
+
+    if (stop < start)
+        return;
+
+    if (!crtc->enabled)
+        return;
+
+    if (info->cs) {
+        if (pPix != pScrn->pScreen->GetScreenPixmap(pScrn->pScreen))
+	    return;
+    } else {
+#ifdef USE_EXA
+	if (info->useEXA)
+	    offset = exaGetPixmapOffset(pPix);
+	else
+#endif
+	    offset = pPix->devPrivate.ptr - info->FB;
+
+	/* if drawing to front buffer */
+	if (offset != 0)
+	    return;
+    }
+
+    start = max(start, 0);
+    stop = min(stop, crtc->mode.VDisplay);
+
+    if (start > crtc->mode.VDisplay)
+        return;
+
+    BEGIN_BATCH(11);
+    /* set the VLINE range */
+    EREG(EVERGREEN_VLINE_START_END, /* this is just a marker */
+	 (start << EVERGREEN_VLINE_START_SHIFT) |
+	 (stop << EVERGREEN_VLINE_END_SHIFT));
+
+    /* tell the CP to poll the VLINE state register */
+    PACK3(IT_WAIT_REG_MEM, 6);
+    E32(IT_WAIT_REG | IT_WAIT_EQ);
+    E32(IT_WAIT_ADDR(EVERGREEN_VLINE_STATUS));
+    E32(0);
+    E32(0);                          // Ref value
+    E32(EVERGREEN_VLINE_STAT);    // Mask
+    E32(10);                         // Wait interval
+    /* add crtc reloc */
+    PACK3(IT_NOP, 1);
+    E32(drmmode_crtc->mode_crtc->crtc_id);
+    END_BATCH();
+}
+
+void
+evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    uint32_t sq_pgm_resources;
+
+    sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) |
+			(fs_conf->stack_size << STACK_SIZE_shift));
+
+    if (fs_conf->dx10_clamp)
+	sq_pgm_resources |= DX10_CLAMP_bit;
+
+    BEGIN_BATCH(3 + 2);
+    EREG(SQ_PGM_START_FS, fs_conf->shader_addr >> 8);
+    RELOC_BATCH(fs_conf->bo, domain, 0);
+    END_BATCH();
+
+    BEGIN_BATCH(3);
+    EREG(SQ_PGM_RESOURCES_FS, sq_pgm_resources);
+    END_BATCH();
+}
+
+void
+evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    uint32_t sq_pgm_resources, sq_pgm_resources_2;
+
+    sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) |
+			(vs_conf->stack_size << STACK_SIZE_shift));
+
+    if (vs_conf->dx10_clamp)
+	sq_pgm_resources |= DX10_CLAMP_bit;
+    if (vs_conf->uncached_first_inst)
+	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
+
+    sq_pgm_resources_2 = ((vs_conf->single_round << SINGLE_ROUND_shift) |
+			  (vs_conf->double_round << DOUBLE_ROUND_shift));
+
+    if (vs_conf->allow_sdi)
+	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
+    if (vs_conf->allow_sd0)
+	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
+    if (vs_conf->allow_ddi)
+	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
+    if (vs_conf->allow_ddo)
+	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
+
+    /* flush SQ cache */
+    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
+				  vs_conf->shader_size, vs_conf->shader_addr,
+				  vs_conf->bo, domain, 0);
+
+    BEGIN_BATCH(3 + 2);
+    EREG(SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
+    RELOC_BATCH(vs_conf->bo, domain, 0);
+    END_BATCH();
+
+    BEGIN_BATCH(4);
+    PACK0(SQ_PGM_RESOURCES_VS, 2);
+    E32(sq_pgm_resources);
+    E32(sq_pgm_resources_2);
+    END_BATCH();
+}
+
+void
+evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    uint32_t sq_pgm_resources, sq_pgm_resources_2;
+
+    sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) |
+			(ps_conf->stack_size << STACK_SIZE_shift));
+
+    if (ps_conf->dx10_clamp)
+	sq_pgm_resources |= DX10_CLAMP_bit;
+    if (ps_conf->uncached_first_inst)
+	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
+    if (ps_conf->clamp_consts)
+	sq_pgm_resources |= CLAMP_CONSTS_bit;
+
+    sq_pgm_resources_2 = ((ps_conf->single_round << SINGLE_ROUND_shift) |
+			  (ps_conf->double_round << DOUBLE_ROUND_shift));
+
+    if (ps_conf->allow_sdi)
+	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_IN_bit;
+    if (ps_conf->allow_sd0)
+	sq_pgm_resources_2 |= ALLOW_SINGLE_DENORM_OUT_bit;
+    if (ps_conf->allow_ddi)
+	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_IN_bit;
+    if (ps_conf->allow_ddo)
+	sq_pgm_resources_2 |= ALLOW_DOUBLE_DENORM_OUT_bit;
+
+    /* flush SQ cache */
+    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
+				  ps_conf->shader_size, ps_conf->shader_addr,
+				  ps_conf->bo, domain, 0);
+
+    BEGIN_BATCH(3 + 2);
+    EREG(SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
+    RELOC_BATCH(ps_conf->bo, domain, 0);
+    END_BATCH();
+
+    BEGIN_BATCH(5);
+    PACK0(SQ_PGM_RESOURCES_PS, 3);
+    E32(sq_pgm_resources);
+    E32(sq_pgm_resources_2);
+    E32(ps_conf->export_mode);
+    END_BATCH();
+}
+
+void
+evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    /* size reg is units of 16 consts (4 dwords each) */
+    uint32_t size = const_conf->size_bytes >> 8;
+
+    if (size == 0)
+	size = 1;
+
+    /* flush SQ cache */
+    evergreen_cp_set_surface_sync(pScrn, SH_ACTION_ENA_bit,
+				  const_conf->size_bytes, const_conf->const_addr,
+				  const_conf->bo, domain, 0);
+
+    switch (const_conf->type) {
+    case SHADER_TYPE_VS:
+	BEGIN_BATCH(3);
+	EREG(SQ_ALU_CONST_BUFFER_SIZE_VS_0, size);
+	END_BATCH();
+	BEGIN_BATCH(3 + 2);
+	EREG(SQ_ALU_CONST_CACHE_VS_0, const_conf->const_addr >> 8);
+	RELOC_BATCH(const_conf->bo, domain, 0);
+	END_BATCH();
+	break;
+    case SHADER_TYPE_PS:
+	BEGIN_BATCH(3);
+	EREG(SQ_ALU_CONST_BUFFER_SIZE_PS_0, size);
+	END_BATCH();
+	BEGIN_BATCH(3 + 2);
+	EREG(SQ_ALU_CONST_CACHE_PS_0, const_conf->const_addr >> 8);
+	RELOC_BATCH(const_conf->bo, domain, 0);
+	END_BATCH();
+	break;
+    default:
+	ErrorF("Unsupported const type %d\n", const_conf->type);
+	break;
+    }
+
+}
+
+void
+evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    /* bool register order is: ps, vs/es, gs, hs, ls, cs; one register each
+     * 1 bits per bool; 32 bools each for ps, vs/es, gs, hs, ls, cs.
+     */
+    BEGIN_BATCH(3);
+    EREG(SQ_BOOL_CONST + offset * SQ_BOOL_CONST_offset, val);
+    END_BATCH();
+}
+
+static void
+evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
+
+    sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) |
+			     ((res->vtx_size_dw << 2) << SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift) |
+			     (res->format << SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift) |
+			     (res->num_format_all << SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift) |
+			     (res->endian << SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift));
+    if (res->clamp_x)
+	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit;
+
+    if (res->format_comp_all)
+	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit;
+
+    if (res->srf_mode_all)
+	    sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit;
+
+    sq_vtx_constant_word3 = ((res->dst_sel_x << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift) |
+			     (res->dst_sel_y << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift) |
+			     (res->dst_sel_z << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift) |
+			     (res->dst_sel_w << SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift));
+
+    if (res->uncached)
+	sq_vtx_constant_word3 |= SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit;
+
+    /* XXX ??? */
+    sq_vtx_constant_word4 = 0;
+
+    /* flush vertex cache */
+    if (info->ChipFamily == CHIP_FAMILY_CEDAR)
+	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
+				      accel_state->vb_offset, accel_state->vb_mc_addr,
+				      res->bo,
+				      domain, 0);
+    else
+	evergreen_cp_set_surface_sync(pScrn, VC_ACTION_ENA_bit,
+				      accel_state->vb_offset, accel_state->vb_mc_addr,
+				      res->bo,
+				      domain, 0);
+
+    BEGIN_BATCH(10 + 2);
+    PACK0(SQ_FETCH_RESOURCE + res->id * SQ_FETCH_RESOURCE_offset, 8);
+    E32(res->vb_addr & 0xffffffff);				// 0: BASE_ADDRESS
+    E32((res->vtx_num_entries << 2) - 1);			// 1: SIZE
+    E32(sq_vtx_constant_word2);	// 2: BASE_HI, STRIDE, CLAMP, FORMAT, ENDIAN
+    E32(sq_vtx_constant_word3);		// 3: swizzles
+    E32(sq_vtx_constant_word4);		// 4: num elements
+    E32(0);							// 5: n/a
+    E32(0);							// 6: n/a
+    E32(SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD7_0__TYPE_shift);	// 7: TYPE
+    RELOC_BATCH(res->bo, domain, 0);
+    END_BATCH();
+}
+
+void
+evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
+    uint32_t sq_tex_resource_word5, sq_tex_resource_word6, sq_tex_resource_word7;
+
+    sq_tex_resource_word0 = (tex_res->dim << DIM_shift);
+
+    if (tex_res->w)
+	sq_tex_resource_word0 |= (((((tex_res->pitch + 7) >> 3) - 1) << PITCH_shift) |
+				  ((tex_res->w - 1) << TEX_WIDTH_shift));
+
+    if (tex_res->tile_type)
+	sq_tex_resource_word0 |= SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit;
+
+    sq_tex_resource_word1 = (tex_res->array_mode << SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift);
+
+    if (tex_res->h)
+	sq_tex_resource_word1 |= ((tex_res->h - 1) << TEX_HEIGHT_shift);
+    if (tex_res->depth)
+	sq_tex_resource_word1 |= ((tex_res->depth - 1) << TEX_DEPTH_shift);
+
+    sq_tex_resource_word4 = ((tex_res->format_comp_x << FORMAT_COMP_X_shift) |
+			     (tex_res->format_comp_y << FORMAT_COMP_Y_shift) |
+			     (tex_res->format_comp_z << FORMAT_COMP_Z_shift) |
+			     (tex_res->format_comp_w << FORMAT_COMP_W_shift) |
+			     (tex_res->num_format_all << SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift) |
+			     (tex_res->endian << SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift) |
+			     (tex_res->dst_sel_x << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift) |
+			     (tex_res->dst_sel_y << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift) |
+			     (tex_res->dst_sel_z << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift) |
+			     (tex_res->dst_sel_w << SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift) |
+			     (tex_res->base_level << BASE_LEVEL_shift));
+
+    if (tex_res->srf_mode_all)
+	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit;
+    if (tex_res->force_degamma)
+	sq_tex_resource_word4 |= SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit;
+
+    sq_tex_resource_word5 = ((tex_res->last_level << LAST_LEVEL_shift) |
+			     (tex_res->base_array << BASE_ARRAY_shift) |
+			     (tex_res->last_array << LAST_ARRAY_shift));
+
+    sq_tex_resource_word6 = ((tex_res->min_lod << SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift) |
+			     (tex_res->perf_modulation << PERF_MODULATION_shift));
+
+    if (tex_res->interlaced)
+	sq_tex_resource_word6 |= INTERLACED_bit;
+
+    sq_tex_resource_word7 = ((tex_res->format << SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift) |
+			     (SQ_TEX_VTX_VALID_TEXTURE << SQ_TEX_RESOURCE_WORD7_0__TYPE_shift));
+
+    /* flush texture cache */
+    evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
+				  tex_res->size, tex_res->base,
+				  tex_res->bo, domain, 0);
+
+    BEGIN_BATCH(10 + 4);
+    PACK0(SQ_FETCH_RESOURCE + tex_res->id * SQ_FETCH_RESOURCE_offset, 8);
+    E32(sq_tex_resource_word0);
+    E32(sq_tex_resource_word1);
+    E32(((tex_res->base) >> 8));
+    E32(((tex_res->mip_base) >> 8));
+    E32(sq_tex_resource_word4);
+    E32(sq_tex_resource_word5);
+    E32(sq_tex_resource_word6);
+    E32(sq_tex_resource_word7);
+    RELOC_BATCH(tex_res->bo, domain, 0);
+    RELOC_BATCH(tex_res->mip_bo, domain, 0);
+    END_BATCH();
+}
+
+void
+evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
+
+    sq_tex_sampler_word0 = ((s->clamp_x       << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift)		|
+			    (s->clamp_y       << CLAMP_Y_shift)					|
+			    (s->clamp_z       << CLAMP_Z_shift)					|
+			    (s->xy_mag_filter << XY_MAG_FILTER_shift)				|
+			    (s->xy_min_filter << XY_MIN_FILTER_shift)				|
+			    (s->z_filter      << Z_FILTER_shift)	|
+			    (s->mip_filter    << MIP_FILTER_shift)				|
+			    (s->border_color  << BORDER_COLOR_TYPE_shift)			|
+			    (s->depth_compare << DEPTH_COMPARE_FUNCTION_shift)			|
+			    (s->chroma_key    << CHROMA_KEY_shift));
+
+    sq_tex_sampler_word1 = ((s->min_lod       << SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift)		|
+			    (s->max_lod       << MAX_LOD_shift)					|
+			    (s->perf_mip      << PERF_MIP_shift)	|
+			    (s->perf_z        << PERF_Z_shift));
+
+
+    sq_tex_sampler_word2 = ((s->lod_bias      << SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift) |
+			    (s->lod_bias2     << LOD_BIAS_SEC_shift));
+
+    if (s->mc_coord_truncate)
+	sq_tex_sampler_word2 |= MC_COORD_TRUNCATE_bit;
+    if (s->force_degamma)
+	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit;
+    if (s->truncate_coord)
+	sq_tex_sampler_word2 |= TRUNCATE_COORD_bit;
+    if (s->disable_cube_wrap)
+	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit;
+    if (s->type)
+	sq_tex_sampler_word2 |= SQ_TEX_SAMPLER_WORD2_0__TYPE_bit;
+
+    BEGIN_BATCH(5);
+    PACK0(SQ_TEX_SAMPLER_WORD + s->id * SQ_TEX_SAMPLER_WORD_offset, 3);
+    E32(sq_tex_sampler_word0);
+    E32(sq_tex_sampler_word1);
+    E32(sq_tex_sampler_word2);
+    END_BATCH();
+}
+
+//XXX deal with clip offsets in clip setup
+void
+evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+    BEGIN_BATCH(4);
+    PACK0(PA_SC_SCREEN_SCISSOR_TL, 2);
+    E32(((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) |
+	 (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift)));
+    E32(((x2 << PA_SC_SCREEN_SCISSOR_BR__BR_X_shift) |
+	 (y2 << PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift)));
+    END_BATCH();
+}
+
+void
+evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+    BEGIN_BATCH(4);
+    PACK0(PA_SC_VPORT_SCISSOR_0_TL + id * PA_SC_VPORT_SCISSOR_0_TL_offset, 2);
+    E32(((x1 << PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift) |
+	 (y1 << PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift) |
+	 WINDOW_OFFSET_DISABLE_bit));
+    E32(((x2 << PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift) |
+	 (y2 << PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift)));
+    END_BATCH();
+}
+
+void
+evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+    BEGIN_BATCH(4);
+    PACK0(PA_SC_GENERIC_SCISSOR_TL, 2);
+    E32(((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) |
+	 (y1 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift) |
+	 WINDOW_OFFSET_DISABLE_bit));
+    E32(((x2 << PA_SC_GENERIC_SCISSOR_BR__BR_X_shift) |
+	 (y2 << PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift)));
+    END_BATCH();
+}
+
+void
+evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+    BEGIN_BATCH(4);
+    PACK0(PA_SC_WINDOW_SCISSOR_TL, 2);
+    E32(((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) |
+	 (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) |
+	 WINDOW_OFFSET_DISABLE_bit));
+    E32(((x2 << PA_SC_WINDOW_SCISSOR_BR__BR_X_shift) |
+	 (y2 << PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift)));
+    END_BATCH();
+}
+
+void
+evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+    BEGIN_BATCH(4);
+    PACK0(PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, 2);
+    E32(((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) |
+	 (y1 << PA_SC_CLIPRECT_0_TL__TL_Y_shift)));
+    E32(((x2 << PA_SC_CLIPRECT_0_BR__BR_X_shift) |
+	 (y2 << PA_SC_CLIPRECT_0_BR__BR_Y_shift)));
+    END_BATCH();
+}
+
+/*
+ * Setup of default state
+ */
+
+void
+evergreen_set_default_state(ScrnInfoPtr pScrn)
+{
+    tex_resource_t tex_res;
+    shader_config_t fs_conf;
+    sq_config_t sq_conf;
+    int i;
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+
+    if (accel_state->XInited3D)
+	return;
+
+    memset(&tex_res, 0, sizeof(tex_resource_t));
+    memset(&fs_conf, 0, sizeof(shader_config_t));
+
+    accel_state->XInited3D = TRUE;
+
+    evergreen_start_3d(pScrn);
+
+    /* SQ */
+    sq_conf.ps_prio = 0;
+    sq_conf.vs_prio = 1;
+    sq_conf.gs_prio = 2;
+    sq_conf.es_prio = 3;
+    sq_conf.hs_prio = 0;
+    sq_conf.ls_prio = 0;
+    sq_conf.cs_prio = 0;
+
+    switch (info->ChipFamily) {
+    case CHIP_FAMILY_CEDAR:
+    default:
+	sq_conf.num_ps_gprs = 93;
+	sq_conf.num_vs_gprs = 46;
+	sq_conf.num_temp_gprs = 4;
+	sq_conf.num_gs_gprs = 31;
+	sq_conf.num_es_gprs = 31;
+	sq_conf.num_hs_gprs = 23;
+	sq_conf.num_ls_gprs = 23;
+	sq_conf.num_ps_threads = 96;
+	sq_conf.num_vs_threads = 16;
+	sq_conf.num_gs_threads = 16;
+	sq_conf.num_es_threads = 16;
+	sq_conf.num_hs_threads = 16;
+	sq_conf.num_ls_threads = 16;
+	sq_conf.num_ps_stack_entries = 42;
+	sq_conf.num_vs_stack_entries = 42;
+	sq_conf.num_gs_stack_entries = 42;
+	sq_conf.num_es_stack_entries = 42;
+	sq_conf.num_hs_stack_entries = 42;
+	sq_conf.num_ls_stack_entries = 42;
+	break;
+    case CHIP_FAMILY_REDWOOD:
+	sq_conf.num_ps_gprs = 93;
+	sq_conf.num_vs_gprs = 46;
+	sq_conf.num_temp_gprs = 4;
+	sq_conf.num_gs_gprs = 31;
+	sq_conf.num_es_gprs = 31;
+	sq_conf.num_hs_gprs = 23;
+	sq_conf.num_ls_gprs = 23;
+	sq_conf.num_ps_threads = 128;
+	sq_conf.num_vs_threads = 20;
+	sq_conf.num_gs_threads = 20;
+	sq_conf.num_es_threads = 20;
+	sq_conf.num_hs_threads = 20;
+	sq_conf.num_ls_threads = 20;
+	sq_conf.num_ps_stack_entries = 42;
+	sq_conf.num_vs_stack_entries = 42;
+	sq_conf.num_gs_stack_entries = 42;
+	sq_conf.num_es_stack_entries = 42;
+	sq_conf.num_hs_stack_entries = 42;
+	sq_conf.num_ls_stack_entries = 42;
+	break;
+    case CHIP_FAMILY_JUNIPER:
+	sq_conf.num_ps_gprs = 93;
+	sq_conf.num_vs_gprs = 46;
+	sq_conf.num_temp_gprs = 4;
+	sq_conf.num_gs_gprs = 31;
+	sq_conf.num_es_gprs = 31;
+	sq_conf.num_hs_gprs = 23;
+	sq_conf.num_ls_gprs = 23;
+	sq_conf.num_ps_threads = 128;
+	sq_conf.num_vs_threads = 20;
+	sq_conf.num_gs_threads = 20;
+	sq_conf.num_es_threads = 20;
+	sq_conf.num_hs_threads = 20;
+	sq_conf.num_ls_threads = 20;
+	sq_conf.num_ps_stack_entries = 85;
+	sq_conf.num_vs_stack_entries = 85;
+	sq_conf.num_gs_stack_entries = 85;
+	sq_conf.num_es_stack_entries = 85;
+	sq_conf.num_hs_stack_entries = 85;
+	sq_conf.num_ls_stack_entries = 85;
+	break;
+    case CHIP_FAMILY_CYPRESS:
+    case CHIP_FAMILY_HEMLOCK:
+	sq_conf.num_ps_gprs = 93;
+	sq_conf.num_vs_gprs = 46;
+	sq_conf.num_temp_gprs = 4;
+	sq_conf.num_gs_gprs = 31;
+	sq_conf.num_es_gprs = 31;
+	sq_conf.num_hs_gprs = 23;
+	sq_conf.num_ls_gprs = 23;
+	sq_conf.num_ps_threads = 128;
+	sq_conf.num_vs_threads = 20;
+	sq_conf.num_gs_threads = 20;
+	sq_conf.num_es_threads = 20;
+	sq_conf.num_hs_threads = 20;
+	sq_conf.num_ls_threads = 20;
+	sq_conf.num_ps_stack_entries = 85;
+	sq_conf.num_vs_stack_entries = 85;
+	sq_conf.num_gs_stack_entries = 85;
+	sq_conf.num_es_stack_entries = 85;
+	sq_conf.num_hs_stack_entries = 85;
+	sq_conf.num_ls_stack_entries = 85;
+	break;
+    }
+
+    evergreen_sq_setup(pScrn, &sq_conf);
+
+    BEGIN_BATCH(24);
+    EREG(SQ_LDS_ALLOC_PS, 0);
+    EREG(SQ_DYN_GPR_RESOURCE_LIMIT_1, 0);
+
+    PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+
+    PACK0(SQ_GS_VERT_ITEMSIZE, 4);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+
+    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
+    E32(0);
+    E32(0);
+    END_BATCH();
+
+    /* DB */
+    BEGIN_BATCH(3 + 2);
+    EREG(DB_Z_INFO,                           0);
+    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+    END_BATCH();
+
+    BEGIN_BATCH(3 + 2);
+    EREG(DB_STENCIL_INFO,                     0);
+    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+    END_BATCH();
+
+    BEGIN_BATCH(42);
+    EREG(DB_DEPTH_CONTROL,                    0);
+
+    PACK0(DB_RENDER_CONTROL, 5);
+    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
+    E32(0); // DB_COUNT_CONTROL
+    E32(0); // DB_DEPTH_VIEW
+    E32(0x2a); // DB_RENDER_OVERRIDE
+    E32(0); // DB_RENDER_OVERRIDE2
+
+    PACK0(DB_STENCIL_CLEAR, 2);
+    E32(0); // DB_STENCIL_CLEAR
+    E32(0); // DB_DEPTH_CLEAR
+
+    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
+					       (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
+					       (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
+					       (2 << ALPHA_TO_MASK_OFFSET3_shift)));
+
+    EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
+			     DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+    // SX
+    EREG(SX_MISC,               0);
+
+    // CB
+    PACK0(SX_ALPHA_TEST_CONTROL, 5);
+    E32(0); // SX_ALPHA_TEST_CONTROL
+    E32(0x00000000); //CB_BLEND_RED
+    E32(0x00000000); //CB_BLEND_GREEN
+    E32(0x00000000); //CB_BLEND_BLUE
+    E32(0x00000000); //CB_BLEND_ALPHA
+
+    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
+
+    // SC
+    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
+					       (0 << WINDOW_Y_OFFSET_shift)));
+    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
+    EREG(PA_SC_EDGERULE,             0xAAAAAAAA);
+    EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
+    END_BATCH();
+
+    /* clip boolean is set to always visible -> doesn't matter */
+    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
+	evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
+
+    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
+	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
+
+    BEGIN_BATCH(50);
+    PACK0(PA_SC_MODE_CNTL_0, 2);
+    E32(0); // PA_SC_MODE_CNTL_0
+    E32(0); // PA_SC_MODE_CNTL_1
+
+    PACK0(PA_SC_LINE_CNTL, 16);
+    E32(0); // PA_SC_LINE_CNTL
+    E32(0); // PA_SC_AA_CONFIG
+    E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
+	 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
+    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
+    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
+    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
+    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
+    E32(0); // PA_SC_AA_SAMPLE_LOCS_0
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0); // PA_SC_AA_SAMPLE_LOCS_7
+    E32(0xFFFFFFFF); // PA_SC_AA_MASK
+
+    // CL
+    PACK0(PA_CL_CLIP_CNTL, 8);
+    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
+    E32(FACE_bit); // PA_SU_SC_MODE_CNTL
+    E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
+    E32(0); // PA_CL_VS_OUT_CNTL
+    E32(0); // PA_CL_NANINF_CNTL
+    E32(0); // PA_SU_LINE_STIPPLE_CNTL
+    E32(0); // PA_SU_LINE_STIPPLE_SCALE
+    E32(0); // PA_SU_PRIM_FILTER_CNTL
+
+    // SU
+    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+
+    PACK0(SPI_INPUT_Z, 8);
+    E32(0); // SPI_INPUT_Z
+    E32(0); // SPI_FOG_CNTL
+    E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
+    E32(0); // SPI_PS_IN_CONTROL_2
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    END_BATCH();
+
+    // clear FS
+    fs_conf.bo = accel_state->shaders_bo;
+    evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+    // VGT
+    BEGIN_BATCH(45);
+
+    PACK0(VGT_MAX_VTX_INDX, 4);
+    E32(0xffffff);
+    E32(0);
+    E32(0);
+    E32(0);
+
+    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
+    E32(0);
+    E32(0);
+
+    PACK0(VGT_REUSE_OFF, 2);
+    E32(0);
+    E32(0);
+
+    PACK0(PA_SU_POINT_SIZE, 17);
+    E32(0); // PA_SU_POINT_SIZE
+    E32(0); // PA_SU_POINT_MINMAX
+    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
+    E32(0); // PA_SC_LINE_STIPPLE
+    E32(0); // VGT_OUTPUT_PATH_CNTL
+    E32(0); // VGT_HOS_CNTL
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0); // VGT_GS_MODE
+
+    EREG(VGT_PRIMITIVEID_EN,                  0);
+    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
+    EREG(VGT_SHADER_STAGES_EN,          0);
+
+    PACK0(VGT_STRMOUT_CONFIG, 2);
+    E32(0);
+    E32(0);
+    END_BATCH();
+}
+
+
+/*
+ * Commands
+ */
+
+void
+evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+    BEGIN_BATCH(10);
+    EREG(VGT_PRIMITIVE_TYPE, draw_conf->prim_type);
+    PACK3(IT_INDEX_TYPE, 1);
+    E32(draw_conf->index_type);
+    PACK3(IT_NUM_INSTANCES, 1);
+    E32(draw_conf->num_instances);
+    PACK3(IT_DRAW_INDEX_AUTO, 2);
+    E32(draw_conf->num_indices);
+    E32(draw_conf->vgt_draw_initiator);
+    END_BATCH();
+}
+
+void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    draw_config_t   draw_conf;
+    vtx_resource_t  vtx_res;
+
+    if (accel_state->vb_start_op == -1)
+      return;
+
+    CLEAR (draw_conf);
+    CLEAR (vtx_res);
+
+    if (accel_state->vb_offset == accel_state->vb_start_op) {
+	radeon_ib_discard(pScrn);
+	radeon_cs_flush_indirect(pScrn);
+	radeon_vb_discard(pScrn);
+	return;
+    }
+
+    /* Vertex buffer setup */
+    accel_state->vb_size = accel_state->vb_offset - accel_state->vb_start_op;
+    vtx_res.id              = SQ_FETCH_RESOURCE_vs;
+    vtx_res.vtx_size_dw     = vtx_size / 4;
+    vtx_res.vtx_num_entries = accel_state->vb_size / 4;
+    vtx_res.vb_addr         = accel_state->vb_mc_addr + accel_state->vb_start_op;
+    vtx_res.bo              = accel_state->vb_bo;
+    vtx_res.dst_sel_x       = SQ_SEL_X;
+    vtx_res.dst_sel_y       = SQ_SEL_Y;
+    vtx_res.dst_sel_z       = SQ_SEL_Z;
+    vtx_res.dst_sel_w       = SQ_SEL_W;
+    evergreen_set_vtx_resource(pScrn, &vtx_res, RADEON_GEM_DOMAIN_GTT);
+
+    /* Draw */
+    draw_conf.prim_type          = DI_PT_RECTLIST;
+    draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX;
+    draw_conf.num_instances      = 1;
+    draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
+    draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
+
+    evergreen_draw_auto(pScrn, &draw_conf);
+
+    /* sync dst surface */
+    evergreen_cp_set_surface_sync(pScrn, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
+				  accel_state->dst_size, accel_state->dst_obj.offset,
+				  accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
+
+    accel_state->vb_start_op = -1;
+    accel_state->ib_reset_op = 0;
+
+}
+
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
new file mode 100644
index 0000000..7eb1b72
--- /dev/null
+++ b/src/evergreen_exa.c
@@ -0,0 +1,1927 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Alex Deucher <alexander.deucher at amd.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include "exa.h"
+
+#include "radeon.h"
+#include "radeon_macros.h"
+#include "radeon_reg.h"
+#include "evergreen_shader.h"
+#include "evergreen_reg.h"
+#include "evergreen_state.h"
+#include "radeon_exa_shared.h"
+#include "radeon_vbo.h"
+
+/* #define SHOW_VERTEXES */
+
+uint32_t EVERGREEN_ROP[16] = {
+    RADEON_ROP3_ZERO, /* GXclear        */
+    RADEON_ROP3_DSa,  /* Gxand          */
+    RADEON_ROP3_SDna, /* GXandReverse   */
+    RADEON_ROP3_S,    /* GXcopy         */
+    RADEON_ROP3_DSna, /* GXandInverted  */
+    RADEON_ROP3_D,    /* GXnoop         */
+    RADEON_ROP3_DSx,  /* GXxor          */
+    RADEON_ROP3_DSo,  /* GXor           */
+    RADEON_ROP3_DSon, /* GXnor          */
+    RADEON_ROP3_DSxn, /* GXequiv        */
+    RADEON_ROP3_Dn,   /* GXinvert       */
+    RADEON_ROP3_SDno, /* GXorReverse    */
+    RADEON_ROP3_Sn,   /* GXcopyInverted */
+    RADEON_ROP3_DSno, /* GXorInverted   */
+    RADEON_ROP3_DSan, /* GXnand         */
+    RADEON_ROP3_ONE,  /* GXset          */
+};
+
+Bool
+EVERGREENSetAccelState(ScrnInfoPtr pScrn,
+		       struct r600_accel_object *src0,
+		       struct r600_accel_object *src1,
+		       struct r600_accel_object *dst,
+		       uint32_t vs_offset, uint32_t ps_offset,
+		       int rop, Pixel planemask)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    int ret;
+
+    if (src0) {
+	memcpy(&accel_state->src_obj[0], src0, sizeof(struct r600_accel_object));
+	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
+    } else {
+	memset(&accel_state->src_obj[0], 0, sizeof(struct r600_accel_object));
+	accel_state->src_size[0] = 0;
+    }
+
+    if (src1) {
+	memcpy(&accel_state->src_obj[1], src1, sizeof(struct r600_accel_object));
+	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
+    } else {
+	memset(&accel_state->src_obj[1], 0, sizeof(struct r600_accel_object));
+	accel_state->src_size[1] = 0;
+    }
+
+    if (dst) {
+	memcpy(&accel_state->dst_obj, dst, sizeof(struct r600_accel_object));
+	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
+    } else {
+	memset(&accel_state->dst_obj, 0, sizeof(struct r600_accel_object));
+	accel_state->dst_size = 0;
+    }
+
+    accel_state->rop = rop;
+    accel_state->planemask = planemask;
+
+    /* bad pitch */
+    if (accel_state->src_obj[0].pitch & 7)
+	RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[0].pitch));
+
+    /* bad offset */
+    if (accel_state->src_obj[0].offset & 0xff)
+	RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[0].offset));
+
+    /* bad pitch */
+    if (accel_state->src_obj[1].pitch & 7)
+	RADEON_FALLBACK(("Bad src pitch 0x%08x\n", accel_state->src_obj[1].pitch));
+
+    /* bad offset */
+    if (accel_state->src_obj[1].offset & 0xff)
+	RADEON_FALLBACK(("Bad src offset 0x%08x\n", accel_state->src_obj[1].offset));
+
+    if (accel_state->dst_obj.pitch & 7)
+	RADEON_FALLBACK(("Bad dst pitch 0x%08x\n", accel_state->dst_obj.pitch));
+
+    if (accel_state->dst_obj.offset & 0xff)
+	RADEON_FALLBACK(("Bad dst offset 0x%08x\n", accel_state->dst_obj.offset));
+
+    accel_state->vs_size = 512;
+    accel_state->ps_size = 512;
+
+    accel_state->vs_mc_addr = vs_offset;
+    accel_state->ps_mc_addr = ps_offset;
+
+    radeon_cs_space_reset_bos(info->cs);
+    radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo,
+				      RADEON_GEM_DOMAIN_VRAM, 0);
+    if (accel_state->src_obj[0].bo)
+	radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[0].bo,
+					  accel_state->src_obj[0].domain, 0);
+    if (accel_state->src_obj[1].bo)
+	radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_obj[1].bo,
+					  accel_state->src_obj[1].domain, 0);
+    if (accel_state->dst_obj.bo)
+	radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_obj.bo,
+					  0, accel_state->dst_obj.domain);
+    ret = radeon_cs_space_check(info->cs);
+    if (ret)
+	RADEON_FALLBACK(("Not enough RAM to hw accel operation\n"));
+
+    return TRUE;
+}
+
+static void
+EVERGREENDoneSolid(PixmapPtr pPix);
+
+static Bool
+EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    cb_config_t     cb_conf;
+    shader_config_t vs_conf, ps_conf;
+    int pmask = 0;
+    int ret;
+    uint32_t a, r, g, b;
+    float *ps_alu_consts;
+    const_config_t ps_const_conf;
+    struct r600_accel_object dst;
+
+    //return FALSE;
+
+    if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
+	RADEON_FALLBACK(("EVERGREENCheckDatatype failed\n"));
+    if (!RADEONValidPM(pm, pPix->drawable.bitsPerPixel))
+	RADEON_FALLBACK(("invalid planemask\n"));
+
+    dst.offset = 0;
+    dst.bo = radeon_get_pixmap_bo(pPix);
+
+    dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
+    dst.width = pPix->drawable.width;
+    dst.height = pPix->drawable.height;
+    dst.bpp = pPix->drawable.bitsPerPixel;
+    dst.domain = RADEON_GEM_DOMAIN_VRAM;
+
+    if (!EVERGREENSetAccelState(pScrn,
+				NULL,
+				NULL,
+				&dst,
+				accel_state->solid_vs_offset, accel_state->solid_ps_offset,
+				alu, pm))
+	return FALSE;
+
+    CLEAR (cb_conf);
+    CLEAR (vs_conf);
+    CLEAR (ps_conf);
+    CLEAR (ps_const_conf);
+
+    ps_const_conf.bo = radeon_bo_open(info->bufmgr, 0, 256, 0,
+				      RADEON_GEM_DOMAIN_GTT, 0);
+    if (ps_const_conf.bo == NULL)
+	RADEON_FALLBACK(("ps const buffer alloc failed\n"));
+    ret = radeon_bo_map(ps_const_conf.bo, 0);
+    if (ret)
+	RADEON_FALLBACK(("ps const buffer map failed\n"));
+
+    /* PS alu constants */
+    ps_const_conf.size_bytes = 256;
+    ps_const_conf.const_addr = 0;
+    ps_const_conf.type = SHADER_TYPE_PS;
+    ps_alu_consts = (float *)ps_const_conf.bo->ptr;
+    if (accel_state->dst_obj.bpp == 16) {
+	r = (fg >> 11) & 0x1f;
+	g = (fg >> 5) & 0x3f;
+	b = (fg >> 0) & 0x1f;
+	ps_alu_consts[0] = (float)r / 31; /* R */
+	ps_alu_consts[1] = (float)g / 63; /* G */
+	ps_alu_consts[2] = (float)b / 31; /* B */
+	ps_alu_consts[3] = 1.0; /* A */
+    } else if (accel_state->dst_obj.bpp == 8) {
+	a = (fg >> 0) & 0xff;
+	ps_alu_consts[0] = 0.0; /* R */
+	ps_alu_consts[1] = 0.0; /* G */
+	ps_alu_consts[2] = 0.0; /* B */
+	ps_alu_consts[3] = (float)a / 255; /* A */
+    } else {
+	a = (fg >> 24) & 0xff;
+	r = (fg >> 16) & 0xff;
+	g = (fg >> 8) & 0xff;
+	b = (fg >> 0) & 0xff;
+	ps_alu_consts[0] = (float)r / 255; /* R */
+	ps_alu_consts[1] = (float)g / 255; /* G */
+	ps_alu_consts[2] = (float)b / 255; /* B */
+	ps_alu_consts[3] = (float)a / 255; /* A */
+    }
+    radeon_bo_unmap(ps_const_conf.bo);
+
+    radeon_cs_space_add_persistent_bo(info->cs, ps_const_conf.bo,
+				      RADEON_GEM_DOMAIN_GTT, 0);
+    if (radeon_cs_space_check(info->cs)) {
+	radeon_bo_unref(ps_const_conf.bo);
+	RADEON_FALLBACK(("ps const buffer size check failed\n"));
+    }
+
+    radeon_vbo_check(pScrn, 16);
+    radeon_cp_start(pScrn);
+
+    evergreen_set_default_state(pScrn);
+
+    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+
+    /* Shader */
+    vs_conf.shader_addr         = accel_state->vs_mc_addr;
+    vs_conf.shader_size         = accel_state->vs_size;
+    vs_conf.num_gprs            = 2;
+    vs_conf.stack_size          = 0;
+    vs_conf.bo                  = accel_state->shaders_bo;
+    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+    ps_conf.shader_addr         = accel_state->ps_mc_addr;
+    ps_conf.shader_size         = accel_state->ps_size;
+    ps_conf.num_gprs            = 1;
+    ps_conf.stack_size          = 0;
+    ps_conf.clamp_consts        = 0;
+    ps_conf.export_mode         = 2;
+    ps_conf.bo                  = accel_state->shaders_bo;
+    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+
+    cb_conf.id = 0;
+    cb_conf.w = accel_state->dst_obj.pitch;
+    cb_conf.h = accel_state->dst_obj.height;
+    cb_conf.base = accel_state->dst_obj.offset;
+    cb_conf.bo = accel_state->dst_obj.bo;
+
+    if (accel_state->dst_obj.bpp == 8) {
+	cb_conf.format = COLOR_8;
+	cb_conf.comp_swap = 3; /* A */
+    } else if (accel_state->dst_obj.bpp == 16) {
+	cb_conf.format = COLOR_5_6_5;
+	cb_conf.comp_swap = 2; /* RGB */
+    } else {
+	cb_conf.format = COLOR_8_8_8_8;
+	cb_conf.comp_swap = 1; /* ARGB */
+    }
+    cb_conf.source_format = EXPORT_4C_16BPC;
+    cb_conf.blend_clamp = 1;
+    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
+
+    /* Render setup */
+    if (accel_state->planemask & 0x000000ff)
+	pmask |= 4; /* B */
+    if (accel_state->planemask & 0x0000ff00)
+	pmask |= 2; /* G */
+    if (accel_state->planemask & 0x00ff0000)
+	pmask |= 1; /* R */
+    if (accel_state->planemask & 0xff000000)
+	pmask |= 8; /* A */
+
+    BEGIN_BATCH(23);
+    EREG(CB_TARGET_MASK,                      (pmask << TARGET0_ENABLE_shift));
+    EREG(CB_COLOR_CONTROL,                    (EVERGREEN_ROP[accel_state->rop] |
+					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
+    EREG(CB_BLEND0_CONTROL,                   0);
+
+    /* Interpolator setup */
+    /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */
+    EREG(SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift));
+    EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
+    /* color semantic id 0 -> GPR[0] */
+    EREG(SPI_PS_INPUT_CNTL_0 + (0 << 2),       ((0    << SEMANTIC_shift)	|
+						(0x03 << DEFAULT_VAL_shift)	|
+						FLAT_SHADE_bit));
+
+    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+    /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */
+    PACK0(SPI_PS_IN_CONTROL_0, 3);
+    E32(((0 << NUM_INTERP_shift) |
+	 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
+    E32(0); // SPI_PS_IN_CONTROL_1
+    E32(FLAT_SHADE_ENA_bit); // SPI_INTERP_CONTROL_0
+    END_BATCH();
+
+    evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
+
+    if (accel_state->vsync)
+	RADEONVlineHelperClear(pScrn);
+
+    return TRUE;
+}
+
+
+static void
+EVERGREENSolid(PixmapPtr pPix, int x1, int y1, int x2, int y2)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    float *vb;
+
+    if (accel_state->vsync)
+	RADEONVlineHelperSet(pScrn, x1, y1, x2, y2);
+
+    vb = radeon_vbo_space(pScrn, 8);
+
+    vb[0] = (float)x1;
+    vb[1] = (float)y1;
+
+    vb[2] = (float)x1;
+    vb[3] = (float)y2;
+
+    vb[4] = (float)x2;
+    vb[5] = (float)y2;
+
+    radeon_vbo_commit(pScrn);
+}
+
+static void
+EVERGREENDoneSolid(PixmapPtr pPix)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+
+    if (accel_state->vsync)
+	evergreen_cp_wait_vline_sync(pScrn, pPix,
+				     accel_state->vline_crtc,
+				     accel_state->vline_y1,
+				     accel_state->vline_y2);
+
+    evergreen_finish_op(pScrn, 8);
+}
+
+static void
+EVERGREENDoPrepareCopy(ScrnInfoPtr pScrn)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    int pmask = 0;
+    cb_config_t     cb_conf;
+    tex_resource_t  tex_res;
+    tex_sampler_t   tex_samp;
+    shader_config_t vs_conf, ps_conf;
+
+    CLEAR (cb_conf);
+    CLEAR (tex_res);
+    CLEAR (tex_samp);
+    CLEAR (vs_conf);
+    CLEAR (ps_conf);
+
+    radeon_vbo_check(pScrn, 16);
+    radeon_cp_start(pScrn);
+
+    evergreen_set_default_state(pScrn);
+
+    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+
+    /* Shader */
+    vs_conf.shader_addr         = accel_state->vs_mc_addr;
+    vs_conf.shader_size         = accel_state->vs_size;
+    vs_conf.num_gprs            = 2;
+    vs_conf.stack_size          = 0;
+    vs_conf.bo                  = accel_state->shaders_bo;
+    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+    ps_conf.shader_addr         = accel_state->ps_mc_addr;
+    ps_conf.shader_size         = accel_state->ps_size;
+    ps_conf.num_gprs            = 1;
+    ps_conf.stack_size          = 0;
+    ps_conf.clamp_consts        = 0;
+    ps_conf.export_mode         = 2;
+    ps_conf.bo                  = accel_state->shaders_bo;
+    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+
+    /* Texture */
+    tex_res.id                  = 0;
+    tex_res.w                   = accel_state->src_obj[0].width;
+    tex_res.h                   = accel_state->src_obj[0].height;
+    tex_res.pitch               = accel_state->src_obj[0].pitch;
+    tex_res.depth               = 0;
+    tex_res.dim                 = SQ_TEX_DIM_2D;
+    tex_res.base                = accel_state->src_obj[0].offset;
+    tex_res.mip_base            = accel_state->src_obj[0].offset;
+    tex_res.size                = accel_state->src_size[0];
+    tex_res.bo                  = accel_state->src_obj[0].bo;
+    tex_res.mip_bo              = accel_state->src_obj[0].bo;
+    if (accel_state->src_obj[0].bpp == 8) {
+	tex_res.format              = FMT_8;
+	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
+	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
+	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
+	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
+    } else if (accel_state->src_obj[0].bpp == 16) {
+	tex_res.format              = FMT_5_6_5;
+	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
+	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
+	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
+	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
+    } else {
+	tex_res.format              = FMT_8_8_8_8;
+	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
+	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
+	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
+	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
+    }
+
+    tex_res.base_level          = 0;
+    tex_res.last_level          = 0;
+    tex_res.perf_modulation     = 0;
+    evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
+
+    tex_samp.id                 = 0;
+    tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
+    tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
+    tex_samp.clamp_z            = SQ_TEX_WRAP;
+    tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
+    tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
+    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
+    tex_samp.mip_filter         = 0;			/* no mipmap */
+    evergreen_set_tex_sampler   (pScrn, &tex_samp);
+
+    cb_conf.id = 0;
+    cb_conf.w = accel_state->dst_obj.pitch;
+    cb_conf.h = accel_state->dst_obj.height;
+    cb_conf.base = accel_state->dst_obj.offset;
+    cb_conf.bo = accel_state->dst_obj.bo;
+    if (accel_state->dst_obj.bpp == 8) {
+	cb_conf.format = COLOR_8;
+	cb_conf.comp_swap = 3; /* A */
+    } else if (accel_state->dst_obj.bpp == 16) {
+	cb_conf.format = COLOR_5_6_5;
+	cb_conf.comp_swap = 2; /* RGB */
+    } else {
+	cb_conf.format = COLOR_8_8_8_8;
+	cb_conf.comp_swap = 1; /* ARGB */
+    }
+    cb_conf.source_format = EXPORT_4C_16BPC;
+    cb_conf.blend_clamp = 1;
+    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
+
+    /* Render setup */
+    if (accel_state->planemask & 0x000000ff)
+	pmask |= 4; /* B */
+    if (accel_state->planemask & 0x0000ff00)
+	pmask |= 2; /* G */
+    if (accel_state->planemask & 0x00ff0000)
+	pmask |= 1; /* R */
+    if (accel_state->planemask & 0xff000000)
+	pmask |= 8; /* A */
+
+    BEGIN_BATCH(23);
+    EREG(CB_TARGET_MASK,                      (pmask << TARGET0_ENABLE_shift));
+    EREG(CB_COLOR_CONTROL,                    (EVERGREEN_ROP[accel_state->rop] |
+					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
+    EREG(CB_BLEND0_CONTROL,                   0);
+
+    /* Interpolator setup */
+    /* export tex coord from VS */
+    EREG(SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
+    EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
+    /* color semantic id 0 -> GPR[0] */
+    EREG(SPI_PS_INPUT_CNTL_0 + (0 << 2),       ((0    << SEMANTIC_shift)	|
+						(0x01 << DEFAULT_VAL_shift)));
+
+    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+    /* input tex coord from VS */
+    PACK0(SPI_PS_IN_CONTROL_0, 3);
+    E32(((1 << NUM_INTERP_shift) |
+	 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
+    E32(0); //SPI_PS_IN_CONTROL_1
+    E32(0); // SPI_INTERP_CONTROL_0
+    END_BATCH();
+
+}
+
+static void
+EVERGREENDoCopy(ScrnInfoPtr pScrn)
+{
+    evergreen_finish_op(pScrn, 16);
+}
+
+static void
+EVERGREENDoCopyVline(PixmapPtr pPix)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+
+    if (accel_state->vsync)
+	evergreen_cp_wait_vline_sync(pScrn, pPix,
+				     accel_state->vline_crtc,
+				     accel_state->vline_y1,
+				     accel_state->vline_y2);
+
+    evergreen_finish_op(pScrn, 16);
+}
+
+static void
+EVERGREENAppendCopyVertex(ScrnInfoPtr pScrn,
+			  int srcX, int srcY,
+			  int dstX, int dstY,
+			  int w, int h)
+{
+    float *vb;
+
+    vb = radeon_vbo_space(pScrn, 16);
+
+    vb[0] = (float)dstX;
+    vb[1] = (float)dstY;
+    vb[2] = (float)srcX;
+    vb[3] = (float)srcY;
+
+    vb[4] = (float)dstX;
+    vb[5] = (float)(dstY + h);
+    vb[6] = (float)srcX;
+    vb[7] = (float)(srcY + h);
+
+    vb[8] = (float)(dstX + w);
+    vb[9] = (float)(dstY + h);
+    vb[10] = (float)(srcX + w);
+    vb[11] = (float)(srcY + h);
+
+    radeon_vbo_commit(pScrn);
+}
+
+static Bool
+EVERGREENPrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
+		     int xdir, int ydir,
+		     int rop,
+		     Pixel planemask)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    struct r600_accel_object src_obj, dst_obj;
+
+    //return FALSE;
+
+    if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
+	RADEON_FALLBACK(("EVERGREENCheckDatatype src failed\n"));
+    if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
+	RADEON_FALLBACK(("EVERGREENCheckDatatype dst failed\n"));
+    if (!RADEONValidPM(planemask, pDst->drawable.bitsPerPixel))
+	RADEON_FALLBACK(("Invalid planemask\n"));
+
+    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
+    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
+
+    accel_state->same_surface = FALSE;
+
+    src_obj.offset = 0;
+    dst_obj.offset = 0;
+    src_obj.bo = radeon_get_pixmap_bo(pSrc);
+    dst_obj.bo = radeon_get_pixmap_bo(pDst);
+    if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
+	accel_state->same_surface = TRUE;
+
+    src_obj.width = pSrc->drawable.width;
+    src_obj.height = pSrc->drawable.height;
+    src_obj.bpp = pSrc->drawable.bitsPerPixel;
+    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+
+    dst_obj.width = pDst->drawable.width;
+    dst_obj.height = pDst->drawable.height;
+    dst_obj.bpp = pDst->drawable.bitsPerPixel;
+    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
+
+    if (!EVERGREENSetAccelState(pScrn,
+				&src_obj,
+				NULL,
+				&dst_obj,
+				accel_state->copy_vs_offset, accel_state->copy_ps_offset,
+				rop, planemask))
+	return FALSE;
+
+    if (accel_state->same_surface == TRUE) {
+	unsigned long size = pDst->drawable.height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
+
+	if (accel_state->copy_area_bo) {
+	    radeon_bo_unref(accel_state->copy_area_bo);
+	    accel_state->copy_area_bo = NULL;
+	}
+	accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
+						   RADEON_GEM_DOMAIN_VRAM,
+						   0);
+	if (accel_state->copy_area_bo == NULL)
+	    RADEON_FALLBACK(("temp copy surface alloc failed\n"));
+
+	radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo,
+					  RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_DOMAIN_VRAM);
+	if (radeon_cs_space_check(info->cs)) {
+	    radeon_bo_unref(accel_state->copy_area_bo);
+	    accel_state->copy_area_bo = NULL;
+	    return FALSE;
+	}
+	accel_state->copy_area = (void*)accel_state->copy_area_bo;
+    } else
+	EVERGREENDoPrepareCopy(pScrn);
+
+    if (accel_state->vsync)
+	RADEONVlineHelperClear(pScrn);
+
+    return TRUE;
+}
+
+static void
+EVERGREENCopy(PixmapPtr pDst,
+	      int srcX, int srcY,
+	      int dstX, int dstY,
+	      int w, int h)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+
+    if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY))
+	return;
+
+    if (accel_state->vsync)
+	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
+
+    if (accel_state->same_surface && accel_state->copy_area) {
+	uint32_t orig_dst_domain = accel_state->dst_obj.domain;
+	uint32_t orig_src_domain = accel_state->src_obj[0].domain;
+	struct radeon_bo *orig_bo = accel_state->dst_obj.bo;
+
+	/* src to tmp */
+	accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
+	accel_state->dst_obj.bo = accel_state->copy_area_bo;
+	accel_state->dst_obj.offset = 0;
+	EVERGREENDoPrepareCopy(pScrn);
+	EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
+	EVERGREENDoCopy(pScrn);
+
+	/* tmp to dst */
+	accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_VRAM;
+	accel_state->src_obj[0].bo = accel_state->copy_area_bo;
+	accel_state->src_obj[0].offset = 0;
+	accel_state->dst_obj.domain = orig_dst_domain;
+	accel_state->dst_obj.bo = orig_bo;
+	accel_state->dst_obj.offset = 0;
+	EVERGREENDoPrepareCopy(pScrn);
+	EVERGREENAppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h);
+	EVERGREENDoCopyVline(pDst);
+
+	/* restore state */
+	accel_state->src_obj[0].domain = orig_src_domain;
+	accel_state->src_obj[0].bo = orig_bo;
+	accel_state->src_obj[0].offset = 0;
+    } else
+	EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h);
+
+}
+
+static void
+EVERGREENDoneCopy(PixmapPtr pDst)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+
+    if (!accel_state->same_surface)
+	EVERGREENDoCopyVline(pDst);
+
+    if (accel_state->copy_area)
+	accel_state->copy_area = NULL;
+
+}
+
+
+#define xFixedToFloat(f) (((float) (f)) / 65536)
+
+struct blendinfo {
+    Bool dst_alpha;
+    Bool src_alpha;
+    uint32_t blend_cntl;
+};
+
+static struct blendinfo EVERGREENBlendOp[] = {
+    /* Clear */
+    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
+    /* Src */
+    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
+    /* Dst */
+    {0, 0, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
+    /* Over */
+    {0, 1, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+    /* OverReverse */
+    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
+    /* In */
+    {1, 0, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
+    /* InReverse */
+    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+    /* Out */
+    {1, 0, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ZERO << COLOR_DESTBLEND_shift)},
+    /* OutReverse */
+    {0, 1, (BLEND_ZERO << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+    /* Atop */
+    {1, 1, (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+    /* AtopReverse */
+    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+    /* Xor */
+    {1, 1, (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift) | (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)},
+    /* Add */
+    {0, 0, (BLEND_ONE << COLOR_SRCBLEND_shift) | (BLEND_ONE << COLOR_DESTBLEND_shift)},
+};
+
+struct formatinfo {
+    unsigned int fmt;
+    uint32_t card_fmt;
+};
+
+static struct formatinfo EVERGREENTexFormats[] = {
+    {PICT_a8r8g8b8,	FMT_8_8_8_8},
+    {PICT_x8r8g8b8,	FMT_8_8_8_8},
+    {PICT_a8b8g8r8,	FMT_8_8_8_8},
+    {PICT_x8b8g8r8,	FMT_8_8_8_8},
+#ifdef PICT_TYPE_BGRA
+    {PICT_b8g8r8a8,	FMT_8_8_8_8},
+    {PICT_b8g8r8x8,	FMT_8_8_8_8},
+#endif
+    {PICT_r5g6b5,	FMT_5_6_5},
+    {PICT_a1r5g5b5,	FMT_1_5_5_5},
+    {PICT_x1r5g5b5,     FMT_1_5_5_5},
+    {PICT_a8,		FMT_8},
+};
+
+static uint32_t EVERGREENGetBlendCntl(int op, PicturePtr pMask, uint32_t dst_format)
+{
+    uint32_t sblend, dblend;
+
+    sblend = EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask;
+    dblend = EVERGREENBlendOp[op].blend_cntl & COLOR_DESTBLEND_mask;
+
+    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
+     * it as always 1.
+     */
+    if (PICT_FORMAT_A(dst_format) == 0 && EVERGREENBlendOp[op].dst_alpha) {
+	if (sblend == (BLEND_DST_ALPHA << COLOR_SRCBLEND_shift))
+	    sblend = (BLEND_ONE << COLOR_SRCBLEND_shift);
+	else if (sblend == (BLEND_ONE_MINUS_DST_ALPHA << COLOR_SRCBLEND_shift))
+	    sblend = (BLEND_ZERO << COLOR_SRCBLEND_shift);
+    }
+
+    /* If the source alpha is being used, then we should only be in a case where
+     * the source blend factor is 0, and the source blend value is the mask
+     * channels multiplied by the source picture's alpha.
+     */
+    if (pMask && pMask->componentAlpha && EVERGREENBlendOp[op].src_alpha) {
+	if (dblend == (BLEND_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
+	    dblend = (BLEND_SRC_COLOR << COLOR_DESTBLEND_shift);
+	} else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) {
+	    dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift);
+	}
+    }
+
+    return sblend | dblend;
+}
+
+static Bool EVERGREENGetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
+{
+    switch (pDstPicture->format) {
+    case PICT_a8r8g8b8:
+    case PICT_x8r8g8b8:
+    case PICT_a8b8g8r8:
+    case PICT_x8b8g8r8:
+#ifdef PICT_TYPE_BGRA
+    case PICT_b8g8r8a8:
+    case PICT_b8g8r8x8:
+#endif
+	*dst_format = COLOR_8_8_8_8;
+	break;
+    case PICT_r5g6b5:
+	*dst_format = COLOR_5_6_5;
+	break;
+    case PICT_a1r5g5b5:
+    case PICT_x1r5g5b5:
+	*dst_format = COLOR_1_5_5_5;
+	break;
+    case PICT_a8:
+	*dst_format = COLOR_8;
+	break;
+    default:
+	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
+	       (int)pDstPicture->format));
+    }
+    return TRUE;
+}
+
+static Bool EVERGREENCheckCompositeTexture(PicturePtr pPict,
+					   PicturePtr pDstPict,
+					   int op,
+					   int unit)
+{
+    int w = pPict->pDrawable->width;
+    int h = pPict->pDrawable->height;
+    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
+    unsigned int i;
+    int max_tex_w, max_tex_h;
+
+    max_tex_w = 16384;
+    max_tex_h = 16384;
+
+    if ((w > max_tex_w) || (h > max_tex_h))
+	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
+
+    for (i = 0; i < sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]); i++) {
+	if (EVERGREENTexFormats[i].fmt == pPict->format)
+	    break;
+    }
+    if (i == sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]))
+	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
+			 (int)pPict->format));
+
+    if (pPict->filter != PictFilterNearest &&
+	pPict->filter != PictFilterBilinear)
+	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
+
+    /* for REPEAT_NONE, Render semantics are that sampling outside the source
+     * picture results in alpha=0 pixels. We can implement this with a border color
+     * *if* our source texture has an alpha channel, otherwise we need to fall
+     * back. If we're not transformed then we hope that upper layers have clipped
+     * rendering to the bounds of the source drawable, in which case it doesn't
+     * matter. I have not, however, verified that the X server always does such
+     * clipping.
+     */
+    /* FIXME evergreen */
+    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
+	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
+	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
+    }
+
+    return TRUE;
+}
+
+static void EVERGREENXFormSetup(PicturePtr pPict, PixmapPtr pPix,
+				int unit, float *vs_alu_consts)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    int w = pPict->pDrawable->width;
+    int h = pPict->pDrawable->height;
+    int const_offset = unit * 8;
+
+    if (pPict->transform != 0) {
+	accel_state->is_transform[unit] = TRUE;
+	accel_state->transform[unit] = pPict->transform;
+
+	vs_alu_consts[0 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][0]);
+	vs_alu_consts[1 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][1]);
+	vs_alu_consts[2 + const_offset] = xFixedToFloat(pPict->transform->matrix[0][2]);
+	vs_alu_consts[3 + const_offset] = 1.0 / w;
+
+	vs_alu_consts[4 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][0]);
+	vs_alu_consts[5 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][1]);
+	vs_alu_consts[6 + const_offset] = xFixedToFloat(pPict->transform->matrix[1][2]);
+	vs_alu_consts[7 + const_offset] = 1.0 / h;
+    } else {
+	accel_state->is_transform[unit] = FALSE;
+
+	vs_alu_consts[0 + const_offset] = 1.0;
+	vs_alu_consts[1 + const_offset] = 0.0;
+	vs_alu_consts[2 + const_offset] = 0.0;
+	vs_alu_consts[3 + const_offset] = 1.0 / w;
+
+	vs_alu_consts[4 + const_offset] = 0.0;
+	vs_alu_consts[5 + const_offset] = 1.0;
+	vs_alu_consts[6 + const_offset] = 0.0;
+	vs_alu_consts[7 + const_offset] = 1.0 / h;
+    }
+
+}
+
+static Bool EVERGREENTextureSetup(PicturePtr pPict, PixmapPtr pPix,
+				  int unit)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    int w = pPict->pDrawable->width;
+    int h = pPict->pDrawable->height;
+    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
+    unsigned int i;
+    tex_resource_t  tex_res;
+    tex_sampler_t   tex_samp;
+    int pix_r, pix_g, pix_b, pix_a;
+
+    CLEAR (tex_res);
+    CLEAR (tex_samp);
+
+    for (i = 0; i < sizeof(EVERGREENTexFormats) / sizeof(EVERGREENTexFormats[0]); i++) {
+	if (EVERGREENTexFormats[i].fmt == pPict->format)
+	    break;
+    }
+
+    /* Texture */
+    tex_res.id                  = unit;
+    tex_res.w                   = w;
+    tex_res.h                   = h;
+    tex_res.pitch               = accel_state->src_obj[unit].pitch;
+    tex_res.depth               = 0;
+    tex_res.dim                 = SQ_TEX_DIM_2D;
+    tex_res.base                = accel_state->src_obj[unit].offset;
+    tex_res.mip_base            = accel_state->src_obj[unit].offset;
+    tex_res.size                = accel_state->src_size[unit];
+    tex_res.format              = EVERGREENTexFormats[i].card_fmt;
+    tex_res.bo                  = accel_state->src_obj[unit].bo;
+    tex_res.mip_bo              = accel_state->src_obj[unit].bo;
+
+    /* component swizzles */
+    switch (pPict->format) {
+    case PICT_a1r5g5b5:
+    case PICT_a8r8g8b8:
+	pix_r = SQ_SEL_Z; /* R */
+	pix_g = SQ_SEL_Y; /* G */
+	pix_b = SQ_SEL_X; /* B */
+	pix_a = SQ_SEL_W; /* A */
+	break;
+    case PICT_a8b8g8r8:
+	pix_r = SQ_SEL_X; /* R */
+	pix_g = SQ_SEL_Y; /* G */
+	pix_b = SQ_SEL_Z; /* B */
+	pix_a = SQ_SEL_W; /* A */
+	break;
+    case PICT_x8b8g8r8:
+	pix_r = SQ_SEL_X; /* R */
+	pix_g = SQ_SEL_Y; /* G */
+	pix_b = SQ_SEL_Z; /* B */
+	pix_a = SQ_SEL_1; /* A */
+	break;
+#ifdef PICT_TYPE_BGRA
+    case PICT_b8g8r8a8:
+	pix_r = SQ_SEL_Y; /* R */
+	pix_g = SQ_SEL_Z; /* G */
+	pix_b = SQ_SEL_W; /* B */
+	pix_a = SQ_SEL_X; /* A */
+	break;
+    case PICT_b8g8r8x8:
+	pix_r = SQ_SEL_Y; /* R */
+	pix_g = SQ_SEL_Z; /* G */
+	pix_b = SQ_SEL_W; /* B */
+	pix_a = SQ_SEL_1; /* A */
+	break;
+#endif
+    case PICT_x1r5g5b5:
+    case PICT_x8r8g8b8:
+    case PICT_r5g6b5:
+	pix_r = SQ_SEL_Z; /* R */
+	pix_g = SQ_SEL_Y; /* G */
+	pix_b = SQ_SEL_X; /* B */
+	pix_a = SQ_SEL_1; /* A */
+	break;
+    case PICT_a8:
+	pix_r = SQ_SEL_0; /* R */
+	pix_g = SQ_SEL_0; /* G */
+	pix_b = SQ_SEL_0; /* B */
+	pix_a = SQ_SEL_X; /* A */
+	break;
+    default:
+	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
+    }
+
+    if (unit == 0) {
+	if (!accel_state->msk_pic) {
+	    if (PICT_FORMAT_RGB(pPict->format) == 0) {
+		pix_r = SQ_SEL_0;
+		pix_g = SQ_SEL_0;
+		pix_b = SQ_SEL_0;
+	    }
+
+	    if (PICT_FORMAT_A(pPict->format) == 0)
+		pix_a = SQ_SEL_1;
+	} else {
+	    if (accel_state->component_alpha) {
+		if (accel_state->src_alpha) {
+		    if (PICT_FORMAT_A(pPict->format) == 0) {
+			pix_r = SQ_SEL_1;
+			pix_g = SQ_SEL_1;
+			pix_b = SQ_SEL_1;
+			pix_a = SQ_SEL_1;
+		    } else {
+			pix_r = pix_a;
+			pix_g = pix_a;
+			pix_b = pix_a;
+		    }
+		} else {
+		    if (PICT_FORMAT_A(pPict->format) == 0)
+			pix_a = SQ_SEL_1;
+		}
+	    } else {
+		if (PICT_FORMAT_RGB(pPict->format) == 0) {
+		    pix_r = SQ_SEL_0;
+		    pix_g = SQ_SEL_0;
+		    pix_b = SQ_SEL_0;
+		}
+
+		if (PICT_FORMAT_A(pPict->format) == 0)
+		    pix_a = SQ_SEL_1;
+	    }
+	}
+    } else {
+	if (accel_state->component_alpha) {
+	    if (PICT_FORMAT_A(pPict->format) == 0)
+		pix_a = SQ_SEL_1;
+	} else {
+	    if (PICT_FORMAT_A(pPict->format) == 0) {
+		pix_r = SQ_SEL_1;
+		pix_g = SQ_SEL_1;
+		pix_b = SQ_SEL_1;
+		pix_a = SQ_SEL_1;
+	    } else {
+		pix_r = pix_a;
+		pix_g = pix_a;
+		pix_b = pix_a;
+	    }
+	}
+    }
+
+    tex_res.dst_sel_x           = pix_r; /* R */
+    tex_res.dst_sel_y           = pix_g; /* G */
+    tex_res.dst_sel_z           = pix_b; /* B */
+    tex_res.dst_sel_w           = pix_a; /* A */
+
+    tex_res.base_level          = 0;
+    tex_res.last_level          = 0;
+    tex_res.perf_modulation     = 0;
+    evergreen_set_tex_resource  (pScrn, &tex_res, accel_state->src_obj[unit].domain);
+
+    tex_samp.id                 = unit;
+    tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+
+    switch (repeatType) {
+    case RepeatNormal:
+	tex_samp.clamp_x            = SQ_TEX_WRAP;
+	tex_samp.clamp_y            = SQ_TEX_WRAP;
+	break;
+    case RepeatPad:
+	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
+	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
+	break;
+    case RepeatReflect:
+	tex_samp.clamp_x            = SQ_TEX_MIRROR;
+	tex_samp.clamp_y            = SQ_TEX_MIRROR;
+	break;
+    case RepeatNone:
+	tex_samp.clamp_x            = SQ_TEX_CLAMP_BORDER;
+	tex_samp.clamp_y            = SQ_TEX_CLAMP_BORDER;
+	break;
+    default:
+	RADEON_FALLBACK(("Bad repeat 0x%x\n", repeatType));
+    }
+
+    switch (pPict->filter) {
+    case PictFilterNearest:
+	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_POINT;
+	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
+	break;
+    case PictFilterBilinear:
+	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
+	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
+	break;
+    default:
+	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
+    }
+
+    tex_samp.clamp_z            = SQ_TEX_WRAP;
+    tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
+    tex_samp.mip_filter         = 0;			/* no mipmap */
+    evergreen_set_tex_sampler   (pScrn, &tex_samp);
+
+    return TRUE;
+}
+
+static Bool EVERGREENCheckComposite(int op, PicturePtr pSrcPicture,
+				    PicturePtr pMaskPicture,
+				    PicturePtr pDstPicture)
+{
+    uint32_t tmp1;
+    PixmapPtr pSrcPixmap, pDstPixmap;
+    int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
+
+    /* Check for unsupported compositing operations. */
+    if (op >= (int) (sizeof(EVERGREENBlendOp) / sizeof(EVERGREENBlendOp[0])))
+	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
+
+    if (!pSrcPicture->pDrawable)
+	RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
+
+    pSrcPixmap = RADEONGetDrawablePixmap(pSrcPicture->pDrawable);
+
+    max_tex_w = 8192;
+    max_tex_h = 8192;
+    max_dst_w = 8192;
+    max_dst_h = 8192;
+
+    if (pSrcPixmap->drawable.width >= max_tex_w ||
+	pSrcPixmap->drawable.height >= max_tex_h) {
+	RADEON_FALLBACK(("Source w/h too large (%d,%d).\n",
+			 pSrcPixmap->drawable.width,
+			 pSrcPixmap->drawable.height));
+    }
+
+    pDstPixmap = RADEONGetDrawablePixmap(pDstPicture->pDrawable);
+
+    if (pDstPixmap->drawable.width >= max_dst_w ||
+	pDstPixmap->drawable.height >= max_dst_h) {
+	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
+			 pDstPixmap->drawable.width,
+			 pDstPixmap->drawable.height));
+    }
+
+    if (pMaskPicture) {
+	PixmapPtr pMaskPixmap;
+
+	if (!pMaskPicture->pDrawable)
+	    RADEON_FALLBACK(("Solid or gradient pictures not supported yet\n"));
+
+	pMaskPixmap = RADEONGetDrawablePixmap(pMaskPicture->pDrawable);
+
+	if (pMaskPixmap->drawable.width >= max_tex_w ||
+	    pMaskPixmap->drawable.height >= max_tex_h) {
+	    RADEON_FALLBACK(("Mask w/h too large (%d,%d).\n",
+			     pMaskPixmap->drawable.width,
+			     pMaskPixmap->drawable.height));
+	}
+
+	if (pMaskPicture->componentAlpha) {
+	    /* Check if it's component alpha that relies on a source alpha and
+	     * on the source value.  We can only get one of those into the
+	     * single source value that we get to blend with.
+	     */
+	    if (EVERGREENBlendOp[op].src_alpha &&
+		(EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) !=
+		(BLEND_ZERO << COLOR_SRCBLEND_shift)) {
+		RADEON_FALLBACK(("Component alpha not supported with source "
+				 "alpha and source value blending.\n"));
+	    }
+	}
+
+	if (!EVERGREENCheckCompositeTexture(pMaskPicture, pDstPicture, op, 1))
+	    return FALSE;
+    }
+
+    if (!EVERGREENCheckCompositeTexture(pSrcPicture, pDstPicture, op, 0))
+	return FALSE;
+
+    if (!EVERGREENGetDestFormat(pDstPicture, &tmp1))
+	return FALSE;
+
+    return TRUE;
+
+}
+
+static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
+				      PicturePtr pMaskPicture, PicturePtr pDstPicture,
+				      PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    uint32_t blendcntl, dst_format;
+    cb_config_t cb_conf;
+    shader_config_t vs_conf, ps_conf;
+    const_config_t vs_const_conf;
+    struct r600_accel_object src_obj, mask_obj, dst_obj;
+    int ret;
+
+    //return FALSE;
+
+    if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8)
+	return FALSE;
+
+    src_obj.offset = 0;
+    dst_obj.offset = 0;
+    src_obj.bo = radeon_get_pixmap_bo(pSrc);
+    dst_obj.bo = radeon_get_pixmap_bo(pDst);
+
+    src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
+    dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
+
+    src_obj.width = pSrc->drawable.width;
+    src_obj.height = pSrc->drawable.height;
+    src_obj.bpp = pSrc->drawable.bitsPerPixel;
+    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+
+    dst_obj.width = pDst->drawable.width;
+    dst_obj.height = pDst->drawable.height;
+    dst_obj.bpp = pDst->drawable.bitsPerPixel;
+    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
+
+    if (pMask) {
+	mask_obj.offset = 0;
+	mask_obj.bo = radeon_get_pixmap_bo(pMask);
+	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
+
+	mask_obj.width = pMask->drawable.width;
+	mask_obj.height = pMask->drawable.height;
+	mask_obj.bpp = pMask->drawable.bitsPerPixel;
+	mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+
+	if (!EVERGREENSetAccelState(pScrn,
+				    &src_obj,
+				    &mask_obj,
+				    &dst_obj,
+				    accel_state->comp_vs_offset, accel_state->comp_ps_offset,
+				    3, 0xffffffff))
+	    return FALSE;
+
+	accel_state->msk_pic = pMaskPicture;
+	if (pMaskPicture->componentAlpha) {
+	    accel_state->component_alpha = TRUE;
+	    if (EVERGREENBlendOp[op].src_alpha)
+		accel_state->src_alpha = TRUE;
+	    else
+		accel_state->src_alpha = FALSE;
+	} else {
+	    accel_state->component_alpha = FALSE;
+	    accel_state->src_alpha = FALSE;
+	}
+    } else {
+	if (!EVERGREENSetAccelState(pScrn,
+				    &src_obj,
+				    NULL,
+				    &dst_obj,
+				    accel_state->comp_vs_offset, accel_state->comp_ps_offset,
+				    3, 0xffffffff))
+	    return FALSE;
+
+	accel_state->msk_pic = NULL;
+	accel_state->component_alpha = FALSE;
+	accel_state->src_alpha = FALSE;
+    }
+
+    if (!EVERGREENGetDestFormat(pDstPicture, &dst_format))
+	return FALSE;
+
+    CLEAR (cb_conf);
+    CLEAR (vs_conf);
+    CLEAR (ps_conf);
+    CLEAR (vs_const_conf);
+
+    vs_const_conf.bo = radeon_bo_open(info->bufmgr, 0, 256, 0,
+				      RADEON_GEM_DOMAIN_GTT, 0);
+    if (vs_const_conf.bo == NULL)
+	RADEON_FALLBACK(("vs const buffer alloc failed\n"));
+    ret = radeon_bo_map(vs_const_conf.bo, 0);
+    if (ret)
+	RADEON_FALLBACK(("vs const buffer map failed\n"));
+
+    /* VS alu constants */
+    vs_const_conf.size_bytes = 256;
+    vs_const_conf.const_addr = 0;
+    vs_const_conf.type = SHADER_TYPE_VS;
+
+    EVERGREENXFormSetup(pSrcPicture, pSrc, 0, (float *)vs_const_conf.bo->ptr);
+    if (pMask)
+        EVERGREENXFormSetup(pMaskPicture, pMask, 1, (float *)vs_const_conf.bo->ptr);
+
+    radeon_bo_unmap(vs_const_conf.bo);
+
+    radeon_cs_space_add_persistent_bo(info->cs, vs_const_conf.bo,
+				      RADEON_GEM_DOMAIN_GTT, 0);
+    if (radeon_cs_space_check(info->cs)) {
+	radeon_bo_unref(vs_const_conf.bo);
+	RADEON_FALLBACK(("vs const buffer size check failed\n"));
+    }
+
+    if (pMask)
+        radeon_vbo_check(pScrn, 24);
+    else
+        radeon_vbo_check(pScrn, 16);
+
+    radeon_cp_start(pScrn);
+
+    evergreen_set_default_state(pScrn);
+
+    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+
+    if (!EVERGREENTextureSetup(pSrcPicture, pSrc, 0)) {
+        radeon_ib_discard(pScrn);
+        radeon_cs_flush_indirect(pScrn);
+        radeon_vb_discard(pScrn);
+        return FALSE;
+    }
+
+    if (pMask) {
+        if (!EVERGREENTextureSetup(pMaskPicture, pMask, 1)) {
+	    radeon_ib_discard(pScrn);
+	    radeon_cs_flush_indirect(pScrn);
+            radeon_vb_discard(pScrn);
+            return FALSE;
+        }
+    } else
+        accel_state->is_transform[1] = FALSE;
+
+    if (pMask) {
+	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0));
+	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
+    } else {
+	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0));
+	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
+    }
+
+    /* Shader */
+    vs_conf.shader_addr         = accel_state->vs_mc_addr;
+    vs_conf.shader_size         = accel_state->vs_size;
+    vs_conf.num_gprs            = 3;
+    vs_conf.stack_size          = 1;
+    vs_conf.bo                  = accel_state->shaders_bo;
+    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+    ps_conf.shader_addr         = accel_state->ps_mc_addr;
+    ps_conf.shader_size         = accel_state->ps_size;
+    ps_conf.num_gprs            = 3;
+    ps_conf.stack_size          = 1;
+    ps_conf.clamp_consts        = 0;
+    ps_conf.export_mode         = 2;
+    ps_conf.bo                  = accel_state->shaders_bo;
+    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+
+    cb_conf.id = 0;
+    cb_conf.w = accel_state->dst_obj.pitch;
+    cb_conf.h = accel_state->dst_obj.height;
+    cb_conf.base = accel_state->dst_obj.offset;
+    cb_conf.format = dst_format;
+    cb_conf.bo = accel_state->dst_obj.bo;
+
+    switch (pDstPicture->format) {
+    case PICT_a8r8g8b8:
+    case PICT_x8r8g8b8:
+    case PICT_a1r5g5b5:
+    case PICT_x1r5g5b5:
+    default:
+	cb_conf.comp_swap = 1; /* ARGB */
+	break;
+    case PICT_a8b8g8r8:
+    case PICT_x8b8g8r8:
+	cb_conf.comp_swap = 0; /* ABGR */
+	break;
+#ifdef PICT_TYPE_BGRA
+    case PICT_b8g8r8a8:
+    case PICT_b8g8r8x8:
+	cb_conf.comp_swap = 3; /* BGRA */
+	break;
+#endif
+    case PICT_r5g6b5:
+	cb_conf.comp_swap = 2; /* RGB */
+	break;
+    case PICT_a8:
+	cb_conf.comp_swap = 3; /* A */
+	break;
+    }
+    cb_conf.source_format = EXPORT_4C_16BPC;
+    cb_conf.blend_clamp = 1;
+    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
+
+    blendcntl = EVERGREENGetBlendCntl(op, pMaskPicture, pDstPicture->format);
+
+    BEGIN_BATCH(24);
+    EREG(CB_TARGET_MASK,                      (0xf << TARGET0_ENABLE_shift));
+    EREG(CB_COLOR_CONTROL,                    (EVERGREEN_ROP[3] |
+					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
+    EREG(CB_BLEND0_CONTROL,                   blendcntl | CB_BLEND0_CONTROL__ENABLE_bit);
+
+    /* Interpolator setup */
+    if (pMask) {
+	/* export 2 tex coords from VS */
+	EREG(SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift));
+	/* src = semantic id 0; mask = semantic id 1 */
+	EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
+			       (1 << SEMANTIC_1_shift)));
+    } else {
+	/* export 1 tex coords from VS */
+	EREG(SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
+	/* src = semantic id 0 */
+	EREG(SPI_VS_OUT_ID_0,   (0 << SEMANTIC_0_shift));
+    }
+
+    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
+    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
+    E32(((0    << SEMANTIC_shift)	|
+	 (0x01 << DEFAULT_VAL_shift)));
+    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
+    E32(((1    << SEMANTIC_shift)	|
+	 (0x01 << DEFAULT_VAL_shift)));
+
+    PACK0(SPI_PS_IN_CONTROL_0, 3);
+    if (pMask) {
+	/* input 2 tex coords from VS */
+	E32(((2 << NUM_INTERP_shift) |
+	     LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
+    } else {
+	/* input 1 tex coords from VS */
+	E32(((1 << NUM_INTERP_shift) |
+	     LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
+    }
+    E32(0); // SPI_PS_IN_CONTROL_1
+    E32(0); // SPI_INTERP_CONTROL_0
+    END_BATCH();
+
+    /* VS alu constants */
+    evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
+
+    if (accel_state->vsync)
+	RADEONVlineHelperClear(pScrn);
+
+    return TRUE;
+}
+
+static void EVERGREENComposite(PixmapPtr pDst,
+			       int srcX, int srcY,
+			       int maskX, int maskY,
+			       int dstX, int dstY,
+			       int w, int h)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    float *vb;
+
+    if (accel_state->vsync)
+	RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h);
+
+    if (accel_state->msk_pic) {
+
+	vb = radeon_vbo_space(pScrn, 24);
+
+	vb[0] = (float)dstX;
+	vb[1] = (float)dstY;
+	vb[2] = (float)srcX;
+	vb[3] = (float)srcY;
+	vb[4] = (float)maskX;
+	vb[5] = (float)maskY;
+
+	vb[6] = (float)dstX;
+	vb[7] = (float)(dstY + h);
+	vb[8] = (float)srcX;
+	vb[9] = (float)(srcY + h);
+	vb[10] = (float)maskX;
+	vb[11] = (float)(maskY + h);
+
+	vb[12] = (float)(dstX + w);
+	vb[13] = (float)(dstY + h);
+	vb[14] = (float)(srcX + w);
+	vb[15] = (float)(srcY + h);
+	vb[16] = (float)(maskX + w);
+	vb[17] = (float)(maskY + h);
+
+	radeon_vbo_commit(pScrn);
+
+    } else {
+
+	vb = radeon_vbo_space(pScrn, 16);
+
+	vb[0] = (float)dstX;
+	vb[1] = (float)dstY;
+	vb[2] = (float)srcX;
+	vb[3] = (float)srcY;
+
+	vb[4] = (float)dstX;
+	vb[5] = (float)(dstY + h);
+	vb[6] = (float)srcX;
+	vb[7] = (float)(srcY + h);
+
+	vb[8] = (float)(dstX + w);
+	vb[9] = (float)(dstY + h);
+	vb[10] = (float)(srcX + w);
+	vb[11] = (float)(srcY + h);
+
+	radeon_vbo_commit(pScrn);
+    }
+
+
+}
+
+static void EVERGREENDoneComposite(PixmapPtr pDst)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    int vtx_size;
+
+    if (accel_state->vsync)
+       evergreen_cp_wait_vline_sync(pScrn, pDst,
+				    accel_state->vline_crtc,
+				    accel_state->vline_y1,
+				    accel_state->vline_y2);
+
+    vtx_size = accel_state->msk_pic ? 24 : 16;
+
+    evergreen_finish_op(pScrn, vtx_size);
+}
+
+static Bool
+EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
+			char *src, int src_pitch)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    struct radeon_exa_pixmap_priv *driver_priv;
+    struct radeon_bo *scratch;
+    unsigned size;
+    uint32_t dst_domain;
+    int bpp = pDst->drawable.bitsPerPixel;
+    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
+    uint32_t src_pitch_hw = scratch_pitch / (bpp / 8);
+    uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
+    Bool r;
+    int i;
+    struct r600_accel_object src_obj, dst_obj;
+
+    if (bpp < 8)
+	return FALSE;
+
+    driver_priv = exaGetPixmapDriverPrivate(pDst);
+
+    /* If we know the BO won't be busy, don't bother */
+    if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs) &&
+	!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
+	return FALSE;
+
+    size = scratch_pitch * h;
+    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
+    if (scratch == NULL) {
+	return FALSE;
+    }
+
+    src_obj.pitch = src_pitch_hw;
+    src_obj.width = w;
+    src_obj.height = h;
+    src_obj.offset = 0;
+    src_obj.bpp = bpp;
+    src_obj.domain = RADEON_GEM_DOMAIN_GTT;
+    src_obj.bo = scratch;
+
+    dst_obj.pitch = dst_pitch_hw;
+    dst_obj.width = pDst->drawable.width;
+    dst_obj.height = pDst->drawable.height;
+    dst_obj.offset = 0;
+    dst_obj.bpp = bpp;
+    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
+    dst_obj.bo = radeon_get_pixmap_bo(pDst);
+
+    if (!EVERGREENSetAccelState(pScrn,
+				&src_obj,
+				NULL,
+				&dst_obj,
+				accel_state->copy_vs_offset, accel_state->copy_ps_offset,
+				3, 0xffffffff)) {
+        r = FALSE;
+        goto out;
+    }
+
+    r = radeon_bo_map(scratch, 0);
+    if (r) {
+        r = FALSE;
+        goto out;
+    }
+    r = TRUE;
+    size = w * bpp / 8;
+    for (i = 0; i < h; i++) {
+        memcpy(scratch->ptr + i * scratch_pitch, src, size);
+        src += src_pitch;
+    }
+    radeon_bo_unmap(scratch);
+
+    if (info->accel_state->vsync)
+	RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
+
+    /* blit from gart to vram */
+    EVERGREENDoPrepareCopy(pScrn);
+    EVERGREENAppendCopyVertex(pScrn, 0, 0, x, y, w, h);
+    EVERGREENDoCopyVline(pDst);
+
+out:
+    radeon_bo_unref(scratch);
+    return r;
+}
+
+static Bool
+EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
+			    int h, char *dst, int dst_pitch)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    struct radeon_exa_pixmap_priv *driver_priv;
+    struct radeon_bo *scratch;
+    unsigned size;
+    uint32_t src_domain = 0;
+    int bpp = pSrc->drawable.bitsPerPixel;
+    uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
+    uint32_t dst_pitch_hw = scratch_pitch / (bpp / 8);
+    uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
+    Bool r;
+    struct r600_accel_object src_obj, dst_obj;
+
+    if (bpp < 8)
+	return FALSE;
+
+    driver_priv = exaGetPixmapDriverPrivate(pSrc);
+
+    /* If we know the BO won't end up in VRAM anyway, don't bother */
+    if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
+	src_domain = radeon_bo_get_src_domain(driver_priv->bo);
+	if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
+	    (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
+	    src_domain = 0;
+    }
+
+    if (!src_domain)
+	radeon_bo_is_busy(driver_priv->bo, &src_domain);
+
+    if (src_domain != RADEON_GEM_DOMAIN_VRAM)
+	return FALSE;
+
+    size = scratch_pitch * h;
+    scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
+    if (scratch == NULL) {
+	return FALSE;
+    }
+    radeon_cs_space_reset_bos(info->cs);
+    radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
+				      RADEON_GEM_DOMAIN_VRAM, 0);
+    accel_state->src_obj[0].domain = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
+    radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
+    accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
+    radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
+    r = radeon_cs_space_check(info->cs);
+    if (r) {
+        r = FALSE;
+        goto out;
+    }
+
+    src_obj.pitch = src_pitch_hw;
+    src_obj.width = pSrc->drawable.width;
+    src_obj.height = pSrc->drawable.height;
+    src_obj.offset = 0;
+    src_obj.bpp = bpp;
+    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+    src_obj.bo = radeon_get_pixmap_bo(pSrc);
+
+    dst_obj.pitch = dst_pitch_hw;
+    dst_obj.width = w;
+    dst_obj.height = h;
+    dst_obj.offset = 0;
+    dst_obj.bo = scratch;
+    dst_obj.bpp = bpp;
+    dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
+
+    if (!EVERGREENSetAccelState(pScrn,
+				&src_obj,
+				NULL,
+				&dst_obj,
+				accel_state->copy_vs_offset, accel_state->copy_ps_offset,
+				3, 0xffffffff)) {
+        r = FALSE;
+        goto out;
+    }
+
+    /* blit from vram to gart */
+    EVERGREENDoPrepareCopy(pScrn);
+    EVERGREENAppendCopyVertex(pScrn, x, y, 0, 0, w, h);
+    EVERGREENDoCopy(pScrn);
+
+    radeon_cs_flush_indirect(pScrn);
+
+    r = radeon_bo_map(scratch, 0);
+    if (r) {
+        r = FALSE;
+        goto out;
+    }
+    r = TRUE;
+    w *= bpp / 8;
+    size = 0;
+    while (h--) {
+        memcpy(dst, scratch->ptr + size, w);
+        size += scratch_pitch;
+        dst += dst_pitch;
+    }
+    radeon_bo_unmap(scratch);
+out:
+    radeon_bo_unref(scratch);
+    return r;
+}
+
+static int
+EVERGREENMarkSync(ScreenPtr pScreen)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+
+    return ++accel_state->exaSyncMarker;
+
+}
+
+static void
+EVERGREENSync(ScreenPtr pScreen, int marker)
+{
+    return;
+}
+
+static Bool
+EVERGREENAllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+
+    /* 512 bytes per shader for now */
+    int size = 512 * 9;
+
+    accel_state->shaders = NULL;
+
+    accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0,
+					     RADEON_GEM_DOMAIN_VRAM, 0);
+    if (accel_state->shaders_bo == NULL) {
+	ErrorF("Allocating shader failed\n");
+	return FALSE;
+    }
+    return TRUE;
+}
+
+Bool
+EVERGREENLoadShaders(ScrnInfoPtr pScrn)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    RADEONChipFamily ChipSet = info->ChipFamily;
+    uint32_t *shader;
+    int ret;
+
+    ret = radeon_bo_map(accel_state->shaders_bo, 1);
+    if (ret) {
+	FatalError("failed to map shader %d\n", ret);
+	return FALSE;
+    }
+    shader = accel_state->shaders_bo->ptr;
+
+    /*  solid vs --------------------------------------- */
+    accel_state->solid_vs_offset = 0;
+    evergreen_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
+
+    /*  solid ps --------------------------------------- */
+    accel_state->solid_ps_offset = 512;
+    evergreen_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
+
+    /*  copy vs --------------------------------------- */
+    accel_state->copy_vs_offset = 1024;
+    evergreen_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
+
+    /*  copy ps --------------------------------------- */
+    accel_state->copy_ps_offset = 1536;
+    evergreen_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
+
+    /*  comp vs --------------------------------------- */
+    accel_state->comp_vs_offset = 2048;
+    evergreen_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
+
+    /*  comp ps --------------------------------------- */
+    accel_state->comp_ps_offset = 2560;
+    evergreen_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
+
+    /*  xv vs --------------------------------------- */
+    accel_state->xv_vs_offset = 3072;
+    evergreen_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
+
+    /*  xv ps --------------------------------------- */
+    accel_state->xv_ps_offset = 3584;
+    evergreen_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
+
+    radeon_bo_unmap(accel_state->shaders_bo);
+
+    return TRUE;
+}
+
+Bool
+EVERGREENDrawInit(ScreenPtr pScreen)
+{
+    ScrnInfoPtr pScrn =  xf86Screens[pScreen->myNum];
+    RADEONInfoPtr info   = RADEONPTR(pScrn);
+
+    if (info->accel_state->exa == NULL) {
+	xf86DrvMsg(pScreen->myNum, X_ERROR, "Memory map not set up\n");
+	return FALSE;
+    }
+
+    /* accel requires kms */
+    if (!info->cs)
+	return FALSE;
+
+    info->accel_state->exa->exa_major = EXA_VERSION_MAJOR;
+    info->accel_state->exa->exa_minor = EXA_VERSION_MINOR;
+
+    info->accel_state->exa->PrepareSolid = EVERGREENPrepareSolid;
+    info->accel_state->exa->Solid = EVERGREENSolid;
+    info->accel_state->exa->DoneSolid = EVERGREENDoneSolid;
+
+    info->accel_state->exa->PrepareCopy = EVERGREENPrepareCopy;
+    info->accel_state->exa->Copy = EVERGREENCopy;
+    info->accel_state->exa->DoneCopy = EVERGREENDoneCopy;
+
+    info->accel_state->exa->MarkSync = EVERGREENMarkSync;
+    info->accel_state->exa->WaitMarker = EVERGREENSync;
+
+    info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
+    info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
+    info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
+    info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
+    info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
+    info->accel_state->exa->UploadToScreen = EVERGREENUploadToScreen;
+    info->accel_state->exa->DownloadFromScreen = EVERGREENDownloadFromScreen;
+
+    info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
+#ifdef EXA_SUPPORTS_PREPARE_AUX
+    info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX;
+#endif
+
+#ifdef EXA_HANDLES_PIXMAPS
+    info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
+#ifdef EXA_MIXED_PIXMAPS
+    info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS;
+#endif
+#endif
+    info->accel_state->exa->pixmapOffsetAlign = 256;
+    info->accel_state->exa->pixmapPitchAlign = 256;
+
+    info->accel_state->exa->CheckComposite = EVERGREENCheckComposite;
+    info->accel_state->exa->PrepareComposite = EVERGREENPrepareComposite;
+    info->accel_state->exa->Composite = EVERGREENComposite;
+    info->accel_state->exa->DoneComposite = EVERGREENDoneComposite;
+
+#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
+
+    info->accel_state->exa->maxPitchBytes = 32768;
+    info->accel_state->exa->maxX = 8192;
+#else
+    info->accel_state->exa->maxX = 8192;
+#endif
+    info->accel_state->exa->maxY = 8192;
+
+    /* not supported yet */
+    if (xf86ReturnOptValBool(info->Options, OPTION_EXA_VSYNC, FALSE)) {
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA VSync enabled\n");
+	info->accel_state->vsync = TRUE;
+    } else
+	info->accel_state->vsync = FALSE;
+
+    if (!exaDriverInit(pScreen, info->accel_state->exa)) {
+	xfree(info->accel_state->exa);
+	return FALSE;
+    }
+
+    info->accel_state->XInited3D = FALSE;
+    info->accel_state->copy_area = NULL;
+    info->accel_state->src_obj[0].bo = NULL;
+    info->accel_state->src_obj[1].bo = NULL;
+    info->accel_state->dst_obj.bo = NULL;
+    info->accel_state->copy_area_bo = NULL;
+    info->accel_state->vb_start_op = -1;
+    info->accel_state->finish_op = evergreen_finish_op;
+    info->accel_state->verts_per_op = 3;
+    RADEONVlineHelperClear(pScrn);
+
+    radeon_vbo_init_lists(pScrn);
+
+    if (!EVERGREENAllocShaders(pScrn, pScreen))
+	return FALSE;
+
+    if (!EVERGREENLoadShaders(pScrn))
+	return FALSE;
+
+    exaMarkSync(pScreen);
+
+    return TRUE;
+
+}
+
diff --git a/src/evergreen_reg.h b/src/evergreen_reg.h
new file mode 100644
index 0000000..4608f08
--- /dev/null
+++ b/src/evergreen_reg.h
@@ -0,0 +1,247 @@
+/*
+ * Evergeen Register documentation
+ *
+ * Copyright (C) 2010  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EVERGREEN_REG_H_
+#define _EVERGREEN_REG_H_
+
+/*
+ * Register definitions
+ */
+
+#include "evergreen_reg_auto.h"
+
+enum {
+    SHADER_TYPE_PS,
+    SHADER_TYPE_VS,
+    SHADER_TYPE_GS,
+    SHADER_TYPE_HS,
+    SHADER_TYPE_LS,
+    SHADER_TYPE_CS,
+    SHADER_TYPE_FS,
+};
+
+
+/* SET_*_REG offsets + ends */
+enum {
+    SET_CONFIG_REG_offset          = 0x00008000,
+    SET_CONFIG_REG_end             = 0x0000ac00,
+    SET_CONTEXT_REG_offset         = 0x00028000,
+    SET_CONTEXT_REG_end            = 0x00029000,
+    SET_RESOURCE_offset            = 0x00030000,
+    SET_RESOURCE_end               = 0x00038000,
+    SET_SAMPLER_offset             = 0x0003c000,
+    SET_SAMPLER_end                = 0x0003c600,
+    SET_CTL_CONST_offset           = 0x0003cff0,
+    SET_CTL_CONST_end              = 0x0003ff0c,
+    SET_LOOP_CONST_offset          = 0x0003a200,
+    SET_LOOP_CONST_end             = 0x0003a500,
+    SET_BOOL_CONST_offset          = 0x0003a500,
+    SET_BOOL_CONST_end             = 0x0003a518,
+};
+
+/* Packet3 commands */
+enum {
+    IT_NOP                      = 0x10,
+    IT_INDIRECT_BUFFER_END      = 0x17,
+    IT_SET_PREDICATION          = 0x20,
+    IT_COND_EXEC                = 0x22,
+    IT_PRED_EXEC                = 0x23,
+    IT_DRAW_INDEX_2             = 0x27,
+    IT_CONTEXT_CONTROL          = 0x28,
+    IT_DRAW_INDEX_OFFSET        = 0x29,
+    IT_INDEX_TYPE               = 0x2A,
+    IT_DRAW_INDEX               = 0x2B,
+    IT_DRAW_INDEX_AUTO          = 0x2D,
+    IT_DRAW_INDEX_IMMD          = 0x2E,
+    IT_NUM_INSTANCES            = 0x2F,
+    IT_INDIRECT_BUFFER          = 0x32,
+    IT_STRMOUT_BUFFER_UPDATE    = 0x34,
+    IT_MEM_SEMAPHORE            = 0x39,
+    IT_MPEG_INDEX               = 0x3A,
+    IT_WAIT_REG_MEM             = 0x3C,
+    IT_MEM_WRITE                = 0x3D,
+    IT_SURFACE_SYNC             = 0x43,
+    IT_ME_INITIALIZE            = 0x44,
+    IT_COND_WRITE               = 0x45,
+    IT_EVENT_WRITE              = 0x46,
+    IT_EVENT_WRITE_EOP          = 0x47,
+    IT_EVENT_WRITE_EOS          = 0x48,
+    IT_SET_CONFIG_REG           = 0x68,
+    IT_SET_CONTEXT_REG          = 0x69,
+    IT_SET_ALU_CONST            = 0x6A,
+    IT_SET_BOOL_CONST           = 0x6B,
+    IT_SET_LOOP_CONST           = 0x6C,
+    IT_SET_RESOURCE             = 0x6D,
+    IT_SET_SAMPLER              = 0x6E,
+    IT_SET_CTL_CONST            = 0x6F,
+};
+
+/* IT_WAIT_REG_MEM operation encoding */
+
+#define IT_WAIT_ALWAYS          (0 << 0)
+#define IT_WAIT_LT              (1 << 0)
+#define IT_WAIT_LE              (2 << 0)
+#define IT_WAIT_EQ              (3 << 0)
+#define IT_WAIT_NE              (4 << 0)
+#define IT_WAIT_GE              (5 << 0)
+#define IT_WAIT_GT              (6 << 0)
+#define IT_WAIT_REG             (0 << 4)
+#define IT_WAIT_MEM             (1 << 4)
+
+#define IT_WAIT_ADDR(x)         ((x) >> 2)
+
+enum {
+
+    SQ_LDS_ALLOC_PS                                       = 0x288ec,
+    SQ_DYN_GPR_RESOURCE_LIMIT_1                           = 0x28838,
+    SQ_DYN_GPR_CNTL_PS_FLUSH_REQ                          = 0x8d8c,
+
+    WAIT_UNTIL                                            = 0x8040,
+	WAIT_CP_DMA_IDLE_bit                              = 1 << 8,
+	WAIT_CMDFIFO_bit                                  = 1 << 10,
+	WAIT_3D_IDLE_bit                                  = 1 << 15,
+	WAIT_3D_IDLECLEAN_bit                             = 1 << 17,
+	WAIT_EXTERN_SIG_bit                               = 1 << 19,
+	CMDFIFO_ENTRIES_mask                              = 0xf << 20,
+	CMDFIFO_ENTRIES_shift                             = 20,
+
+    CP_COHER_CNTL                                         = 0x85f0,
+	DEST_BASE_0_ENA_bit                               = 1 << 0,
+	DEST_BASE_1_ENA_bit                               = 1 << 1,
+	SO0_DEST_BASE_ENA_bit                             = 1 << 2,
+	SO1_DEST_BASE_ENA_bit                             = 1 << 3,
+	SO2_DEST_BASE_ENA_bit                             = 1 << 4,
+	SO3_DEST_BASE_ENA_bit                             = 1 << 5,
+	CB0_DEST_BASE_ENA_bit                             = 1 << 6,
+	CB1_DEST_BASE_ENA_bit                             = 1 << 7,
+	CB2_DEST_BASE_ENA_bit                             = 1 << 8,
+	CB3_DEST_BASE_ENA_bit                             = 1 << 9,
+	CB4_DEST_BASE_ENA_bit                             = 1 << 10,
+	CB5_DEST_BASE_ENA_bit                             = 1 << 11,
+	CB6_DEST_BASE_ENA_bit                             = 1 << 12,
+	CB7_DEST_BASE_ENA_bit                             = 1 << 13,
+	DB_DEST_BASE_ENA_bit                              = 1 << 14,
+	CB8_DEST_BASE_ENA_bit                             = 1 << 15,
+	CB9_DEST_BASE_ENA_bit                             = 1 << 16,
+	CB10_DEST_BASE_ENA_bit                            = 1 << 17,
+	CB11_DEST_BASE_ENA_bit                            = 1 << 18,
+	FULL_CACHE_ENA_bit                                = 1 << 20,
+	TC_ACTION_ENA_bit                                 = 1 << 23,
+	VC_ACTION_ENA_bit                                 = 1 << 24,
+	CB_ACTION_ENA_bit                                 = 1 << 25,
+	DB_ACTION_ENA_bit                                 = 1 << 26,
+	SH_ACTION_ENA_bit                                 = 1 << 27,
+	SX_ACTION_ENA_bit                                 = 1 << 28,
+    CP_COHER_SIZE                                         = 0x85f4,
+    CP_COHER_BASE                                         = 0x85f8,
+    CP_COHER_STATUS                                       = 0x85fc,
+	MATCHING_GFX_CNTX_mask                            = 0xff << 0,
+	MATCHING_GFX_CNTX_shift                           = 0,
+	STATUS_bit                                        = 1 << 31,
+
+//  SQ_VTX_CONSTANT_WORD2_0                               = 0x00030008,
+//    	SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask         = 0x3f << 20,
+	FMT_INVALID=0,      FMT_8,          FMT_4_4,            FMT_3_3_2,
+	                    FMT_16=5,       FMT_16_FLOAT,       FMT_8_8,
+	FMT_5_6_5,          FMT_6_5_5,      FMT_1_5_5_5,        FMT_4_4_4_4,
+	FMT_5_5_5_1,        FMT_32,         FMT_32_FLOAT,       FMT_16_16,
+	FMT_16_16_FLOAT=16, FMT_8_24,       FMT_8_24_FLOAT,     FMT_24_8,
+	FMT_24_8_FLOAT,     FMT_10_11_11,   FMT_10_11_11_FLOAT, FMT_11_11_10,
+	FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8,        FMT_10_10_10_2,
+	FMT_X24_8_32_FLOAT, FMT_32_32,      FMT_32_32_FLOAT,    FMT_16_16_16_16,
+	FMT_16_16_16_16_FLOAT=32,           FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT,
+	                    FMT_1 = 37,                         FMT_GB_GR=39,
+	FMT_BG_RG,          FMT_32_AS_8,    FMT_32_AS_8_8,      FMT_5_9_9_9_SHAREDEXP,
+	FMT_8_8_8,          FMT_16_16_16,   FMT_16_16_16_FLOAT, FMT_32_32_32,
+	FMT_32_32_32_FLOAT=48,
+
+//  High level register file lengths
+    SQ_FETCH_RESOURCE                                       = SQ_TEX_RESOURCE_WORD0_0,
+    SQ_FETCH_RESOURCE_ps_num                                = 176,
+    SQ_FETCH_RESOURCE_vs_num                                = 160,
+    SQ_FETCH_RESOURCE_gs_num                                = 160,
+    SQ_FETCH_RESOURCE_hs_num                                = 160,
+    SQ_FETCH_RESOURCE_ls_num                                = 160,
+    SQ_FETCH_RESOURCE_cs_num                                = 176,
+    SQ_FETCH_RESOURCE_fs_num                                = 32,
+    SQ_FETCH_RESOURCE_all_num                               = 1024,
+    SQ_FETCH_RESOURCE_offset                                = 32,
+    SQ_FETCH_RESOURCE_ps                                    = 0,                                               //   0...175
+    SQ_FETCH_RESOURCE_vs                                    = SQ_FETCH_RESOURCE_ps + SQ_FETCH_RESOURCE_ps_num, // 176...335
+    SQ_FETCH_RESOURCE_gs                                    = SQ_FETCH_RESOURCE_vs + SQ_FETCH_RESOURCE_fs_num, // 336...495
+    SQ_FETCH_RESOURCE_hs                                    = SQ_FETCH_RESOURCE_gs + SQ_FETCH_RESOURCE_gs_num, // 496...655
+    SQ_FETCH_RESOURCE_ls                                    = SQ_FETCH_RESOURCE_hs + SQ_FETCH_RESOURCE_hs_num, // 656...815
+    SQ_FETCH_RESOURCE_cs                                    = SQ_FETCH_RESOURCE_ls + SQ_FETCH_RESOURCE_ls_num, // 816...991
+    SQ_FETCH_RESOURCE_fs                                    = SQ_FETCH_RESOURCE_cs + SQ_FETCH_RESOURCE_cs_num, // 992...1023
+
+    SQ_TEX_SAMPLER_WORD                                   = SQ_TEX_SAMPLER_WORD0_0,
+    SQ_TEX_SAMPLER_WORD_ps_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_vs_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_gs_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_hs_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_ls_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_cs_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_all_num                           = 108,
+    SQ_TEX_SAMPLER_WORD_offset                            = 12,
+    SQ_TEX_SAMPLER_WORD_ps                                = 0,                                                   //  0...17
+    SQ_TEX_SAMPLER_WORD_vs                                = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num, // 18...35
+    SQ_TEX_SAMPLER_WORD_gs                                = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num, // 36...53
+    SQ_TEX_SAMPLER_WORD_hs                                = SQ_TEX_SAMPLER_WORD_gs + SQ_TEX_SAMPLER_WORD_gs_num, // 54...71
+    SQ_TEX_SAMPLER_WORD_ls                                = SQ_TEX_SAMPLER_WORD_hs + SQ_TEX_SAMPLER_WORD_hs_num, // 72...89
+    SQ_TEX_SAMPLER_WORD_cs                                = SQ_TEX_SAMPLER_WORD_ls + SQ_TEX_SAMPLER_WORD_ls_num, // 90...107
+
+    SQ_LOOP_CONST                                         = SQ_LOOP_CONST_0,
+    SQ_LOOP_CONST_ps_num                                  = 32,
+    SQ_LOOP_CONST_vs_num                                  = 32,
+    SQ_LOOP_CONST_gs_num                                  = 32,
+    SQ_LOOP_CONST_hs_num                                  = 32,
+    SQ_LOOP_CONST_ls_num                                  = 32,
+    SQ_LOOP_CONST_cs_num                                  = 32,
+    SQ_LOOP_CONST_all_num                                 = 192,
+    SQ_LOOP_CONST_offset                                  = 4,
+    SQ_LOOP_CONST_ps                                      = 0,                                       //   0...31
+    SQ_LOOP_CONST_vs                                      = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num, //  32...63
+    SQ_LOOP_CONST_gs                                      = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num, //  64...95
+    SQ_LOOP_CONST_hs                                      = SQ_LOOP_CONST_gs + SQ_LOOP_CONST_gs_num, //  96...127
+    SQ_LOOP_CONST_ls                                      = SQ_LOOP_CONST_hs + SQ_LOOP_CONST_hs_num, // 128...159
+    SQ_LOOP_CONST_cs                                      = SQ_LOOP_CONST_ls + SQ_LOOP_CONST_ls_num, // 160...191
+
+    SQ_BOOL_CONST                                         = SQ_BOOL_CONST_0, /* 32 bits each */
+    SQ_BOOL_CONST_ps_num                                  = 1,
+    SQ_BOOL_CONST_vs_num                                  = 1,
+    SQ_BOOL_CONST_gs_num                                  = 1,
+    SQ_BOOL_CONST_hs_num                                  = 1,
+    SQ_BOOL_CONST_ls_num                                  = 1,
+    SQ_BOOL_CONST_cs_num                                  = 1,
+    SQ_BOOL_CONST_all_num                                 = 6,
+    SQ_BOOL_CONST_offset                                  = 4,
+    SQ_BOOL_CONST_ps                                      = 0,
+    SQ_BOOL_CONST_vs                                      = SQ_BOOL_CONST_ps + SQ_BOOL_CONST_ps_num,
+    SQ_BOOL_CONST_gs                                      = SQ_BOOL_CONST_vs + SQ_BOOL_CONST_vs_num,
+    SQ_BOOL_CONST_hs                                      = SQ_BOOL_CONST_gs + SQ_BOOL_CONST_gs_num,
+    SQ_BOOL_CONST_ls                                      = SQ_BOOL_CONST_hs + SQ_BOOL_CONST_hs_num,
+    SQ_BOOL_CONST_cs                                      = SQ_BOOL_CONST_ls + SQ_BOOL_CONST_ls_num,
+
+};
+
+#endif
diff --git a/src/evergreen_reg_auto.h b/src/evergreen_reg_auto.h
new file mode 100644
index 0000000..5c61586
--- /dev/null
+++ b/src/evergreen_reg_auto.h
@@ -0,0 +1,4039 @@
+/*
+ * Evergreen Register documentation
+ *
+ * Copyright (C) 2010  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EVERGREEN_REG_AUTO_H
+#define _EVERGREEN_REG_AUTO_H
+
+enum {
+
+    VGT_VTX_VECT_EJECT_REG                                = 0x000088b0,
+	PRIM_COUNT_mask                                   = 0x3ff << 0,
+	PRIM_COUNT_shift                                  = 0,
+    VGT_LAST_COPY_STATE                                   = 0x000088c0,
+	SRC_STATE_ID_mask                                 = 0x07 << 0,
+	SRC_STATE_ID_shift                                = 0,
+	DST_STATE_ID_mask                                 = 0x07 << 16,
+	DST_STATE_ID_shift                                = 16,
+    VGT_CACHE_INVALIDATION                                = 0x000088c4,
+	CACHE_INVALIDATION_mask                           = 0x03 << 0,
+	CACHE_INVALIDATION_shift                          = 0,
+	    VC_ONLY                                       = 0x00,
+	    TC_ONLY                                       = 0x01,
+	    VC_AND_TC                                     = 0x02,
+	VS_NO_EXTRA_BUFFER_bit                            = 1 << 5,
+	AUTO_INVLD_EN_mask                                = 0x03 << 6,
+	AUTO_INVLD_EN_shift                               = 6,
+    VGT_GS_VERTEX_REUSE                                   = 0x000088d4,
+	VERT_REUSE_mask                                   = 0x1f << 0,
+	VERT_REUSE_shift                                  = 0,
+    VGT_CNTL_STATUS                                       = 0x000088f0,
+	VGT_OUT_INDX_BUSY_bit                             = 1 << 0,
+	VGT_OUT_BUSY_bit                                  = 1 << 1,
+	VGT_PT_BUSY_bit                                   = 1 << 2,
+	VGT_TE_BUSY_bit                                   = 1 << 3,
+	VGT_VR_BUSY_bit                                   = 1 << 4,
+	VGT_GRP_BUSY_bit                                  = 1 << 5,
+	VGT_DMA_REQ_BUSY_bit                              = 1 << 6,
+	VGT_DMA_BUSY_bit                                  = 1 << 7,
+	VGT_GS_BUSY_bit                                   = 1 << 8,
+	VGT_HS_BUSY_bit                                   = 1 << 9,
+	VGT_TE11_BUSY_bit                                 = 1 << 10,
+	VGT_BUSY_bit                                      = 1 << 11,
+    VGT_PRIMITIVE_TYPE                                    = 0x00008958,
+	VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask                = 0x3f << 0,
+	VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift               = 0,
+	    DI_PT_NONE                                    = 0x00,
+	    DI_PT_POINTLIST                               = 0x01,
+	    DI_PT_LINELIST                                = 0x02,
+	    DI_PT_LINESTRIP                               = 0x03,
+	    DI_PT_TRILIST                                 = 0x04,
+	    DI_PT_TRIFAN                                  = 0x05,
+	    DI_PT_TRISTRIP                                = 0x06,
+	    DI_PT_UNUSED_0                                = 0x07,
+	    DI_PT_UNUSED_1                                = 0x08,
+	    DI_PT_PATCH                                   = 0x09,
+	    DI_PT_LINELIST_ADJ                            = 0x0a,
+	    DI_PT_LINESTRIP_ADJ                           = 0x0b,
+	    DI_PT_TRILIST_ADJ                             = 0x0c,
+	    DI_PT_TRISTRIP_ADJ                            = 0x0d,
+	    DI_PT_UNUSED_3                                = 0x0e,
+	    DI_PT_UNUSED_4                                = 0x0f,
+	    DI_PT_TRI_WITH_WFLAGS                         = 0x10,
+	    DI_PT_RECTLIST                                = 0x11,
+	    DI_PT_LINELOOP                                = 0x12,
+	    DI_PT_QUADLIST                                = 0x13,
+	    DI_PT_QUADSTRIP                               = 0x14,
+	    DI_PT_POLYGON                                 = 0x15,
+	    DI_PT_2D_COPY_RECT_LIST_V0                    = 0x16,
+	    DI_PT_2D_COPY_RECT_LIST_V1                    = 0x17,
+	    DI_PT_2D_COPY_RECT_LIST_V2                    = 0x18,
+	    DI_PT_2D_COPY_RECT_LIST_V3                    = 0x19,
+	    DI_PT_2D_FILL_RECT_LIST                       = 0x1a,
+	    DI_PT_2D_LINE_STRIP                           = 0x1b,
+	    DI_PT_2D_TRI_STRIP                            = 0x1c,
+    VGT_INDEX_TYPE                                        = 0x0000895c,
+	INDEX_TYPE_mask                                   = 0x03 << 0,
+	INDEX_TYPE_shift                                  = 0,
+	    DI_INDEX_SIZE_16_BIT                          = 0x00,
+	    DI_INDEX_SIZE_32_BIT                          = 0x01,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_0                      = 0x00008960,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_1                      = 0x00008964,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_2                      = 0x00008968,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_3                      = 0x0000896c,
+    VGT_NUM_INDICES                                       = 0x00008970,
+    VGT_NUM_INSTANCES                                     = 0x00008974,
+    PA_CL_CNTL_STATUS                                     = 0x00008a10,
+	CL_BUSY_bit                                       = 1 << 31,
+    PA_CL_ENHANCE                                         = 0x00008a14,
+	CLIP_VTX_REORDER_ENA_bit                          = 1 << 0,
+	NUM_CLIP_SEQ_mask                                 = 0x03 << 1,
+	NUM_CLIP_SEQ_shift                                = 1,
+	CLIPPED_PRIM_SEQ_STALL_bit                        = 1 << 3,
+	VE_NAN_PROC_DISABLE_bit                           = 1 << 4,
+    PA_SU_CNTL_STATUS                                     = 0x00008a50,
+	SU_BUSY_bit                                       = 1 << 31,
+    PA_SU_LINE_STIPPLE_VALUE                              = 0x00008a60,
+	LINE_STIPPLE_VALUE_mask                           = 0xffffff << 0,
+	LINE_STIPPLE_VALUE_shift                          = 0,
+    PA_SC_LINE_STIPPLE_STATE                              = 0x00008b10,
+	CURRENT_PTR_mask                                  = 0x0f << 0,
+	CURRENT_PTR_shift                                 = 0,
+	CURRENT_COUNT_mask                                = 0xff << 8,
+	CURRENT_COUNT_shift                               = 8,
+    SQ_CONFIG                                             = 0x00008c00,
+	VC_ENABLE_bit                                     = 1 << 0,
+	EXPORT_SRC_C_bit                                  = 1 << 1,
+	CS_PRIO_mask                                      = 0x03 << 18,
+	CS_PRIO_shift                                     = 18,
+	LS_PRIO_mask                                      = 0x03 << 20,
+	LS_PRIO_shift                                     = 20,
+	HS_PRIO_mask                                      = 0x03 << 22,
+	HS_PRIO_shift                                     = 22,
+	PS_PRIO_mask                                      = 0x03 << 24,
+	PS_PRIO_shift                                     = 24,
+	VS_PRIO_mask                                      = 0x03 << 26,
+	VS_PRIO_shift                                     = 26,
+	GS_PRIO_mask                                      = 0x03 << 28,
+	GS_PRIO_shift                                     = 28,
+	ES_PRIO_mask                                      = 0x03 << 30,
+	ES_PRIO_shift                                     = 30,
+    SQ_GPR_RESOURCE_MGMT_1                                = 0x00008c04,
+	NUM_PS_GPRS_mask                                  = 0xff << 0,
+	NUM_PS_GPRS_shift                                 = 0,
+	NUM_VS_GPRS_mask                                  = 0xff << 16,
+	NUM_VS_GPRS_shift                                 = 16,
+	NUM_CLAUSE_TEMP_GPRS_mask                         = 0x0f << 28,
+	NUM_CLAUSE_TEMP_GPRS_shift                        = 28,
+    SQ_GPR_RESOURCE_MGMT_2                                = 0x00008c08,
+	NUM_GS_GPRS_mask                                  = 0xff << 0,
+	NUM_GS_GPRS_shift                                 = 0,
+	NUM_ES_GPRS_mask                                  = 0xff << 16,
+	NUM_ES_GPRS_shift                                 = 16,
+    SQ_GPR_RESOURCE_MGMT_3                                = 0x00008c0c,
+	NUM_HS_GPRS_mask                                  = 0xff << 0,
+	NUM_HS_GPRS_shift                                 = 0,
+	NUM_LS_GPRS_mask                                  = 0xff << 16,
+	NUM_LS_GPRS_shift                                 = 16,
+    SQ_GLOBAL_GPR_RESOURCE_MGMT_1                         = 0x00008c10,
+	PS_GGPR_BASE_mask                                 = 0xff << 0,
+	PS_GGPR_BASE_shift                                = 0,
+	VS_GGPR_BASE_mask                                 = 0xff << 8,
+	VS_GGPR_BASE_shift                                = 8,
+	GS_GGPR_BASE_mask                                 = 0xff << 16,
+	GS_GGPR_BASE_shift                                = 16,
+	ES_GGPR_BASE_mask                                 = 0xff << 24,
+	ES_GGPR_BASE_shift                                = 24,
+    SQ_GLOBAL_GPR_RESOURCE_MGMT_2                         = 0x00008c14,
+	HS_GGPR_BASE_mask                                 = 0xff << 0,
+	HS_GGPR_BASE_shift                                = 0,
+	LS_GGPR_BASE_mask                                 = 0xff << 8,
+	LS_GGPR_BASE_shift                                = 8,
+	CS_GGPR_BASE_mask                                 = 0xff << 16,
+	CS_GGPR_BASE_shift                                = 16,
+    SQ_THREAD_RESOURCE_MGMT                               = 0x00008c18,
+	NUM_PS_THREADS_mask                               = 0xff << 0,
+	NUM_PS_THREADS_shift                              = 0,
+	NUM_VS_THREADS_mask                               = 0xff << 8,
+	NUM_VS_THREADS_shift                              = 8,
+	NUM_GS_THREADS_mask                               = 0xff << 16,
+	NUM_GS_THREADS_shift                              = 16,
+	NUM_ES_THREADS_mask                               = 0xff << 24,
+	NUM_ES_THREADS_shift                              = 24,
+    SQ_THREAD_RESOURCE_MGMT_2                             = 0x00008c1c,
+	NUM_HS_THREADS_mask                               = 0xff << 0,
+	NUM_HS_THREADS_shift                              = 0,
+	NUM_LS_THREADS_mask                               = 0xff << 8,
+	NUM_LS_THREADS_shift                              = 8,
+    SQ_STACK_RESOURCE_MGMT_1                              = 0x00008c20,
+	NUM_PS_STACK_ENTRIES_mask                         = 0xfff << 0,
+	NUM_PS_STACK_ENTRIES_shift                        = 0,
+	NUM_VS_STACK_ENTRIES_mask                         = 0xfff << 16,
+	NUM_VS_STACK_ENTRIES_shift                        = 16,
+    SQ_STACK_RESOURCE_MGMT_2                              = 0x00008c24,
+	NUM_GS_STACK_ENTRIES_mask                         = 0xfff << 0,
+	NUM_GS_STACK_ENTRIES_shift                        = 0,
+	NUM_ES_STACK_ENTRIES_mask                         = 0xfff << 16,
+	NUM_ES_STACK_ENTRIES_shift                        = 16,
+    SQ_STACK_RESOURCE_MGMT_3                              = 0x00008c28,
+	NUM_HS_STACK_ENTRIES_mask                         = 0xfff << 0,
+	NUM_HS_STACK_ENTRIES_shift                        = 0,
+	NUM_LS_STACK_ENTRIES_mask                         = 0xfff << 16,
+	NUM_LS_STACK_ENTRIES_shift                        = 16,
+    SQ_ESGS_RING_BASE                                     = 0x00008c40,
+    SQ_ESGS_RING_SIZE                                     = 0x00008c44,
+    SQ_GSVS_RING_BASE                                     = 0x00008c48,
+    SQ_GSVS_RING_SIZE                                     = 0x00008c4c,
+    SQ_ESTMP_RING_BASE                                    = 0x00008c50,
+    SQ_ESTMP_RING_SIZE                                    = 0x00008c54,
+    SQ_GSTMP_RING_BASE                                    = 0x00008c58,
+    SQ_GSTMP_RING_SIZE                                    = 0x00008c5c,
+    SQ_VSTMP_RING_BASE                                    = 0x00008c60,
+    SQ_VSTMP_RING_SIZE                                    = 0x00008c64,
+    SQ_PSTMP_RING_BASE                                    = 0x00008c68,
+    SQ_PSTMP_RING_SIZE                                    = 0x00008c6c,
+    SQ_CONST_MEM_BASE                                     = 0x00008df8,
+    SQ_ALU_WORD1_OP3                                      = 0x00008dfc,
+	SRC2_SEL_mask                                     = 0x1ff << 0,
+	SRC2_SEL_shift                                    = 0,
+	    SQ_ALU_SRC_LDS_OQ_A                           = 0xdb,
+	    SQ_ALU_SRC_LDS_OQ_B                           = 0xdc,
+	    SQ_ALU_SRC_LDS_OQ_A_POP                       = 0xdd,
+	    SQ_ALU_SRC_LDS_OQ_B_POP                       = 0xde,
+	    SQ_ALU_SRC_LDS_DIRECT_A                       = 0xdf,
+	    SQ_ALU_SRC_LDS_DIRECT_B                       = 0xe0,
+	    SQ_ALU_SRC_TIME_HI                            = 0xe3,
+	    SQ_ALU_SRC_TIME_LO                            = 0xe4,
+	    SQ_ALU_SRC_MASK_HI                            = 0xe5,
+	    SQ_ALU_SRC_MASK_LO                            = 0xe6,
+	    SQ_ALU_SRC_HW_WAVE_ID                         = 0xe7,
+	    SQ_ALU_SRC_SIMD_ID                            = 0xe8,
+	    SQ_ALU_SRC_SE_ID                              = 0xe9,
+	    SQ_ALU_SRC_HW_THREADGRP_ID                    = 0xea,
+	    SQ_ALU_SRC_WAVE_ID_IN_GRP                     = 0xeb,
+	    SQ_ALU_SRC_NUM_THREADGRP_WAVES                = 0xec,
+	    SQ_ALU_SRC_HW_ALU_ODD                         = 0xed,
+	    SQ_ALU_SRC_LOOP_IDX                           = 0xee,
+	    SQ_ALU_SRC_PARAM_BASE_ADDR                    = 0xf0,
+	    SQ_ALU_SRC_NEW_PRIM_MASK                      = 0xf1,
+	    SQ_ALU_SRC_PRIM_MASK_HI                       = 0xf2,
+	    SQ_ALU_SRC_PRIM_MASK_LO                       = 0xf3,
+	    SQ_ALU_SRC_1_DBL_L                            = 0xf4,
+	    SQ_ALU_SRC_1_DBL_M                            = 0xf5,
+	    SQ_ALU_SRC_0_5_DBL_L                          = 0xf6,
+	    SQ_ALU_SRC_0_5_DBL_M                          = 0xf7,
+	    SQ_ALU_SRC_0                                  = 0xf8,
+	    SQ_ALU_SRC_1                                  = 0xf9,
+	    SQ_ALU_SRC_1_INT                              = 0xfa,
+	    SQ_ALU_SRC_M_1_INT                            = 0xfb,
+	    SQ_ALU_SRC_0_5                                = 0xfc,
+	    SQ_ALU_SRC_LITERAL                            = 0xfd,
+	    SQ_ALU_SRC_PV                                 = 0xfe,
+	    SQ_ALU_SRC_PS                                 = 0xff,
+	SRC2_REL_bit                                      = 1 << 9,
+	SRC2_CHAN_mask                                    = 0x03 << 10,
+	SRC2_CHAN_shift                                   = 10,
+	    SQ_CHAN_X                                     = 0x00,
+	    SQ_CHAN_Y                                     = 0x01,
+	    SQ_CHAN_Z                                     = 0x02,
+	    SQ_CHAN_W                                     = 0x03,
+	SRC2_NEG_bit                                      = 1 << 12,
+	SQ_ALU_WORD1_OP3__ALU_INST_mask                   = 0x1f << 13,
+	SQ_ALU_WORD1_OP3__ALU_INST_shift                  = 13,
+	    SQ_OP3_INST_BFE_UINT                          = 0x04,
+	    SQ_OP3_INST_BFE_INT                           = 0x05,
+	    SQ_OP3_INST_BFI_INT                           = 0x06,
+	    SQ_OP3_INST_FMA                               = 0x07,
+	    SQ_OP3_INST_CNDNE_64                          = 0x09,
+	    SQ_OP3_INST_FMA_64                            = 0x0a,
+	    SQ_OP3_INST_LERP_UINT                         = 0x0b,
+	    SQ_OP3_INST_BIT_ALIGN_INT                     = 0x0c,
+	    SQ_OP3_INST_BYTE_ALIGN_INT                    = 0x0d,
+	    SQ_OP3_INST_SAD_ACCUM_UINT                    = 0x0e,
+	    SQ_OP3_INST_SAD_ACCUM_HI_UINT                 = 0x0f,
+	    SQ_OP3_INST_MULADD_UINT24                     = 0x10,
+	    SQ_OP3_INST_LDS_IDX_OP                        = 0x11,
+	    SQ_OP3_INST_MULADD                            = 0x14,
+	    SQ_OP3_INST_MULADD_M2                         = 0x15,
+	    SQ_OP3_INST_MULADD_M4                         = 0x16,
+	    SQ_OP3_INST_MULADD_D2                         = 0x17,
+	    SQ_OP3_INST_MULADD_IEEE                       = 0x18,
+	    SQ_OP3_INST_CNDE                              = 0x19,
+	    SQ_OP3_INST_CNDGT                             = 0x1a,
+	    SQ_OP3_INST_CNDGE                             = 0x1b,
+	    SQ_OP3_INST_CNDE_INT                          = 0x1c,
+	    SQ_OP3_INST_CNDGT_INT                         = 0x1d,
+	    SQ_OP3_INST_CNDGE_INT                         = 0x1e,
+	    SQ_OP3_INST_MUL_LIT                           = 0x1f,
+    SQ_ALU_WORD1_LDS_DIRECT_LITERAL_LO                    = 0x00008dfc,
+	OFFSET_A_mask                                     = 0x1fff << 0,
+	OFFSET_A_shift                                    = 0,
+	STRIDE_A_mask                                     = 0x7f << 13,
+	STRIDE_A_shift                                    = 13,
+	THREAD_REL_A_bit                                  = 1 << 22,
+    SQ_TEX_WORD2                                          = 0x00008dfc,
+	OFFSET_X_mask                                     = 0x1f << 0,
+	OFFSET_X_shift                                    = 0,
+	OFFSET_Y_mask                                     = 0x1f << 5,
+	OFFSET_Y_shift                                    = 5,
+	OFFSET_Z_mask                                     = 0x1f << 10,
+	OFFSET_Z_shift                                    = 10,
+	SAMPLER_ID_mask                                   = 0x1f << 15,
+	SAMPLER_ID_shift                                  = 15,
+	SQ_TEX_WORD2__SRC_SEL_X_mask                      = 0x07 << 20,
+	SQ_TEX_WORD2__SRC_SEL_X_shift                     = 20,
+	    SQ_SEL_X                                      = 0x00,
+	    SQ_SEL_Y                                      = 0x01,
+	    SQ_SEL_Z                                      = 0x02,
+	    SQ_SEL_W                                      = 0x03,
+	    SQ_SEL_0                                      = 0x04,
+	    SQ_SEL_1                                      = 0x05,
+	SRC_SEL_Y_mask                                    = 0x07 << 23,
+	SRC_SEL_Y_shift                                   = 23,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SRC_SEL_Z_mask                                    = 0x07 << 26,
+	SRC_SEL_Z_shift                                   = 26,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SRC_SEL_W_mask                                    = 0x07 << 29,
+	SRC_SEL_W_shift                                   = 29,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+    SQ_CF_ALLOC_EXPORT_WORD1                              = 0x00008dfc,
+	BURST_COUNT_mask                                  = 0x0f << 16,
+	BURST_COUNT_shift                                 = 16,
+	VALID_PIXEL_MODE_bit                              = 1 << 20,
+	END_OF_PROGRAM_bit                                = 1 << 21,
+	SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask            = 0xff << 22,
+	SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift           = 22,
+	    SQ_CF_INST_MEM_STREAM0_BUF0                   = 0x40,
+	    SQ_CF_INST_MEM_STREAM0_BUF1                   = 0x41,
+	    SQ_CF_INST_MEM_STREAM0_BUF2                   = 0x42,
+	    SQ_CF_INST_MEM_STREAM0_BUF3                   = 0x43,
+	    SQ_CF_INST_MEM_STREAM1_BUF0                   = 0x44,
+	    SQ_CF_INST_MEM_STREAM1_BUF1                   = 0x45,
+	    SQ_CF_INST_MEM_STREAM1_BUF2                   = 0x46,
+	    SQ_CF_INST_MEM_STREAM1_BUF3                   = 0x47,
+	    SQ_CF_INST_MEM_STREAM2_BUF0                   = 0x48,
+	    SQ_CF_INST_MEM_STREAM2_BUF1                   = 0x49,
+	    SQ_CF_INST_MEM_STREAM2_BUF2                   = 0x4a,
+	    SQ_CF_INST_MEM_STREAM2_BUF3                   = 0x4b,
+	    SQ_CF_INST_MEM_STREAM3_BUF0                   = 0x4c,
+	    SQ_CF_INST_MEM_STREAM3_BUF1                   = 0x4d,
+	    SQ_CF_INST_MEM_STREAM3_BUF2                   = 0x4e,
+	    SQ_CF_INST_MEM_STREAM3_BUF3                   = 0x4f,
+	    SQ_CF_INST_MEM_SCRATCH                        = 0x50,
+	    SQ_CF_INST_MEM_RING                           = 0x52,
+	    SQ_CF_INST_EXPORT                             = 0x53,
+	    SQ_CF_INST_EXPORT_DONE                        = 0x54,
+	    SQ_CF_INST_MEM_EXPORT                         = 0x55,
+	    SQ_CF_INST_MEM_RAT                            = 0x56,
+	    SQ_CF_INST_MEM_RAT_CACHELESS                  = 0x57,
+	    SQ_CF_INST_MEM_RING1                          = 0x58,
+	    SQ_CF_INST_MEM_RING2                          = 0x59,
+	    SQ_CF_INST_MEM_RING3                          = 0x5a,
+	    SQ_CF_INST_MEM_EXPORT_COMBINED                = 0x5b,
+	    SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS         = 0x5c,
+	MARK_bit                                          = 1 << 30,
+	BARRIER_bit                                       = 1 << 31,
+    SQ_CF_ALU_WORD1                                       = 0x00008dfc,
+	KCACHE_MODE1_mask                                 = 0x03 << 0,
+	KCACHE_MODE1_shift                                = 0,
+	    SQ_CF_KCACHE_NOP                              = 0x00,
+	    SQ_CF_KCACHE_LOCK_1                           = 0x01,
+	    SQ_CF_KCACHE_LOCK_2                           = 0x02,
+	    SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03,
+	KCACHE_ADDR0_mask                                 = 0xff << 2,
+	KCACHE_ADDR0_shift                                = 2,
+	KCACHE_ADDR1_mask                                 = 0xff << 10,
+	KCACHE_ADDR1_shift                                = 10,
+	SQ_CF_ALU_WORD1__COUNT_mask                       = 0x7f << 18,
+	SQ_CF_ALU_WORD1__COUNT_shift                      = 18,
+	SQ_CF_ALU_WORD1__ALT_CONST_bit                    = 1 << 25,
+	SQ_CF_ALU_WORD1__CF_INST_mask                     = 0x0f << 26,
+	SQ_CF_ALU_WORD1__CF_INST_shift                    = 26,
+	    SQ_CF_INST_ALU                                = 0x08,
+	    SQ_CF_INST_ALU_PUSH_BEFORE                    = 0x09,
+	    SQ_CF_INST_ALU_POP_AFTER                      = 0x0a,
+	    SQ_CF_INST_ALU_POP2_AFTER                     = 0x0b,
+	    SQ_CF_INST_ALU_EXTENDED                       = 0x0c,
+	    SQ_CF_INST_ALU_CONTINUE                       = 0x0d,
+	    SQ_CF_INST_ALU_BREAK                          = 0x0e,
+	    SQ_CF_INST_ALU_ELSE_AFTER                     = 0x0f,
+	WHOLE_QUAD_MODE_bit                               = 1 << 30,
+/* 	BARRIER_bit                                       = 1 << 31, */
+    SQ_TEX_WORD1                                          = 0x00008dfc,
+	SQ_TEX_WORD1__DST_GPR_mask                        = 0x7f << 0,
+	SQ_TEX_WORD1__DST_GPR_shift                       = 0,
+	SQ_TEX_WORD1__DST_REL_bit                         = 1 << 7,
+	SQ_TEX_WORD1__DST_SEL_X_mask                      = 0x07 << 9,
+	SQ_TEX_WORD1__DST_SEL_X_shift                     = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	    SQ_SEL_MASK                                   = 0x07,
+	SQ_TEX_WORD1__DST_SEL_Y_mask                      = 0x07 << 12,
+	SQ_TEX_WORD1__DST_SEL_Y_shift                     = 12,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_TEX_WORD1__DST_SEL_Z_mask                      = 0x07 << 15,
+	SQ_TEX_WORD1__DST_SEL_Z_shift                     = 15,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_TEX_WORD1__DST_SEL_W_mask                      = 0x07 << 18,
+	SQ_TEX_WORD1__DST_SEL_W_shift                     = 18,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_TEX_WORD1__LOD_BIAS_mask                       = 0x7f << 21,
+	SQ_TEX_WORD1__LOD_BIAS_shift                      = 21,
+	COORD_TYPE_X_bit                                  = 1 << 28,
+	COORD_TYPE_Y_bit                                  = 1 << 29,
+	COORD_TYPE_Z_bit                                  = 1 << 30,
+	COORD_TYPE_W_bit                                  = 1 << 31,
+    SQ_VTX_WORD0                                          = 0x00008dfc,
+	VTX_INST_mask                                     = 0x1f << 0,
+	VTX_INST_shift                                    = 0,
+	    SQ_VTX_INST_FETCH                             = 0x00,
+	    SQ_VTX_INST_SEMANTIC                          = 0x01,
+	    SQ_VTX_INST_GET_BUFFER_RESINFO                = 0x0e,
+	FETCH_TYPE_mask                                   = 0x03 << 5,
+	FETCH_TYPE_shift                                  = 5,
+	    SQ_VTX_FETCH_VERTEX_DATA                      = 0x00,
+	    SQ_VTX_FETCH_INSTANCE_DATA                    = 0x01,
+	    SQ_VTX_FETCH_NO_INDEX_OFFSET                  = 0x02,
+	FETCH_WHOLE_QUAD_bit                              = 1 << 7,
+	BUFFER_ID_mask                                    = 0xff << 8,
+	BUFFER_ID_shift                                   = 8,
+	SQ_VTX_WORD0__SRC_GPR_mask                        = 0x7f << 16,
+	SQ_VTX_WORD0__SRC_GPR_shift                       = 16,
+	SRC_REL_bit                                       = 1 << 23,
+	SQ_VTX_WORD0__SRC_SEL_X_mask                      = 0x03 << 24,
+	SQ_VTX_WORD0__SRC_SEL_X_shift                     = 24,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+	MEGA_FETCH_COUNT_mask                             = 0x3f << 26,
+	MEGA_FETCH_COUNT_shift                            = 26,
+    SQ_CF_ALLOC_EXPORT_WORD1_SWIZ                         = 0x00008dfc,
+	SEL_X_mask                                        = 0x07 << 0,
+	SEL_X_shift                                       = 0,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SEL_Y_mask                                        = 0x07 << 3,
+	SEL_Y_shift                                       = 3,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SEL_Z_mask                                        = 0x07 << 6,
+	SEL_Z_shift                                       = 6,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SEL_W_mask                                        = 0x07 << 9,
+	SEL_W_shift                                       = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+    SQ_MEM_RD_WORD0                                       = 0x00008dfc,
+	MEM_INST_mask                                     = 0x1f << 0,
+	MEM_INST_shift                                    = 0,
+	    SQ_MEM_INST_MEM                               = 0x02,
+	SQ_MEM_RD_WORD0__ELEM_SIZE_mask                   = 0x03 << 5,
+	SQ_MEM_RD_WORD0__ELEM_SIZE_shift                  = 5,
+/* 	FETCH_WHOLE_QUAD_bit                              = 1 << 7, */
+	MEM_OP_mask                                       = 0x07 << 8,
+	MEM_OP_shift                                      = 8,
+	    SQ_MEM_OP_RD_SCRATCH                          = 0x00,
+	    SQ_MEM_OP_RD_SCATTER                          = 0x02,
+	    SQ_MEM_OP_GDS                                 = 0x04,
+	    SQ_MEM_OP_TF_WRITE                            = 0x05,
+	SQ_MEM_RD_WORD0__UNCACHED_bit                     = 1 << 11,
+	INDEXED_bit                                       = 1 << 12,
+	SQ_MEM_RD_WORD0__SRC_GPR_mask                     = 0x7f << 16,
+	SQ_MEM_RD_WORD0__SRC_GPR_shift                    = 16,
+/* 	SRC_REL_bit                                       = 1 << 23, */
+	SQ_MEM_RD_WORD0__SRC_SEL_X_mask                   = 0x03 << 24,
+	SQ_MEM_RD_WORD0__SRC_SEL_X_shift                  = 24,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+	BURST_CNT_mask                                    = 0x0f << 26,
+	BURST_CNT_shift                                   = 26,
+    SQ_ALU_WORD1                                          = 0x00008dfc,
+	SQ_ALU_WORD1__ENCODING_mask                       = 0x07 << 15,
+	SQ_ALU_WORD1__ENCODING_shift                      = 15,
+	BANK_SWIZZLE_mask                                 = 0x07 << 18,
+	BANK_SWIZZLE_shift                                = 18,
+	    SQ_ALU_VEC_012                                = 0x00,
+	    SQ_ALU_VEC_021                                = 0x01,
+	    SQ_ALU_VEC_120                                = 0x02,
+	    SQ_ALU_VEC_102                                = 0x03,
+	    SQ_ALU_VEC_201                                = 0x04,
+	    SQ_ALU_VEC_210                                = 0x05,
+	SQ_ALU_WORD1__DST_GPR_mask                        = 0x7f << 21,
+	SQ_ALU_WORD1__DST_GPR_shift                       = 21,
+	SQ_ALU_WORD1__DST_REL_bit                         = 1 << 28,
+	DST_CHAN_mask                                     = 0x03 << 29,
+	DST_CHAN_shift                                    = 29,
+	    CHAN_X                                        = 0x00,
+	    CHAN_Y                                        = 0x01,
+	    CHAN_Z                                        = 0x02,
+	    CHAN_W                                        = 0x03,
+	SQ_ALU_WORD1__CLAMP_bit                           = 1 << 31,
+    SQ_CF_ALU_WORD0_EXT                                   = 0x00008dfc,
+	KCACHE_BANK_INDEX_MODE0_mask                      = 0x03 << 4,
+	KCACHE_BANK_INDEX_MODE0_shift                     = 4,
+	    SQ_CF_INDEX_NONE                              = 0x00,
+	    SQ_CF_INDEX_0                                 = 0x01,
+	    SQ_CF_INDEX_1                                 = 0x02,
+	    SQ_CF_INVALID                                 = 0x03,
+	KCACHE_BANK_INDEX_MODE1_mask                      = 0x03 << 6,
+	KCACHE_BANK_INDEX_MODE1_shift                     = 6,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	KCACHE_BANK_INDEX_MODE2_mask                      = 0x03 << 8,
+	KCACHE_BANK_INDEX_MODE2_shift                     = 8,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	KCACHE_BANK_INDEX_MODE3_mask                      = 0x03 << 10,
+	KCACHE_BANK_INDEX_MODE3_shift                     = 10,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	KCACHE_BANK2_mask                                 = 0x0f << 22,
+	KCACHE_BANK2_shift                                = 22,
+	KCACHE_BANK3_mask                                 = 0x0f << 26,
+	KCACHE_BANK3_shift                                = 26,
+	KCACHE_MODE2_mask                                 = 0x03 << 30,
+	KCACHE_MODE2_shift                                = 30,
+/* 	    SQ_CF_KCACHE_NOP                              = 0x00, */
+/* 	    SQ_CF_KCACHE_LOCK_1                           = 0x01, */
+/* 	    SQ_CF_KCACHE_LOCK_2                           = 0x02, */
+/* 	    SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03, */
+    SQ_ALU_WORD0_LDS_IDX_OP                               = 0x00008dfc,
+	SRC0_SEL_mask                                     = 0x1ff << 0,
+	SRC0_SEL_shift                                    = 0,
+/* 	    SQ_ALU_SRC_LDS_OQ_A                           = 0xdb, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B                           = 0xdc, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A_POP                       = 0xdd, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B_POP                       = 0xde, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_A                       = 0xdf, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_B                       = 0xe0, */
+/* 	    SQ_ALU_SRC_TIME_HI                            = 0xe3, */
+/* 	    SQ_ALU_SRC_TIME_LO                            = 0xe4, */
+/* 	    SQ_ALU_SRC_MASK_HI                            = 0xe5, */
+/* 	    SQ_ALU_SRC_MASK_LO                            = 0xe6, */
+/* 	    SQ_ALU_SRC_HW_WAVE_ID                         = 0xe7, */
+/* 	    SQ_ALU_SRC_SIMD_ID                            = 0xe8, */
+/* 	    SQ_ALU_SRC_SE_ID                              = 0xe9, */
+/* 	    SQ_ALU_SRC_HW_THREADGRP_ID                    = 0xea, */
+/* 	    SQ_ALU_SRC_WAVE_ID_IN_GRP                     = 0xeb, */
+/* 	    SQ_ALU_SRC_NUM_THREADGRP_WAVES                = 0xec, */
+/* 	    SQ_ALU_SRC_HW_ALU_ODD                         = 0xed, */
+/* 	    SQ_ALU_SRC_LOOP_IDX                           = 0xee, */
+/* 	    SQ_ALU_SRC_PARAM_BASE_ADDR                    = 0xf0, */
+/* 	    SQ_ALU_SRC_NEW_PRIM_MASK                      = 0xf1, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_HI                       = 0xf2, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_LO                       = 0xf3, */
+/* 	    SQ_ALU_SRC_1_DBL_L                            = 0xf4, */
+/* 	    SQ_ALU_SRC_1_DBL_M                            = 0xf5, */
+/* 	    SQ_ALU_SRC_0_5_DBL_L                          = 0xf6, */
+/* 	    SQ_ALU_SRC_0_5_DBL_M                          = 0xf7, */
+/* 	    SQ_ALU_SRC_0                                  = 0xf8, */
+/* 	    SQ_ALU_SRC_1                                  = 0xf9, */
+/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */
+/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */
+/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */
+/* 	    SQ_ALU_SRC_PS                                 = 0xff, */
+	SRC0_REL_bit                                      = 1 << 9,
+	SRC0_CHAN_mask                                    = 0x03 << 10,
+	SRC0_CHAN_shift                                   = 10,
+/* 	    SQ_CHAN_X                                     = 0x00, */
+/* 	    SQ_CHAN_Y                                     = 0x01, */
+/* 	    SQ_CHAN_Z                                     = 0x02, */
+/* 	    SQ_CHAN_W                                     = 0x03, */
+	IDX_OFFSET_4_bit                                  = 1 << 12,
+	SRC1_SEL_mask                                     = 0x1ff << 13,
+	SRC1_SEL_shift                                    = 13,
+/* 	    SQ_ALU_SRC_LDS_OQ_A                           = 0xdb, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B                           = 0xdc, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A_POP                       = 0xdd, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B_POP                       = 0xde, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_A                       = 0xdf, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_B                       = 0xe0, */
+/* 	    SQ_ALU_SRC_TIME_HI                            = 0xe3, */
+/* 	    SQ_ALU_SRC_TIME_LO                            = 0xe4, */
+/* 	    SQ_ALU_SRC_MASK_HI                            = 0xe5, */
+/* 	    SQ_ALU_SRC_MASK_LO                            = 0xe6, */
+/* 	    SQ_ALU_SRC_HW_WAVE_ID                         = 0xe7, */
+/* 	    SQ_ALU_SRC_SIMD_ID                            = 0xe8, */
+/* 	    SQ_ALU_SRC_SE_ID                              = 0xe9, */
+/* 	    SQ_ALU_SRC_HW_THREADGRP_ID                    = 0xea, */
+/* 	    SQ_ALU_SRC_WAVE_ID_IN_GRP                     = 0xeb, */
+/* 	    SQ_ALU_SRC_NUM_THREADGRP_WAVES                = 0xec, */
+/* 	    SQ_ALU_SRC_HW_ALU_ODD                         = 0xed, */
+/* 	    SQ_ALU_SRC_LOOP_IDX                           = 0xee, */
+/* 	    SQ_ALU_SRC_PARAM_BASE_ADDR                    = 0xf0, */
+/* 	    SQ_ALU_SRC_NEW_PRIM_MASK                      = 0xf1, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_HI                       = 0xf2, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_LO                       = 0xf3, */
+/* 	    SQ_ALU_SRC_1_DBL_L                            = 0xf4, */
+/* 	    SQ_ALU_SRC_1_DBL_M                            = 0xf5, */
+/* 	    SQ_ALU_SRC_0_5_DBL_L                          = 0xf6, */
+/* 	    SQ_ALU_SRC_0_5_DBL_M                          = 0xf7, */
+/* 	    SQ_ALU_SRC_0                                  = 0xf8, */
+/* 	    SQ_ALU_SRC_1                                  = 0xf9, */
+/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */
+/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */
+/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */
+/* 	    SQ_ALU_SRC_PS                                 = 0xff, */
+	SRC1_REL_bit                                      = 1 << 22,
+	SRC1_CHAN_mask                                    = 0x03 << 23,
+	SRC1_CHAN_shift                                   = 23,
+/* 	    SQ_CHAN_X                                     = 0x00, */
+/* 	    SQ_CHAN_Y                                     = 0x01, */
+/* 	    SQ_CHAN_Z                                     = 0x02, */
+/* 	    SQ_CHAN_W                                     = 0x03, */
+	IDX_OFFSET_5_bit                                  = 1 << 25,
+	INDEX_MODE_mask                                   = 0x07 << 26,
+	INDEX_MODE_shift                                  = 26,
+	    SQ_INDEX_AR_X                                 = 0x00,
+	    SQ_INDEX_LOOP                                 = 0x04,
+	    SQ_INDEX_GLOBAL                               = 0x05,
+	    SQ_INDEX_GLOBAL_AR_X                          = 0x06,
+	PRED_SEL_mask                                     = 0x03 << 29,
+	PRED_SEL_shift                                    = 29,
+	    SQ_PRED_SEL_OFF                               = 0x00,
+	    SQ_PRED_SEL_ZERO                              = 0x02,
+	    SQ_PRED_SEL_ONE                               = 0x03,
+	LAST_bit                                          = 1 << 31,
+    SQ_MEM_GDS_WORD2                                      = 0x00008dfc,
+	SQ_MEM_GDS_WORD2__DST_SEL_X_mask                  = 0x07 << 0,
+	SQ_MEM_GDS_WORD2__DST_SEL_X_shift                 = 0,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_GDS_WORD2__DST_SEL_Y_mask                  = 0x07 << 3,
+	SQ_MEM_GDS_WORD2__DST_SEL_Y_shift                 = 3,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_GDS_WORD2__DST_SEL_Z_mask                  = 0x07 << 6,
+	SQ_MEM_GDS_WORD2__DST_SEL_Z_shift                 = 6,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_GDS_WORD2__DST_SEL_W_mask                  = 0x07 << 9,
+	SQ_MEM_GDS_WORD2__DST_SEL_W_shift                 = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+    SQ_CF_ALLOC_EXPORT_WORD0_RAT                          = 0x00008dfc,
+	RAT_ID_mask                                       = 0x0f << 0,
+	RAT_ID_shift                                      = 0,
+	RAT_INST_mask                                     = 0x3f << 4,
+	RAT_INST_shift                                    = 4,
+	    SQ_EXPORT_RAT_INST_NOP                        = 0x00,
+	    SQ_EXPORT_RAT_INST_STORE_TYPED                = 0x01,
+	    SQ_EXPORT_RAT_INST_STORE_RAW                  = 0x02,
+	    SQ_EXPORT_RAT_INST_STORE_RAW_FDENORM          = 0x03,
+	    SQ_EXPORT_RAT_INST_CMPXCHG_INT                = 0x04,
+	    SQ_EXPORT_RAT_INST_CMPXCHG_FLT                = 0x05,
+	    SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM            = 0x06,
+	    SQ_EXPORT_RAT_INST_ADD                        = 0x07,
+	    SQ_EXPORT_RAT_INST_SUB                        = 0x08,
+	    SQ_EXPORT_RAT_INST_RSUB                       = 0x09,
+	    SQ_EXPORT_RAT_INST_MIN_INT                    = 0x0a,
+	    SQ_EXPORT_RAT_INST_MIN_UINT                   = 0x0b,
+	    SQ_EXPORT_RAT_INST_MAX_INT                    = 0x0c,
+	    SQ_EXPORT_RAT_INST_MAX_UINT                   = 0x0d,
+	    SQ_EXPORT_RAT_INST_AND                        = 0x0e,
+	    SQ_EXPORT_RAT_INST_OR                         = 0x0f,
+	    SQ_EXPORT_RAT_INST_XOR                        = 0x10,
+	    SQ_EXPORT_RAT_INST_MSKOR                      = 0x11,
+	    SQ_EXPORT_RAT_INST_INC_UINT                   = 0x12,
+	    SQ_EXPORT_RAT_INST_DEC_UINT                   = 0x13,
+	    SQ_EXPORT_RAT_INST_NOP_RTN                    = 0x20,
+	    SQ_EXPORT_RAT_INST_XCHG_RTN                   = 0x22,
+	    SQ_EXPORT_RAT_INST_XCHG_FDENORM_RTN           = 0x23,
+	    SQ_EXPORT_RAT_INST_CMPXCHG_INT_RTN            = 0x24,
+	    SQ_EXPORT_RAT_INST_CMPXCHG_FLT_RTN            = 0x25,
+	    SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM_RTN        = 0x26,
+	    SQ_EXPORT_RAT_INST_ADD_RTN                    = 0x27,
+	    SQ_EXPORT_RAT_INST_SUB_RTN                    = 0x28,
+	    SQ_EXPORT_RAT_INST_RSUB_RTN                   = 0x29,
+	    SQ_EXPORT_RAT_INST_MIN_INT_RTN                = 0x2a,
+	    SQ_EXPORT_RAT_INST_MIN_UINT_RTN               = 0x2b,
+	    SQ_EXPORT_RAT_INST_MAX_INT_RTN                = 0x2c,
+	    SQ_EXPORT_RAT_INST_MAX_UINT_RTN               = 0x2d,
+	    SQ_EXPORT_RAT_INST_AND_RTN                    = 0x2e,
+	    SQ_EXPORT_RAT_INST_OR_RTN                     = 0x2f,
+	    SQ_EXPORT_RAT_INST_XOR_RTN                    = 0x30,
+	    SQ_EXPORT_RAT_INST_MSKOR_RTN                  = 0x31,
+	    SQ_EXPORT_RAT_INST_INC_UINT_RTN               = 0x32,
+	    SQ_EXPORT_RAT_INST_DEC_UINT_RTN               = 0x33,
+	RAT_INDEX_MODE_mask                               = 0x03 << 11,
+	RAT_INDEX_MODE_shift                              = 11,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	SQ_CF_ALLOC_EXPORT_WORD0_RAT__TYPE_mask           = 0x03 << 13,
+	SQ_CF_ALLOC_EXPORT_WORD0_RAT__TYPE_shift          = 13,
+	    SQ_EXPORT_PIXEL                               = 0x00,
+	    SQ_EXPORT_POS                                 = 0x01,
+	    SQ_EXPORT_PARAM                               = 0x02,
+	    X_UNUSED_FOR_SX_EXPORTS                       = 0x03,
+	RW_GPR_mask                                       = 0x7f << 15,
+	RW_GPR_shift                                      = 15,
+	RW_REL_bit                                        = 1 << 22,
+	INDEX_GPR_mask                                    = 0x7f << 23,
+	INDEX_GPR_shift                                   = 23,
+	SQ_CF_ALLOC_EXPORT_WORD0_RAT__ELEM_SIZE_mask      = 0x03 << 30,
+	SQ_CF_ALLOC_EXPORT_WORD0_RAT__ELEM_SIZE_shift     = 30,
+    SQ_CF_ALU_WORD0                                       = 0x00008dfc,
+	SQ_CF_ALU_WORD0__ADDR_mask                        = 0x3fffff << 0,
+	SQ_CF_ALU_WORD0__ADDR_shift                       = 0,
+	KCACHE_BANK0_mask                                 = 0x0f << 22,
+	KCACHE_BANK0_shift                                = 22,
+	KCACHE_BANK1_mask                                 = 0x0f << 26,
+	KCACHE_BANK1_shift                                = 26,
+	KCACHE_MODE0_mask                                 = 0x03 << 30,
+	KCACHE_MODE0_shift                                = 30,
+/* 	    SQ_CF_KCACHE_NOP                              = 0x00, */
+/* 	    SQ_CF_KCACHE_LOCK_1                           = 0x01, */
+/* 	    SQ_CF_KCACHE_LOCK_2                           = 0x02, */
+/* 	    SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03, */
+    SQ_MEM_GDS_WORD1                                      = 0x00008dfc,
+	SQ_MEM_GDS_WORD1__DST_GPR_mask                    = 0x7f << 0,
+	SQ_MEM_GDS_WORD1__DST_GPR_shift                   = 0,
+	DST_REL_MODE_mask                                 = 0x03 << 7,
+	DST_REL_MODE_shift                                = 7,
+	    SQ_REL_NONE                                   = 0x00,
+	    SQ_REL_LOOP                                   = 0x01,
+	    SQ_REL_GLOBAL                                 = 0x02,
+	GDS_OP_mask                                       = 0x3f << 9,
+	GDS_OP_shift                                      = 9,
+	    SQ_DS_INST_ADD                                = 0x00,
+	    SQ_DS_INST_SUB                                = 0x01,
+	    SQ_DS_INST_RSUB                               = 0x02,
+	    SQ_DS_INST_INC                                = 0x03,
+	    SQ_DS_INST_DEC                                = 0x04,
+	    SQ_DS_INST_MIN_INT                            = 0x05,
+	    SQ_DS_INST_MAX_INT                            = 0x06,
+	    SQ_DS_INST_MIN_UINT                           = 0x07,
+	    SQ_DS_INST_MAX_UINT                           = 0x08,
+	    SQ_DS_INST_AND                                = 0x09,
+	    SQ_DS_INST_OR                                 = 0x0a,
+	    SQ_DS_INST_XOR                                = 0x0b,
+	    SQ_DS_INST_MSKOR                              = 0x0c,
+	    SQ_DS_INST_WRITE                              = 0x0d,
+	    SQ_DS_INST_WRITE_REL                          = 0x0e,
+	    SQ_DS_INST_WRITE2                             = 0x0f,
+	    SQ_DS_INST_CMP_STORE                          = 0x10,
+	    SQ_DS_INST_CMP_STORE_SPF                      = 0x11,
+	    SQ_DS_INST_BYTE_WRITE                         = 0x12,
+	    SQ_DS_INST_SHORT_WRITE                        = 0x13,
+	    SQ_DS_INST_ADD_RET                            = 0x20,
+	    SQ_DS_INST_SUB_RET                            = 0x21,
+	    SQ_DS_INST_RSUB_RET                           = 0x22,
+	    SQ_DS_INST_INC_RET                            = 0x23,
+	    SQ_DS_INST_DEC_RET                            = 0x24,
+	    SQ_DS_INST_MIN_INT_RET                        = 0x25,
+	    SQ_DS_INST_MAX_INT_RET                        = 0x26,
+	    SQ_DS_INST_MIN_UINT_RET                       = 0x27,
+	    SQ_DS_INST_MAX_UINT_RET                       = 0x28,
+	    SQ_DS_INST_AND_RET                            = 0x29,
+	    SQ_DS_INST_OR_RET                             = 0x2a,
+	    SQ_DS_INST_XOR_RET                            = 0x2b,
+	    SQ_DS_INST_MSKOR_RET                          = 0x2c,
+	    SQ_DS_INST_XCHG_RET                           = 0x2d,
+	    SQ_DS_INST_XCHG_REL_RET                       = 0x2e,
+	    SQ_DS_INST_XCHG2_RET                          = 0x2f,
+	    SQ_DS_INST_CMP_XCHG_RET                       = 0x30,
+	    SQ_DS_INST_CMP_XCHG_SPF_RET                   = 0x31,
+	    SQ_DS_INST_READ_RET                           = 0x32,
+	    SQ_DS_INST_READ_REL_RET                       = 0x33,
+	    SQ_DS_INST_READ2_RET                          = 0x34,
+	    SQ_DS_INST_READWRITE_RET                      = 0x35,
+	    SQ_DS_INST_BYTE_READ_RET                      = 0x36,
+	    SQ_DS_INST_UBYTE_READ_RET                     = 0x37,
+	    SQ_DS_INST_SHORT_READ_RET                     = 0x38,
+	    SQ_DS_INST_USHORT_READ_RET                    = 0x39,
+	    SQ_DS_INST_ATOMIC_ORDERED_ALLOC_RET           = 0x3f,
+	DS_OFFSET_mask                                    = 0x7f << 16,
+	DS_OFFSET_shift                                   = 16,
+	UAV_INDEX_MODE_mask                               = 0x03 << 24,
+	UAV_INDEX_MODE_shift                              = 24,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	UAV_ID_mask                                       = 0x0f << 26,
+	UAV_ID_shift                                      = 26,
+	ALLOC_CONSUME_bit                                 = 1 << 30,
+	BCAST_FIRST_REQ_bit                               = 1 << 31,
+    SQ_MEM_RD_WORD2                                       = 0x00008dfc,
+	ARRAY_BASE_mask                                   = 0x1fff << 0,
+	ARRAY_BASE_shift                                  = 0,
+	SQ_MEM_RD_WORD2__ENDIAN_SWAP_mask                 = 0x03 << 16,
+	SQ_MEM_RD_WORD2__ENDIAN_SWAP_shift                = 16,
+	    SQ_ENDIAN_NONE                                = 0x00,
+	    SQ_ENDIAN_8IN16                               = 0x01,
+	    SQ_ENDIAN_8IN32                               = 0x02,
+	SQ_MEM_RD_WORD2__ARRAY_SIZE_mask                  = 0xfff << 20,
+	SQ_MEM_RD_WORD2__ARRAY_SIZE_shift                 = 20,
+    SQ_CF_ALU_WORD1_EXT                                   = 0x00008dfc,
+	KCACHE_MODE3_mask                                 = 0x03 << 0,
+	KCACHE_MODE3_shift                                = 0,
+/* 	    SQ_CF_KCACHE_NOP                              = 0x00, */
+/* 	    SQ_CF_KCACHE_LOCK_1                           = 0x01, */
+/* 	    SQ_CF_KCACHE_LOCK_2                           = 0x02, */
+/* 	    SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03, */
+	KCACHE_ADDR2_mask                                 = 0xff << 2,
+	KCACHE_ADDR2_shift                                = 2,
+	KCACHE_ADDR3_mask                                 = 0xff << 10,
+	KCACHE_ADDR3_shift                                = 10,
+	SQ_CF_ALU_WORD1_EXT__CF_INST_mask                 = 0x0f << 26,
+	SQ_CF_ALU_WORD1_EXT__CF_INST_shift                = 26,
+/* 	    SQ_CF_INST_ALU                                = 0x08, */
+/* 	    SQ_CF_INST_ALU_PUSH_BEFORE                    = 0x09, */
+/* 	    SQ_CF_INST_ALU_POP_AFTER                      = 0x0a, */
+/* 	    SQ_CF_INST_ALU_POP2_AFTER                     = 0x0b, */
+/* 	    SQ_CF_INST_ALU_EXTENDED                       = 0x0c, */
+/* 	    SQ_CF_INST_ALU_CONTINUE                       = 0x0d, */
+/* 	    SQ_CF_INST_ALU_BREAK                          = 0x0e, */
+/* 	    SQ_CF_INST_ALU_ELSE_AFTER                     = 0x0f, */
+/* 	BARRIER_bit                                       = 1 << 31, */
+    SQ_CF_GWS_WORD0                                       = 0x00008dfc,
+	VALUE_mask                                        = 0x3ff << 0,
+	VALUE_shift                                       = 0,
+	RESOURCE_mask                                     = 0x1f << 16,
+	RESOURCE_shift                                    = 16,
+	SIGN_bit                                          = 1 << 25,
+	VAL_INDEX_MODE_mask                               = 0x03 << 26,
+	VAL_INDEX_MODE_shift                              = 26,
+	    SQ_GWS_INDEX_NONE                             = 0x00,
+	    SQ_GWS_INDEX_0                                = 0x01,
+	    SQ_GWS_INDEX_1                                = 0x02,
+	    SQ_GWS_INDEX_MIX                              = 0x03,
+	RSRC_INDEX_MODE_mask                              = 0x03 << 28,
+	RSRC_INDEX_MODE_shift                             = 28,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	GWS_OPCODE_mask                                   = 0x03 << 30,
+	GWS_OPCODE_shift                                  = 30,
+	    SQ_GWS_SEMA_V                                 = 0x00,
+	    SQ_GWS_SEMA_P                                 = 0x01,
+	    SQ_GWS_BARRIER                                = 0x02,
+	    SQ_GWS_INIT                                   = 0x03,
+    SQ_VTX_WORD2                                          = 0x00008dfc,
+	SQ_VTX_WORD2__OFFSET_mask                         = 0xffff << 0,
+	SQ_VTX_WORD2__OFFSET_shift                        = 0,
+	SQ_VTX_WORD2__ENDIAN_SWAP_mask                    = 0x03 << 16,
+	SQ_VTX_WORD2__ENDIAN_SWAP_shift                   = 16,
+/* 	    SQ_ENDIAN_NONE                                = 0x00, */
+/* 	    SQ_ENDIAN_8IN16                               = 0x01, */
+/* 	    SQ_ENDIAN_8IN32                               = 0x02, */
+	CONST_BUF_NO_STRIDE_bit                           = 1 << 18,
+	MEGA_FETCH_bit                                    = 1 << 19,
+	SQ_VTX_WORD2__ALT_CONST_bit                       = 1 << 20,
+	BUFFER_INDEX_MODE_mask                            = 0x03 << 21,
+	BUFFER_INDEX_MODE_shift                           = 21,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+    SQ_CF_ALLOC_EXPORT_WORD1_BUF                          = 0x00008dfc,
+	SQ_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_mask     = 0xfff << 0,
+	SQ_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_shift    = 0,
+	COMP_MASK_mask                                    = 0x0f << 12,
+	COMP_MASK_shift                                   = 12,
+    SQ_CF_WORD0                                           = 0x00008dfc,
+	SQ_CF_WORD0__ADDR_mask                            = 0xffffff << 0,
+	SQ_CF_WORD0__ADDR_shift                           = 0,
+	JUMPTABLE_SEL_mask                                = 0x07 << 24,
+	JUMPTABLE_SEL_shift                               = 24,
+	    SQ_CF_JUMPTABLE_SEL_CONST_A                   = 0x00,
+	    SQ_CF_JUMPTABLE_SEL_CONST_B                   = 0x01,
+	    SQ_CF_JUMPTABLE_SEL_CONST_C                   = 0x02,
+	    SQ_CF_JUMPTABLE_SEL_CONST_D                   = 0x03,
+	    SQ_CF_JUMPTABLE_SEL_INDEX_0                   = 0x04,
+	    SQ_CF_JUMPTABLE_SEL_INDEX_1                   = 0x05,
+    SQ_CF_ALLOC_EXPORT_WORD0                              = 0x00008dfc,
+/* 	ARRAY_BASE_mask                                   = 0x1fff << 0, */
+/* 	ARRAY_BASE_shift                                  = 0, */
+	SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask               = 0x03 << 13,
+	SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift              = 13,
+/* 	    SQ_EXPORT_PIXEL                               = 0x00, */
+/* 	    SQ_EXPORT_POS                                 = 0x01, */
+/* 	    SQ_EXPORT_PARAM                               = 0x02, */
+/* 	    X_UNUSED_FOR_SX_EXPORTS                       = 0x03, */
+/* 	RW_GPR_mask                                       = 0x7f << 15, */
+/* 	RW_GPR_shift                                      = 15, */
+/* 	RW_REL_bit                                        = 1 << 22, */
+/* 	INDEX_GPR_mask                                    = 0x7f << 23, */
+/* 	INDEX_GPR_shift                                   = 23, */
+	SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_mask          = 0x03 << 30,
+	SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_shift         = 30,
+    SQ_MEM_GDS_WORD0                                      = 0x00008dfc,
+/* 	MEM_INST_mask                                     = 0x1f << 0, */
+/* 	MEM_INST_shift                                    = 0, */
+/* 	    SQ_MEM_INST_MEM                               = 0x02, */
+/* 	MEM_OP_mask                                       = 0x07 << 8, */
+/* 	MEM_OP_shift                                      = 8, */
+/* 	    SQ_MEM_OP_RD_SCRATCH                          = 0x00, */
+/* 	    SQ_MEM_OP_RD_SCATTER                          = 0x02, */
+/* 	    SQ_MEM_OP_GDS                                 = 0x04, */
+/* 	    SQ_MEM_OP_TF_WRITE                            = 0x05, */
+	SQ_MEM_GDS_WORD0__SRC_GPR_mask                    = 0x7f << 11,
+	SQ_MEM_GDS_WORD0__SRC_GPR_shift                   = 11,
+	SRC_REL_MODE_mask                                 = 0x03 << 18,
+	SRC_REL_MODE_shift                                = 18,
+/* 	    SQ_REL_NONE                                   = 0x00, */
+/* 	    SQ_REL_LOOP                                   = 0x01, */
+/* 	    SQ_REL_GLOBAL                                 = 0x02, */
+	SQ_MEM_GDS_WORD0__SRC_SEL_X_mask                  = 0x07 << 20,
+	SQ_MEM_GDS_WORD0__SRC_SEL_X_shift                 = 20,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	SRC_SEL_Y_mask                                    = 0x07 << 23, */
+/* 	SRC_SEL_Y_shift                                   = 23, */
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	SRC_SEL_Z_mask                                    = 0x07 << 26, */
+/* 	SRC_SEL_Z_shift                                   = 26, */
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+    SQ_ALU_WORD1_LDS_DIRECT_LITERAL_HI                    = 0x00008dfc,
+	OFFSET_B_mask                                     = 0x1fff << 0,
+	OFFSET_B_shift                                    = 0,
+	STRIDE_B_mask                                     = 0x7f << 13,
+	STRIDE_B_shift                                    = 13,
+	THREAD_REL_B_bit                                  = 1 << 22,
+	DIRECT_READ_32_bit                                = 1 << 31,
+    SQ_VTX_WORD1                                          = 0x00008dfc,
+	SQ_VTX_WORD1__DST_SEL_X_mask                      = 0x07 << 9,
+	SQ_VTX_WORD1__DST_SEL_X_shift                     = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_VTX_WORD1__DST_SEL_Y_mask                      = 0x07 << 12,
+	SQ_VTX_WORD1__DST_SEL_Y_shift                     = 12,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_VTX_WORD1__DST_SEL_Z_mask                      = 0x07 << 15,
+	SQ_VTX_WORD1__DST_SEL_Z_shift                     = 15,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_VTX_WORD1__DST_SEL_W_mask                      = 0x07 << 18,
+	SQ_VTX_WORD1__DST_SEL_W_shift                     = 18,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	USE_CONST_FIELDS_bit                              = 1 << 21,
+	SQ_VTX_WORD1__DATA_FORMAT_mask                    = 0x3f << 22,
+	SQ_VTX_WORD1__DATA_FORMAT_shift                   = 22,
+	SQ_VTX_WORD1__NUM_FORMAT_ALL_mask                 = 0x03 << 28,
+	SQ_VTX_WORD1__NUM_FORMAT_ALL_shift                = 28,
+	    SQ_NUM_FORMAT_NORM                            = 0x00,
+	    SQ_NUM_FORMAT_INT                             = 0x01,
+	    SQ_NUM_FORMAT_SCALED                          = 0x02,
+	SQ_VTX_WORD1__FORMAT_COMP_ALL_bit                 = 1 << 30,
+	SQ_VTX_WORD1__SRF_MODE_ALL_bit                    = 1 << 31,
+    SQ_ALU_WORD1_OP2                                      = 0x00008dfc,
+	SRC0_ABS_bit                                      = 1 << 0,
+	SRC1_ABS_bit                                      = 1 << 1,
+	UPDATE_EXECUTE_MASK_bit                           = 1 << 2,
+	UPDATE_PRED_bit                                   = 1 << 3,
+	WRITE_MASK_bit                                    = 1 << 4,
+	OMOD_mask                                         = 0x03 << 5,
+	OMOD_shift                                        = 5,
+	    SQ_ALU_OMOD_OFF                               = 0x00,
+	    SQ_ALU_OMOD_M2                                = 0x01,
+	    SQ_ALU_OMOD_M4                                = 0x02,
+	    SQ_ALU_OMOD_D2                                = 0x03,
+	SQ_ALU_WORD1_OP2__ALU_INST_mask                   = 0x7ff << 7,
+	SQ_ALU_WORD1_OP2__ALU_INST_shift                  = 7,
+	    SQ_OP2_INST_ADD                               = 0x00,
+	    SQ_OP2_INST_MUL                               = 0x01,
+	    SQ_OP2_INST_MUL_IEEE                          = 0x02,
+	    SQ_OP2_INST_MAX                               = 0x03,
+	    SQ_OP2_INST_MIN                               = 0x04,
+	    SQ_OP2_INST_MAX_DX10                          = 0x05,
+	    SQ_OP2_INST_MIN_DX10                          = 0x06,
+	    SQ_OP2_INST_SETE                              = 0x08,
+	    SQ_OP2_INST_SETGT                             = 0x09,
+	    SQ_OP2_INST_SETGE                             = 0x0a,
+	    SQ_OP2_INST_SETNE                             = 0x0b,
+	    SQ_OP2_INST_SETE_DX10                         = 0x0c,
+	    SQ_OP2_INST_SETGT_DX10                        = 0x0d,
+	    SQ_OP2_INST_SETGE_DX10                        = 0x0e,
+	    SQ_OP2_INST_SETNE_DX10                        = 0x0f,
+	    SQ_OP2_INST_FRACT                             = 0x10,
+	    SQ_OP2_INST_TRUNC                             = 0x11,
+	    SQ_OP2_INST_CEIL                              = 0x12,
+	    SQ_OP2_INST_RNDNE                             = 0x13,
+	    SQ_OP2_INST_FLOOR                             = 0x14,
+	    SQ_OP2_INST_ASHR_INT                          = 0x15,
+	    SQ_OP2_INST_LSHR_INT                          = 0x16,
+	    SQ_OP2_INST_LSHL_INT                          = 0x17,
+	    SQ_OP2_INST_MOV                               = 0x19,
+	    SQ_OP2_INST_NOP                               = 0x1a,
+	    SQ_OP2_INST_PRED_SETGT_UINT                   = 0x1e,
+	    SQ_OP2_INST_PRED_SETGE_UINT                   = 0x1f,
+	    SQ_OP2_INST_PRED_SETE                         = 0x20,
+	    SQ_OP2_INST_PRED_SETGT                        = 0x21,
+	    SQ_OP2_INST_PRED_SETGE                        = 0x22,
+	    SQ_OP2_INST_PRED_SETNE                        = 0x23,
+	    SQ_OP2_INST_PRED_SET_INV                      = 0x24,
+	    SQ_OP2_INST_PRED_SET_POP                      = 0x25,
+	    SQ_OP2_INST_PRED_SET_CLR                      = 0x26,
+	    SQ_OP2_INST_PRED_SET_RESTORE                  = 0x27,
+	    SQ_OP2_INST_PRED_SETE_PUSH                    = 0x28,
+	    SQ_OP2_INST_PRED_SETGT_PUSH                   = 0x29,
+	    SQ_OP2_INST_PRED_SETGE_PUSH                   = 0x2a,
+	    SQ_OP2_INST_PRED_SETNE_PUSH                   = 0x2b,
+	    SQ_OP2_INST_KILLE                             = 0x2c,
+	    SQ_OP2_INST_KILLGT                            = 0x2d,
+	    SQ_OP2_INST_KILLGE                            = 0x2e,
+	    SQ_OP2_INST_KILLNE                            = 0x2f,
+	    SQ_OP2_INST_AND_INT                           = 0x30,
+	    SQ_OP2_INST_OR_INT                            = 0x31,
+	    SQ_OP2_INST_XOR_INT                           = 0x32,
+	    SQ_OP2_INST_NOT_INT                           = 0x33,
+	    SQ_OP2_INST_ADD_INT                           = 0x34,
+	    SQ_OP2_INST_SUB_INT                           = 0x35,
+	    SQ_OP2_INST_MAX_INT                           = 0x36,
+	    SQ_OP2_INST_MIN_INT                           = 0x37,
+	    SQ_OP2_INST_MAX_UINT                          = 0x38,
+	    SQ_OP2_INST_MIN_UINT                          = 0x39,
+	    SQ_OP2_INST_SETE_INT                          = 0x3a,
+	    SQ_OP2_INST_SETGT_INT                         = 0x3b,
+	    SQ_OP2_INST_SETGE_INT                         = 0x3c,
+	    SQ_OP2_INST_SETNE_INT                         = 0x3d,
+	    SQ_OP2_INST_SETGT_UINT                        = 0x3e,
+	    SQ_OP2_INST_SETGE_UINT                        = 0x3f,
+	    SQ_OP2_INST_KILLGT_UINT                       = 0x40,
+	    SQ_OP2_INST_KILLGE_UINT                       = 0x41,
+	    SQ_OP2_INST_PRED_SETE_INT                     = 0x42,
+	    SQ_OP2_INST_PRED_SETGT_INT                    = 0x43,
+	    SQ_OP2_INST_PRED_SETGE_INT                    = 0x44,
+	    SQ_OP2_INST_PRED_SETNE_INT                    = 0x45,
+	    SQ_OP2_INST_KILLE_INT                         = 0x46,
+	    SQ_OP2_INST_KILLGT_INT                        = 0x47,
+	    SQ_OP2_INST_KILLGE_INT                        = 0x48,
+	    SQ_OP2_INST_KILLNE_INT                        = 0x49,
+	    SQ_OP2_INST_PRED_SETE_PUSH_INT                = 0x4a,
+	    SQ_OP2_INST_PRED_SETGT_PUSH_INT               = 0x4b,
+	    SQ_OP2_INST_PRED_SETGE_PUSH_INT               = 0x4c,
+	    SQ_OP2_INST_PRED_SETNE_PUSH_INT               = 0x4d,
+	    SQ_OP2_INST_PRED_SETLT_PUSH_INT               = 0x4e,
+	    SQ_OP2_INST_PRED_SETLE_PUSH_INT               = 0x4f,
+	    SQ_OP2_INST_FLT_TO_INT                        = 0x50,
+	    SQ_OP2_INST_BFREV_INT                         = 0x51,
+	    SQ_OP2_INST_ADDC_UINT                         = 0x52,
+	    SQ_OP2_INST_SUBB_UINT                         = 0x53,
+	    SQ_OP2_INST_GROUP_BARRIER                     = 0x54,
+	    SQ_OP2_INST_GROUP_SEQ_BEGIN                   = 0x55,
+	    SQ_OP2_INST_GROUP_SEQ_END                     = 0x56,
+	    SQ_OP2_INST_SET_MODE                          = 0x57,
+	    SQ_OP2_INST_SET_CF_IDX0                       = 0x58,
+	    SQ_OP2_INST_SET_CF_IDX1                       = 0x59,
+	    SQ_OP2_INST_SET_LDS_SIZE                      = 0x5a,
+	    SQ_OP2_INST_EXP_IEEE                          = 0x81,
+	    SQ_OP2_INST_LOG_CLAMPED                       = 0x82,
+	    SQ_OP2_INST_LOG_IEEE                          = 0x83,
+	    SQ_OP2_INST_RECIP_CLAMPED                     = 0x84,
+	    SQ_OP2_INST_RECIP_FF                          = 0x85,
+	    SQ_OP2_INST_RECIP_IEEE                        = 0x86,
+	    SQ_OP2_INST_RECIPSQRT_CLAMPED                 = 0x87,
+	    SQ_OP2_INST_RECIPSQRT_FF                      = 0x88,
+	    SQ_OP2_INST_RECIPSQRT_IEEE                    = 0x89,
+	    SQ_OP2_INST_SQRT_IEEE                         = 0x8a,
+	    SQ_OP2_INST_SIN                               = 0x8d,
+	    SQ_OP2_INST_COS                               = 0x8e,
+	    SQ_OP2_INST_MULLO_INT                         = 0x8f,
+	    SQ_OP2_INST_MULHI_INT                         = 0x90,
+	    SQ_OP2_INST_MULLO_UINT                        = 0x91,
+	    SQ_OP2_INST_MULHI_UINT                        = 0x92,
+	    SQ_OP2_INST_RECIP_INT                         = 0x93,
+	    SQ_OP2_INST_RECIP_UINT                        = 0x94,
+	    SQ_OP2_INST_RECIP_64                          = 0x95,
+	    SQ_OP2_INST_RECIP_CLAMPED_64                  = 0x96,
+	    SQ_OP2_INST_RECIPSQRT_64                      = 0x97,
+	    SQ_OP2_INST_RECIPSQRT_CLAMPED_64              = 0x98,
+	    SQ_OP2_INST_SQRT_64                           = 0x99,
+	    SQ_OP2_INST_FLT_TO_UINT                       = 0x9a,
+	    SQ_OP2_INST_INT_TO_FLT                        = 0x9b,
+	    SQ_OP2_INST_UINT_TO_FLT                       = 0x9c,
+	    SQ_OP2_INST_BFM_INT                           = 0xa0,
+	    SQ_OP2_INST_FLT32_TO_FLT16                    = 0xa2,
+	    SQ_OP2_INST_FLT16_TO_FLT32                    = 0xa3,
+	    SQ_OP2_INST_UBYTE0_FLT                        = 0xa4,
+	    SQ_OP2_INST_UBYTE1_FLT                        = 0xa5,
+	    SQ_OP2_INST_UBYTE2_FLT                        = 0xa6,
+	    SQ_OP2_INST_UBYTE3_FLT                        = 0xa7,
+	    SQ_OP2_INST_BCNT_INT                          = 0xaa,
+	    SQ_OP2_INST_FFBH_UINT                         = 0xab,
+	    SQ_OP2_INST_FFBL_INT                          = 0xac,
+	    SQ_OP2_INST_FFBH_INT                          = 0xad,
+	    SQ_OP2_INST_FLT_TO_UINT4                      = 0xae,
+	    SQ_OP2_INST_DOT_IEEE                          = 0xaf,
+	    SQ_OP2_INST_FLT_TO_INT_RPI                    = 0xb0,
+	    SQ_OP2_INST_FLT_TO_INT_FLOOR                  = 0xb1,
+	    SQ_OP2_INST_MULHI_UINT24                      = 0xb2,
+	    SQ_OP2_INST_MBCNT_32HI_INT                    = 0xb3,
+	    SQ_OP2_INST_OFFSET_TO_FLT                     = 0xb4,
+	    SQ_OP2_INST_MUL_UINT24                        = 0xb5,
+	    SQ_OP2_INST_BCNT_ACCUM_PREV_INT               = 0xb6,
+	    SQ_OP2_INST_MBCNT_32LO_ACCUM_PREV_INT         = 0xb7,
+	    SQ_OP2_INST_SETE_64                           = 0xb8,
+	    SQ_OP2_INST_SETNE_64                          = 0xb9,
+	    SQ_OP2_INST_SETGT_64                          = 0xba,
+	    SQ_OP2_INST_SETGE_64                          = 0xbb,
+	    SQ_OP2_INST_MIN_64                            = 0xbc,
+	    SQ_OP2_INST_MAX_64                            = 0xbd,
+	    SQ_OP2_INST_DOT4                              = 0xbe,
+	    SQ_OP2_INST_DOT4_IEEE                         = 0xbf,
+	    SQ_OP2_INST_CUBE                              = 0xc0,
+	    SQ_OP2_INST_MAX4                              = 0xc1,
+	    SQ_OP2_INST_FREXP_64                          = 0xc4,
+	    SQ_OP2_INST_LDEXP_64                          = 0xc5,
+	    SQ_OP2_INST_FRACT_64                          = 0xc6,
+	    SQ_OP2_INST_PRED_SETGT_64                     = 0xc7,
+	    SQ_OP2_INST_PRED_SETE_64                      = 0xc8,
+	    SQ_OP2_INST_PRED_SETGE_64                     = 0xc9,
+	    SQ_OP2_INST_MUL_64                            = 0xca,
+	    SQ_OP2_INST_ADD_64                            = 0xcb,
+	    SQ_OP2_INST_MOVA_INT                          = 0xcc,
+	    SQ_OP2_INST_FLT64_TO_FLT32                    = 0xcd,
+	    SQ_OP2_INST_FLT32_TO_FLT64                    = 0xce,
+	    SQ_OP2_INST_SAD_ACCUM_PREV_UINT               = 0xcf,
+	    SQ_OP2_INST_DOT                               = 0xd0,
+	    SQ_OP2_INST_MUL_PREV                          = 0xd1,
+	    SQ_OP2_INST_MUL_IEEE_PREV                     = 0xd2,
+	    SQ_OP2_INST_ADD_PREV                          = 0xd3,
+	    SQ_OP2_INST_MULADD_PREV                       = 0xd4,
+	    SQ_OP2_INST_MULADD_IEEE_PREV                  = 0xd5,
+	    SQ_OP2_INST_INTERP_XY                         = 0xd6,
+	    SQ_OP2_INST_INTERP_ZW                         = 0xd7,
+	    SQ_OP2_INST_INTERP_X                          = 0xd8,
+	    SQ_OP2_INST_INTERP_Z                          = 0xd9,
+	    SQ_OP2_INST_STORE_FLAGS                       = 0xda,
+	    SQ_OP2_INST_LOAD_STORE_FLAGS                  = 0xdb,
+	    SQ_OP2_INST_INTERP_LOAD_P0                    = 0xe0,
+	    SQ_OP2_INST_INTERP_LOAD_P10                   = 0xe1,
+	    SQ_OP2_INST_INTERP_LOAD_P20                   = 0xe2,
+    SQ_CF_WORD1                                           = 0x00008dfc,
+	POP_COUNT_mask                                    = 0x07 << 0,
+	POP_COUNT_shift                                   = 0,
+	CF_CONST_mask                                     = 0x1f << 3,
+	CF_CONST_shift                                    = 3,
+	COND_mask                                         = 0x03 << 8,
+	COND_shift                                        = 8,
+	    SQ_CF_COND_ACTIVE                             = 0x00,
+	    SQ_CF_COND_FALSE                              = 0x01,
+	    SQ_CF_COND_BOOL                               = 0x02,
+	    SQ_CF_COND_NOT_BOOL                           = 0x03,
+	SQ_CF_WORD1__COUNT_mask                           = 0x3f << 10,
+	SQ_CF_WORD1__COUNT_shift                          = 10,
+/* 	VALID_PIXEL_MODE_bit                              = 1 << 20, */
+/* 	END_OF_PROGRAM_bit                                = 1 << 21, */
+	SQ_CF_WORD1__CF_INST_mask                         = 0xff << 22,
+	SQ_CF_WORD1__CF_INST_shift                        = 22,
+	    SQ_CF_INST_NOP                                = 0x00,
+	    SQ_CF_INST_TC                                 = 0x01,
+	    SQ_CF_INST_VC                                 = 0x02,
+	    SQ_CF_INST_GDS                                = 0x03,
+	    SQ_CF_INST_LOOP_START                         = 0x04,
+	    SQ_CF_INST_LOOP_END                           = 0x05,
+	    SQ_CF_INST_LOOP_START_DX10                    = 0x06,
+	    SQ_CF_INST_LOOP_START_NO_AL                   = 0x07,
+	    SQ_CF_INST_LOOP_CONTINUE                      = 0x08,
+	    SQ_CF_INST_LOOP_BREAK                         = 0x09,
+	    SQ_CF_INST_JUMP                               = 0x0a,
+	    SQ_CF_INST_PUSH                               = 0x0b,
+	    SQ_CF_INST_ELSE                               = 0x0d,
+	    SQ_CF_INST_POP                                = 0x0e,
+	    SQ_CF_INST_CALL                               = 0x12,
+	    SQ_CF_INST_CALL_FS                            = 0x13,
+	    SQ_CF_INST_RETURN                             = 0x14,
+	    SQ_CF_INST_EMIT_VERTEX                        = 0x15,
+	    SQ_CF_INST_EMIT_CUT_VERTEX                    = 0x16,
+	    SQ_CF_INST_CUT_VERTEX                         = 0x17,
+	    SQ_CF_INST_KILL                               = 0x18,
+	    SQ_CF_INST_WAIT_ACK                           = 0x1a,
+	    SQ_CF_INST_TC_ACK                             = 0x1b,
+	    SQ_CF_INST_VC_ACK                             = 0x1c,
+	    SQ_CF_INST_JUMPTABLE                          = 0x1d,
+	    SQ_CF_INST_GLOBAL_WAVE_SYNC                   = 0x1e,
+	    SQ_CF_INST_HALT                               = 0x1f,
+/* 	WHOLE_QUAD_MODE_bit                               = 1 << 30, */
+/* 	BARRIER_bit                                       = 1 << 31, */
+    SQ_VTX_WORD1_SEM                                      = 0x00008dfc,
+	SEMANTIC_ID_mask                                  = 0xff << 0,
+	SEMANTIC_ID_shift                                 = 0,
+    SQ_TEX_WORD0                                          = 0x00008dfc,
+	TEX_INST_mask                                     = 0x1f << 0,
+	TEX_INST_shift                                    = 0,
+	    SQ_TEX_INST_LD                                = 0x03,
+	    SQ_TEX_INST_GET_TEXTURE_RESINFO               = 0x04,
+	    SQ_TEX_INST_GET_NUMBER_OF_SAMPLES             = 0x05,
+	    SQ_TEX_INST_GET_LOD                           = 0x06,
+	    SQ_TEX_INST_GET_GRADIENTS_H                   = 0x07,
+	    SQ_TEX_INST_GET_GRADIENTS_V                   = 0x08,
+	    SQ_TEX_INST_SET_TEXTURE_OFFSETS               = 0x09,
+	    SQ_TEX_INST_KEEP_GRADIENTS                    = 0x0a,
+	    SQ_TEX_INST_SET_GRADIENTS_H                   = 0x0b,
+	    SQ_TEX_INST_SET_GRADIENTS_V                   = 0x0c,
+	    SQ_TEX_INST_PASS                              = 0x0d,
+	    SQ_TEX_INST_SAMPLE                            = 0x10,
+	    SQ_TEX_INST_SAMPLE_L                          = 0x11,
+	    SQ_TEX_INST_SAMPLE_LB                         = 0x12,
+	    SQ_TEX_INST_SAMPLE_LZ                         = 0x13,
+	    SQ_TEX_INST_SAMPLE_G                          = 0x14,
+	    SQ_TEX_INST_GATHER4                           = 0x15,
+	    SQ_TEX_INST_SAMPLE_G_LB                       = 0x16,
+	    SQ_TEX_INST_GATHER4_O                         = 0x17,
+	    SQ_TEX_INST_SAMPLE_C                          = 0x18,
+	    SQ_TEX_INST_SAMPLE_C_L                        = 0x19,
+	    SQ_TEX_INST_SAMPLE_C_LB                       = 0x1a,
+	    SQ_TEX_INST_SAMPLE_C_LZ                       = 0x1b,
+	    SQ_TEX_INST_SAMPLE_C_G                        = 0x1c,
+	    SQ_TEX_INST_GATHER4_C                         = 0x1d,
+	    SQ_TEX_INST_SAMPLE_C_G_LB                     = 0x1e,
+	    SQ_TEX_INST_GATHER4_C_O                       = 0x1f,
+	INST_MOD_mask                                     = 0x03 << 5,
+	INST_MOD_shift                                    = 5,
+/* 	FETCH_WHOLE_QUAD_bit                              = 1 << 7, */
+	RESOURCE_ID_mask                                  = 0xff << 8,
+	RESOURCE_ID_shift                                 = 8,
+	SQ_TEX_WORD0__SRC_GPR_mask                        = 0x7f << 16,
+	SQ_TEX_WORD0__SRC_GPR_shift                       = 16,
+/* 	SRC_REL_bit                                       = 1 << 23, */
+	SQ_TEX_WORD0__ALT_CONST_bit                       = 1 << 24,
+	RESOURCE_INDEX_MODE_mask                          = 0x03 << 25,
+	RESOURCE_INDEX_MODE_shift                         = 25,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	SAMPLER_INDEX_MODE_mask                           = 0x03 << 27,
+	SAMPLER_INDEX_MODE_shift                          = 27,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+    SQ_VTX_WORD1_GPR                                      = 0x00008dfc,
+	SQ_VTX_WORD1_GPR__DST_GPR_mask                    = 0x7f << 0,
+	SQ_VTX_WORD1_GPR__DST_GPR_shift                   = 0,
+	SQ_VTX_WORD1_GPR__DST_REL_bit                     = 1 << 7,
+    SQ_ALU_WORD1_LDS_IDX_OP                               = 0x00008dfc,
+/* 	SRC2_SEL_mask                                     = 0x1ff << 0, */
+/* 	SRC2_SEL_shift                                    = 0, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A                           = 0xdb, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B                           = 0xdc, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A_POP                       = 0xdd, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B_POP                       = 0xde, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_A                       = 0xdf, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_B                       = 0xe0, */
+/* 	    SQ_ALU_SRC_TIME_HI                            = 0xe3, */
+/* 	    SQ_ALU_SRC_TIME_LO                            = 0xe4, */
+/* 	    SQ_ALU_SRC_MASK_HI                            = 0xe5, */
+/* 	    SQ_ALU_SRC_MASK_LO                            = 0xe6, */
+/* 	    SQ_ALU_SRC_HW_WAVE_ID                         = 0xe7, */
+/* 	    SQ_ALU_SRC_SIMD_ID                            = 0xe8, */
+/* 	    SQ_ALU_SRC_SE_ID                              = 0xe9, */
+/* 	    SQ_ALU_SRC_HW_THREADGRP_ID                    = 0xea, */
+/* 	    SQ_ALU_SRC_WAVE_ID_IN_GRP                     = 0xeb, */
+/* 	    SQ_ALU_SRC_NUM_THREADGRP_WAVES                = 0xec, */
+/* 	    SQ_ALU_SRC_HW_ALU_ODD                         = 0xed, */
+/* 	    SQ_ALU_SRC_LOOP_IDX                           = 0xee, */
+/* 	    SQ_ALU_SRC_PARAM_BASE_ADDR                    = 0xf0, */
+/* 	    SQ_ALU_SRC_NEW_PRIM_MASK                      = 0xf1, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_HI                       = 0xf2, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_LO                       = 0xf3, */
+/* 	    SQ_ALU_SRC_1_DBL_L                            = 0xf4, */
+/* 	    SQ_ALU_SRC_1_DBL_M                            = 0xf5, */
+/* 	    SQ_ALU_SRC_0_5_DBL_L                          = 0xf6, */
+/* 	    SQ_ALU_SRC_0_5_DBL_M                          = 0xf7, */
+/* 	    SQ_ALU_SRC_0                                  = 0xf8, */
+/* 	    SQ_ALU_SRC_1                                  = 0xf9, */
+/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */
+/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */
+/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */
+/* 	    SQ_ALU_SRC_PS                                 = 0xff, */
+/* 	SRC2_REL_bit                                      = 1 << 9, */
+/* 	SRC2_CHAN_mask                                    = 0x03 << 10, */
+/* 	SRC2_CHAN_shift                                   = 10, */
+/* 	    SQ_CHAN_X                                     = 0x00, */
+/* 	    SQ_CHAN_Y                                     = 0x01, */
+/* 	    SQ_CHAN_Z                                     = 0x02, */
+/* 	    SQ_CHAN_W                                     = 0x03, */
+	IDX_OFFSET_1_bit                                  = 1 << 12,
+	SQ_ALU_WORD1_LDS_IDX_OP__ALU_INST_mask            = 0x1f << 13,
+	SQ_ALU_WORD1_LDS_IDX_OP__ALU_INST_shift           = 13,
+/* 	    SQ_OP3_INST_BFE_UINT                          = 0x04, */
+/* 	    SQ_OP3_INST_BFE_INT                           = 0x05, */
+/* 	    SQ_OP3_INST_BFI_INT                           = 0x06, */
+/* 	    SQ_OP3_INST_FMA                               = 0x07, */
+/* 	    SQ_OP3_INST_CNDNE_64                          = 0x09, */
+/* 	    SQ_OP3_INST_FMA_64                            = 0x0a, */
+/* 	    SQ_OP3_INST_LERP_UINT                         = 0x0b, */
+/* 	    SQ_OP3_INST_BIT_ALIGN_INT                     = 0x0c, */
+/* 	    SQ_OP3_INST_BYTE_ALIGN_INT                    = 0x0d, */
+/* 	    SQ_OP3_INST_SAD_ACCUM_UINT                    = 0x0e, */
+/* 	    SQ_OP3_INST_SAD_ACCUM_HI_UINT                 = 0x0f, */
+/* 	    SQ_OP3_INST_MULADD_UINT24                     = 0x10, */
+/* 	    SQ_OP3_INST_LDS_IDX_OP                        = 0x11, */
+/* 	    SQ_OP3_INST_MULADD                            = 0x14, */
+/* 	    SQ_OP3_INST_MULADD_M2                         = 0x15, */
+/* 	    SQ_OP3_INST_MULADD_M4                         = 0x16, */
+/* 	    SQ_OP3_INST_MULADD_D2                         = 0x17, */
+/* 	    SQ_OP3_INST_MULADD_IEEE                       = 0x18, */
+/* 	    SQ_OP3_INST_CNDE                              = 0x19, */
+/* 	    SQ_OP3_INST_CNDGT                             = 0x1a, */
+/* 	    SQ_OP3_INST_CNDGE                             = 0x1b, */
+/* 	    SQ_OP3_INST_CNDE_INT                          = 0x1c, */
+/* 	    SQ_OP3_INST_CNDGT_INT                         = 0x1d, */
+/* 	    SQ_OP3_INST_CNDGE_INT                         = 0x1e, */
+/* 	    SQ_OP3_INST_MUL_LIT                           = 0x1f, */
+/* 	BANK_SWIZZLE_mask                                 = 0x07 << 18, */
+/* 	BANK_SWIZZLE_shift                                = 18, */
+/* 	    SQ_ALU_VEC_012                                = 0x00, */
+/* 	    SQ_ALU_VEC_021                                = 0x01, */
+/* 	    SQ_ALU_VEC_120                                = 0x02, */
+/* 	    SQ_ALU_VEC_102                                = 0x03, */
+/* 	    SQ_ALU_VEC_201                                = 0x04, */
+/* 	    SQ_ALU_VEC_210                                = 0x05, */
+	LDS_OP_mask                                       = 0x3f << 21,
+	LDS_OP_shift                                      = 21,
+/* 	    SQ_DS_INST_ADD                                = 0x00, */
+/* 	    SQ_DS_INST_SUB                                = 0x01, */
+/* 	    SQ_DS_INST_RSUB                               = 0x02, */
+/* 	    SQ_DS_INST_INC                                = 0x03, */
+/* 	    SQ_DS_INST_DEC                                = 0x04, */
+/* 	    SQ_DS_INST_MIN_INT                            = 0x05, */
+/* 	    SQ_DS_INST_MAX_INT                            = 0x06, */
+/* 	    SQ_DS_INST_MIN_UINT                           = 0x07, */
+/* 	    SQ_DS_INST_MAX_UINT                           = 0x08, */
+/* 	    SQ_DS_INST_AND                                = 0x09, */
+/* 	    SQ_DS_INST_OR                                 = 0x0a, */
+/* 	    SQ_DS_INST_XOR                                = 0x0b, */
+/* 	    SQ_DS_INST_MSKOR                              = 0x0c, */
+/* 	    SQ_DS_INST_WRITE                              = 0x0d, */
+/* 	    SQ_DS_INST_WRITE_REL                          = 0x0e, */
+/* 	    SQ_DS_INST_WRITE2                             = 0x0f, */
+/* 	    SQ_DS_INST_CMP_STORE                          = 0x10, */
+/* 	    SQ_DS_INST_CMP_STORE_SPF                      = 0x11, */
+/* 	    SQ_DS_INST_BYTE_WRITE                         = 0x12, */
+/* 	    SQ_DS_INST_SHORT_WRITE                        = 0x13, */
+/* 	    SQ_DS_INST_ADD_RET                            = 0x20, */
+/* 	    SQ_DS_INST_SUB_RET                            = 0x21, */
+/* 	    SQ_DS_INST_RSUB_RET                           = 0x22, */
+/* 	    SQ_DS_INST_INC_RET                            = 0x23, */
+/* 	    SQ_DS_INST_DEC_RET                            = 0x24, */
+/* 	    SQ_DS_INST_MIN_INT_RET                        = 0x25, */
+/* 	    SQ_DS_INST_MAX_INT_RET                        = 0x26, */
+/* 	    SQ_DS_INST_MIN_UINT_RET                       = 0x27, */
+/* 	    SQ_DS_INST_MAX_UINT_RET                       = 0x28, */
+/* 	    SQ_DS_INST_AND_RET                            = 0x29, */
+/* 	    SQ_DS_INST_OR_RET                             = 0x2a, */
+/* 	    SQ_DS_INST_XOR_RET                            = 0x2b, */
+/* 	    SQ_DS_INST_MSKOR_RET                          = 0x2c, */
+/* 	    SQ_DS_INST_XCHG_RET                           = 0x2d, */
+/* 	    SQ_DS_INST_XCHG_REL_RET                       = 0x2e, */
+/* 	    SQ_DS_INST_XCHG2_RET                          = 0x2f, */
+/* 	    SQ_DS_INST_CMP_XCHG_RET                       = 0x30, */
+/* 	    SQ_DS_INST_CMP_XCHG_SPF_RET                   = 0x31, */
+/* 	    SQ_DS_INST_READ_RET                           = 0x32, */
+/* 	    SQ_DS_INST_READ_REL_RET                       = 0x33, */
+/* 	    SQ_DS_INST_READ2_RET                          = 0x34, */
+/* 	    SQ_DS_INST_READWRITE_RET                      = 0x35, */
+/* 	    SQ_DS_INST_BYTE_READ_RET                      = 0x36, */
+/* 	    SQ_DS_INST_UBYTE_READ_RET                     = 0x37, */
+/* 	    SQ_DS_INST_SHORT_READ_RET                     = 0x38, */
+/* 	    SQ_DS_INST_USHORT_READ_RET                    = 0x39, */
+/* 	    SQ_DS_INST_ATOMIC_ORDERED_ALLOC_RET           = 0x3f, */
+	IDX_OFFSET_0_bit                                  = 1 << 27,
+	IDX_OFFSET_2_bit                                  = 1 << 28,
+/* 	DST_CHAN_mask                                     = 0x03 << 29, */
+/* 	DST_CHAN_shift                                    = 29, */
+/* 	    CHAN_X                                        = 0x00, */
+/* 	    CHAN_Y                                        = 0x01, */
+/* 	    CHAN_Z                                        = 0x02, */
+/* 	    CHAN_W                                        = 0x03, */
+	IDX_OFFSET_3_bit                                  = 1 << 31,
+    SQ_CF_ENCODING_WORD1                                  = 0x00008dfc,
+	SQ_CF_ENCODING_WORD1__ENCODING_mask               = 0x03 << 28,
+	SQ_CF_ENCODING_WORD1__ENCODING_shift              = 28,
+	    SQ_CF_ENCODING_INST_CF                        = 0x00,
+	    SQ_CF_ENCODING_INST_ALLOC_EXPORT              = 0x01,
+	    SQ_CF_ENCODING_INST_ALU0                      = 0x02,
+	    SQ_CF_ENCODING_INST_ALU1                      = 0x03,
+    SQ_ALU_WORD0                                          = 0x00008dfc,
+/* 	SRC0_SEL_mask                                     = 0x1ff << 0, */
+/* 	SRC0_SEL_shift                                    = 0, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A                           = 0xdb, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B                           = 0xdc, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A_POP                       = 0xdd, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B_POP                       = 0xde, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_A                       = 0xdf, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_B                       = 0xe0, */
+/* 	    SQ_ALU_SRC_TIME_HI                            = 0xe3, */
+/* 	    SQ_ALU_SRC_TIME_LO                            = 0xe4, */
+/* 	    SQ_ALU_SRC_MASK_HI                            = 0xe5, */
+/* 	    SQ_ALU_SRC_MASK_LO                            = 0xe6, */
+/* 	    SQ_ALU_SRC_HW_WAVE_ID                         = 0xe7, */
+/* 	    SQ_ALU_SRC_SIMD_ID                            = 0xe8, */
+/* 	    SQ_ALU_SRC_SE_ID                              = 0xe9, */
+/* 	    SQ_ALU_SRC_HW_THREADGRP_ID                    = 0xea, */
+/* 	    SQ_ALU_SRC_WAVE_ID_IN_GRP                     = 0xeb, */
+/* 	    SQ_ALU_SRC_NUM_THREADGRP_WAVES                = 0xec, */
+/* 	    SQ_ALU_SRC_HW_ALU_ODD                         = 0xed, */
+/* 	    SQ_ALU_SRC_LOOP_IDX                           = 0xee, */
+/* 	    SQ_ALU_SRC_PARAM_BASE_ADDR                    = 0xf0, */
+/* 	    SQ_ALU_SRC_NEW_PRIM_MASK                      = 0xf1, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_HI                       = 0xf2, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_LO                       = 0xf3, */
+/* 	    SQ_ALU_SRC_1_DBL_L                            = 0xf4, */
+/* 	    SQ_ALU_SRC_1_DBL_M                            = 0xf5, */
+/* 	    SQ_ALU_SRC_0_5_DBL_L                          = 0xf6, */
+/* 	    SQ_ALU_SRC_0_5_DBL_M                          = 0xf7, */
+/* 	    SQ_ALU_SRC_0                                  = 0xf8, */
+/* 	    SQ_ALU_SRC_1                                  = 0xf9, */
+/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */
+/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */
+/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */
+/* 	    SQ_ALU_SRC_PS                                 = 0xff, */
+/* 	SRC0_REL_bit                                      = 1 << 9, */
+/* 	SRC0_CHAN_mask                                    = 0x03 << 10, */
+/* 	SRC0_CHAN_shift                                   = 10, */
+/* 	    SQ_CHAN_X                                     = 0x00, */
+/* 	    SQ_CHAN_Y                                     = 0x01, */
+/* 	    SQ_CHAN_Z                                     = 0x02, */
+/* 	    SQ_CHAN_W                                     = 0x03, */
+	SRC0_NEG_bit                                      = 1 << 12,
+/* 	SRC1_SEL_mask                                     = 0x1ff << 13, */
+/* 	SRC1_SEL_shift                                    = 13, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A                           = 0xdb, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B                           = 0xdc, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A_POP                       = 0xdd, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B_POP                       = 0xde, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_A                       = 0xdf, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_B                       = 0xe0, */
+/* 	    SQ_ALU_SRC_TIME_HI                            = 0xe3, */
+/* 	    SQ_ALU_SRC_TIME_LO                            = 0xe4, */
+/* 	    SQ_ALU_SRC_MASK_HI                            = 0xe5, */
+/* 	    SQ_ALU_SRC_MASK_LO                            = 0xe6, */
+/* 	    SQ_ALU_SRC_HW_WAVE_ID                         = 0xe7, */
+/* 	    SQ_ALU_SRC_SIMD_ID                            = 0xe8, */
+/* 	    SQ_ALU_SRC_SE_ID                              = 0xe9, */
+/* 	    SQ_ALU_SRC_HW_THREADGRP_ID                    = 0xea, */
+/* 	    SQ_ALU_SRC_WAVE_ID_IN_GRP                     = 0xeb, */
+/* 	    SQ_ALU_SRC_NUM_THREADGRP_WAVES                = 0xec, */
+/* 	    SQ_ALU_SRC_HW_ALU_ODD                         = 0xed, */
+/* 	    SQ_ALU_SRC_LOOP_IDX                           = 0xee, */
+/* 	    SQ_ALU_SRC_PARAM_BASE_ADDR                    = 0xf0, */
+/* 	    SQ_ALU_SRC_NEW_PRIM_MASK                      = 0xf1, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_HI                       = 0xf2, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_LO                       = 0xf3, */
+/* 	    SQ_ALU_SRC_1_DBL_L                            = 0xf4, */
+/* 	    SQ_ALU_SRC_1_DBL_M                            = 0xf5, */
+/* 	    SQ_ALU_SRC_0_5_DBL_L                          = 0xf6, */
+/* 	    SQ_ALU_SRC_0_5_DBL_M                          = 0xf7, */
+/* 	    SQ_ALU_SRC_0                                  = 0xf8, */
+/* 	    SQ_ALU_SRC_1                                  = 0xf9, */
+/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */
+/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */
+/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */
+/* 	    SQ_ALU_SRC_PS                                 = 0xff, */
+/* 	SRC1_REL_bit                                      = 1 << 22, */
+/* 	SRC1_CHAN_mask                                    = 0x03 << 23, */
+/* 	SRC1_CHAN_shift                                   = 23, */
+/* 	    SQ_CHAN_X                                     = 0x00, */
+/* 	    SQ_CHAN_Y                                     = 0x01, */
+/* 	    SQ_CHAN_Z                                     = 0x02, */
+/* 	    SQ_CHAN_W                                     = 0x03, */
+	SRC1_NEG_bit                                      = 1 << 25,
+/* 	INDEX_MODE_mask                                   = 0x07 << 26, */
+/* 	INDEX_MODE_shift                                  = 26, */
+/* 	    SQ_INDEX_AR_X                                 = 0x00, */
+/* 	    SQ_INDEX_LOOP                                 = 0x04, */
+/* 	    SQ_INDEX_GLOBAL                               = 0x05, */
+/* 	    SQ_INDEX_GLOBAL_AR_X                          = 0x06, */
+/* 	PRED_SEL_mask                                     = 0x03 << 29, */
+/* 	PRED_SEL_shift                                    = 29, */
+/* 	    SQ_PRED_SEL_OFF                               = 0x00, */
+/* 	    SQ_PRED_SEL_ZERO                              = 0x02, */
+/* 	    SQ_PRED_SEL_ONE                               = 0x03, */
+/* 	LAST_bit                                          = 1 << 31, */
+    SQ_MEM_RD_WORD1                                       = 0x00008dfc,
+	SQ_MEM_RD_WORD1__DST_GPR_mask                     = 0x7f << 0,
+	SQ_MEM_RD_WORD1__DST_GPR_shift                    = 0,
+	SQ_MEM_RD_WORD1__DST_REL_bit                      = 1 << 7,
+	SQ_MEM_RD_WORD1__DST_SEL_X_mask                   = 0x07 << 9,
+	SQ_MEM_RD_WORD1__DST_SEL_X_shift                  = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_RD_WORD1__DST_SEL_Y_mask                   = 0x07 << 12,
+	SQ_MEM_RD_WORD1__DST_SEL_Y_shift                  = 12,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_RD_WORD1__DST_SEL_Z_mask                   = 0x07 << 15,
+	SQ_MEM_RD_WORD1__DST_SEL_Z_shift                  = 15,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_RD_WORD1__DST_SEL_W_mask                   = 0x07 << 18,
+	SQ_MEM_RD_WORD1__DST_SEL_W_shift                  = 18,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_RD_WORD1__DATA_FORMAT_mask                 = 0x3f << 22,
+	SQ_MEM_RD_WORD1__DATA_FORMAT_shift                = 22,
+	SQ_MEM_RD_WORD1__NUM_FORMAT_ALL_mask              = 0x03 << 28,
+	SQ_MEM_RD_WORD1__NUM_FORMAT_ALL_shift             = 28,
+/* 	    SQ_NUM_FORMAT_NORM                            = 0x00, */
+/* 	    SQ_NUM_FORMAT_INT                             = 0x01, */
+/* 	    SQ_NUM_FORMAT_SCALED                          = 0x02, */
+	SQ_MEM_RD_WORD1__FORMAT_COMP_ALL_bit              = 1 << 30,
+	SQ_MEM_RD_WORD1__SRF_MODE_ALL_bit                 = 1 << 31,
+    SQ_LSTMP_RING_BASE                                    = 0x00008e10,
+    SQ_LSTMP_RING_SIZE                                    = 0x00008e14,
+    SQ_HSTMP_RING_BASE                                    = 0x00008e18,
+    SQ_HSTMP_RING_SIZE                                    = 0x00008e1c,
+    SX_EXPORT_BUFFER_SIZES                                = 0x0000900c,
+	COLOR_BUFFER_SIZE_mask                            = 0xff << 0,
+	COLOR_BUFFER_SIZE_shift                           = 0,
+	POSITION_BUFFER_SIZE_mask                         = 0xff << 8,
+	POSITION_BUFFER_SIZE_shift                        = 8,
+	SMX_BUFFER_SIZE_mask                              = 0xff << 16,
+	SMX_BUFFER_SIZE_shift                             = 16,
+    SX_MEMORY_EXPORT_BASE                                 = 0x00009010,
+    SX_MEMORY_EXPORT_SIZE                                 = 0x00009014,
+    SPI_CONFIG_CNTL                                       = 0x00009100,
+	GPR_WRITE_PRIORITY_mask                           = 0x3ffff << 0,
+	GPR_WRITE_PRIORITY_shift                          = 0,
+    SPI_CONFIG_CNTL_1                                     = 0x0000913c,
+	VTX_DONE_DELAY_mask                               = 0x0f << 0,
+	VTX_DONE_DELAY_shift                              = 0,
+	    X_DELAY_14_CLKS                               = 0x00,
+	    X_DELAY_16_CLKS                               = 0x01,
+	    X_DELAY_18_CLKS                               = 0x02,
+	    X_DELAY_20_CLKS                               = 0x03,
+	    X_DELAY_22_CLKS                               = 0x04,
+	    X_DELAY_24_CLKS                               = 0x05,
+	    X_DELAY_26_CLKS                               = 0x06,
+	    X_DELAY_28_CLKS                               = 0x07,
+	    X_DELAY_30_CLKS                               = 0x08,
+	    X_DELAY_32_CLKS                               = 0x09,
+	    X_DELAY_34_CLKS                               = 0x0a,
+	    X_DELAY_4_CLKS                                = 0x0b,
+	    X_DELAY_6_CLKS                                = 0x0c,
+	    X_DELAY_8_CLKS                                = 0x0d,
+	    X_DELAY_10_CLKS                               = 0x0e,
+	    X_DELAY_12_CLKS                               = 0x0f,
+	INTERP_ONE_PRIM_PER_ROW_bit                       = 1 << 4,
+	BC_OPTIMIZE_DISABLE_bit                           = 1 << 5,
+	PC_LIMIT_ENABLE_bit                               = 1 << 6,
+	PC_LIMIT_STRICT_bit                               = 1 << 7,
+	PC_LIMIT_SIZE_mask                                = 0xffff << 16,
+	PC_LIMIT_SIZE_shift                               = 16,
+    TD_CNTL                                               = 0x00009494,
+	SYNC_PHASE_SH_mask                                = 0x03 << 0,
+	SYNC_PHASE_SH_shift                               = 0,
+	PAD_STALL_EN_bit                                  = 1 << 8,
+	GATHER4_FLOAT_MODE_bit                            = 1 << 16,
+    TD_STATUS                                             = 0x00009498,
+	BUSY_bit                                          = 1 << 31,
+    TA_CNTL_AUX                                           = 0x00009508,
+	TA_CNTL_AUX__DISABLE_CUBE_WRAP_bit                = 1 << 0,
+	DISABLE_CUBE_ANISO_bit                            = 1 << 1,
+	GETLOD_SELECT_mask                                = 0x03 << 2,
+	GETLOD_SELECT_shift                               = 2,
+	    X_SAMPLER_AND_RESOURCE_CLAMPED_LOD_IN_RESOURCE= 0x00,
+	DISABLE_IDLE_STALL_bit                            = 1 << 4,
+	TEX_COORD_PRECISION_bit                           = 1 << 28,
+	LOD_LOG2_TRUNC_bit                                = 1 << 29,
+    DB_ZPASS_COUNT_LOW                                    = 0x00009870,
+    DB_ZPASS_COUNT_HI                                     = 0x00009874,
+	COUNT_HI_mask                                     = 0x7fffffff << 0,
+	COUNT_HI_shift                                    = 0,
+    TD_PS_BORDER_COLOR_INDEX                              = 0x0000a400,
+	INDEX_mask                                        = 0x1f << 0,
+	INDEX_shift                                       = 0,
+    TD_PS_BORDER_COLOR_RED                                = 0x0000a404,
+    TD_PS_BORDER_COLOR_GREEN                              = 0x0000a408,
+    TD_PS_BORDER_COLOR_BLUE                               = 0x0000a40c,
+    TD_PS_BORDER_COLOR_ALPHA                              = 0x0000a410,
+    TD_VS_BORDER_COLOR_INDEX                              = 0x0000a414,
+/* 	INDEX_mask                                        = 0x1f << 0, */
+/* 	INDEX_shift                                       = 0, */
+    TD_VS_BORDER_COLOR_RED                                = 0x0000a418,
+    TD_VS_BORDER_COLOR_GREEN                              = 0x0000a41c,
+    TD_VS_BORDER_COLOR_BLUE                               = 0x0000a420,
+    TD_VS_BORDER_COLOR_ALPHA                              = 0x0000a424,
+    TD_GS_BORDER_COLOR_INDEX                              = 0x0000a428,
+/* 	INDEX_mask                                        = 0x1f << 0, */
+/* 	INDEX_shift                                       = 0, */
+    TD_GS_BORDER_COLOR_RED                                = 0x0000a42c,
+    TD_GS_BORDER_COLOR_GREEN                              = 0x0000a430,
+    TD_GS_BORDER_COLOR_BLUE                               = 0x0000a434,
+    TD_GS_BORDER_COLOR_ALPHA                              = 0x0000a438,
+    TD_HS_BORDER_COLOR_INDEX                              = 0x0000a43c,
+/* 	INDEX_mask                                        = 0x1f << 0, */
+/* 	INDEX_shift                                       = 0, */
+    TD_HS_BORDER_COLOR_RED                                = 0x0000a440,
+    TD_HS_BORDER_COLOR_GREEN                              = 0x0000a444,
+    TD_HS_BORDER_COLOR_BLUE                               = 0x0000a448,
+    TD_HS_BORDER_COLOR_ALPHA                              = 0x0000a44c,
+    TD_LS_BORDER_COLOR_INDEX                              = 0x0000a450,
+/* 	INDEX_mask                                        = 0x1f << 0, */
+/* 	INDEX_shift                                       = 0, */
+    TD_LS_BORDER_COLOR_RED                                = 0x0000a454,
+    TD_LS_BORDER_COLOR_GREEN                              = 0x0000a458,
+    TD_LS_BORDER_COLOR_BLUE                               = 0x0000a45c,
+    TD_LS_BORDER_COLOR_ALPHA                              = 0x0000a460,
+    TD_CS_BORDER_COLOR_INDEX                              = 0x0000a464,
+/* 	INDEX_mask                                        = 0x1f << 0, */
+/* 	INDEX_shift                                       = 0, */
+    TD_CS_BORDER_COLOR_RED                                = 0x0000a468,
+    TD_CS_BORDER_COLOR_GREEN                              = 0x0000a46c,
+    TD_CS_BORDER_COLOR_BLUE                               = 0x0000a470,
+    TD_CS_BORDER_COLOR_ALPHA                              = 0x0000a474,
+    DB_RENDER_CONTROL                                     = 0x00028000,
+	DEPTH_CLEAR_ENABLE_bit                            = 1 << 0,
+	STENCIL_CLEAR_ENABLE_bit                          = 1 << 1,
+	DEPTH_COPY_bit                                    = 1 << 2,
+	STENCIL_COPY_bit                                  = 1 << 3,
+	RESUMMARIZE_ENABLE_bit                            = 1 << 4,
+	STENCIL_COMPRESS_DISABLE_bit                      = 1 << 5,
+	DEPTH_COMPRESS_DISABLE_bit                        = 1 << 6,
+	COPY_CENTROID_bit                                 = 1 << 7,
+	COPY_SAMPLE_mask                                  = 0x07 << 8,
+	COPY_SAMPLE_shift                                 = 8,
+	COLOR_DISABLE_bit                                 = 1 << 12,
+    DB_COUNT_CONTROL                                      = 0x00028004,
+	ZPASS_INCREMENT_DISABLE_bit                       = 1 << 0,
+	PERFECT_ZPASS_COUNTS_bit                          = 1 << 1,
+    DB_DEPTH_VIEW                                         = 0x00028008,
+	SLICE_START_mask                                  = 0x7ff << 0,
+	SLICE_START_shift                                 = 0,
+	SLICE_MAX_mask                                    = 0x7ff << 13,
+	SLICE_MAX_shift                                   = 13,
+	Z_READ_ONLY_bit                                   = 1 << 24,
+	STENCIL_READ_ONLY_bit                             = 1 << 25,
+    DB_RENDER_OVERRIDE                                    = 0x0002800c,
+	FORCE_HIZ_ENABLE_mask                             = 0x03 << 0,
+	FORCE_HIZ_ENABLE_shift                            = 0,
+	    FORCE_OFF                                     = 0x00,
+	    FORCE_ENABLE                                  = 0x01,
+	    FORCE_DISABLE                                 = 0x02,
+	    FORCE_RESERVED                                = 0x03,
+	FORCE_HIS_ENABLE0_mask                            = 0x03 << 2,
+	FORCE_HIS_ENABLE0_shift                           = 2,
+/* 	    FORCE_OFF                                     = 0x00, */
+/* 	    FORCE_ENABLE                                  = 0x01, */
+/* 	    FORCE_DISABLE                                 = 0x02, */
+/* 	    FORCE_RESERVED                                = 0x03, */
+	FORCE_HIS_ENABLE1_mask                            = 0x03 << 4,
+	FORCE_HIS_ENABLE1_shift                           = 4,
+/* 	    FORCE_OFF                                     = 0x00, */
+/* 	    FORCE_ENABLE                                  = 0x01, */
+/* 	    FORCE_DISABLE                                 = 0x02, */
+/* 	    FORCE_RESERVED                                = 0x03, */
+	FORCE_SHADER_Z_ORDER_bit                          = 1 << 6,
+	FAST_Z_DISABLE_bit                                = 1 << 7,
+	FAST_STENCIL_DISABLE_bit                          = 1 << 8,
+	NOOP_CULL_DISABLE_bit                             = 1 << 9,
+	FORCE_COLOR_KILL_bit                              = 1 << 10,
+	FORCE_Z_READ_bit                                  = 1 << 11,
+	FORCE_STENCIL_READ_bit                            = 1 << 12,
+	FORCE_FULL_Z_RANGE_mask                           = 0x03 << 13,
+	FORCE_FULL_Z_RANGE_shift                          = 13,
+/* 	    FORCE_OFF                                     = 0x00, */
+/* 	    FORCE_ENABLE                                  = 0x01, */
+/* 	    FORCE_DISABLE                                 = 0x02, */
+/* 	    FORCE_RESERVED                                = 0x03, */
+	FORCE_QC_SMASK_CONFLICT_bit                       = 1 << 15,
+	DISABLE_VIEWPORT_CLAMP_bit                        = 1 << 16,
+	IGNORE_SC_ZRANGE_bit                              = 1 << 17,
+	DISABLE_FULLY_COVERED_bit                         = 1 << 18,
+	FORCE_Z_LIMIT_SUMM_mask                           = 0x03 << 19,
+	FORCE_Z_LIMIT_SUMM_shift                          = 19,
+	    FORCE_SUMM_OFF                                = 0x00,
+	    FORCE_SUMM_MINZ                               = 0x01,
+	    FORCE_SUMM_MAXZ                               = 0x02,
+	    FORCE_SUMM_BOTH                               = 0x03,
+	MAX_TILES_IN_DTT_mask                             = 0x1f << 21,
+	MAX_TILES_IN_DTT_shift                            = 21,
+	DISABLE_PIXEL_RATE_TILES_bit                      = 1 << 26,
+	FORCE_Z_DIRTY_bit                                 = 1 << 27,
+	FORCE_STENCIL_DIRTY_bit                           = 1 << 28,
+	FORCE_Z_VALID_bit                                 = 1 << 29,
+	FORCE_STENCIL_VALID_bit                           = 1 << 30,
+	PRESERVE_COMPRESSION_bit                          = 1 << 31,
+    DB_RENDER_OVERRIDE2                                   = 0x00028010,
+	PARTIAL_SQUAD_LAUNCH_CONTROL_mask                 = 0x03 << 0,
+	PARTIAL_SQUAD_LAUNCH_CONTROL_shift                = 0,
+	    PSLC_AUTO                                     = 0x00,
+	    PSLC_ON_HANG_ONLY                             = 0x01,
+	    PSLC_ASAP                                     = 0x02,
+	    PSLC_COUNTDOWN                                = 0x03,
+	PARTIAL_SQUAD_LAUNCH_COUNTDOWN_mask               = 0x07 << 2,
+	PARTIAL_SQUAD_LAUNCH_COUNTDOWN_shift              = 2,
+	DISABLE_ZMASK_EXPCLEAR_OPTIMIZATIO_bit            = 1 << 5,
+    DB_HTILE_DATA_BASE                                    = 0x00028014,
+    DB_STENCIL_CLEAR                                      = 0x00028028,
+	DB_STENCIL_CLEAR__CLEAR_mask                      = 0xff << 0,
+	DB_STENCIL_CLEAR__CLEAR_shift                     = 0,
+	MIN_mask                                          = 0xff << 16,
+	MIN_shift                                         = 16,
+    DB_DEPTH_CLEAR                                        = 0x0002802c,
+    PA_SC_SCREEN_SCISSOR_TL                               = 0x00028030,
+	PA_SC_SCREEN_SCISSOR_TL__TL_X_mask                = 0xffff << 0,
+	PA_SC_SCREEN_SCISSOR_TL__TL_X_shift               = 0,
+	PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask                = 0xffff << 16,
+	PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift               = 16,
+    PA_SC_SCREEN_SCISSOR_BR                               = 0x00028034,
+	PA_SC_SCREEN_SCISSOR_BR__BR_X_mask                = 0xffff << 0,
+	PA_SC_SCREEN_SCISSOR_BR__BR_X_shift               = 0,
+	PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask                = 0xffff << 16,
+	PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift               = 16,
+    DB_Z_INFO                                             = 0x00028040,
+	DB_Z_INFO__FORMAT_mask                            = 0x03 << 0,
+	DB_Z_INFO__FORMAT_shift                           = 0,
+	    Z_INVALID                                     = 0x00,
+	    Z_16                                          = 0x01,
+	    Z_24                                          = 0x02,
+	    Z_32_FLOAT                                    = 0x03,
+	DB_Z_INFO__ARRAY_MODE_mask                        = 0x0f << 4,
+	DB_Z_INFO__ARRAY_MODE_shift                       = 4,
+	    ARRAY_LINEAR_GENERAL                          = 0x00,
+	    ARRAY_LINEAR_ALIGNED                          = 0x01,
+	    ARRAY_1D_TILED_THIN1                          = 0x02,
+	    ARRAY_2D_TILED_THIN1                          = 0x04,
+	DB_Z_INFO__TILE_SPLIT_mask                        = 0x07 << 8,
+	DB_Z_INFO__TILE_SPLIT_shift                       = 8,
+	    ADDR_SURF_TILE_SPLIT_64B                      = 0x00,
+	    ADDR_SURF_TILE_SPLIT_128B                     = 0x01,
+	    ADDR_SURF_TILE_SPLIT_256B                     = 0x02,
+	    ADDR_SURF_TILE_SPLIT_512B                     = 0x03,
+	    ADDR_SURF_TILE_SPLIT_1KB                      = 0x04,
+	    ADDR_SURF_TILE_SPLIT_2KB                      = 0x05,
+	    ADDR_SURF_TILE_SPLIT_4KB                      = 0x06,
+	DB_Z_INFO__NUM_BANKS_mask                         = 0x03 << 12,
+	DB_Z_INFO__NUM_BANKS_shift                        = 12,
+	    ADDR_SURF_2_BANK                              = 0x00,
+	    ADDR_SURF_4_BANK                              = 0x01,
+	    ADDR_SURF_8_BANK                              = 0x02,
+	    ADDR_SURF_16_BANK                             = 0x03,
+	DB_Z_INFO__BANK_WIDTH_mask                        = 0x03 << 16,
+	DB_Z_INFO__BANK_WIDTH_shift                       = 16,
+	    ADDR_SURF_BANK_WIDTH_1                        = 0x00,
+	    ADDR_SURF_BANK_WIDTH_2                        = 0x01,
+	    ADDR_SURF_BANK_WIDTH_4                        = 0x02,
+	    ADDR_SURF_BANK_WIDTH_8                        = 0x03,
+	DB_Z_INFO__BANK_HEIGHT_mask                       = 0x03 << 20,
+	DB_Z_INFO__BANK_HEIGHT_shift                      = 20,
+	    ADDR_SURF_BANK_HEIGHT_1                       = 0x00,
+	    ADDR_SURF_BANK_HEIGHT_2                       = 0x01,
+	    ADDR_SURF_BANK_HEIGHT_4                       = 0x02,
+	    ADDR_SURF_BANK_HEIGHT_8                       = 0x03,
+	DB_Z_INFO__MACRO_TILE_ASPECT_mask                 = 0x03 << 24,
+	DB_Z_INFO__MACRO_TILE_ASPECT_shift                = 24,
+	    ADDR_SURF_MACRO_ASPECT_1                      = 0x00,
+	    ADDR_SURF_MACRO_ASPECT_2                      = 0x01,
+	    ADDR_SURF_MACRO_ASPECT_4                      = 0x02,
+	    ADDR_SURF_MACRO_ASPECT_8                      = 0x03,
+	ALLOW_EXPCLEAR_bit                                = 1 << 27,
+	READ_SIZE_bit                                     = 1 << 28,
+	TILE_SURFACE_ENABLE_bit                           = 1 << 29,
+	DB_Z_INFO__TILE_COMPACT_bit                       = 1 << 30,
+	ZRANGE_PRECISION_bit                              = 1 << 31,
+    DB_STENCIL_INFO                                       = 0x00028044,
+	DB_STENCIL_INFO__FORMAT_bit                       = 1 << 0,
+	DB_STENCIL_INFO__TILE_SPLIT_mask                  = 0x07 << 8,
+	DB_STENCIL_INFO__TILE_SPLIT_shift                 = 8,
+/* 	    ADDR_SURF_TILE_SPLIT_64B                      = 0x00, */
+/* 	    ADDR_SURF_TILE_SPLIT_128B                     = 0x01, */
+/* 	    ADDR_SURF_TILE_SPLIT_256B                     = 0x02, */
+/* 	    ADDR_SURF_TILE_SPLIT_512B                     = 0x03, */
+/* 	    ADDR_SURF_TILE_SPLIT_1KB                      = 0x04, */
+/* 	    ADDR_SURF_TILE_SPLIT_2KB                      = 0x05, */
+/* 	    ADDR_SURF_TILE_SPLIT_4KB                      = 0x06, */
+    DB_Z_READ_BASE                                        = 0x00028048,
+    DB_STENCIL_READ_BASE                                  = 0x0002804c,
+    DB_Z_WRITE_BASE                                       = 0x00028050,
+    DB_STENCIL_WRITE_BASE                                 = 0x00028054,
+    DB_DEPTH_SIZE                                         = 0x00028058,
+	PITCH_TILE_MAX_mask                               = 0x7ff << 0,
+	PITCH_TILE_MAX_shift                              = 0,
+	HEIGHT_TILE_MAX_mask                              = 0x7ff << 11,
+	HEIGHT_TILE_MAX_shift                             = 11,
+    DB_DEPTH_SLICE                                        = 0x0002805c,
+	SLICE_TILE_MAX_mask                               = 0x3fffff << 0,
+	SLICE_TILE_MAX_shift                              = 0,
+    SQ_ALU_CONST_BUFFER_SIZE_PS_0                         = 0x00028140,
+	SQ_ALU_CONST_BUFFER_SIZE_PS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift         = 0,
+    SQ_ALU_CONST_BUFFER_SIZE_VS_0                         = 0x00028180,
+	SQ_ALU_CONST_BUFFER_SIZE_VS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift         = 0,
+    SQ_ALU_CONST_BUFFER_SIZE_GS_0                         = 0x000281c0,
+	SQ_ALU_CONST_BUFFER_SIZE_GS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift         = 0,
+    PA_SC_WINDOW_OFFSET                                   = 0x00028200,
+	WINDOW_X_OFFSET_mask                              = 0xffff << 0,
+	WINDOW_X_OFFSET_shift                             = 0,
+	WINDOW_Y_OFFSET_mask                              = 0xffff << 16,
+	WINDOW_Y_OFFSET_shift                             = 16,
+    PA_SC_WINDOW_SCISSOR_TL                               = 0x00028204,
+	PA_SC_WINDOW_SCISSOR_TL__TL_X_mask                = 0x7fff << 0,
+	PA_SC_WINDOW_SCISSOR_TL__TL_X_shift               = 0,
+	PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask                = 0x7fff << 16,
+	PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift               = 16,
+	WINDOW_OFFSET_DISABLE_bit                         = 1 << 31,
+    PA_SC_WINDOW_SCISSOR_BR                               = 0x00028208,
+	PA_SC_WINDOW_SCISSOR_BR__BR_X_mask                = 0x7fff << 0,
+	PA_SC_WINDOW_SCISSOR_BR__BR_X_shift               = 0,
+	PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask                = 0x7fff << 16,
+	PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift               = 16,
+    PA_SC_CLIPRECT_RULE                                   = 0x0002820c,
+	CLIP_RULE_mask                                    = 0xffff << 0,
+	CLIP_RULE_shift                                   = 0,
+    PA_SC_CLIPRECT_0_TL                                   = 0x00028210,
+	PA_SC_CLIPRECT_0_TL_num                           = 4,
+	PA_SC_CLIPRECT_0_TL_offset                        = 8,
+	PA_SC_CLIPRECT_0_TL__TL_X_mask                    = 0x7fff << 0,
+	PA_SC_CLIPRECT_0_TL__TL_X_shift                   = 0,
+	PA_SC_CLIPRECT_0_TL__TL_Y_mask                    = 0x7fff << 16,
+	PA_SC_CLIPRECT_0_TL__TL_Y_shift                   = 16,
+    PA_SC_CLIPRECT_0_BR                                   = 0x00028214,
+	PA_SC_CLIPRECT_0_BR_num                           = 4,
+	PA_SC_CLIPRECT_0_BR_offset                        = 8,
+	PA_SC_CLIPRECT_0_BR__BR_X_mask                    = 0x7fff << 0,
+	PA_SC_CLIPRECT_0_BR__BR_X_shift                   = 0,
+	PA_SC_CLIPRECT_0_BR__BR_Y_mask                    = 0x7fff << 16,
+	PA_SC_CLIPRECT_0_BR__BR_Y_shift                   = 16,
+    PA_SC_EDGERULE                                        = 0x00028230,
+	ER_TRI_mask                                       = 0x0f << 0,
+	ER_TRI_shift                                      = 0,
+	ER_POINT_mask                                     = 0x0f << 4,
+	ER_POINT_shift                                    = 4,
+	ER_RECT_mask                                      = 0x0f << 8,
+	ER_RECT_shift                                     = 8,
+	ER_LINE_LR_mask                                   = 0x3f << 12,
+	ER_LINE_LR_shift                                  = 12,
+	ER_LINE_RL_mask                                   = 0x3f << 18,
+	ER_LINE_RL_shift                                  = 18,
+	ER_LINE_TB_mask                                   = 0x0f << 24,
+	ER_LINE_TB_shift                                  = 24,
+	ER_LINE_BT_mask                                   = 0x0f << 28,
+	ER_LINE_BT_shift                                  = 28,
+    PA_SU_HARDWARE_SCREEN_OFFSET                          = 0x00028234,
+	HW_SCREEN_OFFSET_X_mask                           = 0x1f << 0,
+	HW_SCREEN_OFFSET_X_shift                          = 0,
+	HW_SCREEN_OFFSET_Y_mask                           = 0x1f << 8,
+	HW_SCREEN_OFFSET_Y_shift                          = 8,
+    CB_TARGET_MASK                                        = 0x00028238,
+	TARGET0_ENABLE_mask                               = 0x0f << 0,
+	TARGET0_ENABLE_shift                              = 0,
+	TARGET1_ENABLE_mask                               = 0x0f << 4,
+	TARGET1_ENABLE_shift                              = 4,
+	TARGET2_ENABLE_mask                               = 0x0f << 8,
+	TARGET2_ENABLE_shift                              = 8,
+	TARGET3_ENABLE_mask                               = 0x0f << 12,
+	TARGET3_ENABLE_shift                              = 12,
+	TARGET4_ENABLE_mask                               = 0x0f << 16,
+	TARGET4_ENABLE_shift                              = 16,
+	TARGET5_ENABLE_mask                               = 0x0f << 20,
+	TARGET5_ENABLE_shift                              = 20,
+	TARGET6_ENABLE_mask                               = 0x0f << 24,
+	TARGET6_ENABLE_shift                              = 24,
+	TARGET7_ENABLE_mask                               = 0x0f << 28,
+	TARGET7_ENABLE_shift                              = 28,
+    CB_SHADER_MASK                                        = 0x0002823c,
+	OUTPUT0_ENABLE_mask                               = 0x0f << 0,
+	OUTPUT0_ENABLE_shift                              = 0,
+	OUTPUT1_ENABLE_mask                               = 0x0f << 4,
+	OUTPUT1_ENABLE_shift                              = 4,
+	OUTPUT2_ENABLE_mask                               = 0x0f << 8,
+	OUTPUT2_ENABLE_shift                              = 8,
+	OUTPUT3_ENABLE_mask                               = 0x0f << 12,
+	OUTPUT3_ENABLE_shift                              = 12,
+	OUTPUT4_ENABLE_mask                               = 0x0f << 16,
+	OUTPUT4_ENABLE_shift                              = 16,
+	OUTPUT5_ENABLE_mask                               = 0x0f << 20,
+	OUTPUT5_ENABLE_shift                              = 20,
+	OUTPUT6_ENABLE_mask                               = 0x0f << 24,
+	OUTPUT6_ENABLE_shift                              = 24,
+	OUTPUT7_ENABLE_mask                               = 0x0f << 28,
+	OUTPUT7_ENABLE_shift                              = 28,
+    PA_SC_GENERIC_SCISSOR_TL                              = 0x00028240,
+	PA_SC_GENERIC_SCISSOR_TL__TL_X_mask               = 0x7fff << 0,
+	PA_SC_GENERIC_SCISSOR_TL__TL_X_shift              = 0,
+	PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask               = 0x7fff << 16,
+	PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift              = 16,
+/* 	WINDOW_OFFSET_DISABLE_bit                         = 1 << 31, */
+    PA_SC_GENERIC_SCISSOR_BR                              = 0x00028244,
+	PA_SC_GENERIC_SCISSOR_BR__BR_X_mask               = 0x7fff << 0,
+	PA_SC_GENERIC_SCISSOR_BR__BR_X_shift              = 0,
+	PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask               = 0x7fff << 16,
+	PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift              = 16,
+    PA_SC_VPORT_SCISSOR_0_TL                              = 0x00028250,
+	PA_SC_VPORT_SCISSOR_0_TL_num                      = 16,
+	PA_SC_VPORT_SCISSOR_0_TL_offset                   = 8,
+	PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask               = 0x7fff << 0,
+	PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift              = 0,
+	PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask               = 0x7fff << 16,
+	PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift              = 16,
+/* 	WINDOW_OFFSET_DISABLE_bit                         = 1 << 31, */
+    PA_SC_VPORT_SCISSOR_0_BR                              = 0x00028254,
+	PA_SC_VPORT_SCISSOR_0_BR_num                      = 16,
+	PA_SC_VPORT_SCISSOR_0_BR_offset                   = 8,
+	PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask               = 0x7fff << 0,
+	PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift              = 0,
+	PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask               = 0x7fff << 16,
+	PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift              = 16,
+    PA_SC_VPORT_ZMIN_0                                    = 0x000282d0,
+	PA_SC_VPORT_ZMIN_0_num                            = 16,
+	PA_SC_VPORT_ZMIN_0_offset                         = 8,
+    PA_SC_VPORT_ZMAX_0                                    = 0x000282d4,
+	PA_SC_VPORT_ZMAX_0_num                            = 16,
+	PA_SC_VPORT_ZMAX_0_offset                         = 8,
+    SX_MISC                                               = 0x00028350,
+	MULTIPASS_bit                                     = 1 << 0,
+    SQ_VTX_SEMANTIC_0                                     = 0x00028380,
+	SQ_VTX_SEMANTIC_0_num                             = 32,
+/* 	SEMANTIC_ID_mask                                  = 0xff << 0, */
+/* 	SEMANTIC_ID_shift                                 = 0, */
+    VGT_MAX_VTX_INDX                                      = 0x00028400,
+    VGT_MIN_VTX_INDX                                      = 0x00028404,
+    VGT_INDX_OFFSET                                       = 0x00028408,
+    VGT_MULTI_PRIM_IB_RESET_INDX                          = 0x0002840c,
+    SX_ALPHA_TEST_CONTROL                                 = 0x00028410,
+	ALPHA_FUNC_mask                                   = 0x07 << 0,
+	ALPHA_FUNC_shift                                  = 0,
+	    REF_NEVER                                     = 0x00,
+	    REF_LESS                                      = 0x01,
+	    REF_EQUAL                                     = 0x02,
+	    REF_LEQUAL                                    = 0x03,
+	    REF_GREATER                                   = 0x04,
+	    REF_NOTEQUAL                                  = 0x05,
+	    REF_GEQUAL                                    = 0x06,
+	    REF_ALWAYS                                    = 0x07,
+	ALPHA_TEST_ENABLE_bit                             = 1 << 3,
+	ALPHA_TEST_BYPASS_bit                             = 1 << 8,
+    CB_BLEND_RED                                          = 0x00028414,
+    CB_BLEND_GREEN                                        = 0x00028418,
+    CB_BLEND_BLUE                                         = 0x0002841c,
+    CB_BLEND_ALPHA                                        = 0x00028420,
+    DB_STENCILREFMASK                                     = 0x00028430,
+	STENCILREF_mask                                   = 0xff << 0,
+	STENCILREF_shift                                  = 0,
+	STENCILMASK_mask                                  = 0xff << 8,
+	STENCILMASK_shift                                 = 8,
+	STENCILWRITEMASK_mask                             = 0xff << 16,
+	STENCILWRITEMASK_shift                            = 16,
+    DB_STENCILREFMASK_BF                                  = 0x00028434,
+	STENCILREF_BF_mask                                = 0xff << 0,
+	STENCILREF_BF_shift                               = 0,
+	STENCILMASK_BF_mask                               = 0xff << 8,
+	STENCILMASK_BF_shift                              = 8,
+	STENCILWRITEMASK_BF_mask                          = 0xff << 16,
+	STENCILWRITEMASK_BF_shift                         = 16,
+    SX_ALPHA_REF                                          = 0x00028438,
+    PA_CL_VPORT_XSCALE_0                                  = 0x0002843c,
+	PA_CL_VPORT_XSCALE_0_num                          = 16,
+	PA_CL_VPORT_XSCALE_0_offset                       = 24,
+    PA_CL_VPORT_XOFFSET_0                                 = 0x00028440,
+	PA_CL_VPORT_XOFFSET_0_num                         = 16,
+	PA_CL_VPORT_XOFFSET_0_offset                      = 24,
+    PA_CL_VPORT_YSCALE_0                                  = 0x00028444,
+	PA_CL_VPORT_YSCALE_0_num                          = 16,
+	PA_CL_VPORT_YSCALE_0_offset                       = 24,
+    PA_CL_VPORT_YOFFSET_0                                 = 0x00028448,
+	PA_CL_VPORT_YOFFSET_0_num                         = 16,
+	PA_CL_VPORT_YOFFSET_0_offset                      = 24,
+    PA_CL_VPORT_ZSCALE_0                                  = 0x0002844c,
+	PA_CL_VPORT_ZSCALE_0_num                          = 16,
+	PA_CL_VPORT_ZSCALE_0_offset                       = 24,
+    PA_CL_VPORT_ZOFFSET_0                                 = 0x00028450,
+	PA_CL_VPORT_ZOFFSET_0_num                         = 16,
+	PA_CL_VPORT_ZOFFSET_0_offset                      = 24,
+    PA_CL_UCP_0_X                                         = 0x000285bc,
+	PA_CL_UCP_0_X_num                                 = 6,
+	PA_CL_UCP_0_X_offset                              = 16,
+    PA_CL_UCP_0_Y                                         = 0x000285c0,
+	PA_CL_UCP_0_Y_num                                 = 6,
+	PA_CL_UCP_0_Y_offset                              = 16,
+    PA_CL_UCP_0_Z                                         = 0x000285c4,
+	PA_CL_UCP_0_Z_num                                 = 6,
+	PA_CL_UCP_0_Z_offset                              = 16,
+    PA_CL_UCP_0_W                                         = 0x000285c8,
+	PA_CL_UCP_0_W_num                                 = 6,
+	PA_CL_UCP_0_W_offset                              = 16,
+    SPI_VS_OUT_ID_0                                       = 0x0002861c,
+	SPI_VS_OUT_ID_0_num                               = 10,
+	SEMANTIC_0_mask                                   = 0xff << 0,
+	SEMANTIC_0_shift                                  = 0,
+	SEMANTIC_1_mask                                   = 0xff << 8,
+	SEMANTIC_1_shift                                  = 8,
+	SEMANTIC_2_mask                                   = 0xff << 16,
+	SEMANTIC_2_shift                                  = 16,
+	SEMANTIC_3_mask                                   = 0xff << 24,
+	SEMANTIC_3_shift                                  = 24,
+    SPI_PS_INPUT_CNTL_0                                   = 0x00028644,
+	SPI_PS_INPUT_CNTL_0_num                           = 32,
+	SEMANTIC_mask                                     = 0xff << 0,
+	SEMANTIC_shift                                    = 0,
+	DEFAULT_VAL_mask                                  = 0x03 << 8,
+	DEFAULT_VAL_shift                                 = 8,
+	    X_0_0F                                        = 0x00,
+	FLAT_SHADE_bit                                    = 1 << 10,
+	CYL_WRAP_mask                                     = 0x0f << 13,
+	CYL_WRAP_shift                                    = 13,
+	PT_SPRITE_TEX_bit                                 = 1 << 17,
+    SPI_VS_OUT_CONFIG                                     = 0x000286c4,
+	VS_PER_COMPONENT_bit                              = 1 << 0,
+	VS_EXPORT_COUNT_mask                              = 0x1f << 1,
+	VS_EXPORT_COUNT_shift                             = 1,
+	VS_EXPORTS_FOG_bit                                = 1 << 8,
+	VS_OUT_FOG_VEC_ADDR_mask                          = 0x1f << 9,
+	VS_OUT_FOG_VEC_ADDR_shift                         = 9,
+    SPI_PS_IN_CONTROL_0                                   = 0x000286cc,
+	NUM_INTERP_mask                                   = 0x3f << 0,
+	NUM_INTERP_shift                                  = 0,
+	POSITION_ENA_bit                                  = 1 << 8,
+	POSITION_CENTROID_bit                             = 1 << 9,
+	POSITION_ADDR_mask                                = 0x1f << 10,
+	POSITION_ADDR_shift                               = 10,
+	PARAM_GEN_mask                                    = 0x0f << 15,
+	PARAM_GEN_shift                                   = 15,
+	PERSP_GRADIENT_ENA_bit                            = 1 << 28,
+	LINEAR_GRADIENT_ENA_bit                           = 1 << 29,
+	POSITION_SAMPLE_bit                               = 1 << 30,
+    SPI_PS_IN_CONTROL_1                                   = 0x000286d0,
+	FRONT_FACE_ENA_bit                                = 1 << 8,
+	FRONT_FACE_ALL_BITS_bit                           = 1 << 11,
+	FRONT_FACE_ADDR_mask                              = 0x1f << 12,
+	FRONT_FACE_ADDR_shift                             = 12,
+	FOG_ADDR_mask                                     = 0x7f << 17,
+	FOG_ADDR_shift                                    = 17,
+	FIXED_PT_POSITION_ENA_bit                         = 1 << 24,
+	FIXED_PT_POSITION_ADDR_mask                       = 0x1f << 25,
+	FIXED_PT_POSITION_ADDR_shift                      = 25,
+	POSITION_ULC_bit                                  = 1 << 30,
+    SPI_INTERP_CONTROL_0                                  = 0x000286d4,
+	FLAT_SHADE_ENA_bit                                = 1 << 0,
+	PNT_SPRITE_ENA_bit                                = 1 << 1,
+	PNT_SPRITE_OVRD_X_mask                            = 0x07 << 2,
+	PNT_SPRITE_OVRD_X_shift                           = 2,
+	    SPI_PNT_SPRITE_SEL_0                          = 0x00,
+	    SPI_PNT_SPRITE_SEL_1                          = 0x01,
+	    SPI_PNT_SPRITE_SEL_S                          = 0x02,
+	    SPI_PNT_SPRITE_SEL_T                          = 0x03,
+	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04,
+	PNT_SPRITE_OVRD_Y_mask                            = 0x07 << 5,
+	PNT_SPRITE_OVRD_Y_shift                           = 5,
+/* 	    SPI_PNT_SPRITE_SEL_0                          = 0x00, */
+/* 	    SPI_PNT_SPRITE_SEL_1                          = 0x01, */
+/* 	    SPI_PNT_SPRITE_SEL_S                          = 0x02, */
+/* 	    SPI_PNT_SPRITE_SEL_T                          = 0x03, */
+/* 	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */
+	PNT_SPRITE_OVRD_Z_mask                            = 0x07 << 8,
+	PNT_SPRITE_OVRD_Z_shift                           = 8,
+/* 	    SPI_PNT_SPRITE_SEL_0                          = 0x00, */
+/* 	    SPI_PNT_SPRITE_SEL_1                          = 0x01, */
+/* 	    SPI_PNT_SPRITE_SEL_S                          = 0x02, */
+/* 	    SPI_PNT_SPRITE_SEL_T                          = 0x03, */
+/* 	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */
+	PNT_SPRITE_OVRD_W_mask                            = 0x07 << 11,
+	PNT_SPRITE_OVRD_W_shift                           = 11,
+/* 	    SPI_PNT_SPRITE_SEL_0                          = 0x00, */
+/* 	    SPI_PNT_SPRITE_SEL_1                          = 0x01, */
+/* 	    SPI_PNT_SPRITE_SEL_S                          = 0x02, */
+/* 	    SPI_PNT_SPRITE_SEL_T                          = 0x03, */
+/* 	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */
+	PNT_SPRITE_TOP_1_bit                              = 1 << 14,
+    SPI_INPUT_Z                                           = 0x000286d8,
+	PROVIDE_Z_TO_SPI_bit                              = 1 << 0,
+    SPI_FOG_CNTL                                          = 0x000286dc,
+	PASS_FOG_THROUGH_PS_bit                           = 1 << 0,
+    SPI_BARYC_CNTL                                        = 0x000286e0,
+	PERSP_CENTER_ENA_mask                             = 0x03 << 0,
+	PERSP_CENTER_ENA_shift                            = 0,
+	    X_OFF                                         = 0x00,
+	    PERSP_CENTER_ENA__X_ON_AT_CENTER              = 0x01,
+	    PERSP_CENTER_ENA__X_ON_AT_CENTROID            = 0x02,
+	PERSP_CENTROID_ENA_mask                           = 0x03 << 4,
+	PERSP_CENTROID_ENA_shift                          = 4,
+/* 	    X_OFF                                         = 0x00, */
+	    PERSP_CENTROID_ENA__X_ON_AT_CENTROID          = 0x01,
+	    PERSP_CENTROID_ENA__X_ON_AT_CENTER            = 0x02,
+	PERSP_SAMPLE_ENA_mask                             = 0x03 << 8,
+	PERSP_SAMPLE_ENA_shift                            = 8,
+/* 	    X_OFF                                         = 0x00, */
+	PERSP_PULL_MODEL_ENA_mask                         = 0x03 << 12,
+	PERSP_PULL_MODEL_ENA_shift                        = 12,
+/* 	    X_OFF                                         = 0x00, */
+	LINEAR_CENTER_ENA_mask                            = 0x03 << 16,
+	LINEAR_CENTER_ENA_shift                           = 16,
+/* 	    X_OFF                                         = 0x00, */
+	    LINEAR_CENTER_ENA__X_ON_AT_CENTER             = 0x01,
+	    LINEAR_CENTER_ENA__X_ON_AT_CENTROID           = 0x02,
+	LINEAR_CENTROID_ENA_mask                          = 0x03 << 20,
+	LINEAR_CENTROID_ENA_shift                         = 20,
+/* 	    X_OFF                                         = 0x00, */
+	    LINEAR_CENTROID_ENA__X_ON_AT_CENTROID         = 0x01,
+	    LINEAR_CENTROID_ENA__X_ON_AT_CENTER           = 0x02,
+	LINEAR_SAMPLE_ENA_mask                            = 0x03 << 24,
+	LINEAR_SAMPLE_ENA_shift                           = 24,
+/* 	    X_OFF                                         = 0x00, */
+    SPI_PS_IN_CONTROL_2                                   = 0x000286e4,
+	LINE_STIPPLE_TEX_ADDR_mask                        = 0xff << 0,
+	LINE_STIPPLE_TEX_ADDR_shift                       = 0,
+	LINE_STIPPLE_TEX_ENA_bit                          = 1 << 8,
+    CB_BLEND0_CONTROL                                     = 0x00028780,
+	CB_BLEND0_CONTROL_num                             = 8,
+	COLOR_SRCBLEND_mask                               = 0x1f << 0,
+	COLOR_SRCBLEND_shift                              = 0,
+	    BLEND_ZERO                                    = 0x00,
+	    BLEND_ONE                                     = 0x01,
+	    BLEND_SRC_COLOR                               = 0x02,
+	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03,
+	    BLEND_SRC_ALPHA                               = 0x04,
+	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05,
+	    BLEND_DST_ALPHA                               = 0x06,
+	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07,
+	    BLEND_DST_COLOR                               = 0x08,
+	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09,
+	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a,
+	    BLEND_BOTH_SRC_ALPHA                          = 0x0b,
+	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c,
+	    BLEND_CONSTANT_COLOR                          = 0x0d,
+	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e,
+	    BLEND_SRC1_COLOR                              = 0x0f,
+	    BLEND_INV_SRC1_COLOR                          = 0x10,
+	    BLEND_SRC1_ALPHA                              = 0x11,
+	    BLEND_INV_SRC1_ALPHA                          = 0x12,
+	    BLEND_CONSTANT_ALPHA                          = 0x13,
+	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14,
+	COLOR_COMB_FCN_mask                               = 0x07 << 5,
+	COLOR_COMB_FCN_shift                              = 5,
+	    COMB_DST_PLUS_SRC                             = 0x00,
+	    COMB_SRC_MINUS_DST                            = 0x01,
+	    COMB_MIN_DST_SRC                              = 0x02,
+	    COMB_MAX_DST_SRC                              = 0x03,
+	    COMB_DST_MINUS_SRC                            = 0x04,
+	COLOR_DESTBLEND_mask                              = 0x1f << 8,
+	COLOR_DESTBLEND_shift                             = 8,
+/* 	    BLEND_ZERO                                    = 0x00, */
+/* 	    BLEND_ONE                                     = 0x01, */
+/* 	    BLEND_SRC_COLOR                               = 0x02, */
+/* 	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */
+/* 	    BLEND_SRC_ALPHA                               = 0x04, */
+/* 	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */
+/* 	    BLEND_DST_ALPHA                               = 0x06, */
+/* 	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */
+/* 	    BLEND_DST_COLOR                               = 0x08, */
+/* 	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */
+/* 	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */
+/* 	    BLEND_BOTH_SRC_ALPHA                          = 0x0b, */
+/* 	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */
+/* 	    BLEND_CONSTANT_COLOR                          = 0x0d, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */
+/* 	    BLEND_SRC1_COLOR                              = 0x0f, */
+/* 	    BLEND_INV_SRC1_COLOR                          = 0x10, */
+/* 	    BLEND_SRC1_ALPHA                              = 0x11, */
+/* 	    BLEND_INV_SRC1_ALPHA                          = 0x12, */
+/* 	    BLEND_CONSTANT_ALPHA                          = 0x13, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */
+	ALPHA_SRCBLEND_mask                               = 0x1f << 16,
+	ALPHA_SRCBLEND_shift                              = 16,
+/* 	    BLEND_ZERO                                    = 0x00, */
+/* 	    BLEND_ONE                                     = 0x01, */
+/* 	    BLEND_SRC_COLOR                               = 0x02, */
+/* 	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */
+/* 	    BLEND_SRC_ALPHA                               = 0x04, */
+/* 	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */
+/* 	    BLEND_DST_ALPHA                               = 0x06, */
+/* 	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */
+/* 	    BLEND_DST_COLOR                               = 0x08, */
+/* 	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */
+/* 	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */
+/* 	    BLEND_BOTH_SRC_ALPHA                          = 0x0b, */
+/* 	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */
+/* 	    BLEND_CONSTANT_COLOR                          = 0x0d, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */
+/* 	    BLEND_SRC1_COLOR                              = 0x0f, */
+/* 	    BLEND_INV_SRC1_COLOR                          = 0x10, */
+/* 	    BLEND_SRC1_ALPHA                              = 0x11, */
+/* 	    BLEND_INV_SRC1_ALPHA                          = 0x12, */
+/* 	    BLEND_CONSTANT_ALPHA                          = 0x13, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */
+	ALPHA_COMB_FCN_mask                               = 0x07 << 21,
+	ALPHA_COMB_FCN_shift                              = 21,
+/* 	    COMB_DST_PLUS_SRC                             = 0x00, */
+/* 	    COMB_SRC_MINUS_DST                            = 0x01, */
+/* 	    COMB_MIN_DST_SRC                              = 0x02, */
+/* 	    COMB_MAX_DST_SRC                              = 0x03, */
+/* 	    COMB_DST_MINUS_SRC                            = 0x04, */
+	ALPHA_DESTBLEND_mask                              = 0x1f << 24,
+	ALPHA_DESTBLEND_shift                             = 24,
+/* 	    BLEND_ZERO                                    = 0x00, */
+/* 	    BLEND_ONE                                     = 0x01, */
+/* 	    BLEND_SRC_COLOR                               = 0x02, */
+/* 	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */
+/* 	    BLEND_SRC_ALPHA                               = 0x04, */
+/* 	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */
+/* 	    BLEND_DST_ALPHA                               = 0x06, */
+/* 	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */
+/* 	    BLEND_DST_COLOR                               = 0x08, */
+/* 	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */
+/* 	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */
+/* 	    BLEND_BOTH_SRC_ALPHA                          = 0x0b, */
+/* 	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */
+/* 	    BLEND_CONSTANT_COLOR                          = 0x0d, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */
+/* 	    BLEND_SRC1_COLOR                              = 0x0f, */
+/* 	    BLEND_INV_SRC1_COLOR                          = 0x10, */
+/* 	    BLEND_SRC1_ALPHA                              = 0x11, */
+/* 	    BLEND_INV_SRC1_ALPHA                          = 0x12, */
+/* 	    BLEND_CONSTANT_ALPHA                          = 0x13, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */
+	SEPARATE_ALPHA_BLEND_bit                          = 1 << 29,
+	CB_BLEND0_CONTROL__ENABLE_bit                     = 1 << 30,
+    PA_CL_POINT_X_RAD                                     = 0x000287d4,
+    PA_CL_POINT_Y_RAD                                     = 0x000287d8,
+    PA_CL_POINT_SIZE                                      = 0x000287dc,
+    PA_CL_POINT_CULL_RAD                                  = 0x000287e0,
+    VGT_DMA_BASE_HI                                       = 0x000287e4,
+	VGT_DMA_BASE_HI__BASE_ADDR_mask                   = 0xff << 0,
+	VGT_DMA_BASE_HI__BASE_ADDR_shift                  = 0,
+    VGT_DMA_BASE                                          = 0x000287e8,
+    VGT_DRAW_INITIATOR                                    = 0x000287f0,
+	SOURCE_SELECT_mask                                = 0x03 << 0,
+	SOURCE_SELECT_shift                               = 0,
+	    DI_SRC_SEL_DMA                                = 0x00,
+	    DI_SRC_SEL_IMMEDIATE                          = 0x01,
+	    DI_SRC_SEL_AUTO_INDEX                         = 0x02,
+	    DI_SRC_SEL_RESERVED                           = 0x03,
+	MAJOR_MODE_mask                                   = 0x03 << 2,
+	MAJOR_MODE_shift                                  = 2,
+	    DI_MAJOR_MODE_0                               = 0x00,
+	    DI_MAJOR_MODE_1                               = 0x01,
+	NOT_EOP_bit                                       = 1 << 5,
+	USE_OPAQUE_bit                                    = 1 << 6,
+    VGT_IMMED_DATA                                        = 0x000287f4,
+    VGT_EVENT_ADDRESS_REG                                 = 0x000287f8,
+	ADDRESS_LOW_mask                                  = 0xfffffff << 0,
+	ADDRESS_LOW_shift                                 = 0,
+    DB_DEPTH_CONTROL                                      = 0x00028800,
+	STENCIL_ENABLE_bit                                = 1 << 0,
+	Z_ENABLE_bit                                      = 1 << 1,
+	Z_WRITE_ENABLE_bit                                = 1 << 2,
+	ZFUNC_mask                                        = 0x07 << 4,
+	ZFUNC_shift                                       = 4,
+	    FRAG_NEVER                                    = 0x00,
+	    FRAG_LESS                                     = 0x01,
+	    FRAG_EQUAL                                    = 0x02,
+	    FRAG_LEQUAL                                   = 0x03,
+	    FRAG_GREATER                                  = 0x04,
+	    FRAG_NOTEQUAL                                 = 0x05,
+	    FRAG_GEQUAL                                   = 0x06,
+	    FRAG_ALWAYS                                   = 0x07,
+	BACKFACE_ENABLE_bit                               = 1 << 7,
+	STENCILFUNC_mask                                  = 0x07 << 8,
+	STENCILFUNC_shift                                 = 8,
+/* 	    REF_NEVER                                     = 0x00, */
+/* 	    REF_LESS                                      = 0x01, */
+/* 	    REF_EQUAL                                     = 0x02, */
+/* 	    REF_LEQUAL                                    = 0x03, */
+/* 	    REF_GREATER                                   = 0x04, */
+/* 	    REF_NOTEQUAL                                  = 0x05, */
+/* 	    REF_GEQUAL                                    = 0x06, */
+/* 	    REF_ALWAYS                                    = 0x07, */
+	STENCILFAIL_mask                                  = 0x07 << 11,
+	STENCILFAIL_shift                                 = 11,
+	    STENCIL_KEEP                                  = 0x00,
+	    STENCIL_ZERO                                  = 0x01,
+	    STENCIL_REPLACE                               = 0x02,
+	    STENCIL_INCR_CLAMP                            = 0x03,
+	    STENCIL_DECR_CLAMP                            = 0x04,
+	    STENCIL_INVERT                                = 0x05,
+	    STENCIL_INCR_WRAP                             = 0x06,
+	    STENCIL_DECR_WRAP                             = 0x07,
+	STENCILZPASS_mask                                 = 0x07 << 14,
+	STENCILZPASS_shift                                = 14,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+	STENCILZFAIL_mask                                 = 0x07 << 17,
+	STENCILZFAIL_shift                                = 17,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+	STENCILFUNC_BF_mask                               = 0x07 << 20,
+	STENCILFUNC_BF_shift                              = 20,
+/* 	    REF_NEVER                                     = 0x00, */
+/* 	    REF_LESS                                      = 0x01, */
+/* 	    REF_EQUAL                                     = 0x02, */
+/* 	    REF_LEQUAL                                    = 0x03, */
+/* 	    REF_GREATER                                   = 0x04, */
+/* 	    REF_NOTEQUAL                                  = 0x05, */
+/* 	    REF_GEQUAL                                    = 0x06, */
+/* 	    REF_ALWAYS                                    = 0x07, */
+	STENCILFAIL_BF_mask                               = 0x07 << 23,
+	STENCILFAIL_BF_shift                              = 23,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+	STENCILZPASS_BF_mask                              = 0x07 << 26,
+	STENCILZPASS_BF_shift                             = 26,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+	STENCILZFAIL_BF_mask                              = 0x07 << 29,
+	STENCILZFAIL_BF_shift                             = 29,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+    CB_COLOR_CONTROL                                      = 0x00028808,
+	DEGAMMA_ENABLE_bit                                = 1 << 3,
+	CB_COLOR_CONTROL__MODE_mask                       = 0x07 << 4,
+	CB_COLOR_CONTROL__MODE_shift                      = 4,
+	    CB_DISABLE                                    = 0x00,
+	    CB_NORMAL                                     = 0x01,
+	    CB_ELIMINATE_FAST_CLEAR                       = 0x02,
+	    CB_RESOLVE                                    = 0x03,
+	    CB_DECOMPRESS                                 = 0x04,
+	    CB_FMASK_DECOMPRESS                           = 0x05,
+	ROP3_mask                                         = 0xff << 16,
+	ROP3_shift                                        = 16,
+    DB_SHADER_CONTROL                                     = 0x0002880c,
+	Z_EXPORT_ENABLE_bit                               = 1 << 0,
+	STENCIL_REF_EXPORT_ENABLE_bit                     = 1 << 1,
+	Z_ORDER_mask                                      = 0x03 << 4,
+	Z_ORDER_shift                                     = 4,
+	    LATE_Z                                        = 0x00,
+	    EARLY_Z_THEN_LATE_Z                           = 0x01,
+	    RE_Z                                          = 0x02,
+	    EARLY_Z_THEN_RE_Z                             = 0x03,
+	KILL_ENABLE_bit                                   = 1 << 6,
+	COVERAGE_TO_MASK_ENABLE_bit                       = 1 << 7,
+	MASK_EXPORT_ENABLE_bit                            = 1 << 8,
+	DUAL_EXPORT_ENABLE_bit                            = 1 << 9,
+	EXEC_ON_HIER_FAIL_bit                             = 1 << 10,
+	EXEC_ON_NOOP_bit                                  = 1 << 11,
+	ALPHA_TO_MASK_DISABLE_bit                         = 1 << 12,
+	DB_SOURCE_FORMAT_mask                             = 0x03 << 13,
+	DB_SOURCE_FORMAT_shift                            = 13,
+	    EXPORT_DB_FULL                                = 0x00,
+	    EXPORT_DB_FOUR16                              = 0x01,
+	    EXPORT_DB_TWO                                 = 0x02,
+	DEPTH_BEFORE_SHADER_bit                           = 1 << 15,
+	CONSERVATIVE_Z_EXPORT_mask                        = 0x03 << 16,
+	CONSERVATIVE_Z_EXPORT_shift                       = 16,
+	    EXPORT_ANY_Z                                  = 0x00,
+	    EXPORT_LESS_THAN_Z                            = 0x01,
+	    EXPORT_GREATER_THAN_Z                         = 0x02,
+	    EXPORT_RESERVED                               = 0x03,
+    PA_CL_CLIP_CNTL                                       = 0x00028810,
+	UCP_ENA_0_bit                                     = 1 << 0,
+	UCP_ENA_1_bit                                     = 1 << 1,
+	UCP_ENA_2_bit                                     = 1 << 2,
+	UCP_ENA_3_bit                                     = 1 << 3,
+	UCP_ENA_4_bit                                     = 1 << 4,
+	UCP_ENA_5_bit                                     = 1 << 5,
+	PS_UCP_Y_SCALE_NEG_bit                            = 1 << 13,
+	PS_UCP_MODE_mask                                  = 0x03 << 14,
+	PS_UCP_MODE_shift                                 = 14,
+	CLIP_DISABLE_bit                                  = 1 << 16,
+	UCP_CULL_ONLY_ENA_bit                             = 1 << 17,
+	BOUNDARY_EDGE_FLAG_ENA_bit                        = 1 << 18,
+	DX_CLIP_SPACE_DEF_bit                             = 1 << 19,
+	DIS_CLIP_ERR_DETECT_bit                           = 1 << 20,
+	VTX_KILL_OR_bit                                   = 1 << 21,
+	DX_RASTERIZATION_KILL_bit                         = 1 << 22,
+	DX_LINEAR_ATTR_CLIP_ENA_bit                       = 1 << 24,
+	VTE_VPORT_PROVOKE_DISABLE_bit                     = 1 << 25,
+	ZCLIP_NEAR_DISABLE_bit                            = 1 << 26,
+	ZCLIP_FAR_DISABLE_bit                             = 1 << 27,
+    PA_SU_SC_MODE_CNTL                                    = 0x00028814,
+	CULL_FRONT_bit                                    = 1 << 0,
+	CULL_BACK_bit                                     = 1 << 1,
+	FACE_bit                                          = 1 << 2,
+	POLY_MODE_mask                                    = 0x03 << 3,
+	POLY_MODE_shift                                   = 3,
+	    X_DISABLE_POLY_MODE                           = 0x00,
+	    X_DUAL_MODE                                   = 0x01,
+	POLYMODE_FRONT_PTYPE_mask                         = 0x07 << 5,
+	POLYMODE_FRONT_PTYPE_shift                        = 5,
+	    X_DRAW_POINTS                                 = 0x00,
+	    X_DRAW_LINES                                  = 0x01,
+	    X_DRAW_TRIANGLES                              = 0x02,
+	POLYMODE_BACK_PTYPE_mask                          = 0x07 << 8,
+	POLYMODE_BACK_PTYPE_shift                         = 8,
+/* 	    X_DRAW_POINTS                                 = 0x00, */
+/* 	    X_DRAW_LINES                                  = 0x01, */
+/* 	    X_DRAW_TRIANGLES                              = 0x02, */
+	POLY_OFFSET_FRONT_ENABLE_bit                      = 1 << 11,
+	POLY_OFFSET_BACK_ENABLE_bit                       = 1 << 12,
+	POLY_OFFSET_PARA_ENABLE_bit                       = 1 << 13,
+	VTX_WINDOW_OFFSET_ENABLE_bit                      = 1 << 16,
+	PROVOKING_VTX_LAST_bit                            = 1 << 19,
+	PERSP_CORR_DIS_bit                                = 1 << 20,
+	MULTI_PRIM_IB_ENA_bit                             = 1 << 21,
+    PA_CL_VTE_CNTL                                        = 0x00028818,
+	VPORT_X_SCALE_ENA_bit                             = 1 << 0,
+	VPORT_X_OFFSET_ENA_bit                            = 1 << 1,
+	VPORT_Y_SCALE_ENA_bit                             = 1 << 2,
+	VPORT_Y_OFFSET_ENA_bit                            = 1 << 3,
+	VPORT_Z_SCALE_ENA_bit                             = 1 << 4,
+	VPORT_Z_OFFSET_ENA_bit                            = 1 << 5,
+	VTX_XY_FMT_bit                                    = 1 << 8,
+	VTX_Z_FMT_bit                                     = 1 << 9,
+	VTX_W0_FMT_bit                                    = 1 << 10,
+    PA_CL_VS_OUT_CNTL                                     = 0x0002881c,
+	CLIP_DIST_ENA_0_bit                               = 1 << 0,
+	CLIP_DIST_ENA_1_bit                               = 1 << 1,
+	CLIP_DIST_ENA_2_bit                               = 1 << 2,
+	CLIP_DIST_ENA_3_bit                               = 1 << 3,
+	CLIP_DIST_ENA_4_bit                               = 1 << 4,
+	CLIP_DIST_ENA_5_bit                               = 1 << 5,
+	CLIP_DIST_ENA_6_bit                               = 1 << 6,
+	CLIP_DIST_ENA_7_bit                               = 1 << 7,
+	CULL_DIST_ENA_0_bit                               = 1 << 8,
+	CULL_DIST_ENA_1_bit                               = 1 << 9,
+	CULL_DIST_ENA_2_bit                               = 1 << 10,
+	CULL_DIST_ENA_3_bit                               = 1 << 11,
+	CULL_DIST_ENA_4_bit                               = 1 << 12,
+	CULL_DIST_ENA_5_bit                               = 1 << 13,
+	CULL_DIST_ENA_6_bit                               = 1 << 14,
+	CULL_DIST_ENA_7_bit                               = 1 << 15,
+	USE_VTX_POINT_SIZE_bit                            = 1 << 16,
+	USE_VTX_EDGE_FLAG_bit                             = 1 << 17,
+	USE_VTX_RENDER_TARGET_INDX_bit                    = 1 << 18,
+	USE_VTX_VIEWPORT_INDX_bit                         = 1 << 19,
+	USE_VTX_KILL_FLAG_bit                             = 1 << 20,
+	VS_OUT_MISC_VEC_ENA_bit                           = 1 << 21,
+	VS_OUT_CCDIST0_VEC_ENA_bit                        = 1 << 22,
+	VS_OUT_CCDIST1_VEC_ENA_bit                        = 1 << 23,
+    PA_CL_NANINF_CNTL                                     = 0x00028820,
+	VTE_XY_INF_DISCARD_bit                            = 1 << 0,
+	VTE_Z_INF_DISCARD_bit                             = 1 << 1,
+	VTE_W_INF_DISCARD_bit                             = 1 << 2,
+	VTE_0XNANINF_IS_0_bit                             = 1 << 3,
+	VTE_XY_NAN_RETAIN_bit                             = 1 << 4,
+	VTE_Z_NAN_RETAIN_bit                              = 1 << 5,
+	VTE_W_NAN_RETAIN_bit                              = 1 << 6,
+	VTE_W_RECIP_NAN_IS_0_bit                          = 1 << 7,
+	VS_XY_NAN_TO_INF_bit                              = 1 << 8,
+	VS_XY_INF_RETAIN_bit                              = 1 << 9,
+	VS_Z_NAN_TO_INF_bit                               = 1 << 10,
+	VS_Z_INF_RETAIN_bit                               = 1 << 11,
+	VS_W_NAN_TO_INF_bit                               = 1 << 12,
+	VS_W_INF_RETAIN_bit                               = 1 << 13,
+	VS_CLIP_DIST_INF_DISCARD_bit                      = 1 << 14,
+	VTE_NO_OUTPUT_NEG_0_bit                           = 1 << 20,
+    PA_SU_LINE_STIPPLE_CNTL                               = 0x00028824,
+	LINE_STIPPLE_RESET_mask                           = 0x03 << 0,
+	LINE_STIPPLE_RESET_shift                          = 0,
+	EXPAND_FULL_LENGTH_bit                            = 1 << 2,
+	FRACTIONAL_ACCUM_bit                              = 1 << 3,
+	DIAMOND_ADJUST_bit                                = 1 << 4,
+    PA_SU_LINE_STIPPLE_SCALE                              = 0x00028828,
+    PA_SU_PRIM_FILTER_CNTL                                = 0x0002882c,
+	TRIANGLE_FILTER_DISABLE_bit                       = 1 << 0,
+	LINE_FILTER_DISABLE_bit                           = 1 << 1,
+	POINT_FILTER_DISABLE_bit                          = 1 << 2,
+	RECTANGLE_FILTER_DISABLE_bit                      = 1 << 3,
+	TRIANGLE_EXPAND_ENA_bit                           = 1 << 4,
+	LINE_EXPAND_ENA_bit                               = 1 << 5,
+	POINT_EXPAND_ENA_bit                              = 1 << 6,
+	RECTANGLE_EXPAND_ENA_bit                          = 1 << 7,
+	PRIM_EXPAND_CONSTANT_mask                         = 0xff << 8,
+	PRIM_EXPAND_CONSTANT_shift                        = 8,
+    SQ_LSTMP_RING_ITEMSIZE                                = 0x00028830,
+	ITEMSIZE_mask                                     = 0x7fff << 0,
+	ITEMSIZE_shift                                    = 0,
+    SQ_HSTMP_RING_ITEMSIZE                                = 0x00028834,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_PGM_START_PS                                       = 0x00028840,
+    SQ_PGM_RESOURCES_PS                                   = 0x00028844,
+	NUM_GPRS_mask                                     = 0xff << 0,
+	NUM_GPRS_shift                                    = 0,
+	STACK_SIZE_mask                                   = 0xff << 8,
+	STACK_SIZE_shift                                  = 8,
+	DX10_CLAMP_bit                                    = 1 << 21,
+	UNCACHED_FIRST_INST_bit                           = 1 << 28,
+	CLAMP_CONSTS_bit                                  = 1 << 31,
+    SQ_PGM_RESOURCES_2_PS                                 = 0x00028848,
+	SINGLE_ROUND_mask                                 = 0x03 << 0,
+	SINGLE_ROUND_shift                                = 0,
+	    SQ_ROUND_NEAREST_EVEN                         = 0x00,
+	    SQ_ROUND_PLUS_INFINITY                        = 0x01,
+	    SQ_ROUND_MINUS_INFINITY                       = 0x02,
+	    SQ_ROUND_TO_ZERO                              = 0x03,
+	DOUBLE_ROUND_mask                                 = 0x03 << 2,
+	DOUBLE_ROUND_shift                                = 2,
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+	ALLOW_SINGLE_DENORM_IN_bit                        = 1 << 4,
+	ALLOW_SINGLE_DENORM_OUT_bit                       = 1 << 5,
+	ALLOW_DOUBLE_DENORM_IN_bit                        = 1 << 6,
+	ALLOW_DOUBLE_DENORM_OUT_bit                       = 1 << 7,
+    SQ_PGM_EXPORTS_PS                                     = 0x0002884c,
+	EXPORT_MODE_mask                                  = 0x1f << 0,
+	EXPORT_MODE_shift                                 = 0,
+    SQ_PGM_START_VS                                       = 0x0002885c,
+    SQ_PGM_RESOURCES_VS                                   = 0x00028860,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+/* 	DX10_CLAMP_bit                                    = 1 << 21, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_RESOURCES_2_VS                                 = 0x00028864,
+/* 	SINGLE_ROUND_mask                                 = 0x03 << 0, */
+/* 	SINGLE_ROUND_shift                                = 0, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	DOUBLE_ROUND_mask                                 = 0x03 << 2, */
+/* 	DOUBLE_ROUND_shift                                = 2, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	ALLOW_SINGLE_DENORM_IN_bit                        = 1 << 4, */
+/* 	ALLOW_SINGLE_DENORM_OUT_bit                       = 1 << 5, */
+/* 	ALLOW_DOUBLE_DENORM_IN_bit                        = 1 << 6, */
+/* 	ALLOW_DOUBLE_DENORM_OUT_bit                       = 1 << 7, */
+    SQ_PGM_START_GS                                       = 0x00028874,
+    SQ_PGM_RESOURCES_GS                                   = 0x00028878,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+/* 	DX10_CLAMP_bit                                    = 1 << 21, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_RESOURCES_2_GS                                 = 0x0002887c,
+/* 	SINGLE_ROUND_mask                                 = 0x03 << 0, */
+/* 	SINGLE_ROUND_shift                                = 0, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	DOUBLE_ROUND_mask                                 = 0x03 << 2, */
+/* 	DOUBLE_ROUND_shift                                = 2, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	ALLOW_SINGLE_DENORM_IN_bit                        = 1 << 4, */
+/* 	ALLOW_SINGLE_DENORM_OUT_bit                       = 1 << 5, */
+/* 	ALLOW_DOUBLE_DENORM_IN_bit                        = 1 << 6, */
+/* 	ALLOW_DOUBLE_DENORM_OUT_bit                       = 1 << 7, */
+    SQ_PGM_START_ES                                       = 0x0002888c,
+    SQ_PGM_RESOURCES_ES                                   = 0x00028890,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+/* 	DX10_CLAMP_bit                                    = 1 << 21, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_RESOURCES_2_ES                                 = 0x00028894,
+/* 	SINGLE_ROUND_mask                                 = 0x03 << 0, */
+/* 	SINGLE_ROUND_shift                                = 0, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	DOUBLE_ROUND_mask                                 = 0x03 << 2, */
+/* 	DOUBLE_ROUND_shift                                = 2, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	ALLOW_SINGLE_DENORM_IN_bit                        = 1 << 4, */
+/* 	ALLOW_SINGLE_DENORM_OUT_bit                       = 1 << 5, */
+/* 	ALLOW_DOUBLE_DENORM_IN_bit                        = 1 << 6, */
+/* 	ALLOW_DOUBLE_DENORM_OUT_bit                       = 1 << 7, */
+    SQ_PGM_START_FS                                       = 0x000288a4,
+    SQ_PGM_RESOURCES_FS                                   = 0x000288a8,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+/* 	DX10_CLAMP_bit                                    = 1 << 21, */
+    SQ_PGM_START_HS                                       = 0x000288b8,
+    SQ_PGM_RESOURCES_HS                                   = 0x000288bc,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+/* 	DX10_CLAMP_bit                                    = 1 << 21, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_RESOURCES_2_HS                                 = 0x000288c0,
+/* 	SINGLE_ROUND_mask                                 = 0x03 << 0, */
+/* 	SINGLE_ROUND_shift                                = 0, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	DOUBLE_ROUND_mask                                 = 0x03 << 2, */
+/* 	DOUBLE_ROUND_shift                                = 2, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	ALLOW_SINGLE_DENORM_IN_bit                        = 1 << 4, */
+/* 	ALLOW_SINGLE_DENORM_OUT_bit                       = 1 << 5, */
+/* 	ALLOW_DOUBLE_DENORM_IN_bit                        = 1 << 6, */
+/* 	ALLOW_DOUBLE_DENORM_OUT_bit                       = 1 << 7, */
+    SQ_PGM_START_LS                                       = 0x000288d0,
+    SQ_PGM_RESOURCES_LS                                   = 0x000288d4,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+/* 	DX10_CLAMP_bit                                    = 1 << 21, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_RESOURCES_2_LS                                 = 0x000288d8,
+/* 	SINGLE_ROUND_mask                                 = 0x03 << 0, */
+/* 	SINGLE_ROUND_shift                                = 0, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	DOUBLE_ROUND_mask                                 = 0x03 << 2, */
+/* 	DOUBLE_ROUND_shift                                = 2, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	ALLOW_SINGLE_DENORM_IN_bit                        = 1 << 4, */
+/* 	ALLOW_SINGLE_DENORM_OUT_bit                       = 1 << 5, */
+/* 	ALLOW_DOUBLE_DENORM_IN_bit                        = 1 << 6, */
+/* 	ALLOW_DOUBLE_DENORM_OUT_bit                       = 1 << 7, */
+    SQ_VTX_SEMANTIC_CLEAR                                 = 0x000288f0,
+    SQ_ESGS_RING_ITEMSIZE                                 = 0x00028900,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GSVS_RING_ITEMSIZE                                 = 0x00028904,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_ESTMP_RING_ITEMSIZE                                = 0x00028908,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GSTMP_RING_ITEMSIZE                                = 0x0002890c,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_VSTMP_RING_ITEMSIZE                                = 0x00028910,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_PSTMP_RING_ITEMSIZE                                = 0x00028914,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GS_VERT_ITEMSIZE                                   = 0x0002891c,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GS_VERT_ITEMSIZE_1                                 = 0x00028920,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GS_VERT_ITEMSIZE_2                                 = 0x00028924,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GS_VERT_ITEMSIZE_3                                 = 0x00028928,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GSVS_RING_OFFSET_1                                 = 0x0002892c,
+	SQ_GSVS_RING_OFFSET_1__OFFSET_mask                = 0x7fff << 0,
+	SQ_GSVS_RING_OFFSET_1__OFFSET_shift               = 0,
+    SQ_GSVS_RING_OFFSET_2                                 = 0x00028930,
+	SQ_GSVS_RING_OFFSET_2__OFFSET_mask                = 0x7fff << 0,
+	SQ_GSVS_RING_OFFSET_2__OFFSET_shift               = 0,
+    SQ_GSVS_RING_OFFSET_3                                 = 0x00028934,
+	SQ_GSVS_RING_OFFSET_3__OFFSET_mask                = 0x7fff << 0,
+	SQ_GSVS_RING_OFFSET_3__OFFSET_shift               = 0,
+    SQ_ALU_CONST_CACHE_PS_0                               = 0x00028940,
+	SQ_ALU_CONST_CACHE_PS_0_num                       = 16,
+    SQ_ALU_CONST_CACHE_VS_0                               = 0x00028980,
+	SQ_ALU_CONST_CACHE_VS_0_num                       = 16,
+    SQ_ALU_CONST_CACHE_GS_0                               = 0x000289c0,
+	SQ_ALU_CONST_CACHE_GS_0_num                       = 16,
+    PA_SU_POINT_SIZE                                      = 0x00028a00,
+	HEIGHT_mask                                       = 0xffff << 0,
+	HEIGHT_shift                                      = 0,
+	PA_SU_POINT_SIZE__WIDTH_mask                      = 0xffff << 16,
+	PA_SU_POINT_SIZE__WIDTH_shift                     = 16,
+    PA_SU_POINT_MINMAX                                    = 0x00028a04,
+	MIN_SIZE_mask                                     = 0xffff << 0,
+	MIN_SIZE_shift                                    = 0,
+	PA_SU_POINT_MINMAX__MAX_SIZE_mask                 = 0xffff << 16,
+	PA_SU_POINT_MINMAX__MAX_SIZE_shift                = 16,
+    PA_SU_LINE_CNTL                                       = 0x00028a08,
+	PA_SU_LINE_CNTL__WIDTH_mask                       = 0xffff << 0,
+	PA_SU_LINE_CNTL__WIDTH_shift                      = 0,
+    PA_SC_LINE_STIPPLE                                    = 0x00028a0c,
+	LINE_PATTERN_mask                                 = 0xffff << 0,
+	LINE_PATTERN_shift                                = 0,
+	REPEAT_COUNT_mask                                 = 0xff << 16,
+	REPEAT_COUNT_shift                                = 16,
+	PATTERN_BIT_ORDER_bit                             = 1 << 28,
+	AUTO_RESET_CNTL_mask                              = 0x03 << 29,
+	AUTO_RESET_CNTL_shift                             = 29,
+    VGT_OUTPUT_PATH_CNTL                                  = 0x00028a10,
+	PATH_SELECT_mask                                  = 0x07 << 0,
+	PATH_SELECT_shift                                 = 0,
+	    VGT_OUTPATH_VTX_REUSE                         = 0x00,
+	    VGT_OUTPATH_TESS_EN                           = 0x01,
+	    VGT_OUTPATH_PASSTHRU                          = 0x02,
+	    VGT_OUTPATH_GS_BLOCK                          = 0x03,
+	    VGT_OUTPATH_HS_BLOCK                          = 0x04,
+    VGT_HOS_CNTL                                          = 0x00028a14,
+	TESS_MODE_mask                                    = 0x03 << 0,
+	TESS_MODE_shift                                   = 0,
+    VGT_HOS_MAX_TESS_LEVEL                                = 0x00028a18,
+    VGT_HOS_MIN_TESS_LEVEL                                = 0x00028a1c,
+    VGT_HOS_REUSE_DEPTH                                   = 0x00028a20,
+	REUSE_DEPTH_mask                                  = 0xff << 0,
+	REUSE_DEPTH_shift                                 = 0,
+    VGT_GROUP_PRIM_TYPE                                   = 0x00028a24,
+	VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask               = 0x1f << 0,
+	VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift              = 0,
+	    VGT_GRP_3D_POINT                              = 0x00,
+	    VGT_GRP_3D_LINE                               = 0x01,
+	    VGT_GRP_3D_TRI                                = 0x02,
+	    VGT_GRP_3D_RECT                               = 0x03,
+	    VGT_GRP_3D_QUAD                               = 0x04,
+	    VGT_GRP_2D_COPY_RECT_V0                       = 0x05,
+	    VGT_GRP_2D_COPY_RECT_V1                       = 0x06,
+	    VGT_GRP_2D_COPY_RECT_V2                       = 0x07,
+	    VGT_GRP_2D_COPY_RECT_V3                       = 0x08,
+	    VGT_GRP_2D_FILL_RECT                          = 0x09,
+	    VGT_GRP_2D_LINE                               = 0x0a,
+	    VGT_GRP_2D_TRI                                = 0x0b,
+	    VGT_GRP_PRIM_INDEX_LINE                       = 0x0c,
+	    VGT_GRP_PRIM_INDEX_TRI                        = 0x0d,
+	    VGT_GRP_PRIM_INDEX_QUAD                       = 0x0e,
+	    VGT_GRP_3D_LINE_ADJ                           = 0x0f,
+	    VGT_GRP_3D_TRI_ADJ                            = 0x10,
+	    VGT_GRP_3D_PATCH                              = 0x11,
+	RETAIN_ORDER_bit                                  = 1 << 14,
+	RETAIN_QUADS_bit                                  = 1 << 15,
+	PRIM_ORDER_mask                                   = 0x07 << 16,
+	PRIM_ORDER_shift                                  = 16,
+	    VGT_GRP_LIST                                  = 0x00,
+	    VGT_GRP_STRIP                                 = 0x01,
+	    VGT_GRP_FAN                                   = 0x02,
+	    VGT_GRP_LOOP                                  = 0x03,
+	    VGT_GRP_POLYGON                               = 0x04,
+    VGT_GROUP_FIRST_DECR                                  = 0x00028a28,
+	FIRST_DECR_mask                                   = 0x0f << 0,
+	FIRST_DECR_shift                                  = 0,
+    VGT_GROUP_DECR                                        = 0x00028a2c,
+	DECR_mask                                         = 0x0f << 0,
+	DECR_shift                                        = 0,
+    VGT_GROUP_VECT_0_CNTL                                 = 0x00028a30,
+	COMP_X_EN_bit                                     = 1 << 0,
+	COMP_Y_EN_bit                                     = 1 << 1,
+	COMP_Z_EN_bit                                     = 1 << 2,
+	COMP_W_EN_bit                                     = 1 << 3,
+	VGT_GROUP_VECT_0_CNTL__STRIDE_mask                = 0xff << 8,
+	VGT_GROUP_VECT_0_CNTL__STRIDE_shift               = 8,
+	SHIFT_mask                                        = 0xff << 16,
+	SHIFT_shift                                       = 16,
+    VGT_GROUP_VECT_1_CNTL                                 = 0x00028a34,
+/* 	COMP_X_EN_bit                                     = 1 << 0, */
+/* 	COMP_Y_EN_bit                                     = 1 << 1, */
+/* 	COMP_Z_EN_bit                                     = 1 << 2, */
+/* 	COMP_W_EN_bit                                     = 1 << 3, */
+	VGT_GROUP_VECT_1_CNTL__STRIDE_mask                = 0xff << 8,
+	VGT_GROUP_VECT_1_CNTL__STRIDE_shift               = 8,
+/* 	SHIFT_mask                                        = 0xff << 16, */
+/* 	SHIFT_shift                                       = 16, */
+    VGT_GROUP_VECT_0_FMT_CNTL                             = 0x00028a38,
+	X_CONV_mask                                       = 0x0f << 0,
+	X_CONV_shift                                      = 0,
+	    VGT_GRP_INDEX_16                              = 0x00,
+	    VGT_GRP_INDEX_32                              = 0x01,
+	    VGT_GRP_UINT_16                               = 0x02,
+	    VGT_GRP_UINT_32                               = 0x03,
+	    VGT_GRP_SINT_16                               = 0x04,
+	    VGT_GRP_SINT_32                               = 0x05,
+	    VGT_GRP_FLOAT_32                              = 0x06,
+	    VGT_GRP_AUTO_PRIM                             = 0x07,
+	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08,
+	X_OFFSET_mask                                     = 0x0f << 4,
+	X_OFFSET_shift                                    = 4,
+	Y_CONV_mask                                       = 0x0f << 8,
+	Y_CONV_shift                                      = 8,
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+	Y_OFFSET_mask                                     = 0x0f << 12,
+	Y_OFFSET_shift                                    = 12,
+	Z_CONV_mask                                       = 0x0f << 16,
+	Z_CONV_shift                                      = 16,
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+	Z_OFFSET_mask                                     = 0x0f << 20,
+	Z_OFFSET_shift                                    = 20,
+	W_CONV_mask                                       = 0x0f << 24,
+	W_CONV_shift                                      = 24,
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+	W_OFFSET_mask                                     = 0x0f << 28,
+	W_OFFSET_shift                                    = 28,
+    VGT_GROUP_VECT_1_FMT_CNTL                             = 0x00028a3c,
+/* 	X_CONV_mask                                       = 0x0f << 0, */
+/* 	X_CONV_shift                                      = 0, */
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/* 	X_OFFSET_mask                                     = 0x0f << 4, */
+/* 	X_OFFSET_shift                                    = 4, */
+/* 	Y_CONV_mask                                       = 0x0f << 8, */
+/* 	Y_CONV_shift                                      = 8, */
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/* 	Y_OFFSET_mask                                     = 0x0f << 12, */
+/* 	Y_OFFSET_shift                                    = 12, */
+/* 	Z_CONV_mask                                       = 0x0f << 16, */
+/* 	Z_CONV_shift                                      = 16, */
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/* 	Z_OFFSET_mask                                     = 0x0f << 20, */
+/* 	Z_OFFSET_shift                                    = 20, */
+/* 	W_CONV_mask                                       = 0x0f << 24, */
+/* 	W_CONV_shift                                      = 24, */
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/* 	W_OFFSET_mask                                     = 0x0f << 28, */
+/* 	W_OFFSET_shift                                    = 28, */
+    VGT_GS_MODE                                           = 0x00028a40,
+	VGT_GS_MODE__MODE_mask                            = 0x03 << 0,
+	VGT_GS_MODE__MODE_shift                           = 0,
+	    GS_OFF                                        = 0x00,
+	    GS_SCENARIO_A                                 = 0x01,
+	    GS_SCENARIO_B                                 = 0x02,
+	    GS_SCENARIO_G                                 = 0x03,
+	    GS_SCENARIO_C                                 = 0x04,
+	    SPRITE_EN                                     = 0x05,
+	ES_PASSTHRU_bit                                   = 1 << 2,
+	CUT_MODE_mask                                     = 0x03 << 3,
+	CUT_MODE_shift                                    = 3,
+	    GS_CUT_1024                                   = 0x00,
+	    GS_CUT_512                                    = 0x01,
+	    GS_CUT_256                                    = 0x02,
+	    GS_CUT_128                                    = 0x03,
+	MODE_HI_bit                                       = 1 << 8,
+    PA_SC_MODE_CNTL_0                                     = 0x00028a48,
+	MSAA_ENABLE_bit                                   = 1 << 0,
+	VPORT_SCISSOR_ENABLE_bit                          = 1 << 1,
+	LINE_STIPPLE_ENABLE_bit                           = 1 << 2,
+    VGT_ENHANCE                                           = 0x00028a50,
+    VGT_GS_PER_ES                                         = 0x00028a54,
+	GS_PER_ES_mask                                    = 0x7ff << 0,
+	GS_PER_ES_shift                                   = 0,
+    VGT_ES_PER_GS                                         = 0x00028a58,
+	ES_PER_GS_mask                                    = 0x7ff << 0,
+	ES_PER_GS_shift                                   = 0,
+    VGT_GS_PER_VS                                         = 0x00028a5c,
+	GS_PER_VS_mask                                    = 0x0f << 0,
+	GS_PER_VS_shift                                   = 0,
+    VGT_GS_OUT_PRIM_TYPE                                  = 0x00028a6c,
+	OUTPRIM_TYPE_mask                                 = 0x3f << 0,
+	OUTPRIM_TYPE_shift                                = 0,
+	    POINTLIST                                     = 0x00,
+	    LINESTRIP                                     = 0x01,
+	    TRISTRIP                                      = 0x02,
+    VGT_DMA_SIZE                                          = 0x00028a74,
+    VGT_DMA_MAX_SIZE                                      = 0x00028a78,
+    VGT_DMA_INDEX_TYPE                                    = 0x00028a7c,
+/* 	INDEX_TYPE_mask                                   = 0x03 << 0, */
+/* 	INDEX_TYPE_shift                                  = 0, */
+	    VGT_INDEX_16                                  = 0x00,
+	    VGT_INDEX_32                                  = 0x01,
+	SWAP_MODE_mask                                    = 0x03 << 2,
+	SWAP_MODE_shift                                   = 2,
+	    VGT_DMA_SWAP_NONE                             = 0x00,
+	    VGT_DMA_SWAP_16_BIT                           = 0x01,
+	    VGT_DMA_SWAP_32_BIT                           = 0x02,
+	    VGT_DMA_SWAP_WORD                             = 0x03,
+    VGT_PRIMITIVEID_EN                                    = 0x00028a84,
+	PRIMITIVEID_EN_bit                                = 1 << 0,
+    VGT_DMA_NUM_INSTANCES                                 = 0x00028a88,
+    VGT_EVENT_INITIATOR                                   = 0x00028a90,
+	EVENT_TYPE_mask                                   = 0x3f << 0,
+	EVENT_TYPE_shift                                  = 0,
+	    SAMPLE_STREAMOUTSTATS1                        = 0x01,
+	    SAMPLE_STREAMOUTSTATS2                        = 0x02,
+	    SAMPLE_STREAMOUTSTATS3                        = 0x03,
+	    CACHE_FLUSH_TS                                = 0x04,
+	    CONTEXT_DONE                                  = 0x05,
+	    CACHE_FLUSH                                   = 0x06,
+	    CS_PARTIAL_FLUSH                              = 0x07,
+	    RST_PIX_CNT                                   = 0x0d,
+	    VS_PARTIAL_FLUSH                              = 0x0f,
+	    PS_PARTIAL_FLUSH                              = 0x10,
+	    FLUSH_HS_OUTPUT                               = 0x11,
+	    FLUSH_LS_OUTPUT                               = 0x12,
+	    CACHE_FLUSH_AND_INV_TS_EVENT                  = 0x14,
+	    ZPASS_DONE                                    = 0x15,
+	    CACHE_FLUSH_AND_INV_EVENT                     = 0x16,
+	    PERFCOUNTER_START                             = 0x17,
+	    PERFCOUNTER_STOP                              = 0x18,
+	    PIPELINESTAT_START                            = 0x19,
+	    PIPELINESTAT_STOP                             = 0x1a,
+	    PERFCOUNTER_SAMPLE                            = 0x1b,
+	    FLUSH_ES_OUTPUT                               = 0x1c,
+	    FLUSH_GS_OUTPUT                               = 0x1d,
+	    SAMPLE_PIPELINESTAT                           = 0x1e,
+	    SO_VGTSTREAMOUT_FLUSH                         = 0x1f,
+	    SAMPLE_STREAMOUTSTATS                         = 0x20,
+	    RESET_VTX_CNT                                 = 0x21,
+	    BLOCK_CONTEXT_DONE                            = 0x22,
+	    CS_CONTEXT_DONE                               = 0x23,
+	    VGT_FLUSH                                     = 0x24,
+	    SQ_NON_EVENT                                  = 0x26,
+	    SC_SEND_DB_VPZ                                = 0x27,
+	    BOTTOM_OF_PIPE_TS                             = 0x28,
+	    FLUSH_SX_TS                                   = 0x29,
+	    DB_CACHE_FLUSH_AND_INV                        = 0x2a,
+	    FLUSH_AND_INV_DB_DATA_TS                      = 0x2b,
+	    FLUSH_AND_INV_DB_META                         = 0x2c,
+	    FLUSH_AND_INV_CB_DATA_TS                      = 0x2d,
+	    FLUSH_AND_INV_CB_META                         = 0x2e,
+	    CS_DONE                                       = 0x2f,
+	    PS_DONE                                       = 0x30,
+	    FLUSH_AND_INV_CB_PIXEL_DATA                   = 0x31,
+	ADDRESS_HI_mask                                   = 0xff << 19,
+	ADDRESS_HI_shift                                  = 19,
+	EXTENDED_EVENT_bit                                = 1 << 27,
+    VGT_MULTI_PRIM_IB_RESET_EN                            = 0x00028a94,
+	RESET_EN_bit                                      = 1 << 0,
+    VGT_INSTANCE_STEP_RATE_0                              = 0x00028aa0,
+    VGT_INSTANCE_STEP_RATE_1                              = 0x00028aa4,
+    VGT_REUSE_OFF                                         = 0x00028ab4,
+	REUSE_OFF_bit                                     = 1 << 0,
+    VGT_VTX_CNT_EN                                        = 0x00028ab8,
+	VTX_CNT_EN_bit                                    = 1 << 0,
+    DB_HTILE_SURFACE                                      = 0x00028abc,
+	HTILE_WIDTH_bit                                   = 1 << 0,
+	HTILE_HEIGHT_bit                                  = 1 << 1,
+	LINEAR_bit                                        = 1 << 2,
+	FULL_CACHE_bit                                    = 1 << 3,
+	HTILE_USES_PRELOAD_WIN_bit                        = 1 << 4,
+	PRELOAD_bit                                       = 1 << 5,
+	PREFETCH_WIDTH_mask                               = 0x3f << 6,
+	PREFETCH_WIDTH_shift                              = 6,
+	PREFETCH_HEIGHT_mask                              = 0x3f << 12,
+	PREFETCH_HEIGHT_shift                             = 12,
+    DB_SRESULTS_COMPARE_STATE0                            = 0x00028ac0,
+	COMPAREFUNC0_mask                                 = 0x07 << 0,
+	COMPAREFUNC0_shift                                = 0,
+/* 	    REF_NEVER                                     = 0x00, */
+/* 	    REF_LESS                                      = 0x01, */
+/* 	    REF_EQUAL                                     = 0x02, */
+/* 	    REF_LEQUAL                                    = 0x03, */
+/* 	    REF_GREATER                                   = 0x04, */
+/* 	    REF_NOTEQUAL                                  = 0x05, */
+/* 	    REF_GEQUAL                                    = 0x06, */
+/* 	    REF_ALWAYS                                    = 0x07, */
+	COMPAREVALUE0_mask                                = 0xff << 4,
+	COMPAREVALUE0_shift                               = 4,
+	COMPAREMASK0_mask                                 = 0xff << 12,
+	COMPAREMASK0_shift                                = 12,
+	ENABLE0_bit                                       = 1 << 24,
+    DB_SRESULTS_COMPARE_STATE1                            = 0x00028ac4,
+	COMPAREFUNC1_mask                                 = 0x07 << 0,
+	COMPAREFUNC1_shift                                = 0,
+/* 	    REF_NEVER                                     = 0x00, */
+/* 	    REF_LESS                                      = 0x01, */
+/* 	    REF_EQUAL                                     = 0x02, */
+/* 	    REF_LEQUAL                                    = 0x03, */
+/* 	    REF_GREATER                                   = 0x04, */
+/* 	    REF_NOTEQUAL                                  = 0x05, */
+/* 	    REF_GEQUAL                                    = 0x06, */
+/* 	    REF_ALWAYS                                    = 0x07, */
+	COMPAREVALUE1_mask                                = 0xff << 4,
+	COMPAREVALUE1_shift                               = 4,
+	COMPAREMASK1_mask                                 = 0xff << 12,
+	COMPAREMASK1_shift                                = 12,
+	ENABLE1_bit                                       = 1 << 24,
+    DB_PRELOAD_CONTROL                                    = 0x00028ac8,
+	START_X_mask                                      = 0xff << 0,
+	START_X_shift                                     = 0,
+	START_Y_mask                                      = 0xff << 8,
+	START_Y_shift                                     = 8,
+	MAX_X_mask                                        = 0xff << 16,
+	MAX_X_shift                                       = 16,
+	MAX_Y_mask                                        = 0xff << 24,
+	MAX_Y_shift                                       = 24,
+    VGT_STRMOUT_BUFFER_SIZE_0                             = 0x00028ad0,
+    VGT_STRMOUT_VTX_STRIDE_0                              = 0x00028ad4,
+	VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask             = 0x3ff << 0,
+	VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_0                             = 0x00028ad8,
+    VGT_STRMOUT_BUFFER_OFFSET_0                           = 0x00028adc,
+    VGT_STRMOUT_BUFFER_SIZE_1                             = 0x00028ae0,
+    VGT_STRMOUT_VTX_STRIDE_1                              = 0x00028ae4,
+	VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask             = 0x3ff << 0,
+	VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_1                             = 0x00028ae8,
+    VGT_STRMOUT_BUFFER_OFFSET_1                           = 0x00028aec,
+    VGT_STRMOUT_BUFFER_SIZE_2                             = 0x00028af0,
+    VGT_STRMOUT_VTX_STRIDE_2                              = 0x00028af4,
+	VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask             = 0x3ff << 0,
+	VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_2                             = 0x00028af8,
+    VGT_STRMOUT_BUFFER_OFFSET_2                           = 0x00028afc,
+    VGT_STRMOUT_BUFFER_SIZE_3                             = 0x00028b00,
+    VGT_STRMOUT_VTX_STRIDE_3                              = 0x00028b04,
+	VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask             = 0x3ff << 0,
+	VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_3                             = 0x00028b08,
+    VGT_STRMOUT_BUFFER_OFFSET_3                           = 0x00028b0c,
+    VGT_STRMOUT_BASE_OFFSET_0                             = 0x00028b10,
+    VGT_STRMOUT_BASE_OFFSET_1                             = 0x00028b14,
+    VGT_STRMOUT_BASE_OFFSET_2                             = 0x00028b18,
+    VGT_STRMOUT_BASE_OFFSET_3                             = 0x00028b1c,
+    VGT_STRMOUT_DRAW_OPAQUE_OFFSET                        = 0x00028b28,
+    VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE            = 0x00028b2c,
+    VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE                 = 0x00028b30,
+	VERTEX_STRIDE_mask                                = 0x1ff << 0,
+	VERTEX_STRIDE_shift                               = 0,
+    VGT_GS_MAX_VERT_OUT                                   = 0x00028b38,
+	MAX_VERT_OUT_mask                                 = 0x7ff << 0,
+	MAX_VERT_OUT_shift                                = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_0                          = 0x00028b44,
+	VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask    = 0x3f << 0,
+	VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift   = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_1                          = 0x00028b48,
+	VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask    = 0x3f << 0,
+	VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift   = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_2                          = 0x00028b4c,
+	VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask    = 0x3f << 0,
+	VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift   = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_3                          = 0x00028b50,
+	VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask    = 0x3f << 0,
+	VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift   = 0,
+    VGT_SHADER_STAGES_EN                                  = 0x00028b54,
+	LS_EN_mask                                        = 0x03 << 0,
+	LS_EN_shift                                       = 0,
+	    LS_STAGE_OFF                                  = 0x00,
+	    LS_STAGE_ON                                   = 0x01,
+	    CS_STAGE_ON                                   = 0x02,
+	HS_EN_bit                                         = 1 << 2,
+	ES_EN_mask                                        = 0x03 << 3,
+	ES_EN_shift                                       = 3,
+	    ES_STAGE_OFF                                  = 0x00,
+	    ES_STAGE_DS                                   = 0x01,
+	    ES_STAGE_REAL                                 = 0x02,
+	GS_EN_bit                                         = 1 << 5,
+	VS_EN_mask                                        = 0x03 << 6,
+	VS_EN_shift                                       = 6,
+	    VS_STAGE_REAL                                 = 0x00,
+	    VS_STAGE_DS                                   = 0x01,
+	    VS_STAGE_COPY_SHADER                          = 0x02,
+    VGT_LS_HS_CONFIG                                      = 0x00028b58,
+	NUM_PATCHES_mask                                  = 0xff << 0,
+	NUM_PATCHES_shift                                 = 0,
+	HS_NUM_INPUT_CP_mask                              = 0x3f << 8,
+	HS_NUM_INPUT_CP_shift                             = 8,
+	HS_NUM_OUTPUT_CP_mask                             = 0x3f << 14,
+	HS_NUM_OUTPUT_CP_shift                            = 14,
+    VGT_LS_SIZE                                           = 0x00028b5c,
+	VGT_LS_SIZE__SIZE_mask                            = 0xff << 0,
+	VGT_LS_SIZE__SIZE_shift                           = 0,
+	PATCH_CP_SIZE_mask                                = 0x1fff << 8,
+	PATCH_CP_SIZE_shift                               = 8,
+    VGT_HS_SIZE                                           = 0x00028b60,
+	VGT_HS_SIZE__SIZE_mask                            = 0xff << 0,
+	VGT_HS_SIZE__SIZE_shift                           = 0,
+/* 	PATCH_CP_SIZE_mask                                = 0x1fff << 8, */
+/* 	PATCH_CP_SIZE_shift                               = 8, */
+    VGT_LS_HS_ALLOC                                       = 0x00028b64,
+	HS_TOTAL_OUTPUT_mask                              = 0x1fff << 0,
+	HS_TOTAL_OUTPUT_shift                             = 0,
+	LS_HS_TOTAL_OUTPUT_mask                           = 0x1fff << 13,
+	LS_HS_TOTAL_OUTPUT_shift                          = 13,
+    VGT_HS_PATCH_CONST                                    = 0x00028b68,
+	VGT_HS_PATCH_CONST__SIZE_mask                     = 0x1fff << 0,
+	VGT_HS_PATCH_CONST__SIZE_shift                    = 0,
+	VGT_HS_PATCH_CONST__STRIDE_mask                   = 0x1fff << 13,
+	VGT_HS_PATCH_CONST__STRIDE_shift                  = 13,
+    DB_ALPHA_TO_MASK                                      = 0x00028b70,
+	ALPHA_TO_MASK_ENABLE_bit                          = 1 << 0,
+	ALPHA_TO_MASK_OFFSET0_mask                        = 0x03 << 8,
+	ALPHA_TO_MASK_OFFSET0_shift                       = 8,
+	ALPHA_TO_MASK_OFFSET1_mask                        = 0x03 << 10,
+	ALPHA_TO_MASK_OFFSET1_shift                       = 10,
+	ALPHA_TO_MASK_OFFSET2_mask                        = 0x03 << 12,
+	ALPHA_TO_MASK_OFFSET2_shift                       = 12,
+	ALPHA_TO_MASK_OFFSET3_mask                        = 0x03 << 14,
+	ALPHA_TO_MASK_OFFSET3_shift                       = 14,
+	OFFSET_ROUND_bit                                  = 1 << 16,
+    PA_SU_POLY_OFFSET_DB_FMT_CNTL                         = 0x00028b78,
+	POLY_OFFSET_NEG_NUM_DB_BITS_mask                  = 0xff << 0,
+	POLY_OFFSET_NEG_NUM_DB_BITS_shift                 = 0,
+	POLY_OFFSET_DB_IS_FLOAT_FMT_bit                   = 1 << 8,
+    PA_SU_POLY_OFFSET_CLAMP                               = 0x00028b7c,
+    PA_SU_POLY_OFFSET_FRONT_SCALE                         = 0x00028b80,
+    PA_SU_POLY_OFFSET_FRONT_OFFSET                        = 0x00028b84,
+    PA_SU_POLY_OFFSET_BACK_SCALE                          = 0x00028b88,
+    PA_SU_POLY_OFFSET_BACK_OFFSET                         = 0x00028b8c,
+    VGT_GS_INSTANCE_CNT                                   = 0x00028b90,
+	VGT_GS_INSTANCE_CNT__ENABLE_bit                   = 1 << 0,
+	CNT_mask                                          = 0x7f << 2,
+	CNT_shift                                         = 2,
+    VGT_STRMOUT_CONFIG                                    = 0x00028b94,
+	STREAMOUT_0_EN_bit                                = 1 << 0,
+	STREAMOUT_1_EN_bit                                = 1 << 1,
+	STREAMOUT_2_EN_bit                                = 1 << 2,
+	STREAMOUT_3_EN_bit                                = 1 << 3,
+	RAST_STREAM_mask                                  = 0x07 << 4,
+	RAST_STREAM_shift                                 = 4,
+    VGT_STRMOUT_BUFFER_CONFIG                             = 0x00028b98,
+	STREAM_0_BUFFER_EN_mask                           = 0x0f << 0,
+	STREAM_0_BUFFER_EN_shift                          = 0,
+	STREAM_1_BUFFER_EN_mask                           = 0x0f << 4,
+	STREAM_1_BUFFER_EN_shift                          = 4,
+	STREAM_2_BUFFER_EN_mask                           = 0x0f << 8,
+	STREAM_2_BUFFER_EN_shift                          = 8,
+	STREAM_3_BUFFER_EN_mask                           = 0x0f << 12,
+	STREAM_3_BUFFER_EN_shift                          = 12,
+    CB_IMMED0_BASE                                        = 0x00028b9c,
+	CB_IMMED0_BASE_num                                = 12,
+    PA_SC_LINE_CNTL                                       = 0x00028c00,
+	EXPAND_LINE_WIDTH_bit                             = 1 << 9,
+	LAST_PIXEL_bit                                    = 1 << 10,
+	PERPENDICULAR_ENDCAP_ENA_bit                      = 1 << 11,
+	DX10_DIAMOND_TEST_ENA_bit                         = 1 << 12,
+    PA_SC_AA_CONFIG                                       = 0x00028c04,
+	MSAA_NUM_SAMPLES_mask                             = 0x03 << 0,
+	MSAA_NUM_SAMPLES_shift                            = 0,
+	AA_MASK_CENTROID_DTMN_bit                         = 1 << 4,
+	MAX_SAMPLE_DIST_mask                              = 0x0f << 13,
+	MAX_SAMPLE_DIST_shift                             = 13,
+    PA_SU_VTX_CNTL                                        = 0x00028c08,
+	PIX_CENTER_bit                                    = 1 << 0,
+	PA_SU_VTX_CNTL__ROUND_MODE_mask                   = 0x03 << 1,
+	PA_SU_VTX_CNTL__ROUND_MODE_shift                  = 1,
+	    X_TRUNCATE                                    = 0x00,
+	    X_ROUND                                       = 0x01,
+	    X_ROUND_TO_EVEN                               = 0x02,
+	    X_ROUND_TO_ODD                                = 0x03,
+	QUANT_MODE_mask                                   = 0x07 << 3,
+	QUANT_MODE_shift                                  = 3,
+	    X_1_16TH                                      = 0x00,
+	    X_1_8TH                                       = 0x01,
+	    X_1_4TH                                       = 0x02,
+	    X_1_2                                         = 0x03,
+	    X_1                                           = 0x04,
+	    X_1_256TH                                     = 0x05,
+	    X_1_1024TH                                    = 0x06,
+	    X_1_4096TH                                    = 0x07,
+    PA_CL_GB_VERT_CLIP_ADJ                                = 0x00028c0c,
+    PA_CL_GB_VERT_DISC_ADJ                                = 0x00028c10,
+    PA_CL_GB_HORZ_CLIP_ADJ                                = 0x00028c14,
+    PA_CL_GB_HORZ_DISC_ADJ                                = 0x00028c18,
+    PA_SC_AA_SAMPLE_LOCS_0                                = 0x00028c1c,
+	S0_X_mask                                         = 0x0f << 0,
+	S0_X_shift                                        = 0,
+	S0_Y_mask                                         = 0x0f << 4,
+	S0_Y_shift                                        = 4,
+	S1_X_mask                                         = 0x0f << 8,
+	S1_X_shift                                        = 8,
+	S1_Y_mask                                         = 0x0f << 12,
+	S1_Y_shift                                        = 12,
+	S2_X_mask                                         = 0x0f << 16,
+	S2_X_shift                                        = 16,
+	S2_Y_mask                                         = 0x0f << 20,
+	S2_Y_shift                                        = 20,
+	S3_X_mask                                         = 0x0f << 24,
+	S3_X_shift                                        = 24,
+	S3_Y_mask                                         = 0x0f << 28,
+	S3_Y_shift                                        = 28,
+    PA_SC_AA_SAMPLE_LOCS_1                                = 0x00028c20,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+/* 	S2_X_mask                                         = 0x0f << 16, */
+/* 	S2_X_shift                                        = 16, */
+/* 	S2_Y_mask                                         = 0x0f << 20, */
+/* 	S2_Y_shift                                        = 20, */
+/* 	S3_X_mask                                         = 0x0f << 24, */
+/* 	S3_X_shift                                        = 24, */
+/* 	S3_Y_mask                                         = 0x0f << 28, */
+/* 	S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_2                                = 0x00028c24,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+/* 	S2_X_mask                                         = 0x0f << 16, */
+/* 	S2_X_shift                                        = 16, */
+/* 	S2_Y_mask                                         = 0x0f << 20, */
+/* 	S2_Y_shift                                        = 20, */
+/* 	S3_X_mask                                         = 0x0f << 24, */
+/* 	S3_X_shift                                        = 24, */
+/* 	S3_Y_mask                                         = 0x0f << 28, */
+/* 	S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_3                                = 0x00028c28,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+/* 	S2_X_mask                                         = 0x0f << 16, */
+/* 	S2_X_shift                                        = 16, */
+/* 	S2_Y_mask                                         = 0x0f << 20, */
+/* 	S2_Y_shift                                        = 20, */
+/* 	S3_X_mask                                         = 0x0f << 24, */
+/* 	S3_X_shift                                        = 24, */
+/* 	S3_Y_mask                                         = 0x0f << 28, */
+/* 	S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_4                                = 0x00028c2c,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+/* 	S2_X_mask                                         = 0x0f << 16, */
+/* 	S2_X_shift                                        = 16, */
+/* 	S2_Y_mask                                         = 0x0f << 20, */
+/* 	S2_Y_shift                                        = 20, */
+/* 	S3_X_mask                                         = 0x0f << 24, */
+/* 	S3_X_shift                                        = 24, */
+/* 	S3_Y_mask                                         = 0x0f << 28, */
+/* 	S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_5                                = 0x00028c30,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+/* 	S2_X_mask                                         = 0x0f << 16, */
+/* 	S2_X_shift                                        = 16, */
+/* 	S2_Y_mask                                         = 0x0f << 20, */
+/* 	S2_Y_shift                                        = 20, */
+/* 	S3_X_mask                                         = 0x0f << 24, */
+/* 	S3_X_shift                                        = 24, */
+/* 	S3_Y_mask                                         = 0x0f << 28, */
+/* 	S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_6                                = 0x00028c34,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+/* 	S2_X_mask                                         = 0x0f << 16, */
+/* 	S2_X_shift                                        = 16, */
+/* 	S2_Y_mask                                         = 0x0f << 20, */
+/* 	S2_Y_shift                                        = 20, */
+/* 	S3_X_mask                                         = 0x0f << 24, */
+/* 	S3_X_shift                                        = 24, */
+/* 	S3_Y_mask                                         = 0x0f << 28, */
+/* 	S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_7                                = 0x00028c38,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+/* 	S2_X_mask                                         = 0x0f << 16, */
+/* 	S2_X_shift                                        = 16, */
+/* 	S2_Y_mask                                         = 0x0f << 20, */
+/* 	S2_Y_shift                                        = 20, */
+/* 	S3_X_mask                                         = 0x0f << 24, */
+/* 	S3_X_shift                                        = 24, */
+/* 	S3_Y_mask                                         = 0x0f << 28, */
+/* 	S3_Y_shift                                        = 28, */
+    PA_SC_AA_MASK                                         = 0x00028c3c,
+    VGT_VERTEX_REUSE_BLOCK_CNTL                           = 0x00028c58,
+	VTX_REUSE_DEPTH_mask                              = 0xff << 0,
+	VTX_REUSE_DEPTH_shift                             = 0,
+    VGT_OUT_DEALLOC_CNTL                                  = 0x00028c5c,
+	DEALLOC_DIST_mask                                 = 0x7f << 0,
+	DEALLOC_DIST_shift                                = 0,
+    CB_COLOR0_BASE                                        = 0x00028c60,
+	CB_COLOR0_BASE_num                                = 12,
+	CB_COLOR0_BASE_offset                             = 51,
+    CB_COLOR0_PITCH                                       = 0x00028c64,
+	CB_COLOR0_PITCH_num                               = 12,
+	CB_COLOR0_PITCH_offset                            = 51,
+	CB_COLOR0_PITCH__TILE_MAX_mask                    = 0x7ff << 0,
+	CB_COLOR0_PITCH__TILE_MAX_shift                   = 0,
+    CB_COLOR0_SLICE                                       = 0x00028c68,
+	CB_COLOR0_SLICE_num                               = 12,
+	CB_COLOR0_SLICE_offset                            = 51,
+	CB_COLOR0_SLICE__TILE_MAX_mask                    = 0x3fffff << 0,
+	CB_COLOR0_SLICE__TILE_MAX_shift                   = 0,
+    CB_COLOR0_VIEW                                        = 0x00028c6c,
+	CB_COLOR0_VIEW_num                                = 12,
+	CB_COLOR0_VIEW_offset                             = 51,
+/* 	SLICE_START_mask                                  = 0x7ff << 0, */
+/* 	SLICE_START_shift                                 = 0, */
+/* 	SLICE_MAX_mask                                    = 0x7ff << 13, */
+/* 	SLICE_MAX_shift                                   = 13, */
+    CB_COLOR0_INFO                                        = 0x00028c70,
+	CB_COLOR0_INFO_num                                = 12,
+	CB_COLOR0_INFO_offset                             = 51,
+	ENDIAN_mask                                       = 0x03 << 0,
+	ENDIAN_shift                                      = 0,
+	    ENDIAN_NONE                                   = 0x00,
+	    ENDIAN_8IN16                                  = 0x01,
+	    ENDIAN_8IN32                                  = 0x02,
+	    ENDIAN_8IN64                                  = 0x03,
+	CB_COLOR0_INFO__FORMAT_mask                       = 0x3f << 2,
+	CB_COLOR0_INFO__FORMAT_shift                      = 2,
+	    COLOR_INVALID                                 = 0x00,
+	    COLOR_8                                       = 0x01,
+	    COLOR_16                                      = 0x05,
+	    COLOR_16_FLOAT                                = 0x06,
+	    COLOR_8_8                                     = 0x07,
+	    COLOR_5_6_5                                   = 0x08,
+	    COLOR_1_5_5_5                                 = 0x0a,
+	    COLOR_4_4_4_4                                 = 0x0b,
+	    COLOR_5_5_5_1                                 = 0x0c,
+	    COLOR_32                                      = 0x0d,
+	    COLOR_32_FLOAT                                = 0x0e,
+	    COLOR_16_16                                   = 0x0f,
+	    COLOR_16_16_FLOAT                             = 0x10,
+	    COLOR_8_24                                    = 0x11,
+	    COLOR_24_8                                    = 0x13,
+	    COLOR_10_11_11                                = 0x15,
+	    COLOR_10_11_11_FLOAT                          = 0x16,
+	    COLOR_2_10_10_10                              = 0x19,
+	    COLOR_8_8_8_8                                 = 0x1a,
+	    COLOR_10_10_10_2                              = 0x1b,
+	    COLOR_X24_8_32_FLOAT                          = 0x1c,
+	    COLOR_32_32                                   = 0x1d,
+	    COLOR_32_32_FLOAT                             = 0x1e,
+	    COLOR_16_16_16_16                             = 0x1f,
+	    COLOR_16_16_16_16_FLOAT                       = 0x20,
+	    COLOR_32_32_32_32                             = 0x22,
+	    COLOR_32_32_32_32_FLOAT                       = 0x23,
+	CB_COLOR0_INFO__ARRAY_MODE_mask                   = 0x0f << 8,
+	CB_COLOR0_INFO__ARRAY_MODE_shift                  = 8,
+/* 	    ARRAY_LINEAR_GENERAL                          = 0x00, */
+/* 	    ARRAY_LINEAR_ALIGNED                          = 0x01, */
+/* 	    ARRAY_1D_TILED_THIN1                          = 0x02, */
+/* 	    ARRAY_2D_TILED_THIN1                          = 0x04, */
+	NUMBER_TYPE_mask                                  = 0x07 << 12,
+	NUMBER_TYPE_shift                                 = 12,
+	    NUMBER_UNORM                                  = 0x00,
+	    NUMBER_SNORM                                  = 0x01,
+	    NUMBER_UINT                                   = 0x04,
+	    NUMBER_SINT                                   = 0x05,
+	    NUMBER_SRGB                                   = 0x06,
+	    NUMBER_FLOAT                                  = 0x07,
+	COMP_SWAP_mask                                    = 0x03 << 15,
+	COMP_SWAP_shift                                   = 15,
+	    SWAP_STD                                      = 0x00,
+	    SWAP_ALT                                      = 0x01,
+	    SWAP_STD_REV                                  = 0x02,
+	    SWAP_ALT_REV                                  = 0x03,
+	FAST_CLEAR_bit                                    = 1 << 17,
+	COMPRESSION_bit                                   = 1 << 18,
+	BLEND_CLAMP_bit                                   = 1 << 19,
+	BLEND_BYPASS_bit                                  = 1 << 20,
+	SIMPLE_FLOAT_bit                                  = 1 << 21,
+	CB_COLOR0_INFO__ROUND_MODE_bit                    = 1 << 22,
+	CB_COLOR0_INFO__TILE_COMPACT_bit                  = 1 << 23,
+	SOURCE_FORMAT_mask                                = 0x03 << 24,
+	SOURCE_FORMAT_shift                               = 24,
+	    EXPORT_4C_32BPC                               = 0x00,
+	    EXPORT_4C_16BPC                               = 0x01,
+	RAT_bit                                           = 1 << 26,
+	RESOURCE_TYPE_mask                                = 0x07 << 27,
+	RESOURCE_TYPE_shift                               = 27,
+	    BUFFER                                        = 0x00,
+	    TEXTURE1D                                     = 0x01,
+	    TEXTURE1DARRAY                                = 0x02,
+	    TEXTURE2D                                     = 0x03,
+	    TEXTURE2DARRAY                                = 0x04,
+	    TEXTURE3D                                     = 0x05,
+    CB_COLOR0_ATTRIB                                      = 0x00028c74,
+	CB_COLOR0_ATTRIB_num                              = 12,
+	CB_COLOR0_ATTRIB_offset                           = 51,
+	IGNORE_SHADER_ENGINE_TILING_bit                   = 1 << 3,
+	CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit       = 1 << 4,
+	CB_COLOR0_ATTRIB__TILE_SPLIT_mask                 = 0x0f << 5,
+	CB_COLOR0_ATTRIB__TILE_SPLIT_shift                = 5,
+/* 	    ADDR_SURF_TILE_SPLIT_64B                      = 0x00, */
+/* 	    ADDR_SURF_TILE_SPLIT_128B                     = 0x01, */
+/* 	    ADDR_SURF_TILE_SPLIT_256B                     = 0x02, */
+/* 	    ADDR_SURF_TILE_SPLIT_512B                     = 0x03, */
+/* 	    ADDR_SURF_TILE_SPLIT_1KB                      = 0x04, */
+/* 	    ADDR_SURF_TILE_SPLIT_2KB                      = 0x05, */
+/* 	    ADDR_SURF_TILE_SPLIT_4KB                      = 0x06, */
+	CB_COLOR0_ATTRIB__NUM_BANKS_mask                  = 0x03 << 10,
+	CB_COLOR0_ATTRIB__NUM_BANKS_shift                 = 10,
+/* 	    ADDR_SURF_2_BANK                              = 0x00, */
+/* 	    ADDR_SURF_4_BANK                              = 0x01, */
+/* 	    ADDR_SURF_8_BANK                              = 0x02, */
+/* 	    ADDR_SURF_16_BANK                             = 0x03, */
+	CB_COLOR0_ATTRIB__BANK_WIDTH_mask                 = 0x03 << 13,
+	CB_COLOR0_ATTRIB__BANK_WIDTH_shift                = 13,
+/* 	    ADDR_SURF_BANK_WIDTH_1                        = 0x00, */
+/* 	    ADDR_SURF_BANK_WIDTH_2                        = 0x01, */
+/* 	    ADDR_SURF_BANK_WIDTH_4                        = 0x02, */
+/* 	    ADDR_SURF_BANK_WIDTH_8                        = 0x03, */
+	CB_COLOR0_ATTRIB__BANK_HEIGHT_mask                = 0x03 << 16,
+	CB_COLOR0_ATTRIB__BANK_HEIGHT_shift               = 16,
+/* 	    ADDR_SURF_BANK_HEIGHT_1                       = 0x00, */
+/* 	    ADDR_SURF_BANK_HEIGHT_2                       = 0x01, */
+/* 	    ADDR_SURF_BANK_HEIGHT_4                       = 0x02, */
+/* 	    ADDR_SURF_BANK_HEIGHT_8                       = 0x03, */
+	CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_mask          = 0x03 << 19,
+	CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift         = 19,
+/* 	    ADDR_SURF_MACRO_ASPECT_1                      = 0x00, */
+/* 	    ADDR_SURF_MACRO_ASPECT_2                      = 0x01, */
+/* 	    ADDR_SURF_MACRO_ASPECT_4                      = 0x02, */
+/* 	    ADDR_SURF_MACRO_ASPECT_8                      = 0x03, */
+	FMASK_BANK_HEIGHT_mask                            = 0x03 << 22,
+	FMASK_BANK_HEIGHT_shift                           = 22,
+/* 	    ADDR_SURF_BANK_HEIGHT_1                       = 0x00, */
+/* 	    ADDR_SURF_BANK_HEIGHT_2                       = 0x01, */
+/* 	    ADDR_SURF_BANK_HEIGHT_4                       = 0x02, */
+/* 	    ADDR_SURF_BANK_HEIGHT_8                       = 0x03, */
+    CB_COLOR0_DIM                                         = 0x00028c78,
+	CB_COLOR0_DIM_num                                 = 12,
+	CB_COLOR0_DIM_offset                              = 51,
+	WIDTH_MAX_mask                                    = 0xffff << 0,
+	WIDTH_MAX_shift                                   = 0,
+	HEIGHT_MAX_mask                                   = 0xffff << 16,
+	HEIGHT_MAX_shift                                  = 16,
+    CB_COLOR0_CMASK                                       = 0x00028c7c,
+	CB_COLOR0_CMASK_num                               = 8,
+	CB_COLOR0_CMASK_offset                            = 60,
+    CB_COLOR0_CMASK_SLICE                                 = 0x00028c80,
+	CB_COLOR0_CMASK_SLICE_num                         = 8,
+	CB_COLOR0_CMASK_SLICE_offset                      = 60,
+	CB_COLOR0_CMASK_SLICE__TILE_MAX_mask              = 0x3fff << 0,
+	CB_COLOR0_CMASK_SLICE__TILE_MAX_shift             = 0,
+    CB_COLOR0_FMASK                                       = 0x00028c84,
+	CB_COLOR0_FMASK_num                               = 8,
+	CB_COLOR0_FMASK_offset                            = 60,
+    CB_COLOR0_FMASK_SLICE                                 = 0x00028c88,
+	CB_COLOR0_FMASK_SLICE_num                         = 8,
+	CB_COLOR0_FMASK_SLICE_offset                      = 60,
+	CB_COLOR0_FMASK_SLICE__TILE_MAX_mask              = 0x3fffff << 0,
+	CB_COLOR0_FMASK_SLICE__TILE_MAX_shift             = 0,
+    CB_COLOR0_CLEAR_WORD0                                 = 0x00028c8c,
+	CB_COLOR0_CLEAR_WORD0_num                         = 8,
+	CB_COLOR0_CLEAR_WORD0_offset                      = 60,
+    CB_COLOR0_CLEAR_WORD1                                 = 0x00028c90,
+	CB_COLOR0_CLEAR_WORD1_num                         = 8,
+	CB_COLOR0_CLEAR_WORD1_offset                      = 60,
+    CB_COLOR0_CLEAR_WORD2                                 = 0x00028c94,
+	CB_COLOR0_CLEAR_WORD2_num                         = 8,
+	CB_COLOR0_CLEAR_WORD2_offset                      = 60,
+    CB_COLOR0_CLEAR_WORD3                                 = 0x00028c98,
+	CB_COLOR0_CLEAR_WORD3_num                         = 8,
+	CB_COLOR0_CLEAR_WORD3_offset                      = 60,
+    SQ_ALU_CONST_CACHE_HS_0                               = 0x00028f00,
+	SQ_ALU_CONST_CACHE_HS_0_num                       = 16,
+    SQ_ALU_CONST_CACHE_LS_0                               = 0x00028f40,
+	SQ_ALU_CONST_CACHE_LS_0_num                       = 16,
+    SQ_ALU_CONST_BUFFER_SIZE_HS_0                         = 0x00028f80,
+	SQ_ALU_CONST_BUFFER_SIZE_HS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_HS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_HS_0__DATA_shift         = 0,
+    SQ_ALU_CONST_BUFFER_SIZE_LS_0                         = 0x00028fc0,
+	SQ_ALU_CONST_BUFFER_SIZE_LS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_LS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_LS_0__DATA_shift         = 0,
+    SQ_VTX_CONSTANT_WORD0_0                               = 0x00030000,
+    SQ_TEX_RESOURCE_WORD0_0                               = 0x00030000,
+	DIM_mask                                          = 0x07 << 0,
+	DIM_shift                                         = 0,
+	    SQ_TEX_DIM_1D                                 = 0x00,
+	    SQ_TEX_DIM_2D                                 = 0x01,
+	    SQ_TEX_DIM_3D                                 = 0x02,
+	    SQ_TEX_DIM_CUBEMAP                            = 0x03,
+	    SQ_TEX_DIM_1D_ARRAY                           = 0x04,
+	    SQ_TEX_DIM_2D_ARRAY                           = 0x05,
+	    SQ_TEX_DIM_2D_MSAA                            = 0x06,
+	    SQ_TEX_DIM_2D_ARRAY_MSAA                      = 0x07,
+/* 	IGNORE_SHADER_ENGINE_TILING_bit                   = 1 << 3, */
+	SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_bit= 1 << 5,
+	PITCH_mask                                        = 0xfff << 6,
+	PITCH_shift                                       = 6,
+	TEX_WIDTH_mask                                    = 0x3fff << 18,
+	TEX_WIDTH_shift                                   = 18,
+    SQ_VTX_CONSTANT_WORD1_0                               = 0x00030004,
+    SQ_TEX_RESOURCE_WORD1_0                               = 0x00030004,
+	TEX_HEIGHT_mask                                   = 0x3fff << 0,
+	TEX_HEIGHT_shift                                  = 0,
+	TEX_DEPTH_mask                                    = 0x1fff << 14,
+	TEX_DEPTH_shift                                   = 14,
+	SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask          = 0x0f << 28,
+	SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift         = 28,
+    SQ_VTX_CONSTANT_WORD2_0                               = 0x00030008,
+	BASE_ADDRESS_HI_mask                              = 0xff << 0,
+	BASE_ADDRESS_HI_shift                             = 0,
+	SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask              = 0x7ff << 8,
+	SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift             = 8,
+	SQ_VTX_CONSTANT_WORD2_0__CLAMP_X_bit              = 1 << 19,
+	SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask         = 0x3f << 20,
+	SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift        = 20,
+	SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask      = 0x03 << 26,
+	SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift     = 26,
+/* 	    SQ_NUM_FORMAT_NORM                            = 0x00, */
+/* 	    SQ_NUM_FORMAT_INT                             = 0x01, */
+/* 	    SQ_NUM_FORMAT_SCALED                          = 0x02, */
+	SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit      = 1 << 28,
+	SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit         = 1 << 29,
+	SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask         = 0x03 << 30,
+	SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift        = 30,
+/* 	    SQ_ENDIAN_NONE                                = 0x00, */
+/* 	    SQ_ENDIAN_8IN16                               = 0x01, */
+/* 	    SQ_ENDIAN_8IN32                               = 0x02, */
+    SQ_TEX_RESOURCE_WORD2_0                               = 0x00030008,
+    SQ_VTX_CONSTANT_WORD3_0                               = 0x0003000c,
+	SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit             = 1 << 2,
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_mask           = 0x07 << 3,
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift          = 3,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_mask           = 0x07 << 6,
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift          = 6,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_mask           = 0x07 << 9,
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift          = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_mask           = 0x07 << 12,
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift          = 12,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+    SQ_TEX_RESOURCE_WORD3_0                               = 0x0003000c,
+    SQ_TEX_RESOURCE_WORD4_0                               = 0x00030010,
+	FORMAT_COMP_X_mask                                = 0x03 << 0,
+	FORMAT_COMP_X_shift                               = 0,
+	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00,
+	    SQ_FORMAT_COMP_SIGNED                         = 0x01,
+	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02,
+	FORMAT_COMP_Y_mask                                = 0x03 << 2,
+	FORMAT_COMP_Y_shift                               = 2,
+/* 	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */
+/* 	    SQ_FORMAT_COMP_SIGNED                         = 0x01, */
+/* 	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */
+	FORMAT_COMP_Z_mask                                = 0x03 << 4,
+	FORMAT_COMP_Z_shift                               = 4,
+/* 	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */
+/* 	    SQ_FORMAT_COMP_SIGNED                         = 0x01, */
+/* 	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */
+	FORMAT_COMP_W_mask                                = 0x03 << 6,
+	FORMAT_COMP_W_shift                               = 6,
+/* 	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */
+/* 	    SQ_FORMAT_COMP_SIGNED                         = 0x01, */
+/* 	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */
+	SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask      = 0x03 << 8,
+	SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift     = 8,
+/* 	    SQ_NUM_FORMAT_NORM                            = 0x00, */
+/* 	    SQ_NUM_FORMAT_INT                             = 0x01, */
+/* 	    SQ_NUM_FORMAT_SCALED                          = 0x02, */
+	SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit         = 1 << 10,
+	SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit        = 1 << 11,
+	SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask         = 0x03 << 12,
+	SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift        = 12,
+/* 	    SQ_ENDIAN_NONE                                = 0x00, */
+/* 	    SQ_ENDIAN_8IN16                               = 0x01, */
+/* 	    SQ_ENDIAN_8IN32                               = 0x02, */
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask           = 0x07 << 16,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift          = 16,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask           = 0x07 << 19,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift          = 19,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask           = 0x07 << 22,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift          = 22,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask           = 0x07 << 25,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift          = 25,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	BASE_LEVEL_mask                                   = 0x0f << 28,
+	BASE_LEVEL_shift                                  = 28,
+    SQ_VTX_CONSTANT_WORD4_0                               = 0x00030010,
+    SQ_TEX_RESOURCE_WORD5_0                               = 0x00030014,
+	LAST_LEVEL_mask                                   = 0x0f << 0,
+	LAST_LEVEL_shift                                  = 0,
+	BASE_ARRAY_mask                                   = 0x1fff << 4,
+	BASE_ARRAY_shift                                  = 4,
+	LAST_ARRAY_mask                                   = 0x1fff << 17,
+	LAST_ARRAY_shift                                  = 17,
+    SQ_TEX_RESOURCE_WORD6_0                               = 0x00030018,
+	PERF_MODULATION_mask                              = 0x07 << 3,
+	PERF_MODULATION_shift                             = 3,
+	INTERLACED_bit                                    = 1 << 6,
+	SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_mask             = 0xfff << 8,
+	SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift            = 8,
+	SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_mask          = 0x07 << 29,
+	SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift         = 29,
+	    SQ_ADDR_SURF_TILE_SPLIT_64B                   = 0x00,
+	    SQ_ADDR_SURF_TILE_SPLIT_128B                  = 0x01,
+	    SQ_ADDR_SURF_TILE_SPLIT_256B                  = 0x02,
+	    SQ_ADDR_SURF_TILE_SPLIT_512B                  = 0x03,
+	    SQ_ADDR_SURF_TILE_SPLIT_1KB                   = 0x04,
+	    SQ_ADDR_SURF_TILE_SPLIT_2KB                   = 0x05,
+	    SQ_ADDR_SURF_TILE_SPLIT_4KB                   = 0x06,
+    SQ_VTX_CONSTANT_WORD7_0                               = 0x0003001c,
+	SQ_VTX_CONSTANT_WORD7_0__TYPE_mask                = 0x03 << 30,
+	SQ_VTX_CONSTANT_WORD7_0__TYPE_shift               = 30,
+	    SQ_TEX_VTX_INVALID_TEXTURE                    = 0x00,
+	    SQ_TEX_VTX_INVALID_BUFFER                     = 0x01,
+	    SQ_TEX_VTX_VALID_TEXTURE                      = 0x02,
+	    SQ_TEX_VTX_VALID_BUFFER                       = 0x03,
+    SQ_TEX_RESOURCE_WORD7_0                               = 0x0003001c,
+	SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask         = 0x3f << 0,
+	SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift        = 0,
+	SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_mask   = 0x03 << 6,
+	SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift  = 6,
+	    SQ_ADDR_SURF_MACRO_ASPECT_1                   = 0x00,
+	    SQ_ADDR_SURF_MACRO_ASPECT_2                   = 0x01,
+	    SQ_ADDR_SURF_MACRO_ASPECT_4                   = 0x02,
+	    SQ_ADDR_SURF_MACRO_ASPECT_8                   = 0x03,
+	SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_mask          = 0x03 << 8,
+	SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift         = 8,
+	    SQ_ADDR_SURF_BANK_WH_1                        = 0x00,
+	    SQ_ADDR_SURF_BANK_WH_2                        = 0x01,
+	    SQ_ADDR_SURF_BANK_WH_4                        = 0x02,
+	    SQ_ADDR_SURF_BANK_WH_8                        = 0x03,
+	SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_mask         = 0x03 << 10,
+	SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift        = 10,
+/* 	    SQ_ADDR_SURF_BANK_WH_1                        = 0x00, */
+/* 	    SQ_ADDR_SURF_BANK_WH_2                        = 0x01, */
+/* 	    SQ_ADDR_SURF_BANK_WH_4                        = 0x02, */
+/* 	    SQ_ADDR_SURF_BANK_WH_8                        = 0x03, */
+	DEPTH_SAMPLE_ORDER_bit                            = 1 << 15,
+	SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_mask           = 0x03 << 16,
+	SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift          = 16,
+	    SQ_ADDR_SURF_2_BANK                           = 0x00,
+	    SQ_ADDR_SURF_4_BANK                           = 0x01,
+	    SQ_ADDR_SURF_8_BANK                           = 0x02,
+	    SQ_ADDR_SURF_16_BANK                          = 0x03,
+	SQ_TEX_RESOURCE_WORD7_0__TYPE_mask                = 0x03 << 30,
+	SQ_TEX_RESOURCE_WORD7_0__TYPE_shift               = 30,
+/* 	    SQ_TEX_VTX_INVALID_TEXTURE                    = 0x00, */
+/* 	    SQ_TEX_VTX_INVALID_BUFFER                     = 0x01, */
+/* 	    SQ_TEX_VTX_VALID_TEXTURE                      = 0x02, */
+/* 	    SQ_TEX_VTX_VALID_BUFFER                       = 0x03, */
+    SQ_LOOP_CONST_DX10_0                                  = 0x0003a200,
+    SQ_LOOP_CONST_0                                       = 0x0003a200,
+	SQ_LOOP_CONST_0__COUNT_mask                       = 0xfff << 0,
+	SQ_LOOP_CONST_0__COUNT_shift                      = 0,
+	INIT_mask                                         = 0xfff << 12,
+	INIT_shift                                        = 12,
+	INC_mask                                          = 0xff << 24,
+	INC_shift                                         = 24,
+    SQ_JUMPTABLE_CONST_0                                  = 0x0003a200,
+	CONST_A_mask                                      = 0xff << 0,
+	CONST_A_shift                                     = 0,
+	CONST_B_mask                                      = 0xff << 8,
+	CONST_B_shift                                     = 8,
+	CONST_C_mask                                      = 0xff << 16,
+	CONST_C_shift                                     = 16,
+	CONST_D_mask                                      = 0xff << 24,
+	CONST_D_shift                                     = 24,
+    SQ_BOOL_CONST_0                                       = 0x0003a500,
+	SQ_BOOL_CONST_0_num                               = 6,
+    SQ_TEX_SAMPLER_WORD0_0                                = 0x0003c000,
+	SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask              = 0x07 << 0,
+	SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift             = 0,
+	    SQ_TEX_WRAP                                   = 0x00,
+	    SQ_TEX_MIRROR                                 = 0x01,
+	    SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02,
+	    SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03,
+	    SQ_TEX_CLAMP_HALF_BORDER                      = 0x04,
+	    SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05,
+	    SQ_TEX_CLAMP_BORDER                           = 0x06,
+	    SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07,
+	CLAMP_Y_mask                                      = 0x07 << 3,
+	CLAMP_Y_shift                                     = 3,
+/* 	    SQ_TEX_WRAP                                   = 0x00, */
+/* 	    SQ_TEX_MIRROR                                 = 0x01, */
+/* 	    SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02, */
+/* 	    SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03, */
+/* 	    SQ_TEX_CLAMP_HALF_BORDER                      = 0x04, */
+/* 	    SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05, */
+/* 	    SQ_TEX_CLAMP_BORDER                           = 0x06, */
+/* 	    SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07, */
+	CLAMP_Z_mask                                      = 0x07 << 6,
+	CLAMP_Z_shift                                     = 6,
+/* 	    SQ_TEX_WRAP                                   = 0x00, */
+/* 	    SQ_TEX_MIRROR                                 = 0x01, */
+/* 	    SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02, */
+/* 	    SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03, */
+/* 	    SQ_TEX_CLAMP_HALF_BORDER                      = 0x04, */
+/* 	    SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05, */
+/* 	    SQ_TEX_CLAMP_BORDER                           = 0x06, */
+/* 	    SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07, */
+	XY_MAG_FILTER_mask                                = 0x03 << 9,
+	XY_MAG_FILTER_shift                               = 9,
+	    SQ_TEX_XY_FILTER_POINT                        = 0x00,
+	    SQ_TEX_XY_FILTER_BILINEAR                     = 0x01,
+	XY_MIN_FILTER_mask                                = 0x03 << 11,
+	XY_MIN_FILTER_shift                               = 11,
+/* 	    SQ_TEX_XY_FILTER_POINT                        = 0x00, */
+/* 	    SQ_TEX_XY_FILTER_BILINEAR                     = 0x01, */
+	Z_FILTER_mask                                     = 0x03 << 13,
+	Z_FILTER_shift                                    = 13,
+	    SQ_TEX_Z_FILTER_NONE                          = 0x00,
+	    SQ_TEX_Z_FILTER_POINT                         = 0x01,
+	    SQ_TEX_Z_FILTER_LINEAR                        = 0x02,
+	MIP_FILTER_mask                                   = 0x03 << 15,
+	MIP_FILTER_shift                                  = 15,
+/* 	    SQ_TEX_Z_FILTER_NONE                          = 0x00, */
+/* 	    SQ_TEX_Z_FILTER_POINT                         = 0x01, */
+/* 	    SQ_TEX_Z_FILTER_LINEAR                        = 0x02, */
+	BORDER_COLOR_TYPE_mask                            = 0x03 << 20,
+	BORDER_COLOR_TYPE_shift                           = 20,
+	    SQ_TEX_BORDER_COLOR_TRANS_BLACK               = 0x00,
+	    SQ_TEX_BORDER_COLOR_OPAQUE_BLACK              = 0x01,
+	    SQ_TEX_BORDER_COLOR_OPAQUE_WHITE              = 0x02,
+	    SQ_TEX_BORDER_COLOR_REGISTER                  = 0x03,
+	DEPTH_COMPARE_FUNCTION_mask                       = 0x07 << 22,
+	DEPTH_COMPARE_FUNCTION_shift                      = 22,
+	    SQ_TEX_DEPTH_COMPARE_NEVER                    = 0x00,
+	    SQ_TEX_DEPTH_COMPARE_LESS                     = 0x01,
+	    SQ_TEX_DEPTH_COMPARE_EQUAL                    = 0x02,
+	    SQ_TEX_DEPTH_COMPARE_LESSEQUAL                = 0x03,
+	    SQ_TEX_DEPTH_COMPARE_GREATER                  = 0x04,
+	    SQ_TEX_DEPTH_COMPARE_NOTEQUAL                 = 0x05,
+	    SQ_TEX_DEPTH_COMPARE_GREATEREQUAL             = 0x06,
+	    SQ_TEX_DEPTH_COMPARE_ALWAYS                   = 0x07,
+	CHROMA_KEY_mask                                   = 0x03 << 25,
+	CHROMA_KEY_shift                                  = 25,
+	    SQ_TEX_CHROMA_KEY_DISABLED                    = 0x00,
+	    SQ_TEX_CHROMA_KEY_KILL                        = 0x01,
+	    SQ_TEX_CHROMA_KEY_BLEND                       = 0x02,
+    SQ_TEX_SAMPLER_WORD1_0                                = 0x0003c004,
+	SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_mask              = 0xfff << 0,
+	SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift             = 0,
+	MAX_LOD_mask                                      = 0xfff << 12,
+	MAX_LOD_shift                                     = 12,
+	PERF_MIP_mask                                     = 0x0f << 24,
+	PERF_MIP_shift                                    = 24,
+	PERF_Z_mask                                       = 0x0f << 28,
+	PERF_Z_shift                                      = 28,
+    SQ_TEX_SAMPLER_WORD2_0                                = 0x0003c008,
+	SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_mask             = 0x3fff << 0,
+	SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift            = 0,
+	LOD_BIAS_SEC_mask                                 = 0x3f << 14,
+	LOD_BIAS_SEC_shift                                = 14,
+	MC_COORD_TRUNCATE_bit                             = 1 << 20,
+	SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit         = 1 << 21,
+	TRUNCATE_COORD_bit                                = 1 << 28,
+	SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit     = 1 << 29,
+	SQ_TEX_SAMPLER_WORD2_0__TYPE_bit                  = 1 << 31,
+    SQ_VTX_BASE_VTX_LOC                                   = 0x0003cff0,
+    SQ_VTX_START_INST_LOC                                 = 0x0003cff4,
+    SQ_TEX_SAMPLER_CLEAR                                  = 0x0003ff00,
+    SQ_TEX_RESOURCE_CLEAR                                 = 0x0003ff04,
+    SQ_LOOP_BOOL_CLEAR                                    = 0x0003ff08,
+
+} ;
+
+#endif /* _EVERGREEN_REG_AUTO_H */
+
diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c
new file mode 100644
index 0000000..64e96d8
--- /dev/null
+++ b/src/evergreen_shader.c
@@ -0,0 +1,2790 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Alex Deucher <alexander.deucher at amd.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include "evergreen_shader.h"
+#include "evergreen_reg.h"
+
+/* solid vs --------------------------------------- */
+int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(4),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(1),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_VC),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+    /* 2 - always export a param whether it's used or not */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(0),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(1),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(0));
+    /* 3 - padding */
+    shader[i++] = 0x00000000;
+    shader[i++] = 0x00000000;
+    /* 4/5 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(1),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+/* solid ps --------------------------------------- */
+int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(2),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(1),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+
+    /* 2 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(1));
+    /* 3 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Y),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(1));
+    /* 4 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Z),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Z),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(1));
+    /* 5 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_W),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_W),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(1));
+
+    return i;
+}
+
+/* copy vs --------------------------------------- */
+int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(4),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(2),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_VC),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(0),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+    /* 2 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(0),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(1),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(0));
+    /* 3 */
+    shader[i++] = 0x00000000;
+    shader[i++] = 0x00000000;
+    /* 4/5 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(16));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(1),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 6/7 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+/* copy ps --------------------------------------- */
+int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* CF INST 0 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(3),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+    /* CF INST 1 */
+    shader[i++] = CF_DWORD0(ADDR(8),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(1),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_TC),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+    /* CF INST 2 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(1),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+
+    /* 3 interpolate tex coords */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(0));
+    /* 4 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(0));
+    /* 5 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(0));
+    /* 6 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(0));
+
+    /* 7 */
+    shader[i++] = 0x00000000;
+    shader[i++] = 0x00000000;
+
+    /* 8/9 TEX INST 0 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     INST_MOD(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     ALT_CONST(0),
+			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(0),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(SQ_SEL_X), /* R */
+			     DST_SEL_Y(SQ_SEL_Y), /* G */
+			     DST_SEL_Z(SQ_SEL_Z), /* B */
+			     DST_SEL_W(SQ_SEL_W), /* A */
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_UNNORMALIZED),
+			     COORD_TYPE_Y(TEX_UNNORMALIZED),
+			     COORD_TYPE_Z(TEX_UNNORMALIZED),
+			     COORD_TYPE_W(TEX_UNNORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(0),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    return i;
+}
+
+int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(6),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(2),
+                            VALID_PIXEL_MODE(0),
+                            END_OF_PROGRAM(0),
+                            CF_INST(SQ_CF_INST_VC),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+
+    /* 1 - ALU */
+    shader[i++] = CF_ALU_DWORD0(ADDR(4),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(2),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 2 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+                                          TYPE(SQ_EXPORT_POS),
+                                          RW_GPR(1),
+                                          RW_REL(ABSOLUTE),
+                                          INDEX_GPR(0),
+                                          ELEM_SIZE(3));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+                                               SRC_SEL_Y(SQ_SEL_Y),
+                                               SRC_SEL_Z(SQ_SEL_Z),
+                                               SRC_SEL_W(SQ_SEL_W),
+                                               BURST_COUNT(1),
+                                               VALID_PIXEL_MODE(0),
+                                               END_OF_PROGRAM(0),
+                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
+                                               MARK(0),
+                                               BARRIER(1));
+    /* 3 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+                                          TYPE(SQ_EXPORT_PARAM),
+                                          RW_GPR(0),
+                                          RW_REL(ABSOLUTE),
+                                          INDEX_GPR(0),
+                                          ELEM_SIZE(3));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+                                               SRC_SEL_Y(SQ_SEL_Y),
+                                               SRC_SEL_Z(SQ_SEL_Z),
+                                               SRC_SEL_W(SQ_SEL_W),
+                                               BURST_COUNT(1),
+                                               VALID_PIXEL_MODE(0),
+                                               END_OF_PROGRAM(1),
+                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
+                                               MARK(0),
+                                               BARRIER(0));
+
+
+    /* 4 texX / w */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 5 texY / h */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 6/7 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+                             FETCH_WHOLE_QUAD(0),
+                             BUFFER_ID(0),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             SRC_SEL_X(SQ_SEL_X),
+                             MEGA_FETCH_COUNT(16));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_SEL_X(SQ_SEL_X),
+                                 DST_SEL_Y(SQ_SEL_Y),
+                                 DST_SEL_Z(SQ_SEL_0),
+                                 DST_SEL_W(SQ_SEL_1),
+                                 USE_CONST_FIELDS(0),
+                                 DATA_FORMAT(FMT_32_32_FLOAT),
+                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+                             ENDIAN_SWAP(ENDIAN_NONE),
+                             CONST_BUF_NO_STRIDE(0),
+                             MEGA_FETCH(1),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 8/9 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+                             FETCH_WHOLE_QUAD(0),
+                             BUFFER_ID(0),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             SRC_SEL_X(SQ_SEL_X),
+                             MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_SEL_X(SQ_SEL_X),
+                                 DST_SEL_Y(SQ_SEL_Y),
+                                 DST_SEL_Z(SQ_SEL_0),
+                                 DST_SEL_W(SQ_SEL_1),
+                                 USE_CONST_FIELDS(0),
+                                 DATA_FORMAT(FMT_32_32_FLOAT),
+                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+                             ENDIAN_SWAP(ENDIAN_NONE),
+                             CONST_BUF_NO_STRIDE(0),
+                             MEGA_FETCH(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(5),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_DWORD0(ADDR(21),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            END_OF_PROGRAM(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(0));
+    /* 2 */
+    shader[i++] = CF_DWORD0(ADDR(30),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_NOT_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            END_OF_PROGRAM(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(0));
+    /* 3 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(9),
+                                KCACHE_BANK0(0),
+                                KCACHE_BANK1(0),
+                                KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+                                KCACHE_ADDR0(0),
+                                KCACHE_ADDR1(0),
+                                I_COUNT(12),
+                                ALT_CONST(0),
+                                CF_INST(SQ_CF_INST_ALU),
+                                WHOLE_QUAD_MODE(0),
+                                BARRIER(1));
+    /* 4 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+                                          TYPE(SQ_EXPORT_PIXEL),
+                                          RW_GPR(2),
+                                          RW_REL(ABSOLUTE),
+                                          INDEX_GPR(0),
+                                          ELEM_SIZE(3));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+                                               SRC_SEL_Y(SQ_SEL_Y),
+                                               SRC_SEL_Z(SQ_SEL_Z),
+                                               SRC_SEL_W(SQ_SEL_W),
+                                               BURST_COUNT(1),
+                                               VALID_PIXEL_MODE(0),
+                                               END_OF_PROGRAM(1),
+                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
+                                               MARK(0),
+                                               BARRIER(1));
+    /* 5 interpolate tex coords */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(0));
+    /* 6 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(0));
+    /* 7 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(0));
+    /* 8 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(0));
+
+    /* 9,10,11,12 */
+    /* r2.x = MAD(c0.w, r1.x, c0.x) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_X),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+    /* r2.y = MAD(c0.w, r1.x, c0.y) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Y),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+    /* r2.z = MAD(c0.w, r1.x, c0.z) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+    /* r2.w = MAD(0, 0, 1) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(SQ_ALU_SRC_0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_X),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+
+    /* 13,14,15,16 */
+    /* r2.x = MAD(c1.x, r1.y, pv.x) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_X),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+    /* r2.y = MAD(c1.y, r1.y, pv.y) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Y),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+    /* r2.z = MAD(c1.z, r1.y, pv.z) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+    /* r2.w = MAD(0, 0, 1) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(SQ_ALU_SRC_0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_W),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+    /* 17,18,19,20 */
+    /* r2.x = MAD(c2.x, r1.z, pv.x) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_X),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(1));
+    /* r2.y = MAD(c2.y, r1.z, pv.y) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Y),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(1));
+    /* r2.z = MAD(c2.z, r1.z, pv.z) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(1));
+    /* r2.w = MAD(0, 0, 1) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(SQ_ALU_SRC_0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_X),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(1));
+
+    /* 21 */
+    shader[i++] = CF_DWORD0(ADDR(24),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(3),
+                            VALID_PIXEL_MODE(0),
+                            END_OF_PROGRAM(0),
+                            CF_INST(SQ_CF_INST_TC),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+    /* 22 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+    /* 23 */
+    shader[i++] = 0x00000000;
+    shader[i++] = 0x00000000;
+    /* 24/25 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             INST_MOD(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(0),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             ALT_CONST(0),
+			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_X),
+                             DST_SEL_Y(SQ_SEL_MASK),
+                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_W(SQ_SEL_1),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(0),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 26/27 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             INST_MOD(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(1),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             ALT_CONST(0),
+			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_MASK),
+                             DST_SEL_Z(SQ_SEL_X),
+                             DST_SEL_W(SQ_SEL_MASK),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(1),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 28/29 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             INST_MOD(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(2),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             ALT_CONST(0),
+			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_X),
+                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_W(SQ_SEL_MASK),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(2),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 30 */
+    shader[i++] = CF_DWORD0(ADDR(32),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(2),
+                            VALID_PIXEL_MODE(0),
+                            END_OF_PROGRAM(0),
+                            CF_INST(SQ_CF_INST_TC),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+    /* 31 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+    /* 32/33 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             INST_MOD(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(0),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             ALT_CONST(0),
+                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_X),
+                             DST_SEL_Y(SQ_SEL_MASK),
+                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_W(SQ_SEL_1),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(0),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 34/35 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             INST_MOD(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(1),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             ALT_CONST(0),
+                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_X),
+                             DST_SEL_Z(SQ_SEL_Y),
+                             DST_SEL_W(SQ_SEL_MASK),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(1),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    return i;
+}
+
+/* comp vs --------------------------------------- */
+int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(3),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            END_OF_PROGRAM(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(0));
+    /* 1 */
+    shader[i++] = CF_DWORD0(ADDR(9),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_NOT_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            END_OF_PROGRAM(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(0));
+    /* 2 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+                            JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            END_OF_PROGRAM(1),
+                            CF_INST(SQ_CF_INST_NOP),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+    /* 3 - mask sub */
+    shader[i++] = CF_DWORD0(ADDR(32),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(3),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_VC),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+
+    /* 4 - ALU */
+    shader[i++] = CF_ALU_DWORD0(ADDR(14),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(12),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 5 - dst */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(2),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_0),
+					       SRC_SEL_W(SQ_SEL_1),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+    /* 6 - src */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_0),
+					       SRC_SEL_W(SQ_SEL_1),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(0),
+					       CF_INST(SQ_CF_INST_EXPORT),
+					       MARK(0),
+					       BARRIER(0));
+    /* 7 - mask */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_0),
+					       SRC_SEL_W(SQ_SEL_1),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(0));
+    /* 8 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+    /* 9 - non-mask sub */
+    shader[i++] = CF_DWORD0(ADDR(38),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(2),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_VC),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+
+    /* 10 - ALU */
+    shader[i++] = CF_ALU_DWORD0(ADDR(26),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(6),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 11 - dst */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_0),
+					       SRC_SEL_W(SQ_SEL_1),
+					       BURST_COUNT(0),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+    /* 12 - src */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_0),
+					       SRC_SEL_W(SQ_SEL_1),
+					       BURST_COUNT(0),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(0));
+    /* 13 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+
+    /* mask alu - 14 srcX MAD */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+    /* 15 srcY MAD */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+
+    /* 16 srcX MAD */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 1),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+    /* 17 srcY MAD */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 1),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_W),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 18 maskX MAD */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+
+    /* 19 maskY MAD */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 3),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+
+    /* 20 srcX MAD */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+    /* 21 srcY MAD */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 3),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_W),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 22 srcX / w */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 23 srcY / h */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 24 maskX / w */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 25 maskY / h */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* no mask alu - 26 srcX MAD */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+    /* 27 srcY MAD */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+
+    /* 28 srcX MAD */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+    /* 29 srcY MAD */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_GPR_BASE + 0),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_W),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+    /* 30 srcX / w */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 31 srcY / h */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* mask vfetch - 32/33 - dst */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(24));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(1),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 34/35 - src */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_1),
+				 DST_SEL_W(SQ_SEL_0),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 36/37 - mask */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_1),
+				 DST_SEL_W(SQ_SEL_0),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(16),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+
+    /* no mask vfetch - 38/39 - dst */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(16));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(1),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 40/41 - src */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_1),
+				 DST_SEL_W(SQ_SEL_0),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(0),
+                             ALT_CONST(0),
+                             BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+/* comp ps --------------------------------------- */
+int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(3),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            END_OF_PROGRAM(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(0));
+    /* 1 */
+    shader[i++] = CF_DWORD0(ADDR(8),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_NOT_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            END_OF_PROGRAM(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(0));
+    /* 2 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+                            JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            END_OF_PROGRAM(1),
+                            CF_INST(SQ_CF_INST_NOP),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+
+    /* 3 - mask sub */
+    shader[i++] = CF_ALU_DWORD0(ADDR(12),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(8),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 4 */
+    shader[i++] = CF_DWORD0(ADDR(28),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(2),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_TC),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+
+    /* 5 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(20),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 6 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(2),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+    /* 7 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+
+    /* 8 - non-mask sub */
+    shader[i++] = CF_ALU_DWORD0(ADDR(24),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+    /* 9 */
+    shader[i++] = CF_DWORD0(ADDR(32),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(1),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_TC),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+
+    /* 10 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       END_OF_PROGRAM(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+
+    /* 11 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    END_OF_PROGRAM(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+
+    /* 12 interpolate src tex coords - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(1),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(0));
+    /* 13 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(1),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(0));
+    /* 14 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(1),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(0));
+    /* 15 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(1),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(0));
+
+    /* 16 interpolate mask tex coords */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(0));
+    /* 17 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(0));
+    /* 18 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(0));
+    /* 19 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(0));
+
+    /* 20 - alu 0 */
+    /* MUL gpr[2].x gpr[0].x gpr[1].x */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(1));
+    /* 21 - alu 1 */
+    /* MUL gpr[2].y gpr[0].y gpr[1].y */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Y),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(1));
+    /* 22 - alu 2 */
+    /* MUL gpr[2].z gpr[0].z gpr[1].z */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Z),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Z),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(1));
+    /* 23 - alu 3 */
+    /* MUL gpr[2].w gpr[0].w gpr[1].w */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_W),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_W),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(1));
+
+    /* 24 - interpolate tex coords - non-mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(0));
+    /* 25 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(0));
+    /* 26 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(0));
+    /* 27 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(0));
+
+    /* 28/29 - src - mask */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     INST_MOD(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(0),
+			     SRC_GPR(1),
+			     SRC_REL(ABSOLUTE),
+			     ALT_CONST(0),
+			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_Z),
+			     DST_SEL_W(SQ_SEL_W),
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_NORMALIZED),
+			     COORD_TYPE_Y(TEX_NORMALIZED),
+			     COORD_TYPE_Z(TEX_NORMALIZED),
+			     COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(0),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 30/31 - mask */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     INST_MOD(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(1),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+                             ALT_CONST(0),
+                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(0),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_Z),
+			     DST_SEL_W(SQ_SEL_W),
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_NORMALIZED),
+			     COORD_TYPE_Y(TEX_NORMALIZED),
+			     COORD_TYPE_Z(TEX_NORMALIZED),
+			     COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(1),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    /* 32/33 - src - non-mask */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     INST_MOD(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     ALT_CONST(0),
+			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(0),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_Z),
+			     DST_SEL_W(SQ_SEL_W),
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_NORMALIZED),
+			     COORD_TYPE_Y(TEX_NORMALIZED),
+			     COORD_TYPE_Z(TEX_NORMALIZED),
+			     COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(0),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    return i;
+}
diff --git a/src/evergreen_shader.h b/src/evergreen_shader.h
new file mode 100644
index 0000000..4106619
--- /dev/null
+++ b/src/evergreen_shader.h
@@ -0,0 +1,292 @@
+/*
+ * Evergreen shaders
+ *
+ * Copyright (C) 2010  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Shader macros
+ */
+
+#ifndef __SHADER_H__
+#define __SHADER_H__
+
+#include "radeon.h"
+
+/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */
+
+
+// CF insts
+// addr
+#define ADDR(x)  (x)
+// jumptable
+#define JUMPTABLE_SEL(x) (x)
+// pc
+#define POP_COUNT(x)      (x)
+// const
+#define CF_CONST(x)       (x)
+// cond
+#define COND(x)        (x)		// SQ_COND_*
+// count
+#define I_COUNT(x)        ((x) ? ((x) - 1) : 0)
+// vpm
+#define VALID_PIXEL_MODE(x) (x)
+// eop
+#define END_OF_PROGRAM(x)   (x)
+// cf inst
+#define CF_INST(x)        (x)		// SQ_CF_INST_*
+// wqm
+#define WHOLE_QUAD_MODE(x)  (x)
+// barrier
+#define BARRIER(x)          (x)
+//kb0
+#define KCACHE_BANK0(x)          (x)
+//kb1
+#define KCACHE_BANK1(x)          (x)
+// km0/1
+#define KCACHE_MODE0(x)          (x)
+#define KCACHE_MODE1(x)          (x)	// SQ_CF_KCACHE_*
+//
+#define KCACHE_ADDR0(x)          (x)
+#define KCACHE_ADDR1(x)          (x)
+
+#define ALT_CONST(x)            (x)
+
+#define ARRAY_BASE(x)        (x)
+// export pixel
+#define CF_PIXEL_MRT0         0
+#define CF_PIXEL_MRT1         1
+#define CF_PIXEL_MRT2         2
+#define CF_PIXEL_MRT3         3
+#define CF_PIXEL_MRT4         4
+#define CF_PIXEL_MRT5         5
+#define CF_PIXEL_MRT6         6
+#define CF_PIXEL_MRT7         7
+// computed Z
+#define CF_COMPUTED_Z         61
+// export pos
+#define CF_POS0               60
+#define CF_POS1               61
+#define CF_POS2               62
+#define CF_POS3               63
+// export param
+// 0...31
+#define TYPE(x)              (x)	// SQ_EXPORT_*
+#define RW_GPR(x)            (x)
+#define RW_REL(x)            (x)
+#define ABSOLUTE                  0
+#define RELATIVE                  1
+#define INDEX_GPR(x)            (x)
+#define ELEM_SIZE(x)            (x ? (x - 1) : 0)
+#define BURST_COUNT(x)          (x ? (x - 1) : 0)
+#define MARK(x)         (x)
+
+// swiz
+#define SRC_SEL_X(x)    (x)		// SQ_SEL_* each
+#define SRC_SEL_Y(x)    (x)
+#define SRC_SEL_Z(x)    (x)
+#define SRC_SEL_W(x)    (x)
+
+#define CF_DWORD0(addr, jmptbl) ((addr) | ((jmptbl) << 24))
+#define CF_DWORD1(pc, cf_const, cond, count, vpm, eop, cf_inst, wqm, b) \
+        (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \
+         ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | ((wqm) << 30) | ((b) << 31))
+
+#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))
+#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, alt_const, cf_inst, wqm, b) \
+        (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
+	 ((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))
+
+#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
+	 (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \
+	  ((index_gpr) << 23) | ((es) << 30))
+#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, bc, vpm, eop, cf_inst, m, b) \
+        (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \
+	 ((bc) << 16) | ((vpm) << 20) | ((eop) << 21) | ((cf_inst) << 22) | \
+	 ((m) << 30) | ((b) << 31))
+
+// ALU clause insts
+#define SRC0_SEL(x)        (x)
+#define SRC1_SEL(x)        (x)
+#define SRC2_SEL(x)        (x)
+// src[0-2]_sel
+//   0-127 GPR
+// 128-159 kcache constants bank 0
+// 160-191 kcache constants bank 1
+// 192-255 inline const values
+// 256-287 kcache constants bank 2
+// 288-319 kcache constants bank 3
+// 219-255 special SQ_ALU_SRC_* (0, 1, etc.)
+// 488-520 src param space
+#define ALU_SRC_GPR_BASE        0
+#define ALU_SRC_KCACHE0_BASE  128
+#define ALU_SRC_KCACHE1_BASE  160
+#define ALU_SRC_INLINE_K_BASE 192
+#define ALU_SRC_KCACHE2_BASE  256
+#define ALU_SRC_KCACHE3_BASE  288
+#define ALU_SRC_PARAM_BASE    448
+
+#define SRC0_REL(x)        (x)
+#define SRC1_REL(x)        (x)
+#define SRC2_REL(x)        (x)
+// elem
+#define SRC0_ELEM(x)        (x)
+#define SRC1_ELEM(x)        (x)
+#define SRC2_ELEM(x)        (x)
+#define ELEM_X        0
+#define ELEM_Y        1
+#define ELEM_Z        2
+#define ELEM_W        3
+// neg
+#define SRC0_NEG(x)        (x)
+#define SRC1_NEG(x)        (x)
+#define SRC2_NEG(x)        (x)
+// im
+#define INDEX_MODE(x)    (x)		// SQ_INDEX_*
+// ps
+#define PRED_SEL(x)      (x)		// SQ_PRED_SEL_*
+// last
+#define LAST(x)          (x)
+// abs
+#define SRC0_ABS(x)       (x)
+#define SRC1_ABS(x)       (x)
+// uem
+#define UPDATE_EXECUTE_MASK(x) (x)
+// up
+#define UPDATE_PRED(x)      (x)
+// wm
+#define WRITE_MASK(x)   (x)
+// omod
+#define OMOD(x)        (x)		// SQ_ALU_OMOD_*
+// alu inst
+#define ALU_INST(x)        (x)		// SQ_ALU_INST_*
+//bs
+#define BANK_SWIZZLE(x)        (x)	// SQ_ALU_VEC_*
+#define DST_GPR(x)        (x)
+#define DST_REL(x)        (x)
+#define DST_ELEM(x)       (x)
+#define CLAMP(x)          (x)
+
+#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
+        (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
+         ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
+	 ((im) << 26) | ((ps) << 29) | ((last) << 31))
+
+#define ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
+        (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
+         ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
+	 ((dr) << 28) | ((de) << 29) | ((clamp) << 31))
+
+#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
+        (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
+         ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
+	 ((de) << 29) | ((clamp) << 31))
+
+// VTX clause insts
+// vxt insts
+#define VTX_INST(x)        (x)		// SQ_VTX_INST_*
+
+// fetch type
+#define FETCH_TYPE(x)        (x)	// SQ_VTX_FETCH_*
+
+#define FETCH_WHOLE_QUAD(x)        (x)
+#define BUFFER_ID(x)        (x)
+#define SRC_GPR(x)          (x)
+#define SRC_REL(x)          (x)
+#define MEGA_FETCH_COUNT(x)        ((x) ? ((x) - 1) : 0)
+
+#define DST_SEL_X(x)          (x)
+#define DST_SEL_Y(x)          (x)
+#define DST_SEL_Z(x)          (x)
+#define DST_SEL_W(x)          (x)
+#define USE_CONST_FIELDS(x)   (x)
+#define DATA_FORMAT(x)        (x)
+// num format
+#define NUM_FORMAT_ALL(x)     (x)	// SQ_NUM_FORMAT_*
+// format comp
+#define FORMAT_COMP_ALL(x)     (x)	// SQ_FORMAT_COMP_*
+// sma
+#define SRF_MODE_ALL(x)     (x)
+#define SRF_MODE_ZERO_CLAMP_MINUS_ONE      0
+#define SRF_MODE_NO_ZERO                   1
+#define OFFSET(x)     (x)
+// endian swap
+#define ENDIAN_SWAP(x)     (x)		// SQ_ENDIAN_*
+#define CONST_BUF_NO_STRIDE(x)     (x)
+// mf
+#define MEGA_FETCH(x)     (x)
+#define BUFFER_INDEX_MODE(x) (x)
+
+#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \
+        (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
+	 ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26))
+#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
+        (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+	 ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))
+#define VTX_DWORD2(offset, es, cbns, mf, alt_const, bim)			\
+	(((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19) | ((alt_const) << 20) | ((bim) << 21))
+#define VTX_DWORD_PAD 0x00000000
+
+// TEX clause insts
+// tex insts
+#define TEX_INST(x)     (x)		// SQ_TEX_INST_*
+#define INST_MOD(x)     (x)
+#define FETCH_WHOLE_QUAD(x)     (x)
+#define RESOURCE_ID(x)          (x)
+#define RESOURCE_INDEX_MODE(x)          (x)
+#define SAMPLER_INDEX_MODE(x)          (x)
+
+#define LOD_BIAS(x)     (x)
+//ct
+#define COORD_TYPE_X(x)     (x)
+#define COORD_TYPE_Y(x)     (x)
+#define COORD_TYPE_Z(x)     (x)
+#define COORD_TYPE_W(x)     (x)
+#define TEX_UNNORMALIZED                0
+#define TEX_NORMALIZED                  1
+#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */
+#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f)
+#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f)
+#define SAMPLER_ID(x)     (x)
+
+#define TEX_DWORD0(tex_inst, im, fwq, resource_id, src_gpr, sr, ac, rim, sim) \
+	 (((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
+	  ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27))
+#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
+        (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+	 ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))
+#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
+        (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
+	 ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))
+#define TEX_DWORD_PAD 0x00000000
+
+extern int evergreen_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int evergreen_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+extern int evergreen_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int evergreen_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+extern int evergreen_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
+extern int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader);
+
+extern int evergreen_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+#endif
diff --git a/src/evergreen_state.h b/src/evergreen_state.h
new file mode 100644
index 0000000..5869256
--- /dev/null
+++ b/src/evergreen_state.h
@@ -0,0 +1,338 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Alex Deucher <alexander.deucher at amd.com>
+ *
+ */
+
+#ifndef __EVERGREEN_STATE_H__
+#define __EVERGREEN_STATE_H__
+
+typedef int bool_t;
+
+#define CLEAR(x) memset (&x, 0, sizeof(x))
+
+/* Sequencer / thread handling */
+typedef struct {
+    int ps_prio;
+    int vs_prio;
+    int gs_prio;
+    int es_prio;
+    int hs_prio;
+    int ls_prio;
+    int cs_prio;
+    int num_ps_gprs;
+    int num_vs_gprs;
+    int num_gs_gprs;
+    int num_es_gprs;
+    int num_hs_gprs;
+    int num_ls_gprs;
+    int num_cs_gprs;
+    int num_temp_gprs;
+    int num_ps_threads;
+    int num_vs_threads;
+    int num_gs_threads;
+    int num_es_threads;
+    int num_hs_threads;
+    int num_ls_threads;
+    int num_ps_stack_entries;
+    int num_vs_stack_entries;
+    int num_gs_stack_entries;
+    int num_es_stack_entries;
+    int num_hs_stack_entries;
+    int num_ls_stack_entries;
+} sq_config_t;
+
+/* Color buffer / render target */
+typedef struct {
+    int id;
+    int w;
+    int h;
+    uint64_t base;
+    int format;
+    int endian;
+    int array_mode;						// tiling
+    int number_type;
+    int read_size;
+    int comp_swap;
+    int tile_mode;
+    int blend_clamp;
+    int clear_color;
+    int blend_bypass;
+    int simple_float;
+    int round_mode;
+    int tile_compact;
+    int source_format;
+    int resource_type;
+    int fast_clear;
+    int compression;
+    int rat;
+    struct radeon_bo *bo;
+} cb_config_t;
+
+/* Shader */
+typedef struct {
+    uint64_t shader_addr;
+    uint32_t shader_size;
+    int num_gprs;
+    int stack_size;
+    int dx10_clamp;
+    int clamp_consts;
+    int export_mode;
+    int uncached_first_inst;
+    int single_round;
+    int double_round;
+    int allow_sdi;
+    int allow_sd0;
+    int allow_ddi;
+    int allow_ddo;
+    struct radeon_bo *bo;
+} shader_config_t;
+
+/* Shader consts */
+typedef struct {
+    int type;
+    int size_bytes;
+    uint64_t const_addr;
+    struct radeon_bo *bo;
+} const_config_t;
+
+/* Vertex buffer / vtx resource */
+typedef struct {
+    int id;
+    uint64_t vb_addr;
+    uint32_t vtx_num_entries;
+    uint32_t vtx_size_dw;
+    int clamp_x;
+    int format;
+    int num_format_all;
+    int format_comp_all;
+    int srf_mode_all;
+    int endian;
+    int mem_req_size;
+    int dst_sel_x;
+    int dst_sel_y;
+    int dst_sel_z;
+    int dst_sel_w;
+    int uncached;
+    struct radeon_bo *bo;
+} vtx_resource_t;
+
+/* Texture resource */
+typedef struct {
+    int id;
+    int w;
+    int h;
+    int pitch;
+    int depth;
+    int dim;
+    int array_mode;
+    int tile_type;
+    int format;
+    uint64_t base;
+    uint64_t mip_base;
+    uint32_t size;
+    int format_comp_x;
+    int format_comp_y;
+    int format_comp_z;
+    int format_comp_w;
+    int num_format_all;
+    int srf_mode_all;
+    int force_degamma;
+    int endian;
+    int dst_sel_x;
+    int dst_sel_y;
+    int dst_sel_z;
+    int dst_sel_w;
+    int base_level;
+    int last_level;
+    int base_array;
+    int last_array;
+    int perf_modulation;
+    int interlaced;
+    int min_lod;
+    struct radeon_bo *bo;
+    struct radeon_bo *mip_bo;
+} tex_resource_t;
+
+/* Texture sampler */
+typedef struct {
+    int				id;
+    /* Clamping */
+    int				clamp_x, clamp_y, clamp_z;
+    int		       		border_color;
+    /* Filtering */
+    int				xy_mag_filter, xy_min_filter;
+    int				z_filter;
+    int				mip_filter;
+    bool_t			high_precision_filter;	/* ? */
+    int				perf_mip;		/* ? 0-7 */
+    int				perf_z;			/* ? 3 */
+    /* LoD selection */
+    int				min_lod, max_lod;	/* 0-0x3ff */
+    int                         lod_bias;		/* 0-0xfff (signed?) */
+    int                         lod_bias2;		/* ? 0-0xfff (signed?) */
+    bool_t			lod_uses_minor_axis;	/* ? */
+    /* Other stuff */
+    bool_t			point_sampling_clamp;	/* ? */
+    bool_t			tex_array_override;	/* ? */
+    bool_t                      mc_coord_truncate;	/* ? */
+    bool_t			force_degamma;		/* ? */
+    bool_t			fetch_4;		/* ? */
+    bool_t			sample_is_pcf;		/* ? */
+    bool_t			type;			/* ? */
+    int				depth_compare;		/* only depth textures? */
+    int				chroma_key;
+    int                         truncate_coord;
+    bool_t                      disable_cube_wrap;
+} tex_sampler_t;
+
+/* Draw command */
+typedef struct {
+    uint32_t prim_type;
+    uint32_t vgt_draw_initiator;
+    uint32_t index_type;
+    uint32_t num_instances;
+    uint32_t num_indices;
+} draw_config_t;
+
+#define BEGIN_BATCH(n)			\
+do {								\
+    radeon_ddx_cs_start(pScrn, (n), __FILE__, __func__, __LINE__);	\
+} while(0)
+#define END_BATCH()			\
+do {					\
+    radeon_cs_end(info->cs, __FILE__, __func__, __LINE__);	\
+} while(0)
+#define RELOC_BATCH(bo, rd, wd)				\
+do {							\
+    int _ret;								\
+    _ret = radeon_cs_write_reloc(info->cs, (bo), (rd), (wd), 0);	\
+    if (_ret) ErrorF("reloc emit failure %d (%s %d)\n", _ret, __func__, __LINE__); \
+} while(0)
+#define E32(dword)                                                  \
+do {                                                                    \
+    radeon_cs_write_dword(info->cs, (dword));				\
+} while (0)
+
+#define EFLOAT(val)							\
+do {								\
+    union { float f; uint32_t d; } a;                                   \
+    a.f = (val);							\
+    E32(a.d);							\
+} while (0)
+
+#define PACK3(cmd, num)	       					\
+do {                                                                    \
+    E32(RADEON_CP_PACKET3 | ((cmd) << 8) | ((((num) - 1) & 0x3fff) << 16)); \
+} while (0)
+
+/* write num registers, start at reg */
+/* If register falls in a special area, special commands are issued */
+#define PACK0(reg, num)                                             \
+do {                                                                    \
+    if ((reg) >= SET_CONFIG_REG_offset && (reg) < SET_CONFIG_REG_end) {	\
+	PACK3(IT_SET_CONFIG_REG, (num) + 1);			\
+	E32(((reg) - SET_CONFIG_REG_offset) >> 2);                  \
+    } else if ((reg) >= SET_CONTEXT_REG_offset && (reg) < SET_CONTEXT_REG_end) { \
+	PACK3(IT_SET_CONTEXT_REG, (num) + 1);			\
+	E32(((reg) - SET_CONTEXT_REG_offset) >> 2);			\
+    } else if ((reg) >= SET_RESOURCE_offset && (reg) < SET_RESOURCE_end) { \
+	PACK3(IT_SET_RESOURCE, num + 1);				\
+	E32(((reg) - SET_RESOURCE_offset) >> 2);			\
+    } else if ((reg) >= SET_SAMPLER_offset && (reg) < SET_SAMPLER_end) { \
+	PACK3(IT_SET_SAMPLER, (num) + 1);				\
+	E32((reg - SET_SAMPLER_offset) >> 2);			\
+    } else if ((reg) >= SET_CTL_CONST_offset && (reg) < SET_CTL_CONST_end) { \
+	PACK3(IT_SET_CTL_CONST, (num) + 1);			\
+	E32(((reg) - SET_CTL_CONST_offset) >> 2);		\
+    } else if ((reg) >= SET_LOOP_CONST_offset && (reg) < SET_LOOP_CONST_end) { \
+	PACK3(IT_SET_LOOP_CONST, (num) + 1);			\
+	E32(((reg) - SET_LOOP_CONST_offset) >> 2);		\
+    } else if ((reg) >= SET_BOOL_CONST_offset && (reg) < SET_BOOL_CONST_end) { \
+	PACK3(IT_SET_BOOL_CONST, (num) + 1);			\
+	E32(((reg) - SET_BOOL_CONST_offset) >> 2);		\
+    } else {								\
+	E32(CP_PACKET0 ((reg), (num) - 1));			\
+    }									\
+} while (0)
+
+/* write a single register */
+#define EREG(reg, val)                                              \
+do {								        \
+    PACK0((reg), 1);						\
+    E32((val));							\
+} while (0)
+
+void
+evergreen_start_3d(ScrnInfoPtr pScrn);
+void
+evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain);
+void
+evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop);
+void
+evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain);
+void
+evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain);
+void
+evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain);
+void
+evergreen_set_alu_consts(ScrnInfoPtr pScrn, const_config_t *const_conf, uint32_t domain);
+void
+evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val);
+void
+evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain);
+void
+evergreen_set_tex_sampler(ScrnInfoPtr pScrn, tex_sampler_t *s);
+void
+evergreen_set_screen_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2);
+void
+evergreen_set_vport_scissor(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2);
+void
+evergreen_set_generic_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2);
+void
+evergreen_set_window_scissor(ScrnInfoPtr pScrn, int x1, int y1, int x2, int y2);
+void
+evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y2);
+void
+evergreen_set_default_state(ScrnInfoPtr pScrn);
+void
+evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf);
+
+void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size);
+
+Bool
+EVERGREENSetAccelState(ScrnInfoPtr pScrn,
+		       struct r600_accel_object *src0,
+		       struct r600_accel_object *src1,
+		       struct r600_accel_object *dst,
+		       uint32_t vs_offset, uint32_t ps_offset,
+		       int rop, Pixel planemask);
+
+extern Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index);
+extern void RADEONFinishAccess_CS(PixmapPtr pPix, int index);
+extern void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align);
+extern void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv);
+extern struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix);
+extern Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix);
+
+#endif
diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
new file mode 100644
index 0000000..2b8d65e
--- /dev/null
+++ b/src/evergreen_textured_videofuncs.c
@@ -0,0 +1,587 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Alex Deucher <alexander.deucher at amd.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include "exa.h"
+
+#include "radeon.h"
+#include "radeon_reg.h"
+#include "evergreen_shader.h"
+#include "evergreen_reg.h"
+#include "evergreen_state.h"
+
+#include "radeon_video.h"
+
+#include <X11/extensions/Xv.h>
+#include "fourcc.h"
+
+#include "damage.h"
+
+#include "radeon_exa_shared.h"
+#include "radeon_vbo.h"
+
+/* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces
+   note the difference to the parameters used in overlay are due
+   to 10bit vs. float calcs */
+static REF_TRANSFORM trans[2] =
+{
+    {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */
+    {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0}  /* BT.709 */
+};
+
+void
+EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    PixmapPtr pPixmap = pPriv->pPixmap;
+    BoxPtr pBox = REGION_RECTS(&pPriv->clip);
+    int nBox = REGION_NUM_RECTS(&pPriv->clip);
+    int dstxoff, dstyoff;
+    struct r600_accel_object src_obj, dst_obj;
+    cb_config_t     cb_conf;
+    tex_resource_t  tex_res;
+    tex_sampler_t   tex_samp;
+    shader_config_t vs_conf, ps_conf;
+    /*
+     * y' = y - .0625
+     * u' = u - .5
+     * v' = v - .5;
+     *
+     * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
+     * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
+     * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
+     *
+     * DP3 might look like the straightforward solution
+     * but we'd need to move the texture yuv values in
+     * the same reg for this to work. Therefore use MADs.
+     * Brightness just adds to the off constant.
+     * Contrast is multiplication of luminance.
+     * Saturation and hue change the u and v coeffs.
+     * Default values (before adjustments - depend on colorspace):
+     * yco = 1.1643
+     * uco = 0, -0.39173, 2.017
+     * vco = 1.5958, -0.8129, 0
+     * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r],
+     *       -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g],
+     *       -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b],
+     *
+     * temp = MAD(yco, yuv.yyyy, off)
+     * temp = MAD(uco, yuv.uuuu, temp)
+     * result = MAD(vco, yuv.vvvv, temp)
+     */
+    /* TODO: calc consts in the shader */
+    const float Loff = -0.0627;
+    const float Coff = -0.502;
+    float uvcosf, uvsinf;
+    float yco;
+    float uco[3], vco[3], off[3];
+    float bright, cont, gamma;
+    int ref = pPriv->transform_index;
+    Bool needgamma = FALSE;
+    float *ps_alu_consts;
+    const_config_t ps_const_conf;
+    float *vs_alu_consts;
+    const_config_t vs_const_conf;
+    int ret;
+
+    cont = RTFContrast(pPriv->contrast);
+    bright = RTFBrightness(pPriv->brightness);
+    gamma = (float)pPriv->gamma / 1000.0;
+    uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue));
+    uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue));
+    /* overlay video also does pre-gamma contrast/sat adjust, should we? */
+
+    yco = trans[ref].RefLuma * cont;
+    uco[0] = -trans[ref].RefRCr * uvsinf;
+    uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf;
+    uco[2] = trans[ref].RefBCb * uvcosf;
+    vco[0] = trans[ref].RefRCr * uvcosf;
+    vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf;
+    vco[2] = trans[ref].RefBCb * uvsinf;
+    off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright;
+    off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright;
+    off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright;
+
+    // XXX
+    gamma = 1.0;
+
+    if (gamma != 1.0) {
+	needgamma = TRUE;
+	/* note: gamma correction is out = in ^ gamma;
+	   gpu can only do LG2/EX2 therefore we transform into
+	   in ^ gamma = 2 ^ (log2(in) * gamma).
+	   Lots of scalar ops, unfortunately (better solution?) -
+	   without gamma that's 3 inst, with gamma it's 10...
+	   could use different gamma factors per channel,
+	   if that's of any use. */
+    }
+
+    CLEAR (cb_conf);
+    CLEAR (tex_res);
+    CLEAR (tex_samp);
+    CLEAR (vs_conf);
+    CLEAR (ps_conf);
+    CLEAR (vs_const_conf);
+    CLEAR (ps_const_conf);
+
+    /* setup the ps consts */
+    ps_const_conf.bo = radeon_bo_open(info->bufmgr, 0, 256, 0,
+				      RADEON_GEM_DOMAIN_GTT, 0);
+    if (ps_const_conf.bo == NULL) {
+	ErrorF("ps const buffer alloc failed\n");
+	return;
+    }
+    ret = radeon_bo_map(ps_const_conf.bo, 0);
+    if (ret) {
+	ErrorF("ps const buffer map failed\n");
+	return;
+    }
+
+    /* PS alu constants */
+    ps_const_conf.size_bytes = 256;
+    ps_const_conf.const_addr = 0;
+    ps_const_conf.type = SHADER_TYPE_PS;
+    ps_alu_consts = (float *)ps_const_conf.bo->ptr;
+
+    ps_alu_consts[0] = off[0];
+    ps_alu_consts[1] = off[1];
+    ps_alu_consts[2] = off[2];
+    ps_alu_consts[3] = yco;
+
+    ps_alu_consts[4] = uco[0];
+    ps_alu_consts[5] = uco[1];
+    ps_alu_consts[6] = uco[2];
+    ps_alu_consts[7] = gamma;
+
+    ps_alu_consts[8] = vco[0];
+    ps_alu_consts[9] = vco[1];
+    ps_alu_consts[10] = vco[2];
+    ps_alu_consts[11] = 0.0;
+    radeon_bo_unmap(ps_const_conf.bo);
+
+    vs_const_conf.bo = radeon_bo_open(info->bufmgr, 0, 256, 0,
+				      RADEON_GEM_DOMAIN_GTT, 0);
+    if (vs_const_conf.bo == NULL) {
+	ErrorF("vs const buffer alloc failed\n");
+	return;
+    }
+    ret = radeon_bo_map(vs_const_conf.bo, 0);
+    if (ret) {
+	ErrorF("vs const buffer map failed\n");
+	return;
+    }
+
+    /* PS alu constants */
+    vs_const_conf.size_bytes = 256;
+    vs_const_conf.const_addr = 0;
+    vs_const_conf.type = SHADER_TYPE_VS;
+    vs_alu_consts = (float *)vs_const_conf.bo->ptr;
+    vs_alu_consts[0] = 1.0 / pPriv->w;
+    vs_alu_consts[1] = 1.0 / pPriv->h;
+    vs_alu_consts[2] = 0.0;
+    vs_alu_consts[3] = 0.0;
+    radeon_bo_unmap(vs_const_conf.bo);
+
+    radeon_cs_space_add_persistent_bo(info->cs, ps_const_conf.bo,
+				      RADEON_GEM_DOMAIN_GTT, 0);
+    radeon_cs_space_add_persistent_bo(info->cs, vs_const_conf.bo,
+				      RADEON_GEM_DOMAIN_GTT, 0);
+    if (radeon_cs_space_check(info->cs)) {
+	radeon_bo_unref(ps_const_conf.bo);
+	radeon_bo_unref(vs_const_conf.bo);
+	ErrorF("const buffer size check failed\n");
+	return;
+    }
+
+#if defined(XF86DRM_MODE)
+    if (info->cs) {
+	dst_obj.offset = 0;
+	src_obj.offset = 0;
+	dst_obj.bo = radeon_get_pixmap_bo(pPixmap);
+    } else
+#endif
+    {
+	dst_obj.offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
+	src_obj.offset = pPriv->src_offset + info->fbLocation + pScrn->fbOffset;
+	dst_obj.bo = src_obj.bo = NULL;
+    }
+    dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
+
+    src_obj.pitch = pPriv->src_pitch;
+    src_obj.width = pPriv->w;
+    src_obj.height = pPriv->h;
+    src_obj.bpp = 16;
+    src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
+    src_obj.bo = pPriv->src_bo[pPriv->currentBuffer];
+
+    dst_obj.width = pPixmap->drawable.width;
+    dst_obj.height = pPixmap->drawable.height;
+    dst_obj.bpp = pPixmap->drawable.bitsPerPixel;
+    dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
+
+    if (!EVERGREENSetAccelState(pScrn,
+				&src_obj,
+				NULL,
+				&dst_obj,
+				accel_state->xv_vs_offset, accel_state->xv_ps_offset,
+				3, 0xffffffff))
+	return;
+
+#ifdef COMPOSITE
+    dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
+    dstyoff = -pPixmap->screen_y + pPixmap->drawable.y;
+#else
+    dstxoff = 0;
+    dstyoff = 0;
+#endif
+
+    radeon_vbo_check(pScrn, 16);
+    radeon_cp_start(pScrn);
+
+    evergreen_set_default_state(pScrn);
+
+    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+
+    /* PS bool constant */
+    switch(pPriv->id) {
+    case FOURCC_YV12:
+    case FOURCC_I420:
+	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0));
+	break;
+    case FOURCC_UYVY:
+    case FOURCC_YUY2:
+    default:
+	evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0));
+	break;
+    }
+
+    /* Shader */
+    vs_conf.shader_addr         = accel_state->vs_mc_addr;
+    vs_conf.shader_size         = accel_state->vs_size;
+    vs_conf.num_gprs            = 2;
+    vs_conf.stack_size          = 0;
+    vs_conf.bo                  = accel_state->shaders_bo;
+    evergreen_vs_setup(pScrn, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+    ps_conf.shader_addr         = accel_state->ps_mc_addr;
+    ps_conf.shader_size         = accel_state->ps_size;
+    ps_conf.num_gprs            = 3;
+    ps_conf.stack_size          = 1;
+    ps_conf.clamp_consts        = 0;
+    ps_conf.export_mode         = 2;
+    ps_conf.bo                  = accel_state->shaders_bo;
+    evergreen_ps_setup(pScrn, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+
+    /* PS alu constants */
+    evergreen_set_alu_consts(pScrn, &ps_const_conf, RADEON_GEM_DOMAIN_GTT);
+
+    /* Texture */
+    switch(pPriv->id) {
+    case FOURCC_YV12:
+    case FOURCC_I420:
+	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
+
+	/* Y texture */
+	tex_res.id                  = 0;
+	tex_res.w                   = accel_state->src_obj[0].width;
+	tex_res.h                   = accel_state->src_obj[0].height;
+	tex_res.pitch               = accel_state->src_obj[0].pitch;
+	tex_res.depth               = 0;
+	tex_res.dim                 = SQ_TEX_DIM_2D;
+	tex_res.base                = accel_state->src_obj[0].offset;
+	tex_res.mip_base            = accel_state->src_obj[0].offset;
+	tex_res.size                = accel_state->src_size[0];
+	tex_res.bo                  = accel_state->src_obj[0].bo;
+	tex_res.mip_bo              = accel_state->src_obj[0].bo;
+
+	tex_res.format              = FMT_8;
+	tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
+	tex_res.dst_sel_y           = SQ_SEL_1;
+	tex_res.dst_sel_z           = SQ_SEL_1;
+	tex_res.dst_sel_w           = SQ_SEL_1;
+
+	tex_res.base_level          = 0;
+	tex_res.last_level          = 0;
+	tex_res.perf_modulation     = 0;
+	tex_res.interlaced          = 0;
+	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
+
+	/* Y sampler */
+	tex_samp.id                 = 0;
+	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
+	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
+	tex_samp.clamp_z            = SQ_TEX_WRAP;
+
+	/* xxx: switch to bicubic */
+	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
+	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
+
+	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
+	tex_samp.mip_filter         = 0;			/* no mipmap */
+	evergreen_set_tex_sampler(pScrn, &tex_samp);
+
+	/* U or V texture */
+	tex_res.id                  = 1;
+	tex_res.format              = FMT_8;
+	tex_res.w                   = accel_state->src_obj[0].width >> 1;
+	tex_res.h                   = accel_state->src_obj[0].height >> 1;
+	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, 256);
+	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
+	tex_res.dst_sel_y           = SQ_SEL_1;
+	tex_res.dst_sel_z           = SQ_SEL_1;
+	tex_res.dst_sel_w           = SQ_SEL_1;
+	tex_res.interlaced          = 0;
+
+	tex_res.base                = accel_state->src_obj[0].offset + pPriv->planev_offset;
+	tex_res.mip_base            = accel_state->src_obj[0].offset + pPriv->planev_offset;
+	tex_res.size                = accel_state->src_size[0] / 4;
+	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
+
+	/* U or V sampler */
+	tex_samp.id                 = 1;
+	evergreen_set_tex_sampler(pScrn, &tex_samp);
+
+	/* U or V texture */
+	tex_res.id                  = 2;
+	tex_res.format              = FMT_8;
+	tex_res.w                   = accel_state->src_obj[0].width >> 1;
+	tex_res.h                   = accel_state->src_obj[0].height >> 1;
+	tex_res.pitch               = RADEON_ALIGN(accel_state->src_obj[0].pitch >> 1, 256);
+	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
+	tex_res.dst_sel_y           = SQ_SEL_1;
+	tex_res.dst_sel_z           = SQ_SEL_1;
+	tex_res.dst_sel_w           = SQ_SEL_1;
+	tex_res.interlaced          = 0;
+
+	tex_res.base                = accel_state->src_obj[0].offset + pPriv->planeu_offset;
+	tex_res.mip_base            = accel_state->src_obj[0].offset + pPriv->planeu_offset;
+	tex_res.size                = accel_state->src_size[0] / 4;
+	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
+
+	/* UV sampler */
+	tex_samp.id                 = 2;
+	evergreen_set_tex_sampler(pScrn, &tex_samp);
+	break;
+    case FOURCC_UYVY:
+    case FOURCC_YUY2:
+    default:
+	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
+
+	/* Y texture */
+	tex_res.id                  = 0;
+	tex_res.w                   = accel_state->src_obj[0].width;
+	tex_res.h                   = accel_state->src_obj[0].height;
+	tex_res.pitch               = accel_state->src_obj[0].pitch >> 1;
+	tex_res.depth               = 0;
+	tex_res.dim                 = SQ_TEX_DIM_2D;
+	tex_res.base                = accel_state->src_obj[0].offset;
+	tex_res.mip_base            = accel_state->src_obj[0].offset;
+	tex_res.size                = accel_state->src_size[0];
+	tex_res.bo                  = accel_state->src_obj[0].bo;
+	tex_res.mip_bo              = accel_state->src_obj[0].bo;
+
+	tex_res.format              = FMT_8_8;
+	if (pPriv->id == FOURCC_UYVY)
+	    tex_res.dst_sel_x           = SQ_SEL_Y; /* Y */
+	else
+	    tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
+	tex_res.dst_sel_y           = SQ_SEL_1;
+	tex_res.dst_sel_z           = SQ_SEL_1;
+	tex_res.dst_sel_w           = SQ_SEL_1;
+
+	tex_res.base_level          = 0;
+	tex_res.last_level          = 0;
+	tex_res.perf_modulation     = 0;
+	tex_res.interlaced          = 0;
+	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
+
+	/* Y sampler */
+	tex_samp.id                 = 0;
+	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
+	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
+	tex_samp.clamp_z            = SQ_TEX_WRAP;
+
+	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
+	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
+
+	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
+	tex_samp.mip_filter         = 0;			/* no mipmap */
+	evergreen_set_tex_sampler(pScrn, &tex_samp);
+
+	/* UV texture */
+	tex_res.id                  = 1;
+	tex_res.format              = FMT_8_8_8_8;
+	tex_res.w                   = accel_state->src_obj[0].width >> 1;
+	tex_res.h                   = accel_state->src_obj[0].height;
+	tex_res.pitch               = accel_state->src_obj[0].pitch >> 2;
+	if (pPriv->id == FOURCC_UYVY) {
+	    tex_res.dst_sel_x           = SQ_SEL_X; /* V */
+	    tex_res.dst_sel_y           = SQ_SEL_Z; /* U */
+	} else {
+	    tex_res.dst_sel_x           = SQ_SEL_Y; /* V */
+	    tex_res.dst_sel_y           = SQ_SEL_W; /* U */
+	}
+	tex_res.dst_sel_z           = SQ_SEL_1;
+	tex_res.dst_sel_w           = SQ_SEL_1;
+	tex_res.interlaced          = 0;
+
+	tex_res.base                = accel_state->src_obj[0].offset;
+	tex_res.mip_base            = accel_state->src_obj[0].offset;
+	tex_res.size                = accel_state->src_size[0];
+	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
+
+	/* UV sampler */
+	tex_samp.id                 = 1;
+	evergreen_set_tex_sampler(pScrn, &tex_samp);
+	break;
+    }
+
+    cb_conf.id = 0;
+    cb_conf.w = accel_state->dst_obj.pitch;
+    cb_conf.h = accel_state->dst_obj.height;
+    cb_conf.base = accel_state->dst_obj.offset;
+    cb_conf.bo = accel_state->dst_obj.bo;
+
+    switch (accel_state->dst_obj.bpp) {
+    case 16:
+	if (pPixmap->drawable.depth == 15) {
+	    cb_conf.format = COLOR_1_5_5_5;
+	    cb_conf.comp_swap = 1; /* ARGB */
+	} else {
+	    cb_conf.format = COLOR_5_6_5;
+	    cb_conf.comp_swap = 2; /* RGB */
+	}
+	break;
+    case 32:
+	cb_conf.format = COLOR_8_8_8_8;
+	cb_conf.comp_swap = 1; /* ARGB */
+	break;
+    default:
+	return;
+    }
+
+    cb_conf.source_format = EXPORT_4C_16BPC;
+    cb_conf.blend_clamp = 1;
+    evergreen_set_render_target(pScrn, &cb_conf, accel_state->dst_obj.domain);
+
+    /* Render setup */
+    BEGIN_BATCH(23);
+    EREG(CB_TARGET_MASK,                      (0x0f << TARGET0_ENABLE_shift));
+    EREG(CB_COLOR_CONTROL,                    ((0xcc << ROP3_shift) |
+					       (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift)));
+    EREG(CB_BLEND0_CONTROL,                   0);
+
+    /* Interpolator setup */
+    /* export tex coords from VS */
+    EREG(SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
+    EREG(SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
+    EREG(SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
+					       (0x03 << DEFAULT_VAL_shift)));
+
+    /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
+     * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
+    PACK0(SPI_PS_IN_CONTROL_0, 3);
+    E32(((1 << NUM_INTERP_shift) |
+	 LINEAR_GRADIENT_ENA_bit)); // SPI_PS_IN_CONTROL_0
+    E32(0); // SPI_PS_IN_CONTROL_1
+    E32(0); // SPI_INTERP_CONTROL_0
+    END_BATCH();
+
+    /* VS alu constants */
+    evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT);
+
+    if (pPriv->vsync) {
+	xf86CrtcPtr crtc;
+	if (pPriv->desired_crtc)
+	    crtc = pPriv->desired_crtc;
+	else
+	    crtc = radeon_pick_best_crtc(pScrn,
+					 pPriv->drw_x,
+					 pPriv->drw_x + pPriv->dst_w,
+					 pPriv->drw_y,
+					 pPriv->drw_y + pPriv->dst_h);
+	if (crtc)
+	    evergreen_cp_wait_vline_sync(pScrn, pPixmap,
+					 crtc,
+					 pPriv->drw_y - crtc->y,
+					 (pPriv->drw_y - crtc->y) + pPriv->dst_h);
+    }
+
+    while (nBox--) {
+	int srcX, srcY, srcw, srch;
+	int dstX, dstY, dstw, dsth;
+	float *vb;
+
+
+	dstX = pBox->x1 + dstxoff;
+	dstY = pBox->y1 + dstyoff;
+	dstw = pBox->x2 - pBox->x1;
+	dsth = pBox->y2 - pBox->y1;
+
+	srcX = pPriv->src_x;
+	srcX += ((pBox->x1 - pPriv->drw_x) *
+		 pPriv->src_w) / pPriv->dst_w;
+	srcY = pPriv->src_y;
+	srcY += ((pBox->y1 - pPriv->drw_y) *
+		 pPriv->src_h) / pPriv->dst_h;
+
+	srcw = (pPriv->src_w * dstw) / pPriv->dst_w;
+	srch = (pPriv->src_h * dsth) / pPriv->dst_h;
+
+	vb = radeon_vbo_space(pScrn, 16);
+
+	vb[0] = (float)dstX;
+	vb[1] = (float)dstY;
+	vb[2] = (float)srcX;
+	vb[3] = (float)srcY;
+
+	vb[4] = (float)dstX;
+	vb[5] = (float)(dstY + dsth);
+	vb[6] = (float)srcX;
+	vb[7] = (float)(srcY + srch);
+
+	vb[8] = (float)(dstX + dstw);
+	vb[9] = (float)(dstY + dsth);
+	vb[10] = (float)(srcX + srcw);
+	vb[11] = (float)(srcY + srch);
+
+	radeon_vbo_commit(pScrn);
+
+	pBox++;
+    }
+
+    evergreen_finish_op(pScrn, 16);
+
+    DamageDamageRegion(pPriv->pDraw, &pPriv->clip);
+}
diff --git a/src/r600_exa.c b/src/r600_exa.c
index d6e98ff..7702087 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -43,7 +43,7 @@
 
 /* #define SHOW_VERTEXES */
 
-uint32_t RADEON_ROP[16] = {
+uint32_t R600_ROP[16] = {
     RADEON_ROP3_ZERO, /* GXclear        */
     RADEON_ROP3_DSa,  /* Gxand          */
     RADEON_ROP3_SDna, /* GXandReverse   */
@@ -211,11 +211,11 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     radeon_vbo_check(pScrn, 16);
     radeon_cp_start(pScrn);
 
-    set_default_state(pScrn, accel_state->ib);
+    r600_set_default_state(pScrn, accel_state->ib);
 
-    set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
 
     /* Shader */
     vs_conf.shader_addr         = accel_state->vs_mc_addr;
@@ -223,7 +223,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     vs_conf.num_gprs            = 2;
     vs_conf.stack_size          = 0;
     vs_conf.bo                  = accel_state->shaders_bo;
-    vs_setup                    (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
 
     ps_conf.shader_addr         = accel_state->ps_mc_addr;
     ps_conf.shader_size         = accel_state->ps_size;
@@ -233,7 +233,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     ps_conf.clamp_consts        = 0;
     ps_conf.export_mode         = 2;
     ps_conf.bo                  = accel_state->shaders_bo;
-    ps_setup                    (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
 
     cb_conf.id = 0;
     cb_conf.w = accel_state->dst_obj.pitch;
@@ -253,7 +253,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     }
     cb_conf.source_format = 1;
     cb_conf.blend_clamp = 1;
-    set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
+    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
 
     /* Render setup */
     if (accel_state->planemask & 0x000000ff)
@@ -266,7 +266,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
 	pmask |= 8; /* A */
     BEGIN_BATCH(20);
     EREG(accel_state->ib, CB_TARGET_MASK,                      (pmask << TARGET0_ENABLE_shift));
-    EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[accel_state->rop]);
+    EREG(accel_state->ib, CB_COLOR_CONTROL,                    R600_ROP[accel_state->rop]);
 
     /* Interpolator setup */
     /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */
@@ -312,8 +312,8 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
 	ps_alu_consts[2] = (float)b / 255; /* B */
 	ps_alu_consts[3] = (float)a / 255; /* A */
     }
-    set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
-		   sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
+			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
 
     if (accel_state->vsync)
 	RADEONVlineHelperClear(pScrn);
@@ -355,10 +355,10 @@ R600DoneSolid(PixmapPtr pPix)
     struct radeon_accel_state *accel_state = info->accel_state;
 
     if (accel_state->vsync)
-	cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
-			   accel_state->vline_crtc,
-			   accel_state->vline_y1,
-			   accel_state->vline_y2);
+	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
+				accel_state->vline_crtc,
+				accel_state->vline_y1,
+				accel_state->vline_y2);
 
     r600_finish_op(pScrn, 8);
 }
@@ -383,11 +383,11 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
     radeon_vbo_check(pScrn, 16);
     radeon_cp_start(pScrn);
 
-    set_default_state(pScrn, accel_state->ib);
+    r600_set_default_state(pScrn, accel_state->ib);
 
-    set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
 
     /* Shader */
     vs_conf.shader_addr         = accel_state->vs_mc_addr;
@@ -395,7 +395,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
     vs_conf.num_gprs            = 2;
     vs_conf.stack_size          = 0;
     vs_conf.bo                  = accel_state->shaders_bo;
-    vs_setup                    (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
 
     ps_conf.shader_addr         = accel_state->ps_mc_addr;
     ps_conf.shader_size         = accel_state->ps_size;
@@ -405,7 +405,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
     ps_conf.clamp_consts        = 0;
     ps_conf.export_mode         = 2;
     ps_conf.bo                  = accel_state->shaders_bo;
-    ps_setup                    (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
 
     /* Texture */
     tex_res.id                  = 0;
@@ -443,7 +443,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
     tex_res.base_level          = 0;
     tex_res.last_level          = 0;
     tex_res.perf_modulation     = 0;
-    set_tex_resource            (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
+    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
 
     tex_samp.id                 = 0;
     tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
@@ -453,7 +453,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
     tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_POINT;
     tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
     tex_samp.mip_filter         = 0;			/* no mipmap */
-    set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
+    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
 
     cb_conf.id = 0;
     cb_conf.w = accel_state->dst_obj.pitch;
@@ -472,7 +472,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
     }
     cb_conf.source_format = 1;
     cb_conf.blend_clamp = 1;
-    set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
+    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
 
     /* Render setup */
     if (accel_state->planemask & 0x000000ff)
@@ -485,7 +485,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn)
 	pmask |= 8; /* A */
     BEGIN_BATCH(20);
     EREG(accel_state->ib, CB_TARGET_MASK,                      (pmask << TARGET0_ENABLE_shift));
-    EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[accel_state->rop]);
+    EREG(accel_state->ib, CB_COLOR_CONTROL,                    R600_ROP[accel_state->rop]);
 
     /* Interpolator setup */
     /* export tex coord from VS */
@@ -521,10 +521,10 @@ R600DoCopyVline(PixmapPtr pPix)
     struct radeon_accel_state *accel_state = info->accel_state;
 
     if (accel_state->vsync)
-	cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
-			   accel_state->vline_crtc,
-			   accel_state->vline_y1,
-			   accel_state->vline_y2);
+	r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPix,
+				accel_state->vline_crtc,
+				accel_state->vline_y1,
+				accel_state->vline_y2);
 
     r600_finish_op(pScrn, 16);
 }
@@ -603,7 +603,7 @@ R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
     src_obj.height = pSrc->drawable.height;
     src_obj.bpp = pSrc->drawable.bitsPerPixel;
     src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
-    
+
     dst_obj.width = pDst->drawable.width;
     dst_obj.height = pDst->drawable.height;
     dst_obj.bpp = pDst->drawable.bitsPerPixel;
@@ -1060,7 +1060,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
     tex_res.base_level          = 0;
     tex_res.last_level          = 0;
     tex_res.perf_modulation     = 0;
-    set_tex_resource            (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
+    r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[unit].domain);
 
     tex_samp.id                 = unit;
     tex_samp.border_color       = SQ_TEX_BORDER_COLOR_TRANS_BLACK;
@@ -1102,7 +1102,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
     tex_samp.clamp_z            = SQ_TEX_WRAP;
     tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
     tex_samp.mip_filter         = 0;			/* no mipmap */
-    set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
+    r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
 
     if (pPict->transform != 0) {
 	accel_state->is_transform[unit] = TRUE;
@@ -1132,8 +1132,8 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
     }
 
     /* VS alu constants */
-    set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
-		   sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
+    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs + (unit * 2),
+			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
 
     return TRUE;
 }
@@ -1264,7 +1264,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 	if (info->cs) {
 	    mask_obj.offset = 0;
 	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
-	} else 
+	} else
 #endif
 	{
 	    mask_obj.offset = exaGetPixmapOffset(pMask) + info->fbLocation + pScrn->fbOffset;
@@ -1324,11 +1324,11 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 
     radeon_cp_start(pScrn);
 
-    set_default_state(pScrn, accel_state->ib);
+    r600_set_default_state(pScrn, accel_state->ib);
 
-    set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
 
     if (!R600TextureSetup(pSrcPicture, pSrc, 0)) {
         R600IBDiscard(pScrn, accel_state->ib);
@@ -1346,11 +1346,11 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
         accel_state->is_transform[1] = FALSE;
 
     if (pMask) {
-	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
-	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
+	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
+	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
     } else {
-	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
-	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
+	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
+	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
     }
 
     /* Shader */
@@ -1359,7 +1359,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     vs_conf.num_gprs            = 3;
     vs_conf.stack_size          = 1;
     vs_conf.bo                  = accel_state->shaders_bo;
-    vs_setup                    (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
 
     ps_conf.shader_addr         = accel_state->ps_mc_addr;
     ps_conf.shader_size         = accel_state->ps_size;
@@ -1369,7 +1369,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     ps_conf.clamp_consts        = 0;
     ps_conf.export_mode         = 2;
     ps_conf.bo                  = accel_state->shaders_bo;
-    ps_setup                    (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
 
     cb_conf.id = 0;
     cb_conf.w = accel_state->dst_obj.pitch;
@@ -1405,7 +1405,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     }
     cb_conf.source_format = 1;
     cb_conf.blend_clamp = 1;
-    set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
+    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
 
     BEGIN_BATCH(24);
     EREG(accel_state->ib, CB_TARGET_MASK,                      (0xf << TARGET0_ENABLE_shift));
@@ -1414,10 +1414,10 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 
     if (info->ChipFamily == CHIP_FAMILY_R600) {
 	/* no per-MRT blend on R600 */
-	EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
+	EREG(accel_state->ib, CB_COLOR_CONTROL,                    R600_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
 	EREG(accel_state->ib, CB_BLEND_CONTROL,                    blendcntl);
     } else {
-	EREG(accel_state->ib, CB_COLOR_CONTROL,                    (RADEON_ROP[3] |
+	EREG(accel_state->ib, CB_COLOR_CONTROL,                    (R600_ROP[3] |
 								    (1 << TARGET_BLEND_ENABLE_shift) |
 								    PER_MRT_BLEND_bit));
 	EREG(accel_state->ib, CB_BLEND0_CONTROL,                   blendcntl);
@@ -1542,10 +1542,10 @@ static void R600DoneComposite(PixmapPtr pDst)
     int vtx_size;
 
     if (accel_state->vsync)
-       cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
-                          accel_state->vline_crtc,
-                          accel_state->vline_y1,
-                          accel_state->vline_y2);
+       r600_cp_wait_vline_sync(pScrn, accel_state->ib, pDst,
+			       accel_state->vline_crtc,
+			       accel_state->vline_y1,
+			       accel_state->vline_y2);
 
     vtx_size = accel_state->msk_pic ? 24 : 16;
 
@@ -1913,7 +1913,7 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     src_obj.bpp = bpp;
     src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
     src_obj.bo = radeon_get_pixmap_bo(pSrc);
-    
+
     dst_obj.pitch = dst_pitch_hw;
     dst_obj.width = w;
     dst_obj.height = h;
diff --git a/src/r600_state.h b/src/r600_state.h
index 151f402..1e8dea3 100644
--- a/src/r600_state.h
+++ b/src/r600_state.h
@@ -274,48 +274,46 @@ do {								        \
 void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib);
 void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib);
 
-uint64_t
-upload (ScrnInfoPtr pScrn, void *shader, int size, int offset);
 void
-wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib);
+r600_wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib);
 void
-wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib);
+r600_wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib);
 void
-start_3d(ScrnInfoPtr pScrn, drmBufPtr ib);
+r600_start_3d(ScrnInfoPtr pScrn, drmBufPtr ib);
 void
-set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain);
+r600_set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain);
 void
-cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop);
+r600_cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop);
 void
-fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain);
+r600_fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain);
 void
-vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain);
+r600_vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain);
 void
-ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain);
+r600_ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain);
 void
-set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf);
+r600_set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf);
 void
-set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val);
+r600_set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val);
 void
-set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain);
+r600_set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain);
 void
-set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s);
+r600_set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s);
 void
-set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
+r600_set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
 void
-set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2);
+r600_set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2);
 void
-set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
+r600_set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
 void
-set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
+r600_set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2);
 void
-set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2);
+r600_set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2);
 void
-set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib);
+r600_set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib);
 void
-draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices);
+r600_draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices);
 void
-draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf);
+r600_draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf);
 
 void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size);
 
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index e18a9c8..66164ac 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -209,22 +209,22 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     radeon_vbo_check(pScrn, 16);
     radeon_cp_start(pScrn);
 
-    set_default_state(pScrn, accel_state->ib);
+    r600_set_default_state(pScrn, accel_state->ib);
 
-    set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    r600_set_generic_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    r600_set_screen_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    r600_set_window_scissor(pScrn, accel_state->ib, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
 
     /* PS bool constant */
     switch(pPriv->id) {
     case FOURCC_YV12:
     case FOURCC_I420:
-	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
+	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
 	break;
     case FOURCC_UYVY:
     case FOURCC_YUY2:
     default:
-	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
+	r600_set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
 	break;
     }
 
@@ -234,7 +234,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     vs_conf.num_gprs            = 2;
     vs_conf.stack_size          = 0;
     vs_conf.bo                  = accel_state->shaders_bo;
-    vs_setup                    (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
+    r600_vs_setup(pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM);
 
     ps_conf.shader_addr         = accel_state->ps_mc_addr;
     ps_conf.shader_size         = accel_state->ps_size;
@@ -244,11 +244,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     ps_conf.clamp_consts        = 0;
     ps_conf.export_mode         = 2;
     ps_conf.bo                  = accel_state->shaders_bo;
-    ps_setup                    (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
+    r600_ps_setup(pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM);
 
     /* PS alu constants */
-    set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
-		   sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
+    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps,
+			sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
 
     /* Texture */
     switch(pPriv->id) {
@@ -280,7 +280,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.last_level          = 0;
 	tex_res.perf_modulation     = 0;
 	tex_res.interlaced          = 0;
-	set_tex_resource            (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
+	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
 
 	/* Y sampler */
 	tex_samp.id                 = 0;
@@ -294,7 +294,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 
 	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
 	tex_samp.mip_filter         = 0;			/* no mipmap */
-	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
+	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
 
 	/* U or V texture */
 	tex_res.id                  = 1;
@@ -311,11 +311,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.base                = accel_state->src_obj[0].offset + pPriv->planev_offset;
 	tex_res.mip_base            = accel_state->src_obj[0].offset + pPriv->planev_offset;
 	tex_res.size                = accel_state->src_size[0] / 4;
-	set_tex_resource            (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
+	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
 
 	/* U or V sampler */
 	tex_samp.id                 = 1;
-	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
+	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
 
 	/* U or V texture */
 	tex_res.id                  = 2;
@@ -332,11 +332,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.base                = accel_state->src_obj[0].offset + pPriv->planeu_offset;
 	tex_res.mip_base            = accel_state->src_obj[0].offset + pPriv->planeu_offset;
 	tex_res.size                = accel_state->src_size[0] / 4;
-	set_tex_resource            (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
+	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
 
 	/* UV sampler */
 	tex_samp.id                 = 2;
-	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
+	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
 	break;
     case FOURCC_UYVY:
     case FOURCC_YUY2:
@@ -370,7 +370,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.last_level          = 0;
 	tex_res.perf_modulation     = 0;
 	tex_res.interlaced          = 0;
-	set_tex_resource            (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
+	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
 
 	/* Y sampler */
 	tex_samp.id                 = 0;
@@ -384,7 +384,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 
 	tex_samp.z_filter           = SQ_TEX_Z_FILTER_NONE;
 	tex_samp.mip_filter         = 0;			/* no mipmap */
-	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
+	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
 
 	/* UV texture */
 	tex_res.id                  = 1;
@@ -406,11 +406,11 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.base                = accel_state->src_obj[0].offset;
 	tex_res.mip_base            = accel_state->src_obj[0].offset;
 	tex_res.size                = accel_state->src_size[0];
-	set_tex_resource            (pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
+	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
 
 	/* UV sampler */
 	tex_samp.id                 = 1;
-	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
+	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
 	break;
     }
 
@@ -440,7 +440,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 
     cb_conf.source_format = 1;
     cb_conf.blend_clamp = 1;
-    set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
+    r600_set_render_target(pScrn, accel_state->ib, &cb_conf, accel_state->dst_obj.domain);
 
     /* Render setup */
     BEGIN_BATCH(20);
@@ -469,8 +469,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     vs_alu_consts[3] = 0.0;
 
     /* VS alu constants */
-    set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs,
-		   sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
+    r600_set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs,
+			sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts);
 
     if (pPriv->vsync) {
 	xf86CrtcPtr crtc;
@@ -483,10 +483,10 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 					 pPriv->drw_y,
 					 pPriv->drw_y + pPriv->dst_h);
 	if (crtc)
-	    cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap,
-			       crtc,
-			       pPriv->drw_y - crtc->y,
-			       (pPriv->drw_y - crtc->y) + pPriv->dst_h);
+	    r600_cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap,
+				    crtc,
+				    pPriv->drw_y - crtc->y,
+				    (pPriv->drw_y - crtc->y) + pPriv->dst_h);
     }
 
     while (nBox--) {
diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c
index 86817bd..2952863 100644
--- a/src/r6xx_accel.c
+++ b/src/r6xx_accel.c
@@ -98,7 +98,7 @@ void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib)
 }
 
 void
-wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
+r600_wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
 
@@ -113,7 +113,7 @@ wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib)
 }
 
 void
-wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
+r600_wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
 
@@ -123,7 +123,7 @@ wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib)
 }
 
 void
-start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
+r600_start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
 
@@ -147,7 +147,7 @@ start_3d(ScrnInfoPtr pScrn, drmBufPtr ib)
 
 // asic stack/thread/gpr limits - need to query the drm
 static void
-sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
+r600_sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
 {
     uint32_t sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
     uint32_t sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
@@ -198,7 +198,7 @@ sq_setup(ScrnInfoPtr pScrn, drmBufPtr ib, sq_config_t *sq_conf)
 }
 
 void
-set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain)
+r600_set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_t domain)
 {
     uint32_t cb_color_info;
     int pitch, slice, h;
@@ -276,8 +276,9 @@ set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf, uint32_
 }
 
 static void
-cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr,
-		    struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
+r600_cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type,
+			 uint32_t size, uint64_t mc_addr,
+			 struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     uint32_t cp_coher_size;
@@ -297,7 +298,8 @@ cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_
 }
 
 /* inserts a wait for vline in the command stream */
-void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix,
+void
+r600_cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix,
 			xf86CrtcPtr crtc, int start, int stop)
 {
     RADEONInfoPtr  info = RADEONPTR(pScrn);
@@ -380,7 +382,7 @@ void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix,
 }
 
 void
-fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain)
+r600_fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t domain)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     uint32_t sq_pgm_resources;
@@ -403,7 +405,7 @@ fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf, uint32_t dom
 }
 
 void
-vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain)
+r600_vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t domain)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     uint32_t sq_pgm_resources;
@@ -419,9 +421,9 @@ vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t dom
 	sq_pgm_resources |= UNCACHED_FIRST_INST_bit;
 
     /* flush SQ cache */
-    cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit,
-			vs_conf->shader_size, vs_conf->shader_addr,
-			vs_conf->bo, domain, 0);
+    r600_cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit,
+			     vs_conf->shader_size, vs_conf->shader_addr,
+			     vs_conf->bo, domain, 0);
 
     BEGIN_BATCH(3 + 2);
     EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8);
@@ -435,7 +437,7 @@ vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf, uint32_t dom
 }
 
 void
-ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain)
+r600_ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t domain)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     uint32_t sq_pgm_resources;
@@ -453,9 +455,9 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t dom
 	sq_pgm_resources |= CLAMP_CONSTS_bit;
 
     /* flush SQ cache */
-    cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit,
-			ps_conf->shader_size, ps_conf->shader_addr,
-			ps_conf->bo, domain, 0);
+    r600_cp_set_surface_sync(pScrn, ib, SH_ACTION_ENA_bit,
+			     ps_conf->shader_size, ps_conf->shader_addr,
+			     ps_conf->bo, domain, 0);
 
     BEGIN_BATCH(3 + 2);
     EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8);
@@ -470,7 +472,7 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf, uint32_t dom
 }
 
 void
-set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf)
+r600_set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     int i;
@@ -484,7 +486,7 @@ set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *co
 }
 
 void
-set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
+r600_set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     /* bool register order is: ps, vs, gs; one register each
@@ -496,7 +498,7 @@ set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val)
 }
 
 static void
-set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t domain)
+r600_set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t domain)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_accel_state *accel_state = info->accel_state;
@@ -522,15 +524,15 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t
 	(info->ChipFamily == CHIP_FAMILY_RS780) ||
 	(info->ChipFamily == CHIP_FAMILY_RS880) ||
 	(info->ChipFamily == CHIP_FAMILY_RV710))
-	cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit,
-			    accel_state->vb_offset, accel_state->vb_mc_addr,
-			    res->bo,
-			    domain, 0);
+	r600_cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit,
+				 accel_state->vb_offset, accel_state->vb_mc_addr,
+				 res->bo,
+				 domain, 0);
     else
-	cp_set_surface_sync(pScrn, ib, VC_ACTION_ENA_bit,
-			    accel_state->vb_offset, accel_state->vb_mc_addr,
-			    res->bo,
-			    domain, 0);
+	r600_cp_set_surface_sync(pScrn, ib, VC_ACTION_ENA_bit,
+				 accel_state->vb_offset, accel_state->vb_mc_addr,
+				 res->bo,
+				 domain, 0);
 
     BEGIN_BATCH(9 + 2);
     PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7);
@@ -546,7 +548,7 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res, uint32_t
 }
 
 void
-set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain)
+r600_set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint32_t domain)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
@@ -599,9 +601,9 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint3
 	sq_tex_resource_word6 |= INTERLACED_bit;
 
     /* flush texture cache */
-    cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit,
-			tex_res->size, tex_res->base,
-			tex_res->bo, domain, 0);
+    r600_cp_set_surface_sync(pScrn, ib, TC_ACTION_ENA_bit,
+			     tex_res->size, tex_res->base,
+			     tex_res->bo, domain, 0);
 
     BEGIN_BATCH(9 + 4);
     PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7);
@@ -618,7 +620,7 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res, uint3
 }
 
 void
-set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
+r600_set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2;
@@ -670,7 +672,7 @@ set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s)
 
 //XXX deal with clip offsets in clip setup
 void
-set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
+r600_set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
 
@@ -684,7 +686,7 @@ set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int
 }
 
 void
-set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
+r600_set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
 
@@ -699,7 +701,7 @@ set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x
 }
 
 void
-set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
+r600_set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
 
@@ -714,7 +716,7 @@ set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int
 }
 
 void
-set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
+r600_set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
 
@@ -729,7 +731,7 @@ set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int
 }
 
 void
-set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
+r600_set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
 
@@ -747,7 +749,7 @@ set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, i
  */
 
 void
-set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
+r600_set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
 {
     tex_resource_t tex_res;
     shader_config_t fs_conf;
@@ -764,7 +766,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
 
     accel_state->XInited3D = TRUE;
 
-    start_3d(pScrn, accel_state->ib);
+    r600_start_3d(pScrn, accel_state->ib);
 
     // SQ
     sq_conf.ps_prio = 0;
@@ -888,7 +890,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
 	break;
     }
 
-    sq_setup(pScrn, ib, &sq_conf);
+    r600_sq_setup(pScrn, ib, &sq_conf);
 
     /* set fake reloc for unused depth */
     BEGIN_BATCH(3 + 2);
@@ -992,10 +994,10 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
 
     /* clip boolean is set to always visible -> doesn't matter */
     for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
-	set_clip_rect (pScrn, ib, i, 0, 0, 8192, 8192);
+	r600_set_clip_rect(pScrn, ib, i, 0, 0, 8192, 8192);
 
     for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
-	set_vport_scissor (pScrn, ib, i, 0, 0, 8192, 8192);
+	r600_set_vport_scissor(pScrn, ib, i, 0, 0, 8192, 8192);
 
     BEGIN_BATCH(42);
     PACK0(ib, PA_SC_MPASS_PS_CNTL, 2);
@@ -1051,7 +1053,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
 
     // clear FS
     fs_conf.bo = accel_state->shaders_bo;
-    fs_setup(pScrn, ib, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
+    r600_fs_setup(pScrn, ib, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
 
     // VGT
     BEGIN_BATCH(43);
@@ -1102,7 +1104,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib)
  */
 
 void
-draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices)
+r600_draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     uint32_t i, count;
@@ -1140,7 +1142,7 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i
 }
 
 void
-draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf)
+r600_draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
 
@@ -1183,7 +1185,7 @@ void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
     vtx_res.mem_req_size    = 1;
     vtx_res.vb_addr         = accel_state->vb_mc_addr + accel_state->vb_start_op;
     vtx_res.bo              = accel_state->vb_bo;
-    set_vtx_resource        (pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT);
+    r600_set_vtx_resource(pScrn, accel_state->ib, &vtx_res, RADEON_GEM_DOMAIN_GTT);
 
     /* Draw */
     draw_conf.prim_type          = DI_PT_RECTLIST;
@@ -1192,15 +1194,15 @@ void r600_finish_op(ScrnInfoPtr pScrn, int vtx_size)
     draw_conf.num_indices        = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw;
     draw_conf.index_type         = DI_INDEX_SIZE_16_BIT;
 
-    draw_auto(pScrn, accel_state->ib, &draw_conf);
+    r600_draw_auto(pScrn, accel_state->ib, &draw_conf);
 
     /* XXX drm should handle this in fence submit */
-    wait_3d_idle_clean(pScrn, accel_state->ib);
+    r600_wait_3d_idle_clean(pScrn, accel_state->ib);
 
     /* sync dst surface */
-    cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
-			accel_state->dst_size, accel_state->dst_obj.offset,
-			accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
+    r600_cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit),
+			     accel_state->dst_size, accel_state->dst_obj.offset,
+			     accel_state->dst_obj.bo, 0, accel_state->dst_obj.domain);
 
     accel_state->vb_start_op = -1;
     accel_state->ib_reset_op = 0;
diff --git a/src/radeon.h b/src/radeon.h
index 134a4cf..8428e2d 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -387,6 +387,8 @@ typedef enum {
 
 #define IS_DCE4_VARIANT ((info->ChipFamily >= CHIP_FAMILY_CEDAR))
 
+#define IS_EVERGREEN_3D (info->ChipFamily >= CHIP_FAMILY_CEDAR)
+
 #define IS_R600_3D (info->ChipFamily >= CHIP_FAMILY_R600)
 
 #define IS_R500_3D ((info->ChipFamily == CHIP_FAMILY_RV515)  ||  \
@@ -753,6 +755,16 @@ struct radeon_accel_state {
     uint32_t          comp_ps_offset;
     uint32_t          xv_vs_offset;
     uint32_t          xv_ps_offset;
+    // shader consts
+    uint32_t          solid_vs_const_offset;
+    uint32_t          solid_ps_const_offset;
+    uint32_t          copy_vs_const_offset;
+    uint32_t          copy_ps_const_offset;
+    uint32_t          comp_vs_const_offset;
+    uint32_t          comp_ps_const_offset;
+    uint32_t          comp_mask_ps_const_offset;
+    uint32_t          xv_vs_const_offset;
+    uint32_t          xv_ps_const_offset;
 
     //size/addr stuff
     struct r600_accel_object src_obj[2];
@@ -1274,6 +1286,8 @@ extern void RADEONDoPrepareCopyMMIO(ScrnInfoPtr pScrn,
 				    Pixel planemask);
 extern Bool R600DrawInit(ScreenPtr pScreen);
 extern Bool R600LoadShaders(ScrnInfoPtr pScrn);
+extern Bool EVERGREENDrawInit(ScreenPtr pScreen);
+extern Bool EVERGREENLoadShaders(ScrnInfoPtr pScrn);
 #endif
 
 #if defined(XF86DRI) && defined(USE_EXA)
diff --git a/src/radeon_accel.c b/src/radeon_accel.c
index 281bc6d..8fc515d 100644
--- a/src/radeon_accel.c
+++ b/src/radeon_accel.c
@@ -1072,7 +1072,10 @@ Bool RADEONAccelInit(ScreenPtr pScreen)
     if (info->useEXA) {
 # ifdef XF86DRI
 	if (info->directRenderingEnabled) {
-	    if (info->ChipFamily >= CHIP_FAMILY_R600) {
+	    if (info->ChipFamily >= CHIP_FAMILY_CEDAR) {
+		if (!EVERGREENDrawInit(pScreen))
+		    return FALSE;
+	    } else if (info->ChipFamily >= CHIP_FAMILY_R600) {
 		if (!R600DrawInit(pScreen))
 		    return FALSE;
 	    } else {
diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index ba13071..b762648 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -202,8 +202,18 @@ static Bool RADEONIsAccelWorking(ScrnInfoPtr pScrn)
     int r;
     uint32_t tmp;
 
+#ifndef RADEON_INFO_ACCEL_WORKING
+#define RADEON_INFO_ACCEL_WORKING 0x03
+#endif
+#ifndef RADEON_INFO_ACCEL_WORKING2
+#define RADEON_INFO_ACCEL_WORKING2 0x05
+#endif
+
     memset(&ginfo, 0, sizeof(ginfo));
-    ginfo.request = 0x3;
+    if (info->dri->pKernelDRMVersion->version_minor >= 5)
+	ginfo.request = RADEON_INFO_ACCEL_WORKING2;
+    else
+	ginfo.request = RADEON_INFO_ACCEL_WORKING;
     ginfo.value = (uintptr_t)&tmp;
     r = drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &ginfo, sizeof(ginfo));
     if (r) {
@@ -230,7 +240,6 @@ static Bool RADEONPreInitAccel_KMS(ScrnInfoPtr pScrn)
     }
 
     if (xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE) ||
-	(info->ChipFamily >= CHIP_FAMILY_CEDAR) ||
 	(!RADEONIsAccelWorking(pScrn))) {
 	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
 		   "GPU accel disabled or not working, using shadowfb for KMS\n");
diff --git a/src/radeon_reg.h b/src/radeon_reg.h
index 377c26b..e61c29d 100644
--- a/src/radeon_reg.h
+++ b/src/radeon_reg.h
@@ -4242,6 +4242,12 @@
 #define EVERGREEN_DATA_FORMAT                           0x6b00
 #       define EVERGREEN_INTERLEAVE_EN                  (1 << 0)
 #define EVERGREEN_DESKTOP_HEIGHT                        0x6b04
+#define EVERGREEN_VLINE_START_END                       0x6b08
+#       define EVERGREEN_VLINE_START_SHIFT              0
+#       define EVERGREEN_VLINE_END_SHIFT                16
+#       define EVERGREEN_VLINE_INV                      (1 << 31)
+#define EVERGREEN_VLINE_STATUS                          0x6bb8
+#       define EVERGREEN_VLINE_STAT                     (1 << 12)
 
 #define EVERGREEN_VIEWPORT_START                        0x6d70
 #define EVERGREEN_VIEWPORT_SIZE                         0x6d74
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index c19066b..f682811 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -36,7 +36,6 @@
 
 #include "radeon.h"
 #include "radeon_reg.h"
-#include "r600_reg.h"
 #include "radeon_macros.h"
 #include "radeon_probe.h"
 #include "radeon_video.h"
@@ -47,6 +46,9 @@
 extern void
 R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv);
 
+extern void
+EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv);
+
 extern Bool
 R600CopyToVRAM(ScrnInfoPtr pScrn,
 	       char *src, int src_pitch,
@@ -473,7 +475,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
 #endif
 #ifdef XF86DRI
     if (info->directRenderingEnabled) {
-	if (IS_R600_3D)
+	if (IS_EVERGREEN_3D)
+	    EVERGREENDisplayTexturedVideo(pScrn, pPriv);
+	else if (IS_R600_3D)
 	    R600DisplayTexturedVideo(pScrn, pPriv);
 	else if (IS_R500_3D)
 	    R500DisplayTexturedVideoCP(pScrn, pPriv);


More information about the xorg-commit mailing list