xf86-video-ati: Branch 'master'

Dave Airlie airlied at kemper.freedesktop.org
Mon Jun 29 23:38:54 PDT 2009


 configure.ac                     |   41 +++++++
 src/r600_textured_videofuncs.c   |    4 
 src/radeon.h                     |  122 +++++++++++++++++-----
 src/radeon_accel.c               |  174 ++++++++++++++++++++-----------
 src/radeon_drm.h                 |  129 +++++++++++++++++++++++
 src/radeon_dummy_bufmgr.h        |   57 ++++++++++
 src/radeon_exa.c                 |  140 ++++++++++++++++++++++++-
 src/radeon_exa_funcs.c           |  216 ++++++++++++++++++++++++++++++++-------
 src/radeon_exa_render.c          |  196 +++++++++++++++++++++++++++--------
 src/radeon_macros.h              |   37 ++++++
 src/radeon_textured_video.c      |   45 +++++++-
 src/radeon_textured_videofuncs.c |  155 ++++++++++++++++++---------
 src/radeon_video.h               |    3 
 13 files changed, 1084 insertions(+), 235 deletions(-)

New commits:
commit e932836691aeaec37794fdaed2dabb22710fd171
Author: Dave Airlie <airlied at redhat.com>
Date:   Tue Jun 30 16:24:37 2009 +1000

    radeon: initial preparation for kms patch.
    
    This patch contains most of the changes to the EXA and texture video
    accel code.
    
    It adds a few bits of pixmap support but doesn't actually do anything
    useful KMS yet.
    
    Testing this should not have any regressions over what we have already,
    biggest worries are r6xx, I've fixed a textured video one, but no idea
    what other might lurk
    
    It won't build against libdrm radeon yet either

diff --git a/configure.ac b/configure.ac
index a2d7f97..0cf24e6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -31,6 +31,7 @@ AM_CONFIG_HEADER([config.h])
 AC_CONFIG_AUX_DIR(.)
 
 AM_INIT_AUTOMAKE([dist-bzip2])
+AC_SYS_LARGEFILE
 
 AM_MAINTAINER_MODE
 
@@ -114,6 +115,22 @@ if test "$DRI" = yes; then
 	if test "$have_damage_h" = yes; then
 		AC_DEFINE(DAMAGE,1,[Use Damage extension])
 	fi
+
+        save_CFLAGS="$CFLAGS"
+        CFLAGS="$XORG_CFLAGS $DRI_CFLAGS $CFLAGS"
+#	AC_CHECK_HEADER(xf86drmMode.h,[DRM_MODE=yes],[DRM_MODE=no],[#include <stdint.h>
+	DRM_MODE=no
+#include <stdlib.h>])
+	 if test "x$DRM_MODE" = xyes; then
+		PKG_CHECK_MODULES(LIBDRM_RADEON, [xorg-server >= 1.6 libdrm_radeon],
+		[LIBDRM_RADEON=yes], [LIBDRM_RADEON=no])
+		
+		if test "x$LIBDRM_RADEON" = xyes; then
+			AC_DEFINE(XF86DRM_MODE,1,[DRM kernel modesetting])
+			AC_DEFINE(DRI2, 1,[Enable DRI2 code])
+		fi
+        fi
+	CFLAGS="$save_CFLAGS"
 fi
 
 save_CFLAGS="$CFLAGS"
@@ -310,6 +327,8 @@ esac
 
 AC_SUBST([XORG_CFLAGS])
 AC_SUBST([DRI_CFLAGS])
+AC_SUBST([LIBDRM_RADEON_CFLAGS])
+AC_SUBST([LIBDRM_RADEON_LIBS])
 AC_SUBST([moduledir])
 
 DRIVER_NAME=ati
@@ -336,3 +355,25 @@ AC_OUTPUT([
 	src/Makefile
 	man/Makefile
 ])
+
+dnl
+dnl Output some configuration info for the user
+dnl
+echo ""
+echo "        prefix:              $prefix"
+echo "        exec_prefix:         $exec_prefix"
+echo "        libdir:              $libdir"
+echo "        includedir:          $includedir"
+
+
+echo ""
+echo "        Kernel modesetting:  $DRM_MODE"
+
+echo ""
+echo "        CFLAGS:              $CFLAGS"
+echo "        CXXFLAGS:            $CXXFLAGS"
+echo "        Macros:              $DEFINES"
+
+echo ""
+echo "        Run '${MAKE-make}' to build xf86-video-ati"
+echo ""
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 6af0949..7c91a06 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -297,7 +297,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     switch(pPriv->id) {
     case FOURCC_YV12:
     case FOURCC_I420:
-	accel_state->src_mc_addr[0] = pPriv->src_offset;
+	accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset;
 	accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h;
 
 	/* flush texture cache */
@@ -392,7 +392,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     case FOURCC_UYVY:
     case FOURCC_YUY2:
     default:
-	accel_state->src_mc_addr[0] = pPriv->src_offset;
+	accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset;
 	accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h;
 
 	/* flush texture cache */
diff --git a/src/radeon.h b/src/radeon.h
index 2145de5..0dce081 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -75,6 +75,7 @@
 #include "dri.h"
 #include "GL/glxint.h"
 #include "xf86drm.h"
+#include "radeon_drm.h"
 
 #ifdef DAMAGE
 #include "damage.h"
@@ -85,6 +86,13 @@
 #include "xf86Crtc.h"
 #include "X11/Xatom.h"
 
+#ifdef XF86DRM_MODE
+#include "radeon_bo.h"
+#include "radeon_cs.h"
+#else
+#include "radeon_dummy_bufmgr.h"
+#endif
+
 				/* Render support */
 #ifdef RENDER
 #include "picturestr.h"
@@ -450,6 +458,11 @@ typedef struct {
 
 typedef struct _atomBiosHandle *atomBiosHandlePtr;
 
+struct radeon_exa_pixmap_priv {
+    struct radeon_bo *bo;
+    int flags;
+};
+
 typedef struct {
     uint32_t pci_device_id;
     RADEONChipFamily chip_family;
@@ -460,6 +473,25 @@ typedef struct {
     int singledac;
 } RADEONCardInfo;
 
+#define RADEON_2D_EXA_COPY 1
+#define RADEON_2D_EXA_SOLID 2
+
+struct radeon_2d_state {
+    int op; //
+    uint32_t dst_pitch_offset;
+    uint32_t src_pitch_offset;
+    uint32_t dp_gui_master_cntl;
+    uint32_t dp_cntl;
+    uint32_t dp_write_mask;
+    uint32_t dp_brush_frgd_clr;
+    uint32_t dp_brush_bkgd_clr;
+    uint32_t dp_src_frgd_clr;
+    uint32_t dp_src_bkgd_clr;
+    uint32_t default_sc_bottom_right;
+    struct radeon_bo *dst_bo;
+    struct radeon_bo *src_bo;
+};
+    
 #ifdef XF86DRI
 struct radeon_cp {
     Bool              CPRuns;           /* CP is running */
@@ -937,6 +969,18 @@ typedef struct {
     float igp_ht_link_clk;
     float igp_ht_link_width;
 
+    int can_resize;
+    void (*reemit_current2d)(ScrnInfoPtr pScrn, int op); // emit the current 2D state into the IB 
+    struct radeon_2d_state state_2d;
+#ifdef XF86DRM_MODE
+    struct radeon_bo_manager *bufmgr;
+    struct radeon_cs_manager *csm;
+    struct radeon_cs *cs;
+#else
+    /* fake bool */
+    Bool cs;
+#endif
+
 } RADEONInfoRec, *RADEONInfoPtr;
 
 #define RADEONWaitForFifo(pScrn, entries)				\
@@ -1013,11 +1057,13 @@ extern void RADEONWaitForFifoFunction(ScrnInfoPtr pScrn, int entries);
 #ifdef XF86DRI
 extern drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn);
 extern void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard);
+extern void radeon_cs_flush_indirect(ScrnInfoPtr pScrn);
 extern void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn);
 extern int RADEONCPStop(ScrnInfoPtr pScrn,  RADEONInfoPtr info);
 #  ifdef USE_XAA
 extern Bool RADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen);
 #  endif
+uint32_t radeonGetPixmapOffset(PixmapPtr pPix);
 #endif
 
 #ifdef USE_XAA
@@ -1202,6 +1248,9 @@ extern void
 radeon_legacy_free_memory(ScrnInfoPtr pScrn,
 		          void *mem_struct);
 
+struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix);
+void radeon_set_pixmap_bo(PixmapPtr pPix, struct radeon_bo *bo);
+
 #ifdef XF86DRI
 #  ifdef USE_XAA
 /* radeon_accelfuncs.c */
@@ -1220,7 +1269,9 @@ do {									\
 
 #define RADEONCP_RELEASE(pScrn, info)					\
 do {									\
-    if (info->cp->CPInUse) {						\
+    if (info->cs) {							\
+	radeon_cs_flush_indirect(pScrn);				\
+    } else if (info->cp->CPInUse) {					\
 	RADEON_PURGE_CACHE();						\
 	RADEON_WAIT_UNTIL_IDLE();					\
 	RADEONCPReleaseIndirect(pScrn);					\
@@ -1255,7 +1306,7 @@ do {									\
 
 #define RADEONCP_REFRESH(pScrn, info)					\
 do {									\
-    if (!info->cp->CPInUse) {						\
+    if (!info->cp->CPInUse && !info->cs) {				\
 	if (info->cp->needCacheFlush) {					\
 	    RADEON_PURGE_CACHE();					\
 	    RADEON_PURGE_ZCACHE();					\
@@ -1286,54 +1337,59 @@ do {									\
 	xf86DrvMsg(pScrn->scrnIndex, X_INFO,				\
 		   "BEGIN_RING(%d) in %s\n", (unsigned int)n, __FUNCTION__);\
     }									\
-    if (++info->cp->dma_begin_count != 1) {				\
+    if (info->cs) radeon_cs_begin(info->cs, n, __FILE__, __func__, __LINE__); else { \
+      if (++info->cp->dma_begin_count != 1) {				\
 	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,				\
 		   "BEGIN_RING without end at %s:%d\n",			\
-		   info->cp->dma_debug_func, info->cp->dma_debug_lineno);	\
+		   info->cp->dma_debug_func, info->cp->dma_debug_lineno); \
 	info->cp->dma_begin_count = 1;					\
-    }									\
-    info->cp->dma_debug_func = __FILE__;				\
-    info->cp->dma_debug_lineno = __LINE__;				\
-    if (!info->cp->indirectBuffer) {					\
+      }									\
+      info->cp->dma_debug_func = __FILE__;				\
+      info->cp->dma_debug_lineno = __LINE__;				\
+      if (!info->cp->indirectBuffer) {					\
 	info->cp->indirectBuffer = RADEONCPGetBuffer(pScrn);		\
 	info->cp->indirectStart = 0;					\
-    } else if (info->cp->indirectBuffer->used + (n) * (int)sizeof(uint32_t) >	\
-	       info->cp->indirectBuffer->total) {		        \
+      } else if (info->cp->indirectBuffer->used + (n) * (int)sizeof(uint32_t) >	\
+		 info->cp->indirectBuffer->total) {		        \
 	RADEONCPFlushIndirect(pScrn, 1);				\
+      }									\
+      __expected = n;							\
+      __head = (pointer)((char *)info->cp->indirectBuffer->address +	\
+			 info->cp->indirectBuffer->used);		\
+      __count = 0;							\
     }									\
-    __expected = n;							\
-    __head = (pointer)((char *)info->cp->indirectBuffer->address +	\
-		       info->cp->indirectBuffer->used);			\
-    __count = 0;							\
 } while (0)
 
 #define ADVANCE_RING() do {						\
-    if (info->cp->dma_begin_count-- != 1) {				\
+    if (info->cs) radeon_cs_end(info->cs, __FILE__, __func__, __LINE__); else {		\
+      if (info->cp->dma_begin_count-- != 1) {				\
 	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,				\
 		   "ADVANCE_RING without begin at %s:%d\n",		\
 		   __FILE__, __LINE__);					\
 	info->cp->dma_begin_count = 0;					\
-    }									\
-    if (__count != __expected) {					\
+      }									\
+      if (__count != __expected) {					\
 	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,				\
 		   "ADVANCE_RING count != expected (%d vs %d) at %s:%d\n", \
 		   __count, __expected, __FILE__, __LINE__);		\
-    }									\
-    if (RADEON_VERBOSE) {						\
+      }									\
+      if (RADEON_VERBOSE) {						\
 	xf86DrvMsg(pScrn->scrnIndex, X_INFO,				\
 		   "ADVANCE_RING() start: %d used: %d count: %d\n",	\
 		   info->cp->indirectStart,				\
 		   info->cp->indirectBuffer->used,			\
 		   __count * (int)sizeof(uint32_t));			\
+      }									\
+      info->cp->indirectBuffer->used += __count * (int)sizeof(uint32_t); \
     }									\
-    info->cp->indirectBuffer->used += __count * (int)sizeof(uint32_t);	\
-} while (0)
+  } while (0)
 
 #define OUT_RING(x) do {						\
     if (RADEON_VERBOSE) {						\
 	xf86DrvMsg(pScrn->scrnIndex, X_INFO,				\
 		   "   OUT_RING(0x%08x)\n", (unsigned int)(x));		\
     }									\
+    if (info->cs) radeon_cs_write_dword(info->cs, (x)); else		\
     __head[__count++] = (x);						\
 } while (0)
 
@@ -1343,12 +1399,22 @@ do {									\
     OUT_RING(val);							\
 } while (0)
 
+#define OUT_RING_RELOC(x, read_domains, write_domain)			\
+  do {									\
+	int _ret; \
+    _ret = radeon_cs_write_reloc(info->cs, x, read_domains, write_domain, 0); \
+	if (_ret) ErrorF("reloc emit failure %d\n", _ret); \
+  } while(0)
+
+
 #define FLUSH_RING()							\
 do {									\
     if (RADEON_VERBOSE)							\
 	xf86DrvMsg(pScrn->scrnIndex, X_INFO,				\
 		   "FLUSH_RING in %s\n", __FUNCTION__);			\
-    if (info->cp->indirectBuffer)					\
+    if (info->cs)							\
+	radeon_cs_flush_indirect(pScrn); 				\
+    else if (info->cp->indirectBuffer)					\
 	RADEONCPFlushIndirect(pScrn, 0);				\
 } while (0)
 
@@ -1434,8 +1500,12 @@ do {									\
 	case EXA_ENGINEMODE_2D:						\
 	    break;							\
 	}								\
-	if (flush && info->directRenderingEnabled)                      \
-	    RADEONCPFlushIndirect(pScrn, 1);                            \
+	if (flush) {							\
+    	    if (info->cs)						\
+	        radeon_cs_flush_indirect(pScrn);			\
+            else if (info->directRenderingEnabled)                     	\
+	        RADEONCPFlushIndirect(pScrn, 1);                        \
+	}								\
         info->accel_state->engineMode = EXA_ENGINEMODE_2D;              \
 } while (0);
 
@@ -1450,7 +1520,9 @@ do {									\
 	    break;							\
 	}								\
 	if (flush) {							\
-	    if (info->directRenderingEnabled)				\
+    	    if (info->cs)						\
+	        radeon_cs_flush_indirect(pScrn);			\
+	    else if (info->directRenderingEnabled)				\
 	        RADEONCPFlushIndirect(pScrn, 1);                        \
 	    RADEONInit3DEngine(pScrn);                                  \
 	}                                                               \
diff --git a/src/radeon_accel.c b/src/radeon_accel.c
index f90b386..e51bffe 100644
--- a/src/radeon_accel.c
+++ b/src/radeon_accel.c
@@ -375,6 +375,9 @@ void RADEONEngineRestore(ScrnInfoPtr pScrn)
     RADEONInfoPtr  info       = RADEONPTR(pScrn);
     unsigned char *RADEONMMIO = info->MMIO;
 
+    if (info->cs)
+      return;
+
     xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
 		   "EngineRestore (%d/%d)\n",
 		   info->CurrentLayout.pixel_code,
@@ -421,6 +424,24 @@ void RADEONEngineRestore(ScrnInfoPtr pScrn)
     info->accel_state->XInited3D = FALSE;
 }
 
+static int RADEONDRMGetNumPipes(ScrnInfoPtr pScrn, int *num_pipes)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    if (info->dri->pKernelDRMVersion->version_major < 2) {
+        drm_radeon_getparam_t np;
+
+        memset(&np, 0, sizeof(np));
+        np.param = RADEON_PARAM_NUM_GB_PIPES;
+        np.value = num_pipes;
+        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np, sizeof(np));
+    } else {
+        struct drm_radeon_info np2;
+        np2.value = (uint64_t)num_pipes;
+        np2.request = RADEON_INFO_NUM_GB_PIPES;
+        return drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &np2, sizeof(np2));
+    }
+}
+
 /* Initialize the acceleration hardware */
 void RADEONEngineInit(ScrnInfoPtr pScrn)
 {
@@ -436,15 +457,9 @@ void RADEONEngineInit(ScrnInfoPtr pScrn)
 
 #ifdef XF86DRI
     if (info->directRenderingEnabled && (IS_R300_3D || IS_R500_3D)) {
-	drm_radeon_getparam_t np;
 	int num_pipes;
 
-	memset(&np, 0, sizeof(np));
-	np.param = RADEON_PARAM_NUM_GB_PIPES;
-	np.value = &num_pipes;
-
-	if (drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_GETPARAM, &np,
-				sizeof(np)) < 0) {
+	if(RADEONDRMGetNumPipes(pScrn, &num_pipes) < 0) {
 	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
 		       "Failed to determine num pipes from DRM, falling back to "
 		       "manual look-up!\n");
@@ -455,64 +470,67 @@ void RADEONEngineInit(ScrnInfoPtr pScrn)
     }
 #endif
 
-    if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
-	(info->ChipFamily == CHIP_FAMILY_R420)  ||
-	(info->ChipFamily == CHIP_FAMILY_RS600) ||
-	(info->ChipFamily == CHIP_FAMILY_RS690) ||
-	(info->ChipFamily == CHIP_FAMILY_RS740) ||
-	(info->ChipFamily == CHIP_FAMILY_RS400) ||
-	(info->ChipFamily == CHIP_FAMILY_RS480) ||
-	IS_R500_3D) {
-	if (info->accel_state->num_gb_pipes == 0) {
-	    uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
-
-	    info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
-	    if (IS_R500_3D)
-		OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
-	}
-    } else {
-	if (info->accel_state->num_gb_pipes == 0) {
-	    if ((info->ChipFamily == CHIP_FAMILY_R300) ||
-		(info->ChipFamily == CHIP_FAMILY_R350)) {
-		/* R3xx chips */
-		info->accel_state->num_gb_pipes = 2;
-	    } else {
-		/* RV3xx chips */
-		info->accel_state->num_gb_pipes = 1;
+    if (!info->cs) {
+	if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
+	    (info->ChipFamily == CHIP_FAMILY_R420)  ||
+	    (info->ChipFamily == CHIP_FAMILY_RS600) ||
+	    (info->ChipFamily == CHIP_FAMILY_RS690) ||
+	    (info->ChipFamily == CHIP_FAMILY_RS740) ||
+	    (info->ChipFamily == CHIP_FAMILY_RS400) ||
+	    (info->ChipFamily == CHIP_FAMILY_RS480) ||
+	    IS_R500_3D) {
+	    if (info->accel_state->num_gb_pipes == 0) {
+		uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
+
+		info->accel_state->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
+		if (IS_R500_3D)
+		    OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
+	    }
+	} else {
+	    if (info->accel_state->num_gb_pipes == 0) {
+		if ((info->ChipFamily == CHIP_FAMILY_R300) ||
+		    (info->ChipFamily == CHIP_FAMILY_R350)) {
+		    /* R3xx chips */
+		    info->accel_state->num_gb_pipes = 2;
+		} else {
+		    /* RV3xx chips */
+		    info->accel_state->num_gb_pipes = 1;
+		}
 	    }
-	}
-    }
-
-    /* RV410 SE cards only have 1 quadpipe */
-    if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
-	(info->Chipset == PCI_CHIP_RV410_5E4F))
-	info->accel_state->num_gb_pipes = 1;
-
-    if (IS_R300_3D || IS_R500_3D)
-	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		   "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
-
-    if (IS_R300_3D || IS_R500_3D) {
-	uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
-
-	switch(info->accel_state->num_gb_pipes) {
-	case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
-	case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
-	case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
-	default:
-	case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
 	}
 
-	OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
-	OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
-	OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
-	OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
-					 R300_DC_AUTOFLUSH_ENABLE |
-					 R300_DC_DC_DISABLE_IGNORE_PE));
-    } else
-	OUTREG(RADEON_RB3D_CNTL, 0);
+	/* RV410 SE cards only have 1 quadpipe */
+	if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
+	    (info->Chipset == PCI_CHIP_RV410_5E4F))
+	    info->accel_state->num_gb_pipes = 1;
+
+	if (IS_R300_3D || IS_R500_3D)
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "num quad-pipes is %d\n", info->accel_state->num_gb_pipes);
+
+	if (IS_R300_3D || IS_R500_3D) {
+	    uint32_t gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
+	    
+	    switch(info->accel_state->num_gb_pipes) {
+	    case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
+	    case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
+	    case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
+	    default:
+	    case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
+	    }
 
-    RADEONEngineReset(pScrn);
+	    OUTREG(R300_GB_TILE_CONFIG, gb_tile_config);
+	    OUTREG(RADEON_WAIT_UNTIL, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
+	    if (info->ChipFamily >= CHIP_FAMILY_R420)
+		OUTREG(R300_DST_PIPE_CONFIG, INREG(R300_DST_PIPE_CONFIG) | R300_PIPE_AUTO_CONFIG);
+	    OUTREG(R300_RB2D_DSTCACHE_MODE, (INREG(R300_RB2D_DSTCACHE_MODE) |
+					     R300_DC_AUTOFLUSH_ENABLE |
+					     R300_DC_DC_DISABLE_IGNORE_PE));
+	} else
+	    OUTREG(RADEON_RB3D_CNTL, 0);
+	
+	RADEONEngineReset(pScrn);
+    }
 
     switch (info->CurrentLayout.pixel_code) {
     case 8:  datatype = 2; break;
@@ -536,6 +554,24 @@ void RADEONEngineInit(ScrnInfoPtr pScrn)
     RADEONEngineRestore(pScrn);
 }
 
+uint32_t radeonGetPixmapOffset(PixmapPtr pPix)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    uint32_t offset = 0;
+    if (info->cs)
+	return 0;
+#ifdef USE_EXA
+    if (info->useEXA) {
+	offset = exaGetPixmapOffset(pPix);
+    } else
+#endif
+    {
+	offset = pPix->devPrivate.ptr - info->FB;
+    }
+    offset += info->fbLocation + pScrn->fbOffset;
+    return offset;
+}
 
 #define ACCEL_MMIO
 #define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
@@ -620,6 +656,20 @@ int RADEONCPStop(ScrnInfoPtr pScrn, RADEONInfoPtr info)
     }
 }
 
+#define RADEON_IB_RESERVE (16 * sizeof(uint32_t))
+
+void radeon_cs_flush_indirect(ScrnInfoPtr pScrn)
+{
+#ifdef XF86DRM_MODE
+    RADEONInfoPtr  info = RADEONPTR(pScrn);
+
+    if (!info->cs->cdw)
+	return;
+    radeon_cs_emit(info->cs);
+    radeon_cs_erase(info->cs);
+#endif
+}
+
 /* Get an indirect buffer for the CP 2D acceleration commands  */
 drmBufPtr RADEONCPGetBuffer(ScrnInfoPtr pScrn)
 {
@@ -696,6 +746,7 @@ void RADEONCPFlushIndirect(ScrnInfoPtr pScrn, int discard)
     int                start  = info->cp->indirectStart;
     drm_radeon_indirect_t  indirect;
 
+    assert(!info->cs);
     if (!buffer) return;
     if (start == buffer->used && !discard) return;
 
@@ -745,6 +796,7 @@ void RADEONCPReleaseIndirect(ScrnInfoPtr pScrn)
     int                start  = info->cp->indirectStart;
     drm_radeon_indirect_t  indirect;
 
+    assert(!info->cs);
     if (info->ChipFamily >= CHIP_FAMILY_R600) {
 	if (buffer && (buffer->used & 0x3c)) {
 	    RING_LOCALS;
diff --git a/src/radeon_drm.h b/src/radeon_drm.h
index 54bc234..dd0087a 100644
--- a/src/radeon_drm.h
+++ b/src/radeon_drm.h
@@ -493,6 +493,16 @@ typedef struct {
 #define DRM_RADEON_SETPARAM   0x19
 #define DRM_RADEON_SURF_ALLOC 0x1a
 #define DRM_RADEON_SURF_FREE  0x1b
+/* KMS ioctl */
+#define DRM_RADEON_GEM_INFO		0x1c
+#define DRM_RADEON_GEM_CREATE		0x1d
+#define DRM_RADEON_GEM_MMAP		0x1e
+#define DRM_RADEON_GEM_PREAD		0x21
+#define DRM_RADEON_GEM_PWRITE		0x22
+#define DRM_RADEON_GEM_SET_DOMAIN	0x23
+#define DRM_RADEON_GEM_WAIT_IDLE	0x24
+#define DRM_RADEON_CS			0x26
+#define DRM_RADEON_INFO			0x27
 
 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -521,6 +531,17 @@ typedef struct {
 #define DRM_IOCTL_RADEON_SETPARAM   DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SETPARAM, drm_radeon_setparam_t)
 #define DRM_IOCTL_RADEON_SURF_ALLOC DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_ALLOC, drm_radeon_surface_alloc_t)
 #define DRM_IOCTL_RADEON_SURF_FREE  DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_SURF_FREE, drm_radeon_surface_free_t)
+/* KMS */
+#define DRM_IOCTL_RADEON_GEM_INFO	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_INFO, struct drm_radeon_gem_info)
+#define DRM_IOCTL_RADEON_GEM_CREATE	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_CREATE, struct drm_radeon_gem_create)
+#define DRM_IOCTL_RADEON_GEM_MMAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_MMAP, struct drm_radeon_gem_mmap)
+#define DRM_IOCTL_RADEON_GEM_PREAD	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PREAD, struct drm_radeon_gem_pread)
+#define DRM_IOCTL_RADEON_GEM_PWRITE	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_PWRITE, struct drm_radeon_gem_pwrite)
+#define DRM_IOCTL_RADEON_GEM_SET_DOMAIN	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_DOMAIN, struct drm_radeon_gem_set_domain)
+#define DRM_IOCTL_RADEON_GEM_WAIT_IDLE	DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle) 
+#define DRM_IOCTL_RADEON_CS		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs)
+#define DRM_IOCTL_RADEON_INFO		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info)
+
 
 typedef struct drm_radeon_init {
 	enum {
@@ -753,4 +774,112 @@ typedef struct drm_radeon_surface_free {
 #define	DRM_RADEON_VBLANK_CRTC1		1
 #define	DRM_RADEON_VBLANK_CRTC2		2
 
+/*
+ * Kernel modesetting world below.
+ */
+#define RADEON_GEM_DOMAIN_CPU		0x1
+#define RADEON_GEM_DOMAIN_GTT		0x2
+#define RADEON_GEM_DOMAIN_VRAM		0x4
+
+struct drm_radeon_gem_info {
+	uint64_t	gart_size;
+	uint64_t	vram_size;
+	uint64_t	vram_visible;
+};
+
+#define RADEON_GEM_NO_BACKING_STORE 1
+
+struct drm_radeon_gem_create {
+	uint64_t	size;
+	uint64_t	alignment;
+	uint32_t	handle;
+	uint32_t	initial_domain;
+	uint32_t	flags;
+};
+
+struct drm_radeon_gem_mmap {
+	uint32_t	handle;
+	uint32_t	pad;
+	uint64_t	offset;
+	uint64_t	size;
+	uint64_t	addr_ptr;
+};
+
+struct drm_radeon_gem_set_domain {
+	uint32_t	handle;
+	uint32_t		read_domains;
+	uint32_t		write_domain;
+};
+
+struct drm_radeon_gem_wait_idle {
+	uint32_t	handle;
+	uint32_t	pad;
+};
+
+struct drm_radeon_gem_busy {
+	uint32_t	handle;
+	uint32_t	busy;
+};
+
+struct drm_radeon_gem_pread {
+	/** Handle for the object being read. */
+	uint32_t handle;
+	uint32_t pad;
+	/** Offset into the object to read from */
+	uint64_t offset;
+	/** Length of data to read */
+	uint64_t size;
+	/** Pointer to write the data into. */
+	/* void *, but pointers are not 32/64 compatible */
+	uint64_t data_ptr;
+};
+
+struct drm_radeon_gem_pwrite {
+	/** Handle for the object being written to. */
+	uint32_t handle;
+	uint32_t pad;
+	/** Offset into the object to write to */
+	uint64_t offset;
+	/** Length of data to write */
+	uint64_t size;
+	/** Pointer to read the data from. */
+	/* void *, but pointers are not 32/64 compatible */
+	uint64_t data_ptr;
+};
+
+#define RADEON_CHUNK_ID_RELOCS	0x01
+#define RADEON_CHUNK_ID_IB	0x02
+
+struct drm_radeon_cs_chunk {
+	uint32_t		chunk_id;
+	uint32_t		length_dw;
+	uint64_t		chunk_data;
+};
+
+struct drm_radeon_cs_reloc {
+	uint32_t		handle;
+	uint32_t		read_domains;
+	uint32_t		write_domain;
+	uint32_t		flags;
+};
+
+struct drm_radeon_cs {
+	uint32_t		num_chunks;
+	uint32_t		cs_id;
+	/* this points to uint64_t * which point to cs chunks */
+	uint64_t		chunks;
+	/* updates to the limits after this CS ioctl */
+	uint64_t		gart_limit;
+	uint64_t		vram_limit;
+};
+
+#define RADEON_INFO_DEVICE_ID		0x00
+#define RADEON_INFO_NUM_GB_PIPES	0x01
+
+struct drm_radeon_info {
+	uint32_t		request;
+	uint32_t		pad;
+	uint64_t		value;
+};
+
 #endif
diff --git a/src/radeon_dummy_bufmgr.h b/src/radeon_dummy_bufmgr.h
new file mode 100644
index 0000000..bf89292
--- /dev/null
+++ b/src/radeon_dummy_bufmgr.h
@@ -0,0 +1,57 @@
+
+#ifndef RADEON_DUMMY_BUFMGR_H
+#define RADEON_DUMMY_BUFMGR_H
+/* when we don't have modesetting but we still need these functions */
+
+struct radeon_bo {
+  int dummy;
+  void *ptr;
+};
+
+static inline int radeon_cs_begin(Bool dummy, int d2, const char *file,
+				   const char *func, int line)
+{
+  return 0;
+}
+
+static inline int radeon_cs_end(Bool dummy, const char *file,
+				const char *func, int line)
+{
+  return 0;
+}
+
+static inline void radeon_cs_write_dword(Bool cs, uint32_t dword)
+{
+}
+
+static inline int radeon_cs_write_reloc(Bool cs,
+                                        struct radeon_bo *bo,
+                                        uint32_t read_domain,
+                                        uint32_t write_domain,
+                                        uint32_t flags)
+{
+  return 0;
+}
+
+static inline int radeon_bo_map(struct radeon_bo *bo, int write) {return 0;}
+static inline void radeon_bo_ref(struct radeon_bo *bo) {return;}
+static inline struct radeon_bo *radeon_bo_unref(struct radeon_bo *bo) {return NULL;}
+static inline void radeon_bo_unmap(struct radeon_bo *bo) {return;}
+static inline int radeon_bo_wait(struct radeon_bo *bo) {return 0;}
+
+
+struct radeon_cs_space_check {
+  struct radeon_bo *bo;
+  int read_domains;
+  int write_domain;
+  int new_accounted;
+};
+
+static inline int radeon_cs_space_check(Bool cs, struct radeon_cs_space_check *bos, int num)
+{
+  return 0;
+}
+#define RADEON_CS_SPACE_OP_TO_BIG 0
+#define RADEON_CS_SPACE_FLUSH 1
+
+#endif
diff --git a/src/radeon_exa.c b/src/radeon_exa.c
index ae68146..5b20eca 100644
--- a/src/radeon_exa.c
+++ b/src/radeon_exa.c
@@ -120,6 +120,15 @@ static __inline__ uint32_t F_TO_DW(float val)
     return tmp.l;
 }
 
+static inline void radeon_add_pixmap(struct radeon_cs_space_check *bos, int index, PixmapPtr pPix, int read_domains, int write_domain)
+{
+    struct radeon_exa_pixmap_priv *driver_priv = exaGetPixmapDriverPrivate(pPix);
+    bos[index].bo = driver_priv->bo;
+    bos[index].read_domains = read_domains;
+    bos[index].write_domain = write_domain;
+    bos[index].new_accounted = 0;
+}
+
 /* Assumes that depth 15 and 16 can be used as depth 16, which is okay since we
  * require src and dest datatypes to be equal.
  */
@@ -179,7 +188,6 @@ static Bool RADEONGetOffsetPitch(PixmapPtr pPix, int bpp, uint32_t *pitch_offset
 
 Bool RADEONGetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset)
 {
-	RINFO_FROM_SCREEN(pPix->drawable.pScreen);
 	uint32_t pitch, offset;
 	int bpp;
 
@@ -187,7 +195,7 @@ Bool RADEONGetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset)
 	if (bpp == 24)
 		bpp = 8;
 
-	offset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
+	offset = radeonGetPixmapOffset(pPix);
 	pitch = exaGetPixmapPitch(pPix);
 
 	return RADEONGetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch);
@@ -224,7 +232,7 @@ int RADEONBiggerCrtcArea(PixmapPtr pPix)
 
 static unsigned long swapper_surfaces[6];
 
-static Bool RADEONPrepareAccess(PixmapPtr pPix, int index)
+static Bool RADEONPrepareAccess_BE(PixmapPtr pPix, int index)
 {
     RINFO_FROM_SCREEN(pPix->drawable.pScreen);
     unsigned char *RADEONMMIO = info->MMIO;
@@ -290,7 +298,7 @@ static Bool RADEONPrepareAccess(PixmapPtr pPix, int index)
     return TRUE;
 }
 
-static void RADEONFinishAccess(PixmapPtr pPix, int index)
+static void RADEONFinishAccess_BE(PixmapPtr pPix, int index)
 {
     RINFO_FROM_SCREEN(pPix->drawable.pScreen);
     unsigned char *RADEONMMIO = info->MMIO;
@@ -323,6 +331,123 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index)
 
 #endif /* X_BYTE_ORDER == X_BIG_ENDIAN */
 
+#ifdef XF86DRM_MODE
+static Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index)
+{
+    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
+    struct radeon_exa_pixmap_priv *driver_priv;
+    int ret;
+
+    driver_priv = exaGetPixmapDriverPrivate(pPix);
+    if (!driver_priv)
+      return FALSE;
+
+    /* if we have more refs than just the BO then flush */
+    if (driver_priv->bo->cref > 1)
+      RADEONCPFlushIndirect(pScrn, 0);
+    
+    radeon_bo_wait(driver_priv->bo);
+
+    /* flush IB */
+    ret = radeon_bo_map(driver_priv->bo, 1);
+    if (ret) {
+      FatalError("failed to map pixmap %d\n", ret);
+      return FALSE;
+    }
+
+    pPix->devPrivate.ptr = driver_priv->bo->ptr;
+
+    return TRUE;
+}
+
+static void RADEONFinishAccess_CS(PixmapPtr pPix, int index)
+{
+    struct radeon_exa_pixmap_priv *driver_priv;
+
+    driver_priv = exaGetPixmapDriverPrivate(pPix);
+    if (!driver_priv)
+        return;
+
+    radeon_bo_unmap(driver_priv->bo);
+    pPix->devPrivate.ptr = NULL;
+}
+
+
+void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_exa_pixmap_priv *new_priv;
+
+    new_priv = xcalloc(1, sizeof(struct radeon_exa_pixmap_priv));
+    if (!new_priv)
+	return NULL;
+
+    if (size == 0)
+	return new_priv;
+
+    new_priv->bo = radeon_bo_open(info->bufmgr, 0, size,
+				align, 0, 0);
+    if (!new_priv->bo) {
+	xfree(new_priv);
+	ErrorF("Failed to alloc memory\n");
+	return NULL;
+    }
+    
+    return new_priv;
+
+}
+
+static void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_exa_pixmap_priv *driver_priv = driverPriv;
+
+    radeon_bo_unref(driver_priv->bo);
+    xfree(driverPriv);
+}
+
+struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_exa_pixmap_priv *driver_priv;
+    driver_priv = exaGetPixmapDriverPrivate(pPix);
+    return driver_priv->bo;
+}
+
+void radeon_set_pixmap_bo(PixmapPtr pPix, struct radeon_bo *bo)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+    struct radeon_exa_pixmap_priv *driver_priv;
+
+    driver_priv = exaGetPixmapDriverPrivate(pPix);
+    if (driver_priv) {
+	if (driver_priv->bo)
+	    radeon_bo_unref(driver_priv->bo);
+
+	radeon_bo_ref(bo);
+	driver_priv->bo = bo;
+    }
+}
+
+static Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix)
+{
+    struct radeon_exa_pixmap_priv *driver_priv;
+
+    driver_priv = exaGetPixmapDriverPrivate(pPix);
+
+    if (!driver_priv)
+       return FALSE;
+    if (driver_priv->bo)
+       return TRUE;
+    return FALSE;
+}
+#endif
+
 #define ENTER_DRAW(x) TRACE
 #define LEAVE_DRAW(x) TRACE
 /***********************************************************************/
@@ -332,6 +457,7 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index)
 #define BEGIN_ACCEL(n)		RADEONWaitForFifo(pScrn, (n))
 #define OUT_ACCEL_REG(reg, val)	OUTREG(reg, val)
 #define OUT_ACCEL_REG_F(reg, val) OUTREG(reg, F_TO_DW(val))
+#define OUT_RELOC(x, read, write)            do {} while(0)
 #define FINISH_ACCEL()
 
 #ifdef RENDER
@@ -345,6 +471,7 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index)
 #undef OUT_ACCEL_REG
 #undef OUT_ACCEL_REG_F
 #undef FINISH_ACCEL
+#undef OUT_RELOC
 
 #ifdef XF86DRI
 
@@ -355,6 +482,7 @@ static void RADEONFinishAccess(PixmapPtr pPix, int index)
 #define BEGIN_ACCEL(n)		BEGIN_RING(2*(n))
 #define OUT_ACCEL_REG(reg, val)	OUT_RING_REG(reg, val)
 #define FINISH_ACCEL()		ADVANCE_RING()
+#define OUT_RELOC(x, read, write) OUT_RING_RELOC(x, read, write)
 
 #define OUT_RING_F(x) OUT_RING(F_TO_DW(x))
 
@@ -523,6 +651,10 @@ RADEONTexOffsetStart(PixmapPtr pPix)
 {
     RINFO_FROM_SCREEN(pPix->drawable.pScreen);
     unsigned long long offset;
+
+    if (exaGetPixmapDriverPrivate(pPix))
+	return -1;
+
     exaMoveInPixmap(pPix);
     ExaOffscreenMarkUsed(pPix);
 
diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index ac82952..c47dfb4 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -74,6 +74,9 @@ FUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker)
     ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
     RADEONInfoPtr info = RADEONPTR(pScrn);
 
+    if (info->cs)
+	    return;
+
     TRACE;
 
     if (info->accel_state->exaMarkerSynced != marker) {
@@ -84,11 +87,60 @@ FUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker)
     RADEONPTR(pScrn)->accel_state->engineMode = EXA_ENGINEMODE_UNKNOWN;
 }
 
+static void FUNC_NAME(Emit2DState)(ScrnInfoPtr pScrn, int op)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    int has_src;
+    ACCEL_PREAMBLE();
+
+    /* don't emit if no operation in progress */
+    if (info->state_2d.op == 0 && op == 0)
+	return;
+
+    has_src = info->state_2d.src_pitch_offset || (info->cs && info->state_2d.src_bo);
+
+    if (has_src) {
+      BEGIN_ACCEL_RELOC(10, 2);
+    } else {
+      BEGIN_ACCEL_RELOC(9, 1);
+    }
+    OUT_ACCEL_REG(RADEON_DEFAULT_SC_BOTTOM_RIGHT, info->state_2d.default_sc_bottom_right);
+    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, info->state_2d.dp_gui_master_cntl);
+    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR, info->state_2d.dp_brush_frgd_clr);
+    OUT_ACCEL_REG(RADEON_DP_BRUSH_BKGD_CLR, info->state_2d.dp_brush_bkgd_clr);
+    OUT_ACCEL_REG(RADEON_DP_SRC_FRGD_CLR,   info->state_2d.dp_src_frgd_clr);
+    OUT_ACCEL_REG(RADEON_DP_SRC_BKGD_CLR,   info->state_2d.dp_src_bkgd_clr);
+    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, info->state_2d.dp_write_mask);
+    OUT_ACCEL_REG(RADEON_DP_CNTL, info->state_2d.dp_cntl);
+
+    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, info->state_2d.dst_pitch_offset);
+    if (info->cs)
+	OUT_RELOC(info->state_2d.dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
+
+    if (has_src) {
+	    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, info->state_2d.src_pitch_offset);
+	    if (info->cs)
+		OUT_RELOC(info->state_2d.src_bo, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
+	    
+    }
+    FINISH_ACCEL();
+
+    if (op)
+	info->state_2d.op = op;
+    if (info->cs)
+	info->reemit_current2d = FUNC_NAME(Emit2DState);
+}
+
 static Bool
 FUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
 {
     RINFO_FROM_SCREEN(pPix->drawable.pScreen);
     uint32_t datatype, dst_pitch_offset;
+    struct radeon_exa_pixmap_priv *driver_priv;
+    int ret;
+    int retry_count = 0;
+    struct radeon_cs_space_check bos[1];
+    int i;
     ACCEL_PREAMBLE();
 
     TRACE;
@@ -101,21 +153,54 @@ FUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
 	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch failed\n"));
 
     RADEON_SWITCH_TO_2D();
+ retry:
+    if (info->cs) {
+      
+	i = 0;
+	driver_priv = exaGetPixmapDriverPrivate(pPix);
+	bos[i].bo = driver_priv->bo;
+	bos[i].read_domains = 0;
+	bos[i].write_domain = RADEON_GEM_DOMAIN_VRAM;;
+	bos[i].new_accounted = 0;
+	i++;
+
+	ret = radeon_cs_space_check(info->cs, bos, i);
+	if (ret == RADEON_CS_SPACE_OP_TO_BIG) {
+	    RADEON_FALLBACK(("Not enough RAM to hw accel composite operation\n"));
+	}
+	if (ret == RADEON_CS_SPACE_FLUSH) {
+	    radeon_cs_flush_indirect(pScrn);
+	    retry_count++;
+	    if (retry_count == 2)
+	        RADEON_FALLBACK(("Not enough Video RAM for src\n"));
+	    goto retry;
+	}
+    }
 
-    BEGIN_ACCEL(5);
-    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
-	    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
-	    RADEON_GMC_BRUSH_SOLID_COLOR |
-	    (datatype << 8) |
-	    RADEON_GMC_SRC_DATATYPE_COLOR |
-	    RADEON_ROP[alu].pattern |
-	    RADEON_GMC_CLR_CMP_CNTL_DIS);
-    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR, fg);
-    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, pm);
-    OUT_ACCEL_REG(RADEON_DP_CNTL,
-	(RADEON_DST_X_LEFT_TO_RIGHT | RADEON_DST_Y_TOP_TO_BOTTOM));
-    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
-    FINISH_ACCEL();
+
+    info->state_2d.default_sc_bottom_right = (RADEON_DEFAULT_SC_RIGHT_MAX |
+					       RADEON_DEFAULT_SC_BOTTOM_MAX);
+    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
+    info->state_2d.dp_src_frgd_clr = 0xffffffff;
+    info->state_2d.dp_src_bkgd_clr = 0x00000000;
+    info->state_2d.dp_gui_master_cntl = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+					  RADEON_GMC_BRUSH_SOLID_COLOR |
+					  (datatype << 8) |
+					  RADEON_GMC_SRC_DATATYPE_COLOR |
+					  RADEON_ROP[alu].pattern |
+					  RADEON_GMC_CLR_CMP_CNTL_DIS);
+    info->state_2d.dp_brush_frgd_clr = fg;
+    info->state_2d.dp_cntl = (RADEON_DST_X_LEFT_TO_RIGHT | RADEON_DST_Y_TOP_TO_BOTTOM);
+    info->state_2d.dp_write_mask = pm;
+    info->state_2d.dst_pitch_offset = dst_pitch_offset;
+    info->state_2d.src_pitch_offset = 0;
+    info->state_2d.src_bo = NULL;
+
+    driver_priv = exaGetPixmapDriverPrivate(pPix);
+    if (driver_priv)
+      info->state_2d.dst_bo = driver_priv->bo;
+
+    FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_SOLID);
 
     return TRUE;
 }
@@ -146,6 +231,7 @@ FUNC_NAME(RADEONDone2D)(PixmapPtr pPix)
 
     TRACE;
 
+    info->state_2d.op = 0;
     BEGIN_ACCEL(2);
     OUT_ACCEL_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL);
     OUT_ACCEL_REG(RADEON_WAIT_UNTIL,
@@ -161,25 +247,28 @@ FUNC_NAME(RADEONDoPrepareCopy)(ScrnInfoPtr pScrn, uint32_t src_pitch_offset,
     RADEONInfoPtr info = RADEONPTR(pScrn);
     ACCEL_PREAMBLE();
 
-    RADEON_SWITCH_TO_2D();
-
-    BEGIN_ACCEL(5);
-    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
-	RADEON_GMC_DST_PITCH_OFFSET_CNTL |
-	RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
-	RADEON_GMC_BRUSH_NONE |
-	(datatype << 8) |
-	RADEON_GMC_SRC_DATATYPE_COLOR |
-	RADEON_ROP[rop].rop |
-	RADEON_DP_SRC_SOURCE_MEMORY |
-	RADEON_GMC_CLR_CMP_CNTL_DIS);
-    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, planemask);
-    OUT_ACCEL_REG(RADEON_DP_CNTL,
-	((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) |
-	 (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0)));
-    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
-    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset);
-    FINISH_ACCEL();
+    /* setup 2D state */
+    info->state_2d.dp_gui_master_cntl = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+					  RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+					  RADEON_GMC_BRUSH_NONE |
+					  (datatype << 8) |
+					  RADEON_GMC_SRC_DATATYPE_COLOR |
+					  RADEON_ROP[rop].rop |
+					  RADEON_DP_SRC_SOURCE_MEMORY |
+					  RADEON_GMC_CLR_CMP_CNTL_DIS);
+    info->state_2d.dp_cntl = ((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) |
+			       (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0));
+    info->state_2d.dp_brush_frgd_clr = 0xffffffff;
+    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
+    info->state_2d.dp_src_frgd_clr = 0xffffffff;
+    info->state_2d.dp_src_bkgd_clr = 0x00000000;
+    info->state_2d.dp_write_mask = planemask;
+    info->state_2d.dst_pitch_offset = dst_pitch_offset;
+    info->state_2d.src_pitch_offset = src_pitch_offset;
+    info->state_2d.default_sc_bottom_right =  (RADEON_DEFAULT_SC_RIGHT_MAX
+						| RADEON_DEFAULT_SC_BOTTOM_MAX);
+
+    FUNC_NAME(Emit2DState)(pScrn, RADEON_2D_EXA_COPY);
 }
 
 static Bool
@@ -190,9 +279,42 @@ FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc,   PixmapPtr pDst,
 {
     RINFO_FROM_SCREEN(pDst->drawable.pScreen);
     uint32_t datatype, src_pitch_offset, dst_pitch_offset;
-
+    struct radeon_exa_pixmap_priv *driver_priv;
+    int ret;
+    int retry_count = 0;
+    struct radeon_cs_space_check bos[2];
+    int i;
     TRACE;
 
+    RADEON_SWITCH_TO_2D();
+retry:
+    if (info->cs) {
+      
+	driver_priv = exaGetPixmapDriverPrivate(pSrc);
+	info->state_2d.src_bo = driver_priv->bo;
+
+	driver_priv = exaGetPixmapDriverPrivate(pDst);
+	info->state_2d.dst_bo = driver_priv->bo;
+
+	i = 0;
+	radeon_add_pixmap(bos, i++, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+
+	radeon_add_pixmap(bos, i++, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
+
+	ret = radeon_cs_space_check(info->cs, bos, i);
+	if (ret == RADEON_CS_SPACE_OP_TO_BIG) {
+	    RADEON_FALLBACK(("Not enough RAM to hw accel composite operation\n"));
+	}
+	if (ret == RADEON_CS_SPACE_FLUSH) {
+	    radeon_cs_flush_indirect(pScrn);
+	    retry_count++;
+	    if (retry_count == 2)
+	        RADEON_FALLBACK(("Not enough Video RAM for src\n"));
+	    goto retry;
+	}
+    }
+
+
     info->accel_state->xdir = xdir;
     info->accel_state->ydir = ydir;
 
@@ -256,6 +378,9 @@ RADEONUploadToScreenCP(PixmapPtr pDst, int x, int y, int w, int h,
 
     TRACE;
 
+    if (info->cs)
+	return FALSE;
+
     if (bpp < 8)
 	return FALSE;
 
@@ -458,9 +583,9 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen)
 #endif
 
 #if X_BYTE_ORDER == X_BIG_ENDIAN
-    info->accel_state->exa->PrepareAccess = RADEONPrepareAccess;
-    info->accel_state->exa->FinishAccess = RADEONFinishAccess;
-#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */
+    info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_BE;
+    info->accel_state->exa->FinishAccess = RADEONFinishAccess_BE;
+#endif
 
     info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS;
 #ifdef EXA_SUPPORTS_PREPARE_AUX
@@ -473,6 +598,10 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen)
     info->accel_state->exa->pixmapOffsetAlign = RADEON_BUFFER_ALIGN + 1;
     info->accel_state->exa->pixmapPitchAlign = 64;
 
+    if (info->cs)
+      info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS;
+
+
 #ifdef RENDER
     if (info->RenderAccel) {
 	if (IS_R300_3D || IS_R500_3D) {
@@ -510,6 +639,19 @@ Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen)
     }
 #endif
 
+#ifdef XF86DRM_MODE
+#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4)
+    if (info->cs) {
+        info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap;
+        info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap;
+        info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen;
+	info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS;
+	info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS;
+    }
+#endif
+#endif
+
+
 #if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
     xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Setting EXA maxPitchBytes\n");
 
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index e274205..60c40a2 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -365,13 +365,14 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
     Bool repeat = pPict->repeat && pPict->repeatType != RepeatPad &&
 	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
     int i;
+    struct radeon_exa_pixmap_priv *driver_priv;
     ACCEL_PREAMBLE();
 
     txpitch = exaGetPixmapPitch(pPix);
-    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
+    txoffset = 0;
+
+    CHECK_OFFSET(pPix, 0x1f, "texture");
 
-    if ((txoffset & 0x1f) != 0)
-	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
     if ((txpitch & 0x1f) != 0)
 	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
 
@@ -426,23 +427,27 @@ static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 	}
     }
 
-    BEGIN_ACCEL(5);
+    BEGIN_ACCEL_RELOC(5, 1);
     if (unit == 0) {
 	OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter);
 	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
-	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, txoffset);
 	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
 	    (pPix->drawable.width - 1) |
 	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
 	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32);
+
+	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_0, txoffset, pPix);
+	/* emit a texture relocation */
     } else {
 	OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, txfilter);
 	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat);
-	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_1, txoffset);
+
 	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1,
 	    (pPix->drawable.width - 1) |
 	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
 	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32);
+	EMIT_READ_OFFSET(RADEON_PP_TXOFFSET_1, txoffset, pPix);
+	/* emit a texture relocation */
     }
     FINISH_ACCEL();
 
@@ -548,9 +553,13 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op,
 					    PixmapPtr pDst)
 {
     RINFO_FROM_SCREEN(pDst->drawable.pScreen);
-    uint32_t dst_format, dst_offset, dst_pitch, colorpitch;
+    uint32_t dst_format, dst_pitch, colorpitch;
     uint32_t pp_cntl, blendcntl, cblend, ablend;
     int pixel_shift;
+    struct radeon_exa_pixmap_priv *driver_priv;
+    int retry_count = 0;
+    struct radeon_cs_space_check bos[3];
+    int i, ret;
     ACCEL_PREAMBLE();
 
     TRACE;
@@ -568,24 +577,45 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op,
 
     pixel_shift = pDst->drawable.bitsPerPixel >> 4;
 
-    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
     dst_pitch = exaGetPixmapPitch(pDst);
     colorpitch = dst_pitch >> pixel_shift;
     if (RADEONPixmapIsColortiled(pDst))
 	colorpitch |= RADEON_COLOR_TILE_ENABLE;
 
-    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
-    dst_pitch = exaGetPixmapPitch(pDst);
-    if ((dst_offset & 0x0f) != 0)
-	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
+    CHECK_OFFSET(pDst, 0x0f, "destination");
+
     if (((dst_pitch >> pixel_shift) & 0x7) != 0)
 	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
 
+    /* switch to 3D before doing buffer space checks as it may flush */
+    RADEON_SWITCH_TO_3D();
+ retry:
+    if (info->cs) {
+      
+	i = 0;
+	radeon_add_pixmap(bos, i++, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+
+	if (pMask)
+	    radeon_add_pixmap(bos, i++, pMask, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+
+	radeon_add_pixmap(bos, i++, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
+
+	ret = radeon_cs_space_check(info->cs, bos, i);
+	if (ret == RADEON_CS_SPACE_OP_TO_BIG) {
+	    RADEON_FALLBACK(("Not enough RAM to hw accel composite operation\n"));
+	}
+	if (ret == RADEON_CS_SPACE_FLUSH) {
+	    radeon_cs_flush_indirect(pScrn);
+	    retry_count++;
+	    if (retry_count == 2)
+	        RADEON_FALLBACK(("Not enough Video RAM for src\n"));
+	    goto retry;
+	}
+    }
+
     if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
 	return FALSE;
 
-    RADEON_SWITCH_TO_3D();
-
     if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0))
 	return FALSE;
     pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
@@ -598,10 +628,10 @@ static Bool FUNC_NAME(R100PrepareComposite)(int op,
 	info->accel_state->is_transform[1] = FALSE;
     }
 
-    BEGIN_ACCEL(10);
+    BEGIN_ACCEL_RELOC(10, 1);
     OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
     OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
-    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
+    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
     OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
 
     /* IN operator: Multiply src by mask components or mask alpha.
@@ -705,13 +735,14 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
     Bool repeat = pPict->repeat && pPict->repeatType != RepeatPad &&
 	!(unit == 0 && (info->accel_state->need_src_tile_x || info->accel_state->need_src_tile_y));
     int i;
+    struct radeon_exa_pixmap_priv *driver_priv;
     ACCEL_PREAMBLE();
 
     txpitch = exaGetPixmapPitch(pPix);
-    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
 
-    if ((txoffset & 0x1f) != 0)
-	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
+    txoffset = 0;
+    CHECK_OFFSET(pPix, 0x1f, "texture");
+
     if ((txpitch & 0x1f) != 0)
 	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
 
@@ -768,7 +799,7 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 	}
     }
 
-    BEGIN_ACCEL(6);
+    BEGIN_ACCEL_RELOC(6, 1);
     if (unit == 0) {
 	OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
 	OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
@@ -776,7 +807,7 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 	OUT_ACCEL_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) |
 		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
 	OUT_ACCEL_REG(R200_PP_TXPITCH_0, txpitch - 32);
-	OUT_ACCEL_REG(R200_PP_TXOFFSET_0, txoffset);
+	EMIT_READ_OFFSET(R200_PP_TXOFFSET_0, txoffset, pPix);
     } else {
 	OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
 	OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat);
@@ -784,7 +815,8 @@ static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 	OUT_ACCEL_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) |
 		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
 	OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch - 32);
-	OUT_ACCEL_REG(R200_PP_TXOFFSET_1, txoffset);
+	EMIT_READ_OFFSET(R200_PP_TXOFFSET_1, txoffset, pPix);
+	/* emit a texture relocation */
     }
     FINISH_ACCEL();
 
@@ -878,9 +910,13 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
 				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
 {
     RINFO_FROM_SCREEN(pDst->drawable.pScreen);
-    uint32_t dst_format, dst_offset, dst_pitch;
+    uint32_t dst_format, dst_pitch;
     uint32_t pp_cntl, blendcntl, cblend, ablend, colorpitch;
     int pixel_shift;
+    struct radeon_exa_pixmap_priv *driver_priv;
+    int retry_count = 0;
+    struct radeon_cs_space_check bos[3];
+    int i, ret;
     ACCEL_PREAMBLE();
 
     TRACE;
@@ -898,22 +934,45 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
 
     pixel_shift = pDst->drawable.bitsPerPixel >> 4;
 
-    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
     dst_pitch = exaGetPixmapPitch(pDst);
     colorpitch = dst_pitch >> pixel_shift;
     if (RADEONPixmapIsColortiled(pDst))
 	colorpitch |= RADEON_COLOR_TILE_ENABLE;
 
-    if ((dst_offset & 0x0f) != 0)
-	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
+    CHECK_OFFSET(pDst, 0xf, "destination");
+
     if (((dst_pitch >> pixel_shift) & 0x7) != 0)
 	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
 
+    /* switch to 3D before doing buffer space checks as it may flush */
+    RADEON_SWITCH_TO_3D();
+
+ retry:
+    if (info->cs) {
+      
+	i = 0;
+	radeon_add_pixmap(bos, i++, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+
+	if (pMask)
+	    radeon_add_pixmap(bos, i++, pMask, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+
+	radeon_add_pixmap(bos, i++, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
+
+	ret = radeon_cs_space_check(info->cs, bos, i);
+	if (ret == RADEON_CS_SPACE_OP_TO_BIG) {
+	    RADEON_FALLBACK(("Not enough RAM to hw accel composite operation\n"));
+	}
+	if (ret == RADEON_CS_SPACE_FLUSH) {
+	    radeon_cs_flush_indirect(pScrn);
+	    retry_count++;
+	    if (retry_count == 2)
+	        RADEON_FALLBACK(("Not enough Video RAM for src\n"));
+	    goto retry;
+	}
+    }
     if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE))
 	return FALSE;
 
-    RADEON_SWITCH_TO_3D();
-
     if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0))
 	return FALSE;
     pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
@@ -926,11 +985,12 @@ static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
 	info->accel_state->is_transform[1] = FALSE;
     }
 
-    BEGIN_ACCEL(13);
+    BEGIN_ACCEL_RELOC(13, 1);
 
     OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
     OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
-    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
+
+    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pDst);
 
     OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
     if (pMask)
@@ -1004,6 +1064,10 @@ static Bool R300CheckCompositeTexture(PicturePtr pPict,
 				      int unit,
 				      Bool is_r500)
 {
+    ScreenPtr pScreen = pDstPict->pDrawable->pScreen;
+    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
     int w = pPict->pDrawable->width;
     int h = pPict->pDrawable->height;
     int i;
@@ -1029,8 +1093,17 @@ static Bool R300CheckCompositeTexture(PicturePtr pPict,
 	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
 			 (int)pPict->format));
 
-    if (!RADEONCheckTexturePOT(pPict, unit == 0))
+    if (!RADEONCheckTexturePOT(pPict, unit == 0)) {
+	if (info->cs) {
+    		struct radeon_exa_pixmap_priv *driver_priv;
+		PixmapPtr pPix;
+
+    		pPix = RADEONGetDrawablePixmap(pPict->pDrawable);
+		driver_priv = exaGetPixmapDriverPrivate(pPix);
+		//TODOradeon_bufmgr_gem_force_gtt(driver_priv->bo);
+	}
 	return FALSE;
+    }
 
     if (pPict->filter != PictFilterNearest &&
 	pPict->filter != PictFilterBilinear)
@@ -1062,15 +1135,16 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
     int w = pPict->pDrawable->width;
     int h = pPict->pDrawable->height;
     int i, pixel_shift;
+    struct radeon_exa_pixmap_priv *driver_priv;
     ACCEL_PREAMBLE();
 
     TRACE;
 
     txpitch = exaGetPixmapPitch(pPix);
-    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset;
+    txoffset = 0;
+
+    CHECK_OFFSET(pPix, 0x1f, "texture");
 
-    if ((txoffset & 0x1f) != 0)
-	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
     if ((txpitch & 0x1f) != 0)
 	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
 
@@ -1156,13 +1230,15 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
     }
 
-    BEGIN_ACCEL(pPict->repeat ? 6 : 7);
+    BEGIN_ACCEL_RELOC(pPict->repeat ? 6 : 7, 1);
     OUT_ACCEL_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter);
     OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0);
     OUT_ACCEL_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0);
     OUT_ACCEL_REG(R300_TX_FORMAT1_0 + (unit * 4), txformat1);
     OUT_ACCEL_REG(R300_TX_FORMAT2_0 + (unit * 4), txpitch);
-    OUT_ACCEL_REG(R300_TX_OFFSET_0 + (unit * 4), txoffset);
+
+    EMIT_READ_OFFSET((R300_TX_OFFSET_0 + (unit * 4)), txoffset, pPix);
+
     if (!pPict->repeat)
 	OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0);
     FINISH_ACCEL();
@@ -1321,14 +1397,18 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
 {
     RINFO_FROM_SCREEN(pDst->drawable.pScreen);
-    uint32_t dst_format, dst_offset, dst_pitch;
+    uint32_t dst_format, dst_pitch;
     uint32_t txenable, colorpitch;
     uint32_t blendcntl, output_fmt;
     uint32_t src_color, src_alpha;
     uint32_t mask_color, mask_alpha;
     int pixel_shift;
+    int ret;
+    int retry_count = 0;
+    struct radeon_exa_pixmap_priv *driver_priv;
+    struct radeon_cs_space_check bos[3];
+    int i;
     ACCEL_PREAMBLE();
-
     TRACE;
 
     if (!R300GetDestFormat(pDstPicture, &dst_format))
@@ -1341,7 +1421,6 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 
     pixel_shift = pDst->drawable.bitsPerPixel >> 4;
 
-    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset;
     dst_pitch = exaGetPixmapPitch(pDst);
     colorpitch = dst_pitch >> pixel_shift;
 
@@ -1350,16 +1429,41 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 
     colorpitch |= dst_format;
 
-    if ((dst_offset & 0x0f) != 0)
-	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
+    CHECK_OFFSET(pDst, 0x0f, "destination");
+
     if (((dst_pitch >> pixel_shift) & 0x7) != 0)
 	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
 
+    /* have to execute switch before doing buffer sizing check as it flushes */
+    RADEON_SWITCH_TO_3D();
+ retry:
+    if (info->cs) {
+      
+	i = 0;
+	driver_priv = exaGetPixmapDriverPrivate(pSrc);
+	radeon_add_pixmap(bos, i++, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+
+	if (pMask)
+	    radeon_add_pixmap(bos, i++, pMask, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+
+	radeon_add_pixmap(bos, i++, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
+
+	ret = radeon_cs_space_check(info->cs, bos, i);
+	if (ret == RADEON_CS_SPACE_OP_TO_BIG) {
+	    RADEON_FALLBACK(("Not enough RAM to hw accel composite operation\n"));
+	}
+	if (ret == RADEON_CS_SPACE_FLUSH) {
+            radeon_cs_flush_indirect(pScrn);
+	    retry_count++;
+	    if (retry_count == 2)
+	        RADEON_FALLBACK(("Not enough Video RAM - this really shouldn't happen\nm"));
+	    goto retry;
+	}
+    }
+
     if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE))
 	return FALSE;
 
-    RADEON_SWITCH_TO_3D();
-
     if (!FUNC_NAME(R300TextureSetup)(pSrcPicture, pSrc, 0))
 	return FALSE;
     txenable = R300_TEX_0_ENABLE;
@@ -1945,9 +2049,9 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     }
     FINISH_ACCEL();
 
-    BEGIN_ACCEL(3);
-
-    OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
+    
+    BEGIN_ACCEL_RELOC(3, 1);
+    EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pDst);
     OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
 
     blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
diff --git a/src/radeon_macros.h b/src/radeon_macros.h
index 8575884..15d9d73 100644
--- a/src/radeon_macros.h
+++ b/src/radeon_macros.h
@@ -160,4 +160,41 @@ do {									\
 #define INPCIE_P(pScrn, addr) R600INPCIE_PORT(pScrn, addr)
 #define OUTPCIE_P(pScrn, addr, val) R600OUTPCIE_PORT(pScrn, addr, val)
 
+#define BEGIN_ACCEL_RELOC(n, r) do {		\
+	int _nqw = (n) + (info->cs ? (r) : 0);	\
+	BEGIN_ACCEL(_nqw);			\
+    } while (0)
+
+#define CHECK_OFFSET(pPix, mask, type) do {	\
+    if (!info->cs) {			       \
+	uint32_t _pix_offset = radeonGetPixmapOffset(pPix);	\
+	if ((_pix_offset & mask) != 0)					\
+	    RADEON_FALLBACK(("Bad %s offset 0x%x\n", type, (int)pix_offset)); \
+    }									\
+    } while(0)
+
+#define EMIT_OFFSET(reg, value, pPix, rd, wd) do {		\
+    if (info->cs) {						\
+	driver_priv = exaGetPixmapDriverPrivate(pPix);		\
+	OUT_ACCEL_REG((reg), 0);				\
+	OUT_RELOC(driver_priv->bo, (rd), (wd));			\
+    } else {							\
+	uint32_t _pix_offset;					\
+	_pix_offset = radeonGetPixmapOffset(pPix);	\
+	OUT_ACCEL_REG((reg), _pix_offset | value);		\
+    }								\
+    } while(0)
+
+#define EMIT_READ_OFFSET(reg, value, pPix) EMIT_OFFSET(reg, value, pPix, (RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT), 0)
+#define EMIT_WRITE_OFFSET(reg, value, pPix) EMIT_OFFSET(reg, value, pPix, 0, RADEON_GEM_DOMAIN_VRAM)
+
+#define OUT_TEXTURE_REG(reg, offset, bo) do {   \
+    if (info->cs) {                                                     \
+      OUT_ACCEL_REG((reg), (offset));                                   \
+      OUT_RELOC((bo), RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); \
+    } else {                                                            \
+      OUT_ACCEL_REG((reg), (offset) + info->fbLocation + pScrn->fbOffset);} \
+  } while(0)
+
+
 #endif
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index ab743be..10414b9 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -142,6 +142,7 @@ static REF_TRANSFORM trans[2] =
 #define BEGIN_ACCEL(n)		RADEONWaitForFifo(pScrn, (n))
 #define OUT_ACCEL_REG(reg, val)	OUTREG(reg, val)
 #define OUT_ACCEL_REG_F(reg, val) OUTREG(reg, F_TO_DW(val))
+#define OUT_RELOC(x, read, write) do {} while(0)
 #define FINISH_ACCEL()
 
 #include "radeon_textured_videofuncs.c"
@@ -151,6 +152,7 @@ static REF_TRANSFORM trans[2] =
 #undef BEGIN_ACCEL
 #undef OUT_ACCEL_REG
 #undef OUT_ACCEL_REG_F
+#undef OUT_RELOC
 #undef FINISH_ACCEL
 
 #ifdef XF86DRI
@@ -164,6 +166,7 @@ static REF_TRANSFORM trans[2] =
 #define OUT_ACCEL_REG_F(reg, val)	OUT_ACCEL_REG(reg, F_TO_DW(val))
 #define FINISH_ACCEL()		ADVANCE_RING()
 #define OUT_RING_F(x) OUT_RING(F_TO_DW(x))
+#define OUT_RELOC(x, read, write) OUT_RING_RELOC(x, read, write)
 
 #include "radeon_textured_videofuncs.c"
 
@@ -323,6 +326,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
 								size * 2, 64);
 	if (pPriv->video_offset == 0)
 	    return BadAlloc;
+
+	if (info->cs)
+	    pPriv->src_bo = pPriv->video_memory;
     }
 
     /* Bicubic filter loading */
@@ -333,6 +339,9 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
 	pPriv->bicubic_src_offset = pPriv->bicubic_offset + info->fbLocation + pScrn->fbOffset;
 	if (pPriv->bicubic_offset == 0)
 		pPriv->bicubic_enabled = FALSE;
+
+	if (info->cs)
+	    pPriv->bicubic_bo = pPriv->bicubic_memory;
     }
 
     if (pDraw->type == DRAWABLE_WINDOW)
@@ -361,8 +370,18 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
     top = (y1 >> 16) & ~1;
     nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
 
-    pPriv->src_offset = pPriv->video_offset + info->fbLocation + pScrn->fbOffset;
-    pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset);
+    pPriv->src_offset = pPriv->video_offset;
+    if (info->cs) {
+	int ret;
+	radeon_bo_wait(pPriv->src_bo);
+	ret = radeon_bo_map(pPriv->src_bo, 1);
+	if (ret) 
+	    return BadAlloc;
+	
+	pPriv->src_addr = pPriv->src_bo->ptr;
+    } else {
+	pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset);
+    }
     pPriv->src_pitch = dstPitch;
 
     pPriv->planeu_offset = dstPitch * dst_height;
@@ -431,9 +450,23 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
 
     /* Upload bicubic filter tex */
     if (pPriv->bicubic_enabled) {
-	if (info->ChipFamily < CHIP_FAMILY_R600)
-	    RADEONCopyData(pScrn, (uint8_t *)bicubic_tex_512,
-			   (uint8_t *)(info->FB + pPriv->bicubic_offset), 1024, 1024, 1, 512, 2);
+	if (info->ChipFamily < CHIP_FAMILY_R600) {
+	    uint8_t *bicubic_addr;
+	    int ret;
+	    if (info->cs) {
+		radeon_bo_wait(pPriv->bicubic_bo);
+		ret = radeon_bo_map(pPriv->bicubic_bo, 1);
+		if (ret)
+		    return BadAlloc;
+		
+		bicubic_addr = pPriv->bicubic_bo->ptr;
+	    } else
+		bicubic_addr = (uint8_t *)(info->FB + pPriv->bicubic_offset);
+
+	    RADEONCopyData(pScrn, (uint8_t *)bicubic_tex_512, bicubic_addr, 1024, 1024, 1, 512, 2);
+	   if (info->cs)
+	       radeon_bo_unmap(pPriv->bicubic_bo);
+	}
     }
 
     /* update cliplist */
@@ -453,6 +486,8 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
     pPriv->w = width;
     pPriv->h = height;
 
+    if (info->cs)
+	radeon_bo_unmap(pPriv->src_bo);
 #ifdef XF86DRI
     if (info->directRenderingEnabled) {
 	if (IS_R600_3D)
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 47878fc..b9930c7 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -92,27 +92,69 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     PixmapPtr pPixmap = pPriv->pPixmap;
-    uint32_t txformat, txsize, txpitch;
-    uint32_t dst_offset, dst_pitch, dst_format;
+    struct radeon_exa_pixmap_priv *driver_priv;
+    struct radeon_cs_space_check bos[3];
+    uint32_t txformat, txsize, txpitch, txoffset;
+    uint32_t dst_pitch, dst_format;
     uint32_t colorpitch;
     Bool isplanar = FALSE;
     int dstxoff, dstyoff, pixel_shift, vtx_count;
     BoxPtr pBox = REGION_RECTS(&pPriv->clip);
     int nBox = REGION_NUM_RECTS(&pPriv->clip);
+    int i, ret, retry_count = 0;
     ACCEL_PREAMBLE();
 
+ retry:
+    if (info->cs) {
+      
+	i = 0;
+	bos[i].bo = pPriv->src_bo;
+	bos[i].read_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
+	bos[i].write_domain = 0;
+	bos[i].new_accounted = 0;
+	i++;
+
+	if (pPriv->bicubic_enabled) {
+	    bos[i].bo = pPriv->bicubic_bo;
+	    bos[i].read_domains = RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM;
+	    bos[i].write_domain = 0;
+	    bos[i].new_accounted = 0;
+	    i++;
+	}
+
+	driver_priv = exaGetPixmapDriverPrivate(pPixmap);
+	bos[i].bo = driver_priv->bo;
+	bos[i].read_domains = 0;
+	bos[i].write_domain = RADEON_GEM_DOMAIN_VRAM;
+	bos[i].new_accounted = 0;
+	i++;
+
+	ret = radeon_cs_space_check(info->cs, bos, i);
+	if (ret == RADEON_CS_SPACE_OP_TO_BIG) {
+	    ErrorF("Not enough RAM to hw accel composite operation\n");
+	    return;
+	}
+	if (ret == RADEON_CS_SPACE_FLUSH) {
+	    radeon_cs_flush_indirect(pScrn);
+	    retry_count++;
+	    if (retry_count == 2) {
+	        ErrorF("Not enough RAM to hw accel composite operation\n");
+	        return;
+	    }
+	    goto retry;
+	}
+    }
+
     pixel_shift = pPixmap->drawable.bitsPerPixel >> 4;
 
+    
 #ifdef USE_EXA
     if (info->useEXA) {
-	dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
 	dst_pitch = exaGetPixmapPitch(pPixmap);
     } else
 #endif
     {
-	dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
-	    info->fbLocation + pScrn->fbOffset;
-	dst_pitch = pPixmap->devKind;
+        dst_pitch = pPixmap->devKind;
     }
 
 #ifdef COMPOSITE
@@ -175,10 +217,12 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
     if (RADEONTilingEnabled(pScrn, pPixmap))
 	colorpitch |= RADEON_COLOR_TILE_ENABLE;
 
-    BEGIN_ACCEL(4);
+    txoffset = info->cs ? 0 : pPriv->src_offset;
+
+    BEGIN_ACCEL_RELOC(4,1);
 
     OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format);
-    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
+    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap);
     OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
     OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL,
 		  RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
@@ -215,7 +259,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		      RADEON_CLAMP_T_CLAMP_LAST |
 		      RADEON_YUV_TO_RGB);
 	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0);
-	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset);
+	OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, pPriv->src_bo);
 	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
 		      RADEON_COLOR_ARG_A_ZERO |
 		      RADEON_COLOR_ARG_B_ZERO |
@@ -242,7 +286,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		      RADEON_CLAMP_S_CLAMP_LAST |
 		      RADEON_CLAMP_T_CLAMP_LAST);
 	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1);
-	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_1, pPriv->src_offset + pPriv->planeu_offset);
+	OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, pPriv->src_bo);
 	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_1,
 		      RADEON_COLOR_ARG_A_ZERO |
 		      RADEON_COLOR_ARG_B_ZERO |
@@ -266,7 +310,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		      RADEON_CLAMP_S_CLAMP_LAST |
 		      RADEON_CLAMP_T_CLAMP_LAST);
 	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1);
-	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_2, pPriv->src_offset + pPriv->planev_offset);
+	OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, pPriv->src_bo);
 	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_2,
 		      RADEON_COLOR_ARG_A_ZERO |
 		      RADEON_COLOR_ARG_B_ZERO |
@@ -299,7 +343,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		      RADEON_CLAMP_T_CLAMP_LAST |
 		      RADEON_YUV_TO_RGB);
 	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0);
-	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, pPriv->src_offset);
+	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, pPriv->src_bo);
 	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
 		      RADEON_COLOR_ARG_A_ZERO |
 		      RADEON_COLOR_ARG_B_ZERO |
@@ -448,9 +492,10 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     PixmapPtr pPixmap = pPriv->pPixmap;
+    struct radeon_exa_pixmap_priv *driver_priv;
     uint32_t txformat;
-    uint32_t txfilter, txsize, txpitch;
-    uint32_t dst_offset, dst_pitch, dst_format;
+    uint32_t txfilter, txsize, txpitch, txoffset;
+    uint32_t dst_pitch, dst_format;
     uint32_t colorpitch;
     Bool isplanar = FALSE;
     int dstxoff, dstyoff, pixel_shift, vtx_count;
@@ -473,15 +518,12 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 
 #ifdef USE_EXA
     if (info->useEXA) {
-	dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
 	dst_pitch = exaGetPixmapPitch(pPixmap);
     } else
 #endif
-	{
-	    dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
-		info->fbLocation + pScrn->fbOffset;
-	    dst_pitch = pPixmap->devKind;
-	}
+    {
+	dst_pitch = pPixmap->devKind;
+    }
 
 #ifdef COMPOSITE
     dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
@@ -546,7 +588,7 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     BEGIN_ACCEL(4);
 
     OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format);
-    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
+    EMIT_WRITE_OFFSET(RADEON_RB3D_COLOROFFSET, 0, pPixmap);
     OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
 
     OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL,
@@ -590,6 +632,8 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	vcscale = 0.125;
     }
 
+    txoffset = info->cs ? 0 : pPriv->src_offset;
+
     if (isplanar) {
 	/* need 2 texcoord sets (even though they are identical) due
 	   to denormalization! hw apparently can't premultiply
@@ -621,21 +665,21 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 		      (pPriv->w - 1) |
 		      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
 	OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
-	OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
+	OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, pPriv->src_bo);
 
 	OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
 	OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
 	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
 	OUT_ACCEL_REG(R200_PP_TXSIZE_1, txsize);
 	OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch);
-	OUT_ACCEL_REG(R200_PP_TXOFFSET_1, pPriv->src_offset + pPriv->planeu_offset);
+	OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, pPriv->src_bo);
 
 	OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter);
 	OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
 	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0);
 	OUT_ACCEL_REG(R200_PP_TXSIZE_2, txsize);
 	OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch);
-	OUT_ACCEL_REG(R200_PP_TXOFFSET_2, pPriv->src_offset + pPriv->planev_offset);
+	OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, pPriv->src_bo);
 
 	/* similar to r300 code. Note the big problem is that hardware constants
 	 * are 8 bits only, representing 0.0-1.0. We can get that up (using bias
@@ -777,7 +821,7 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 		      (pPriv->w - 1) |
 		      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
 	OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
-	OUT_ACCEL_REG(R200_PP_TXOFFSET_0, pPriv->src_offset);
+	OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, pPriv->src_bo);
 
 	/* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */
 	OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
@@ -980,9 +1024,10 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     PixmapPtr pPixmap = pPriv->pPixmap;
+    struct radeon_exa_pixmap_priv *driver_priv;
     uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
-    uint32_t dst_offset, dst_pitch, dst_format;
-    uint32_t txenable, colorpitch;
+    uint32_t dst_pitch, dst_format;
+    uint32_t txenable, colorpitch, bicubic_offset;
     uint32_t output_fmt;
     Bool isplanar = FALSE;
     int dstxoff, dstyoff, pixel_shift, vtx_count;
@@ -994,15 +1039,12 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 
 #ifdef USE_EXA
     if (info->useEXA) {
-	dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
 	dst_pitch = exaGetPixmapPitch(pPixmap);
     } else
 #endif
-	{
-	    dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
-		info->fbLocation + pScrn->fbOffset;
-	    dst_pitch = pPixmap->devKind;
-	}
+    {
+	dst_pitch = pPixmap->devKind;
+    }
 
 #ifdef COMPOSITE
     dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
@@ -1095,9 +1137,9 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 		R300_TX_MIN_FILTER_LINEAR |
 		(0 << R300_TX_ID_SHIFT));
 
-    txoffset = pPriv->src_offset;
+    txoffset = info->cs ? 0 : pPriv->src_offset;
 
-    BEGIN_ACCEL(6);
+    BEGIN_ACCEL_RELOC(6, 1);
     OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter);
     OUT_ACCEL_REG(R300_TX_FILTER1_0, 0);
     OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
@@ -1106,7 +1148,7 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     else
 	OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
     OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
-    OUT_ACCEL_REG(R300_TX_OFFSET_0, txoffset);
+    OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, pPriv->src_bo);
     FINISH_ACCEL();
 
     txenable = R300_TEX_0_ENABLE;
@@ -1122,19 +1164,19 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 		    R300_TX_MIN_FILTER_LINEAR |
 		    R300_TX_MAG_FILTER_LINEAR);
 
-	BEGIN_ACCEL(12);
+	BEGIN_ACCEL_RELOC(12, 2);
 	OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter | (1 << R300_TX_ID_SHIFT));
 	OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
 	OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
 	OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
 	OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
-	OUT_ACCEL_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset);
+	OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, pPriv->src_bo);
 	OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
 	OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
 	OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
 	OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
 	OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
-	OUT_ACCEL_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset);
+	OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, pPriv->src_bo);
 	FINISH_ACCEL();
 	txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
     }
@@ -1155,13 +1197,18 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 		    R300_TX_MAG_FILTER_NEAREST |
 		    (1 << R300_TX_ID_SHIFT));
 
-	BEGIN_ACCEL(6);
+	if (info->cs)
+	    bicubic_offset = 0;
+	else
+	    bicubic_offset = pPriv->bicubic_src_offset;
+       
+	BEGIN_ACCEL_RELOC(6, 1);
 	OUT_ACCEL_REG(R300_TX_FILTER0_1, txfilter);
 	OUT_ACCEL_REG(R300_TX_FILTER1_1, 0);
 	OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
 	OUT_ACCEL_REG(R300_TX_FORMAT1_1, txformat1);
 	OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
-	OUT_ACCEL_REG(R300_TX_OFFSET_1, pPriv->bicubic_src_offset);
+	OUT_TEXTURE_REG(R300_TX_OFFSET_1, bicubic_offset, pPriv->bicubic_bo);
 	FINISH_ACCEL();
 
 	/* Enable tex 1 */
@@ -2205,11 +2252,11 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	FINISH_ACCEL();
     }
 
-    BEGIN_ACCEL(6);
+    BEGIN_ACCEL_RELOC(6, 1);
     OUT_ACCEL_REG(R300_TX_INVALTAGS, 0);
     OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
 
-    OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
+    EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap);
     OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
 
     /* no need to enable blending */
@@ -2407,8 +2454,9 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     PixmapPtr pPixmap = pPriv->pPixmap;
+    struct radeon_exa_pixmap_priv *driver_priv;
     uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
-    uint32_t dst_offset, dst_pitch, dst_format;
+    uint32_t dst_pitch, dst_format;
     uint32_t txenable, colorpitch;
     uint32_t output_fmt;
     Bool isplanar = FALSE;
@@ -2421,15 +2469,12 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 
 #ifdef USE_EXA
     if (info->useEXA) {
-	dst_offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
 	dst_pitch = exaGetPixmapPitch(pPixmap);
     } else
 #endif
-	{
-	    dst_offset = (pPixmap->devPrivate.ptr - info->FB) +
-		info->fbLocation + pScrn->fbOffset;
-	    dst_pitch = pPixmap->devKind;
-	}
+    {
+	dst_pitch = pPixmap->devKind;
+    }
 
 #ifdef COMPOSITE
     dstxoff = -pPixmap->screen_x + pPixmap->drawable.x;
@@ -2528,15 +2573,15 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     if ((pPriv->h - 1) & 0x800)
 	txpitch |= R500_TXHEIGHT_11;
 
-    txoffset = pPriv->src_offset;
+    txoffset = info->cs ? 0 : pPriv->src_offset;
 
-    BEGIN_ACCEL(6);
+    BEGIN_ACCEL_RELOC(6, 1);
     OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter);
     OUT_ACCEL_REG(R300_TX_FILTER1_0, 0);
     OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
     OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
     OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
-    OUT_ACCEL_REG(R300_TX_OFFSET_0, txoffset);
+    OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, pPriv->src_bo);
     FINISH_ACCEL();
 
     txenable = R300_TEX_0_ENABLE;
@@ -3758,11 +3803,11 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	FINISH_ACCEL();
     }
 
-    BEGIN_ACCEL(6);
+    BEGIN_ACCEL_RELOC(6, 1);
     OUT_ACCEL_REG(R300_TX_INVALTAGS, 0);
     OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
 
-    OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
+    EMIT_WRITE_OFFSET(R300_RB3D_COLOROFFSET0, 0, pPixmap);
     OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
 
     /* no need to enable blending */
diff --git a/src/radeon_video.h b/src/radeon_video.h
index 0cf8168..aeb6441 100644
--- a/src/radeon_video.h
+++ b/src/radeon_video.h
@@ -120,6 +120,9 @@ typedef struct {
     int drw_x, drw_y;
     int src_x, src_y;
     int vsync;
+
+    struct radeon_bo *src_bo;
+    struct radeon_bo *bicubic_bo;
 } RADEONPortPrivRec, *RADEONPortPrivPtr;
 
 /* Reference color space transform data */


More information about the xorg-commit mailing list