[PATCH] EXA: add PrepareAccess() & FinishAccess() hooks

Benjamin Herrenschmidt benh at kernel.crashing.org
Sat Sep 3 02:26:34 PDT 2005


On Sat, 2005-09-03 at 19:22 +1000, Benjamin Herrenschmidt wrote:

> A very basic implementation for radeon follows with no surface control
> (using the fallback to RAM).

And here is the updated radeon EXA patch, based on Eric's patch #15 with
a very simplistic implementation of the PrepareAccess() hook that just
triggers the fallback to RAM when the bit depth doesn't match the main
swappers. I'll do better implementation using the surface registers
asap.

diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/Imakefile xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/Imakefile
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/Imakefile	2005-08-30 13:44:21.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/Imakefile	2005-09-03 08:37:58.000000000 +1000
@@ -24,6 +24,14 @@
 #define IHaveModules
 #include <Server.tmpl>
 
+
+XAA_DEFINES = -DUSE_XAA
+
+EXA_DIR = -I$(XF86SRC)/exa
+EXA_DEFINES = -DUSE_EXA
+RADEON_EXA_SOURCE = radeon_exa.c
+RADEON_EXA_OBJS = radeon_exa.o
+
 #ifdef ATIDriverCCOptions
 CCOPTIONS = ATIDriverCCOptions
 #endif
@@ -186,7 +194,7 @@
 #endif
 
 DEFINES = $(CPIODEFINES) $(DGADEFINES) $(NONPCIDEFINES) $(DRIDEFINES) \
-	  $(TVOUTDEFINES) $(VGAHWDEFINES)
+	  $(TVOUTDEFINES) $(VGAHWDEFINES) $(EXA_DEFINES) $(XAA_DEFINES)
 
 SRCS1 = ati.c atiadapter.c atibus.c atichip.c atiident.c atioption.c \
 	atiprobe.c atividmem.c $(CPIOSRCS1) $(MODSRCS1) \
@@ -201,7 +209,7 @@
         r128_video.c $(DRISRCS3) $(MODSRCS3)
 SRCS4 = radeon_accel.c radeon_mergedfb.c radeon_cursor.c radeon_dga.c radeon_driver.c \
         radeon_video.c radeon_bios.c radeon_mm_i2c.c radeon_vip.c \
-	$(DRISRCS4) $(MODSRCS4)
+	$(RADEON_EXA_SOURCE) $(DRISRCS4) $(MODSRCS4)
 SRCS_THEATRE_DETECT = theatre.c $(MODSRC_THEATRE_DETECT)
 SRCS_THEATRE = theatre.c $(MODSRC_THEATRE)
 SRCS_THEATRE200 = theatre200.c $(MODSRC_THEATRE200)
@@ -219,7 +227,7 @@
         r128_video.o $(DRIOBJS3) $(MODOBJS3)
 OBJS4 = radeon_accel.o radeon_mergedfb.o radeon_cursor.o radeon_dga.o radeon_driver.o \
         radeon_video.o radeon_bios.o radeon_mm_i2c.o radeon_vip.o \
-	$(DRIOBJS4) $(MODOBJS4)
+	$(RADEON_EXA_OBJS) $(DRIOBJS4) $(MODOBJS4)
 OBJS_THEATRE_DETECT = theatre_detect.o $(MODOBJ_THEATRE_DETECT)
 OBJS_THEATRE = theatre.o $(MODOBJ_THEATRE)
 OBJS_THEATRE200 = theatre200.o $(MODOBJ_THEATRE200)
@@ -234,7 +242,7 @@
            -I$(XF86SRC)/vbe -I$(XF86SRC)/int10 \
            -I$(XF86SRC)/ddc -I$(XF86SRC)/i2c \
            -I$(XF86SRC)/rac -I$(XF86SRC)/ramdac \
-           -I$(XF86SRC)/shadowfb -I$(XF86SRC)/xaa \
+           -I$(XF86SRC)/shadowfb -I$(XF86SRC)/xaa $(EXA_DIR) \
            -I$(XF86SRC)/xf4bpp -I$(XF86SRC)/xf1bpp \
            -I$(XF86SRC)/vgahw -I$(XF86SRC)/fbdevhw \
            -I$(SERVERSRC)/mfb -I$(SERVERSRC)/fb -I$(SERVERSRC)/mi \
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon.h xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon.h
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon.h	2005-08-17 11:55:36.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon.h	2005-09-03 08:37:58.000000000 +1000
@@ -43,8 +43,15 @@
 				/* PCI support */
 #include "xf86Pci.h"
 
-				/* XAA and Cursor Support */
+#ifdef USE_EXA
+#include "exa.h"
+#endif
+#ifdef USE_XAA
 #include "xaa.h"
+#include "xf86fbman.h"
+#endif
+
+				/* Exa and Cursor Support */
 #include "vbe.h"
 #include "xf86Cursor.h"
 
@@ -122,6 +129,7 @@
 #define RADEON_ALIGN(x,bytes) (((x) + ((bytes) - 1)) & ~((bytes) - 1))
 #define RADEONPTR(pScrn)      ((RADEONInfoPtr)(pScrn)->driverPrivate)
 
+
 typedef struct {
 				/* Common registers */
     CARD32            ovr_clr;
@@ -382,11 +390,21 @@
 
     Bool              PaletteSavedOnVT; /* Palette saved on last VT switch   */
 
+#ifdef USE_EXA
+    ExaDriverRec      exa;
+#endif
+#ifdef USE_XAA
     XAAInfoRecPtr     accel;
+#endif
     Bool              accelOn;
     xf86CursorInfoPtr cursor;
-    unsigned long     cursor_start;
+#ifdef USE_EXA
+    ExaOffscreenArea   *cursorArea;
+#endif
+    unsigned long     cursor_offset;
+#ifdef USE_XAA
     unsigned long     cursor_end;
+#endif
     Bool              allowColorTiling;
     Bool              tilingEnabled; /* mirror of sarea->tiling_enabled */
 #ifdef ARGB_CURSOR
@@ -395,11 +413,13 @@
     int               cursor_fg;
     int               cursor_bg;
 
+#ifdef USE_XAA
     /*
      * XAAForceTransBlit is used to change the behavior of the XAA
      * SetupForScreenToScreenCopy function, to make it DGA-friendly.
      */
     Bool              XAAForceTransBlit;
+#endif
 
     int               fifo_slots;       /* Free slots in the FIFO (64 max)   */
     int               pix24bpp;         /* Depth of pixmap for 24bpp fb      */
@@ -416,6 +436,12 @@
     int               xdir;
     int               ydir;
 
+#ifdef USE_EXA
+    int		scratch_offset;
+    int		scratch_next;
+    ExaOffscreenArea *scratch_area;
+#endif
+#ifdef USE_XAA
 				/* ScanlineScreenToScreenColorExpand support */
     unsigned char     *scratch_buffer[1];
     unsigned char     *scratch_save;
@@ -432,7 +458,7 @@
     int               scanline_hpass;
     int               scanline_x1clip;
     int               scanline_x2clip;
-
+#endif
 				/* Saved values for DashedTwoPointLine */
     int               dashLen;
     CARD32            dashPattern;
@@ -539,6 +565,7 @@
     int               textureSize;
     int               log2TexGran;
 
+#ifdef USE_XAA
     CARD32            frontPitchOffset;
     CARD32            backPitchOffset;
     CARD32            depthPitchOffset;
@@ -548,6 +575,7 @@
     FBAreaPtr         backArea;
     int               depthTexLines;
     FBAreaPtr         depthTexArea;
+#endif
 
 				/* Saved scissor values */
     CARD32            sc_left;
@@ -577,7 +605,9 @@
 				/* XVideo */
     XF86VideoAdaptorPtr adaptor;
     void              (*VideoTimerCallback)(ScrnInfoPtr, Time);
+#ifdef USE_XAA
     FBLinearPtr       videoLinear;
+#endif
     int               videoKey;
     int		      RageTheatreCrystal;
     int               RageTheatreTunerPort;
@@ -600,19 +630,31 @@
         CARD8 input[5];
     	} MM_TABLE;
     CARD16 video_decoder_type;
-				/* Render */
+
+    /* Render */
     Bool              RenderAccel;
-    Bool              RenderInited3D;
+#ifdef USE_XAA
     FBLinearPtr       RenderTex;
     void              (*RenderCallback)(ScrnInfoPtr);
     Time              RenderTimeout;
+#endif
 
-				/* general */
+    /* general */
     Bool              showCache;
     OptionInfoPtr     Options;
+
+    Bool              useEXA;
 #ifdef XFree86LOADER
+#ifdef USE_EXA
+    XF86ModReqInfo    exaReq;
+#endif
+#ifdef USE_XAA
     XF86ModReqInfo    xaaReq;
 #endif
+#endif
+
+    /* X itself has the 3D context */
+    Bool              XInited3D;
 
     /* merged fb stuff, also covers clone modes */
     Bool		MergedFB;
@@ -689,11 +731,22 @@
 extern void        RADEONChangeSurfaces(ScrnInfoPtr pScrn);
 
 extern Bool        RADEONAccelInit(ScreenPtr pScreen);
+#ifdef USE_EXA
+extern Bool        RADEONSetupMemEXA (ScreenPtr pScreen);
+extern Bool        RADEONDrawInitMMIO(ScreenPtr pScreen);
+#ifdef XF86DRI
+extern Bool        RADEONDrawInitCP(ScreenPtr pScreen);
+#endif
+#endif
+#ifdef USE_XAA
 extern void        RADEONAccelInitMMIO(ScreenPtr pScreen, XAAInfoRecPtr a);
+#endif
 extern void        RADEONEngineInit(ScrnInfoPtr pScrn);
 extern Bool        RADEONCursorInit(ScreenPtr pScreen);
 extern Bool        RADEONDGAInit(ScreenPtr pScreen);
 
+extern void        RADEONInit3DEngine(ScrnInfoPtr pScrn);
+
 extern int         RADEONMinBits(int val);
 
 extern void        RADEONInitVideo(ScreenPtr pScreen);
@@ -704,7 +757,9 @@
 extern void        RADEONPllErrataAfterData(RADEONInfoPtr info);
 
 #ifdef XF86DRI
+#ifdef USE_XAA
 extern void        RADEONAccelInitCP(ScreenPtr pScreen, XAAInfoRecPtr a);
+#endif
 extern Bool        RADEONDRIScreenInit(ScreenPtr pScreen);
 extern void        RADEONDRICloseScreen(ScreenPtr pScreen);
 extern void        RADEONDRIResume(ScreenPtr pScreen);
@@ -719,10 +774,14 @@
 				      unsigned int w, CARD32 dstPitch,
 				      CARD32 *bufPitch, CARD8 **dst,
 				      unsigned int *h, unsigned int *hpass);
-extern void        RADEONHostDataBlitCopyPass(CARD8 *dst, CARD8 *src,
+extern void        RADEONHostDataBlitCopyPass(ScrnInfoPtr pScrn,
+					      unsigned int bpp,
+					      CARD8 *dst, CARD8 *src,
 					      unsigned int hpass,
 					      unsigned int dstPitch,
 					      unsigned int srcPitch);
+extern void        RADEONCopySwap(CARD8 *dst, CARD8 *src, unsigned int size,
+				  int swap);
 
 extern Bool        RADEONGetBIOSInfo(ScrnInfoPtr pScrn, xf86Int10InfoPtr pInt10);
 extern Bool        RADEONGetConnectorInfoFromBIOS (ScrnInfoPtr pScrn);
@@ -903,19 +962,43 @@
 #define RADEON_FLUSH_CACHE()						\
 do {									\
     BEGIN_RING(2);							\
-    OUT_RING(CP_PACKET0(RADEON_RB2D_DSTCACHE_CTLSTAT, 0));		\
-    OUT_RING(RADEON_RB2D_DC_FLUSH);					\
+    OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));		\
+    OUT_RING(RADEON_RB3D_DC_FLUSH);					\
     ADVANCE_RING();							\
 } while (0)
 
 #define RADEON_PURGE_CACHE()						\
 do {									\
     BEGIN_RING(2);							\
-    OUT_RING(CP_PACKET0(RADEON_RB2D_DSTCACHE_CTLSTAT, 0));		\
-    OUT_RING(RADEON_RB2D_DC_FLUSH_ALL);					\
+    OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));		\
+    OUT_RING(RADEON_RB3D_DC_FLUSH_ALL);					\
     ADVANCE_RING();							\
 } while (0)
 
 #endif /* XF86DRI */
 
+static __inline__ void RADEON_MARK_SYNC(RADEONInfoPtr info, ScrnInfoPtr pScrn)
+{
+#ifdef USE_EXA
+    if (info->useEXA)
+	exaMarkSync(pScrn->pScreen);
+#endif
+#ifdef USE_XAA
+    if (!info->useEXA)
+	SET_SYNC_FLAG(info->accel);
+#endif
+}
+
+static __inline__ void RADEON_SYNC(RADEONInfoPtr info, ScrnInfoPtr pScrn)
+{
+#ifdef USE_EXA
+    if (info->useEXA)
+	exaWaitSync(pScrn->pScreen);
+#endif
+#ifdef USE_XAA
+    if (!info->useEXA && info->accel)
+	info->accel->Sync(pScrn);
+#endif
+}
+
 #endif /* _RADEON_H_ */
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon.man xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon.man
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon.man	2005-08-17 11:55:36.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon.man	2005-09-03 08:37:58.000000000 +1000
@@ -501,6 +501,17 @@
 .B enable
 Render acceleration.
 .TP
+.BI "Option \*qAccelMethod\*q \*q" "string" \*q
+Chooses between available acceleration architectures.  Valid options are
+.B XAA
+and
+.B EXA.
+XAA is the traditional acceleration architecture and support for it is very
+stable.  EXA is a newer acceleration architecture with better performance for
+the Render and Composite extensions, but the rendering code for it is newer and
+possibly unstable.  The default is
+.B XAA.
+.TP
 .BI "Option \*qDMAForXv\*q \*q" boolean \*q
 Try or don't try to use DMA for Xv image transfers. This will reduce CPU
 usage when playing big videos like DVDs, but may cause instabilities.
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_accel.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_accel.c
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_accel.c	2005-08-17 11:55:36.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_accel.c	2005-09-03 08:37:58.000000000 +1000
@@ -93,6 +93,7 @@
 				/* X and server generic header files */
 #include "xf86.h"
 
+
 static struct {
     int rop;
     int pattern;
@@ -160,6 +161,10 @@
 	if (!(INREG(RADEON_RB2D_DSTCACHE_CTLSTAT) & RADEON_RB2D_DC_BUSY))
 	    break;
     }
+    if (i == RADEON_TIMEOUT) {
+	RADEONTRACE(("DC flush timeout: %x\n",
+		    INREG(RADEON_RB2D_DSTCACHE_CTLSTAT)));
+    }
 }
 
 /* Reset graphics card to known state */
@@ -342,9 +347,7 @@
 
     RADEONWaitForIdleMMIO(pScrn);
 
-#ifdef RENDER
-    info->RenderInited3D = FALSE;
-#endif
+    info->XInited3D = FALSE;
 }
 
 /* Initialize the acceleration hardware */
@@ -399,13 +402,15 @@
     RADEONEngineRestore(pScrn);
 }
 
+
 #define ACCEL_MMIO
 #define ACCEL_PREAMBLE()        unsigned char *RADEONMMIO = info->MMIO
 #define BEGIN_ACCEL(n)          RADEONWaitForFifo(pScrn, (n))
 #define OUT_ACCEL_REG(reg, val) OUTREG(reg, val)
 #define FINISH_ACCEL()
 
-#ifdef RENDER
+#include "radeon_commonfuncs.c"
+#if defined(RENDER) && defined(USE_XAA)
 #include "radeon_render.c"
 #endif
 #include "radeon_accelfuncs.c"
@@ -426,7 +431,9 @@
 #define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val)
 #define FINISH_ACCEL()          ADVANCE_RING()
 
-#ifdef RENDER
+
+#include "radeon_commonfuncs.c"
+#if defined(RENDER) && defined(USE_XAA)
 #include "radeon_render.c"
 #endif
 #include "radeon_accelfuncs.c"
@@ -656,14 +663,22 @@
     }
 
 #if X_BYTE_ORDER == X_BIG_ENDIAN
-    BEGIN_RING(2);
-    if (bpp == 2)
-	OUT_RING_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_16BIT);
-    else if (bpp == 1)
-	OUT_RING_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_32BIT);
-    else
-	OUT_RING_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_NONE);
-    ADVANCE_RING();
+    /* Swap doesn't work on R300 and later, it's handled during the
+     * copy to ind. buffer pass
+     */
+    if (info->ChipFamily < CHIP_FAMILY_R300) {
+        BEGIN_RING(2);
+	if (bpp == 2)
+	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
+			 RADEON_HOST_DATA_SWAP_HDW);
+	else if (bpp == 1)
+	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
+			 RADEON_HOST_DATA_SWAP_32BIT);
+	else
+	    OUT_RING_REG(RADEON_RBBM_GUICNTL,
+			 RADEON_HOST_DATA_SWAP_NONE);
+	ADVANCE_RING();
+    }
 #endif
 
     /*RADEON_PURGE_CACHE();
@@ -704,11 +719,61 @@
     return ret;
 }
 
+void RADEONCopySwap(CARD8 *dst, CARD8 *src, unsigned int size, int swap)
+{
+    switch(swap) {
+    case RADEON_HOST_DATA_SWAP_HDW:
+        {
+	    unsigned int *d = (unsigned int *)dst;
+	    unsigned int *s = (unsigned int *)src;
+	    unsigned int nwords = size >> 2;
+
+	    for (; nwords > 0; --nwords, ++d, ++s)
+		*d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff);
+	    return;
+        }
+    case RADEON_HOST_DATA_SWAP_32BIT:
+        {
+	    unsigned int *d = (unsigned int *)dst;
+	    unsigned int *s = (unsigned int *)src;
+	    unsigned int nwords = size >> 2;
+
+	    for (; nwords > 0; --nwords, ++d, ++s)
+#ifdef __powerpc__
+		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
+#else
+		*d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
+			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
+#endif
+	    return;
+        }
+    case RADEON_HOST_DATA_SWAP_16BIT:
+        {
+	    unsigned short *d = (unsigned short *)dst;
+	    unsigned short *s = (unsigned short *)src;
+	    unsigned int nwords = size >> 1;
+
+	    for (; nwords > 0; --nwords, ++d, ++s)
+#ifdef __powerpc__
+		asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d));
+#else
+	        *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00)
+			| ((*s & 0xff00) << 8) | ((*s & 0xff) << 24);
+#endif
+	    return;
+	}
+    }
+    if (src != dst)
+	    memmove(dst, src, size);
+}
+
 /* Copies a single pass worth of data for a hostdata blit set up by
  * RADEONHostDataBlit().
  */
 void
 RADEONHostDataBlitCopyPass(
+    ScrnInfoPtr pScrn,
+    unsigned int bpp,
     CARD8 *dst,
     CARD8 *src,
     unsigned int hpass,
@@ -716,11 +781,27 @@
     unsigned int srcPitch
 ){
 
+    RADEONInfoPtr info = RADEONPTR( pScrn );
+
     /* RADEONHostDataBlitCopy can return NULL ! */
     if( (dst==NULL) || (src==NULL)) return;
 
     if ( dstPitch == srcPitch )
     {
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+        if (info->ChipFamily >= CHIP_FAMILY_R300) {
+	    switch(bpp) {
+	    case 1:
+		RADEONCopySwap(dst, src, hpass * dstPitch,
+			       RADEON_HOST_DATA_SWAP_32BIT);
+		return;
+	    case 2:
+	        RADEONCopySwap(dst, src, hpass * dstPitch,
+			       RADEON_HOST_DATA_SWAP_HDW);
+		return;
+	    }
+	}
+#endif
 	memcpy( dst, src, hpass * dstPitch );
     }
     else
@@ -728,7 +809,22 @@
 	unsigned int minPitch = min( dstPitch, srcPitch );
 	while ( hpass-- )
 	{
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+            if (info->ChipFamily >= CHIP_FAMILY_R300) {
+		switch(bpp) {
+		case 1:
+		    RADEONCopySwap(dst, src, minPitch,
+				   RADEON_HOST_DATA_SWAP_32BIT);
+		    goto next;
+		case 2:
+	            RADEONCopySwap(dst, src, minPitch,
+				   RADEON_HOST_DATA_SWAP_HDW);
+		    goto next;
+		}
+	    }
+#endif
 	    memcpy( dst, src, minPitch );
+	next:
 	    src += srcPitch;
 	    dst += dstPitch;
 	}
@@ -737,33 +833,67 @@
 
 #endif
 
-/* Initialize XAA for supported acceleration and also initialize the
- * graphics hardware for acceleration
- */
 Bool RADEONAccelInit(ScreenPtr pScreen)
 {
     ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
     RADEONInfoPtr  info  = RADEONPTR(pScrn);
-    XAAInfoRecPtr  a;
 
-    if (!(a = info->accel = XAACreateInfoRec())) {
-	xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
-	return FALSE;
+#ifdef USE_EXA
+    if (info->useEXA) {
+# ifdef XF86DRI
+	if (info->directRenderingEnabled) {
+	    if (!RADEONDrawInitCP(pScreen))
+		return FALSE;
+	} else
+# endif /* XF86DRI */
+	{
+	    if (!RADEONDrawInitMMIO(pScreen))
+		return FALSE;
+	}
     }
+#endif /* USE_EXA */
+#ifdef USE_XAA
+    if (!info->useEXA) {
+	XAAInfoRecPtr  a;
+
+	if (!(a = info->accel = XAACreateInfoRec())) {
+	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n");
+	    return FALSE;
+	}
 
 #ifdef XF86DRI
-    if (info->directRenderingEnabled)
-	RADEONAccelInitCP(pScreen, a);
-    else
-#endif
-	RADEONAccelInitMMIO(pScreen, a);
+	if (info->directRenderingEnabled)
+	    RADEONAccelInitCP(pScreen, a);
+	else
+#endif /* XF86DRI */
+	    RADEONAccelInitMMIO(pScreen, a);
+
+	RADEONEngineInit(pScrn);
+
+	if (!XAAInit(pScreen, a)) {
+	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
+	    return FALSE;
+	}
+    }
+#endif /* USE_XAA */
+    return TRUE;
+}
 
-    RADEONEngineInit(pScrn);
+void RADEONInit3DEngine(ScrnInfoPtr pScrn)
+{
+    RADEONInfoPtr info = RADEONPTR (pScrn);
 
-    if (!XAAInit(pScreen, a)) {
-	xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n");
-	return FALSE;
-    }
+#ifdef XF86DRI
+    if (info->directRenderingEnabled) {
+	RADEONSAREAPrivPtr pSAREAPriv;
 
-    return TRUE;
+	pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
+	pSAREAPriv->ctxOwner = DRIGetContext(pScrn->pScreen);
+	RADEONInit3DEngineCP(pScrn);
+    } else
+#endif
+	RADEONInit3DEngineMMIO(pScrn);
+
+    info->XInited3D = TRUE;
 }
+
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_accelfuncs.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_accelfuncs.c
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_accelfuncs.c	2005-08-17 11:55:36.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_accelfuncs.c	2005-09-03 08:37:58.000000000 +1000
@@ -95,89 +95,14 @@
 #endif
 #endif
 
-/* MMIO:
- *
- * Wait for the graphics engine to be completely idle: the FIFO has
- * drained, the Pixel Cache is flushed, and the engine is idle.  This is
- * a standard "sync" function that will make the hardware "quiescent".
- *
- * CP:
- *
- * Wait until the CP is completely idle: the FIFO has drained and the CP
- * is idle.
- */
-void
-FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn)
-{
-    RADEONInfoPtr  info = RADEONPTR(pScrn);
-    unsigned char *RADEONMMIO = info->MMIO;
-    int            i    = 0;
-
-#ifdef ACCEL_CP
-    /* Make sure the CP is idle first */
-    if (info->CPStarted) {
-	int  ret;
-	FLUSH_RING();
-
-	for (;;) {
-	    do {
-		ret = drmCommandNone(info->drmFD, DRM_RADEON_CP_IDLE);
-		if (ret && ret != -EBUSY) {
-		    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
-			       "%s: CP idle %d\n", __FUNCTION__, ret);
-		}
-	    } while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
-
-	    if (ret == 0) return;
-
-	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
-		       "Idle timed out, resetting engine...\n");
-	    RADEONEngineReset(pScrn);
-	    RADEONEngineRestore(pScrn);
-
-	    /* Always restart the engine when doing CP 2D acceleration */
-	    RADEONCP_RESET(pScrn, info);
-	    RADEONCP_START(pScrn, info);
-	}
-    }
-#endif
-
-    RADEONTRACE(("WaitForIdle (entering): %d entries, stat=0x%08x\n",
-		     INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
-		     INREG(RADEON_RBBM_STATUS)));
-
-    /* Wait for the engine to go idle */
-    RADEONWaitForFifoFunction(pScrn, 64);
-
-    for (;;) {
-	for (i = 0; i < RADEON_TIMEOUT; i++) {
-	    if (!(INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)) {
-		RADEONEngineFlush(pScrn);
-		return;
-	    }
-	}
-	RADEONTRACE(("Idle timed out: %d entries, stat=0x%08x\n",
-		     INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
-		     INREG(RADEON_RBBM_STATUS)));
-	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
-		   "Idle timed out, resetting engine...\n");
-	RADEONEngineReset(pScrn);
-	RADEONEngineRestore(pScrn);
-#ifdef XF86DRI
-	if (info->directRenderingEnabled) {
-	    RADEONCP_RESET(pScrn, info);
-	    RADEONCP_START(pScrn, info);
-	}
-#endif
-    }
-}
+#ifdef USE_XAA
 
 /* This callback is required for multiheader cards using XAA */
 static void
 FUNC_NAME(RADEONRestoreAccelState)(ScrnInfoPtr pScrn)
 {
-    RADEONInfoPtr  info       = RADEONPTR(pScrn);
-    unsigned char *RADEONMMIO = info->MMIO;
+    /*RADEONInfoPtr  info       = RADEONPTR(pScrn);
+    unsigned char *RADEONMMIO = info->MMIO;*/
 
 #ifdef ACCEL_MMIO
 
@@ -775,6 +700,8 @@
     RADEONInfoPtr  info = RADEONPTR(pScrn);
     ACCEL_PREAMBLE();
 
+    info->scanline_bpp = 0;
+
     /* Save for later clipping */
     info->dp_gui_master_cntl_clip = (info->dp_gui_master_cntl
 				     | RADEON_GMC_DST_CLIPPING
@@ -812,9 +739,12 @@
 #if X_BYTE_ORDER == X_LITTLE_ENDIAN
     BEGIN_ACCEL(1);
 #else
-    BEGIN_ACCEL(2);
+    if (info->ChipFamily < CHIP_FAMILY_R300) {
+	BEGIN_ACCEL(2);
 
-    OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,       RADEON_HOST_DATA_SWAP_32BIT);
+	OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_32BIT);
+    } else
+	BEGIN_ACCEL(1);
 #endif
     OUT_ACCEL_REG(RADEON_DP_WRITE_MASK,      planemask);
 
@@ -939,6 +869,22 @@
 
 #else /* ACCEL_CP */
 
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    if (info->ChipFamily >= CHIP_FAMILY_R300) {
+	if (info->scanline_bpp == 16) {
+	    RADEONCopySwap(info->scratch_buffer[bufno],
+			   info->scratch_buffer[bufno],
+			   info->scanline_words << 2,
+			   RADEON_HOST_DATA_SWAP_HDW);
+	} else if (info->scanline_bpp < 15) {
+	    RADEONCopySwap(info->scratch_buffer[bufno],
+			   info->scratch_buffer[bufno],
+			   info->scanline_words << 2,
+			   RADEON_HOST_DATA_SWAP_32BIT);
+	}
+    }
+#endif
+
     if (--info->scanline_hpass) {
 	info->scratch_buffer[bufno] += 4 * info->scanline_words;
     } else if (info->scanline_h) {
@@ -994,12 +940,15 @@
 #if X_BYTE_ORDER == X_LITTLE_ENDIAN
     BEGIN_ACCEL(1);
 #else
-    BEGIN_ACCEL(2);
+    if (info->ChipFamily < CHIP_FAMILY_R300) {
+        BEGIN_ACCEL(2);
 
-    if (bpp == 16)
-	OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_HDW);
-    else
-	OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_NONE);
+	if (bpp == 16)
+	    OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_HDW);
+	else
+	    OUT_ACCEL_REG(RADEON_RBBM_GUICNTL,   RADEON_HOST_DATA_SWAP_NONE);
+    } else
+	BEGIN_ACCEL(1);
 #endif
 #endif
     OUT_ACCEL_REG(RADEON_DP_WRITE_MASK,      planemask);
@@ -1225,7 +1174,8 @@
 	= FUNC_NAME(RADEONSetupForScanlineCPUToScreenColorExpandFill);
     a->SubsequentScanlineCPUToScreenColorExpandFill
 	= FUNC_NAME(RADEONSubsequentScanlineCPUToScreenColorExpandFill);
-    a->SubsequentColorExpandScanline    = FUNC_NAME(RADEONSubsequentScanline);
+    a->SubsequentColorExpandScanline
+        = FUNC_NAME(RADEONSubsequentScanline);
 
 				/* Solid Lines */
     a->SetupForSolidLine
@@ -1394,4 +1344,6 @@
 #endif /* RENDER */
 }
 
+#endif /* USE_XAA */
+
 #undef FUNC_NAME
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_commonfuncs.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_commonfuncs.c
--- /dev/null	2005-08-31 14:46:40.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_commonfuncs.c	2005-09-03 08:37:58.000000000 +1000
@@ -0,0 +1,185 @@
+/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_accelfuncs.c,v 1.7tsi Exp $ */
+/*
+ * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and
+ *                VA Linux Systems Inc., Fremont, California.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation on the rights to use, copy, modify, merge,
+ * publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT.  IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR
+ * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
+#error Cannot define both MMIO and CP acceleration!
+#endif
+
+#if !defined(UNIXCPP) || defined(ANSICPP)
+#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
+#else
+#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
+#endif
+
+#ifdef ACCEL_MMIO
+#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
+#else
+#ifdef ACCEL_CP
+#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
+#else
+#error No accel type defined!
+#endif
+#endif
+
+static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
+{
+    RADEONInfoPtr  info       = RADEONPTR(pScrn);
+    ACCEL_PREAMBLE();
+
+    if (info->ChipFamily >= CHIP_FAMILY_R300) {
+	/* Unimplemented */
+    } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || 
+	       (info->ChipFamily == CHIP_FAMILY_RV280) || 
+	       (info->ChipFamily == CHIP_FAMILY_RS300) || 
+	       (info->ChipFamily == CHIP_FAMILY_R200)) {
+
+	BEGIN_ACCEL(7);
+        if (info->ChipFamily == CHIP_FAMILY_RS300) {
+            OUT_ACCEL_REG(R200_SE_VAP_CNTL_STATUS, RADEON_TCL_BYPASS);
+        } else {
+            OUT_ACCEL_REG(R200_SE_VAP_CNTL_STATUS, 0);
+        }
+	OUT_ACCEL_REG(R200_PP_CNTL_X, 0);
+	OUT_ACCEL_REG(R200_PP_TXMULTI_CTL_0, 0);
+	OUT_ACCEL_REG(R200_SE_VTX_STATE_CNTL, 0);
+	OUT_ACCEL_REG(R200_RE_CNTL, 0x0);
+	/* XXX: correct?  Want it to be like RADEON_VTX_ST?_NONPARAMETRIC */
+	OUT_ACCEL_REG(R200_SE_VTE_CNTL, R200_VTX_ST_DENORMALIZED);
+	OUT_ACCEL_REG(R200_SE_VAP_CNTL, R200_VAP_FORCE_W_TO_ONE |
+	    R200_VAP_VF_MAX_VTX_NUM);
+	FINISH_ACCEL();
+    } else {
+	BEGIN_ACCEL(2);
+        if ((info->ChipFamily == CHIP_FAMILY_RADEON) ||
+            (info->ChipFamily == CHIP_FAMILY_RV200))
+            OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, 0);
+        else
+            OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS);
+	OUT_ACCEL_REG(RADEON_SE_COORD_FMT,
+	    RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
+	    RADEON_VTX_ST0_NONPARAMETRIC |
+	    RADEON_VTX_ST1_NONPARAMETRIC |
+	    RADEON_TEX1_W_ROUTING_USE_W0);
+	FINISH_ACCEL();
+    }
+
+    BEGIN_ACCEL(4);
+    OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0);
+    OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, 0x07ff07ff);
+    OUT_ACCEL_REG(RADEON_RB3D_PLANEMASK, 0xffffffff);
+    OUT_ACCEL_REG(RADEON_SE_CNTL, RADEON_DIFFUSE_SHADE_GOURAUD |
+				  RADEON_BFACE_SOLID | 
+				  RADEON_FFACE_SOLID |
+				  RADEON_VTX_PIX_CENTER_OGL |
+				  RADEON_ROUND_MODE_ROUND |
+				  RADEON_ROUND_PREC_4TH_PIX);
+    FINISH_ACCEL();
+}
+
+
+/* MMIO:
+ *
+ * Wait for the graphics engine to be completely idle: the FIFO has
+ * drained, the Pixel Cache is flushed, and the engine is idle.  This is
+ * a standard "sync" function that will make the hardware "quiescent".
+ *
+ * CP:
+ *
+ * Wait until the CP is completely idle: the FIFO has drained and the CP
+ * is idle.
+ */
+void FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn)
+{
+    RADEONInfoPtr  info = RADEONPTR(pScrn);
+    unsigned char *RADEONMMIO = info->MMIO;
+    int            i    = 0;
+
+#ifdef ACCEL_CP
+    /* Make sure the CP is idle first */
+    if (info->CPStarted) {
+	int  ret;
+
+	FLUSH_RING();
+
+	for (;;) {
+	    do {
+		ret = drmCommandNone(info->drmFD, DRM_RADEON_CP_IDLE);
+		if (ret && ret != -EBUSY) {
+		    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+			       "%s: CP idle %d\n", __FUNCTION__, ret);
+		}
+	    } while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT));
+
+	    if (ret == 0) return;
+
+	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+		       "Idle timed out, resetting engine...\n");
+	    RADEONEngineReset(pScrn);
+	    RADEONEngineRestore(pScrn);
+
+	    /* Always restart the engine when doing CP 2D acceleration */
+	    RADEONCP_RESET(pScrn, info);
+	    RADEONCP_START(pScrn, info);
+	}
+    }
+#endif
+
+    RADEONTRACE(("WaitForIdle (entering): %d entries, stat=0x%08x\n",
+		     INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
+		     INREG(RADEON_RBBM_STATUS)));
+
+    /* Wait for the engine to go idle */
+    RADEONWaitForFifoFunction(pScrn, 64);
+
+    for (;;) {
+	for (i = 0; i < RADEON_TIMEOUT; i++) {
+	    if (!(INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)) {
+		RADEONEngineFlush(pScrn);
+		return;
+	    }
+	}
+	RADEONTRACE(("Idle timed out: %d entries, stat=0x%08x\n",
+		     INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK,
+		     INREG(RADEON_RBBM_STATUS)));
+	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+		   "Idle timed out, resetting engine...\n");
+	RADEONEngineReset(pScrn);
+	RADEONEngineRestore(pScrn);
+#ifdef XF86DRI
+	if (info->directRenderingEnabled) {
+	    RADEONCP_RESET(pScrn, info);
+	    RADEONCP_START(pScrn, info);
+	}
+#endif
+    }
+}
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_cursor.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_cursor.c
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_cursor.c	2005-08-17 11:55:36.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_cursor.c	2005-09-03 08:37:58.000000000 +1000
@@ -68,9 +68,7 @@
 #define CURSOR_WIDTH	64
 #define CURSOR_HEIGHT	64
 
-#define COMMON_CURSOR_SWAPPING_START()	  \
-    if (info->accel && info->accel->Sync) \
-	info->accel->Sync(pScrn);
+#define COMMON_CURSOR_SWAPPING_START()	 RADEON_SYNC(info, pScrn)
 
 /*
  * The cursor bits are always 32bpp.  On MSBFirst buses,
@@ -108,7 +106,7 @@
 static void RADEONSetCursorColors(ScrnInfoPtr pScrn, int bg, int fg)
 {
     RADEONInfoPtr  info       = RADEONPTR(pScrn);
-    CARD32        *pixels     = (CARD32 *)(pointer)(info->FB + info->cursor_start);
+    CARD32        *pixels     = (CARD32 *)(pointer)(info->FB + info->cursor_offset);
     int            pixel, i;
     CURSOR_SWAPPING_DECL_MMIO
 
@@ -173,7 +171,7 @@
 	OUTREG(RADEON_CUR_HORZ_VERT_POSN, (RADEON_CUR_LOCK
 					   | ((xorigin ? 0 : x) << 16)
 					   | (yorigin ? 0 : y)));
-	OUTREG(RADEON_CUR_OFFSET, info->cursor_start + yorigin * stride);
+	OUTREG(RADEON_CUR_OFFSET, info->cursor_offset + yorigin * stride);
     } else {
 	OUTREG(RADEON_CUR2_HORZ_VERT_OFF,  (RADEON_CUR2_LOCK
 					    | (xorigin << 16)
@@ -182,7 +180,7 @@
 					    | ((xorigin ? 0 : x) << 16)
 					    | (yorigin ? 0 : y)));
 	OUTREG(RADEON_CUR2_OFFSET,
-	       info->cursor_start + pScrn->fbOffset + yorigin * stride);
+	       info->cursor_offset + pScrn->fbOffset + yorigin * stride);
     }
 
 }
@@ -195,7 +193,7 @@
     RADEONInfoPtr  info       = RADEONPTR(pScrn);
     unsigned char *RADEONMMIO = info->MMIO;
     CARD8         *s          = (CARD8 *)(pointer)image;
-    CARD32        *d          = (CARD32 *)(pointer)(info->FB + info->cursor_start);
+    CARD32        *d          = (CARD32 *)(pointer)(info->FB + info->cursor_offset);
     CARD32         save1      = 0;
     CARD32         save2      = 0;
     CARD8	   chunk;
@@ -279,7 +277,7 @@
     ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
     RADEONInfoPtr  info  = RADEONPTR(pScrn);
 
-    return info->cursor_start ? TRUE : FALSE;
+    return info->cursor_offset ? TRUE : FALSE;
 }
 
 #ifdef ARGB_CURSOR
@@ -290,7 +288,7 @@
     ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
     RADEONInfoPtr  info  = RADEONPTR(pScrn);
 
-    if (info->cursor_start &&
+    if (info->cursor_offset &&
 	pCurs->bits->height <= CURSOR_HEIGHT && pCurs->bits->width <= CURSOR_WIDTH)
 	return TRUE;
     return FALSE;
@@ -300,7 +298,7 @@
 {
     RADEONInfoPtr  info       = RADEONPTR(pScrn);
     unsigned char *RADEONMMIO = info->MMIO;
-    CARD32        *d          = (CARD32 *)(pointer)(info->FB + info->cursor_start);
+    CARD32        *d          = (CARD32 *)(pointer)(info->FB + info->cursor_offset);
     int            x, y, w, h;
     CARD32         save1      = 0;
     CARD32         save2      = 0;
@@ -361,6 +359,18 @@
 
 #endif
 
+#ifdef USE_EXA
+static void
+ATICursorSave(ScreenPtr pScreen, ExaOffscreenArea *area)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+
+    info->cursorArea = NULL;
+    info->cursor_offset = 0;
+}
+#endif
+
 
 /* Initialize hardware cursor support. */
 Bool RADEONCursorInit(ScreenPtr pScreen)
@@ -368,7 +378,6 @@
     ScrnInfoPtr        pScrn   = xf86Screens[pScreen->myNum];
     RADEONInfoPtr      info    = RADEONPTR(pScrn);
     xf86CursorInfoPtr  cursor;
-    FBAreaPtr          fbarea;
     int                width;
     int		       width_bytes;
     int                height;
@@ -405,29 +414,49 @@
     width                     = pScrn->displayWidth;
     width_bytes		      = width * (pScrn->bitsPerPixel / 8);
     height                    = (size_bytes + width_bytes - 1) / width_bytes;
-    fbarea                    = xf86AllocateOffscreenArea(pScreen,
-							  width,
-							  height,
-							  256,
-							  NULL,
-							  NULL,
-							  NULL);
-
-    if (!fbarea) {
-	info->cursor_start    = 0;
-	xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
+
+#ifdef USE_EXA
+    if (info->useEXA) {
+	info->cursorArea = exaOffscreenAlloc(pScreen, size_bytes,
+                                             128, TRUE, ATICursorSave, info);
+
+	if (!info->cursorArea) {
+	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
+		       "Hardware cursor disabled"
+		       " due to insufficient offscreen memory\n");
+	    info->cursor_offset = 0;
+	} else {
+	    info->cursor_offset = info->cursorArea->offset;
+	}
+
+	RADEONTRACE(("RADEONCursorInit (0x%08x-0x%08x)\n",
+		     info->cursor_offset,
+		     info->cursor_offset + info->cursorArea->size));
+    }
+#endif /* USE_EXA */
+#ifdef USE_XAA
+    if (!info->useEXA) {
+	FBAreaPtr          fbarea;
+
+	fbarea = xf86AllocateOffscreenArea(pScreen, width, height,
+					   256, NULL, NULL, NULL);
+
+	if (!fbarea) {
+	    info->cursor_offset    = 0;
+	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
 		   "Hardware cursor disabled"
 		   " due to insufficient offscreen memory\n");
-    } else {
-	info->cursor_start    = RADEON_ALIGN((fbarea->box.x1 +
-					      fbarea->box.y1 * width) *
-					     info->CurrentLayout.pixel_bytes,
-					     256);
-	info->cursor_end      = info->cursor_start + size_bytes;
+	} else {
+	    info->cursor_offset  = RADEON_ALIGN((fbarea->box.x1 +
+						fbarea->box.y1 * width) *
+						info->CurrentLayout.pixel_bytes,
+						256);
+	    info->cursor_end = info->cursor_offset + size_bytes;
+	}
+	RADEONTRACE(("RADEONCursorInit (0x%08x-0x%08x)\n",
+		    info->cursor_offset, info->cursor_end));
     }
-
-    RADEONTRACE(("RADEONCursorInit (0x%08x-0x%08x)\n",
-		 info->cursor_start, info->cursor_end));
+#endif
 
     return xf86InitCursor(pScreen, cursor);
 }
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_dga.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_dga.c
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_dga.c	2005-08-17 11:55:36.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_dga.c	2005-09-03 08:37:58.000000000 +1000
@@ -61,9 +61,10 @@
 static void RADEON_SetViewport(ScrnInfoPtr, int, int, int);
 static void RADEON_FillRect(ScrnInfoPtr, int, int, int, int, unsigned long);
 static void RADEON_BlitRect(ScrnInfoPtr, int, int, int, int, int, int);
+#ifdef USE_XAA
 static void RADEON_BlitTransRect(ScrnInfoPtr, int, int, int, int, int, int,
 				 unsigned long);
-
+#endif
 
 static DGAModePtr RADEONSetupDGAMode(ScrnInfoPtr pScrn,
 				     DGAModePtr modes,
@@ -112,7 +113,19 @@
 	    if (pixmap)
 		currentMode->flags     |= DGA_PIXMAP_AVAILABLE;
 
-	    if (info->accel) {
+#ifdef USE_EXA
+	    if (info->useEXA) {
+		/* We need to fill in RADEON_FillRect and RADEON_BlitRect and
+		 * connect them in RADEONDGAInit before turning these on.
+		 */
+		/*if (info->exa.accel.PrepareSolid && info->exa.accel.Solid)
+		    currentMode->flags    |= DGA_FILL_RECT;
+		if (info->exa.accel.PrepareCopy && info->exa.accel.Copy)
+		    currentMode->flags    |= DGA_BLIT_RECT | DGA_BLIT_RECT_TRANS;*/
+	    }
+#endif /* USE_EXA */
+#ifdef USE_XAA
+	    if (!info->useEXA && info->accel) {
 	      if (info->accel->SetupForSolidFill &&
 		  info->accel->SubsequentSolidFillRect)
 		 currentMode->flags    |= DGA_FILL_RECT;
@@ -124,6 +137,8 @@
 		   DGA_BLIT_RECT | DGA_BLIT_RECT_TRANS))
 		  currentMode->flags   &= ~DGA_CONCURRENT_ACCESS;
 	    }
+#endif /* USE_XAA */
+
 	    if (pMode->Flags & V_DBLSCAN)
 		currentMode->flags     |= DGA_DOUBLESCAN;
 	    if (pMode->Flags & V_INTERLACE)
@@ -237,7 +252,19 @@
     info->DGAFuncs.BlitRect              = NULL;
     info->DGAFuncs.BlitTransRect         = NULL;
 
-    if (info->accel) {
+#ifdef USE_EXA
+    /*info->DGAFuncs.Sync              = info->exa.accel->Sync;*/
+    if (info->useEXA) {
+	/*if (info->exa.accel.PrepareSolid && info->exa.accel.Solid) {
+	    info->DGAFuncs.FillRect      = RADEON_FillRect;
+	}
+	if (info->exa.accel.PrepareCopy && info->exa.accel.Copy) {
+	    info->DGAFuncs.BlitRect      = RADEON_BlitRect;
+	}*/
+    }
+#endif /* USE_EXA */
+#ifdef USE_XAA
+    if (!info->useEXA && info->accel) {
 	info->DGAFuncs.Sync              = info->accel->Sync;
 	if (info->accel->SetupForSolidFill &&
 	    info->accel->SubsequentSolidFillRect)
@@ -248,6 +275,7 @@
 	    info->DGAFuncs.BlitTransRect = RADEON_BlitTransRect;
 	}
     }
+#endif /* USE_XAA */
 
     return DGAInit(pScreen, &info->DGAFuncs, modes, num);
 }
@@ -332,17 +360,33 @@
     info->DGAViewportStatus = 0;  /* FIXME */
 }
 
+
 static void RADEON_FillRect(ScrnInfoPtr pScrn,
 			    int x, int y, int w, int h,
 			    unsigned long color)
 {
     RADEONInfoPtr  info = RADEONPTR(pScrn);
 
-    (*info->accel->SetupForSolidFill)(pScrn, color, GXcopy, (CARD32)(~0));
-    (*info->accel->SubsequentSolidFillRect)(pScrn, x, y, w, h);
+#ifdef USE_EXA
+    /* XXX */
+    if (info->useEXA) {
+	/*
+	info->exa.accel.PrepareSolid(pScrn, color, GXcopy, (CARD32)(~0));
+	info->exa.accel.Solid(pScrn, x, y, x+w, y+h);
+	info->exa.accel.DoneSolid();
+	*/
+	RADEON_MARK_SYNC(info, pScrn);
+    }
+#endif /* USE_EXA */
+#ifdef USE_XAA
+    if (!info->useEXA) {
+	(*info->accel->SetupForSolidFill)(pScrn, color, GXcopy, (CARD32)(~0));
+	(*info->accel->SubsequentSolidFillRect)(pScrn, x, y, w, h);
+        if (pScrn->bitsPerPixel == info->CurrentLayout.bitsPerPixel)
+	    RADEON_MARK_SYNC(info, pScrn);
+    }
+#endif /* USE_XAA */
 
-    if (pScrn->bitsPerPixel == info->CurrentLayout.bitsPerPixel)
-	SET_SYNC_FLAG(info->accel);
 }
 
 static void RADEON_BlitRect(ScrnInfoPtr pScrn,
@@ -353,15 +397,30 @@
     int            xdir = ((srcx < dstx) && (srcy == dsty)) ? -1 : 1;
     int            ydir = (srcy < dsty) ? -1 : 1;
 
-    (*info->accel->SetupForScreenToScreenCopy)(pScrn, xdir, ydir,
-					       GXcopy, (CARD32)(~0), -1);
-    (*info->accel->SubsequentScreenToScreenCopy)(pScrn, srcx, srcy,
-						 dstx, dsty, w, h);
-
-    if (pScrn->bitsPerPixel == info->CurrentLayout.bitsPerPixel)
-	SET_SYNC_FLAG(info->accel);
+	#ifdef USE_EXA
+    /* XXX */
+    if (info->useEXA) {
+	/*
+	info->exa.accel.PrepareCopy(pScrn, color, GXcopy, (CARD32)(~0));
+	info->exa.accel.Copy(pScrn, srcx, srcy, dstx, dsty, w, h);
+	info->exa.accel.DoneCopy();
+	*/
+	RADEON_MARK_SYNC(info, pScrn);
+    }
+#endif /* USE_EXA */
+#ifdef USE_XAA
+    if (!info->useEXA) {
+	(*info->accel->SetupForScreenToScreenCopy)(pScrn, xdir, ydir,
+						   GXcopy, (CARD32)(~0), -1);
+	(*info->accel->SubsequentScreenToScreenCopy)(pScrn, srcx, srcy,
+						     dstx, dsty, w, h);
+        if (pScrn->bitsPerPixel == info->CurrentLayout.bitsPerPixel)
+	    RADEON_MARK_SYNC(info, pScrn);
+    }
+#endif /* USE_XAA */
 }
 
+#ifdef USE_XAA
 static void RADEON_BlitTransRect(ScrnInfoPtr pScrn,
 				 int srcx, int srcy, int w, int h,
 				 int dstx, int dsty, unsigned long color)
@@ -371,7 +430,6 @@
     int            ydir = (srcy < dsty) ? -1 : 1;
 
     info->XAAForceTransBlit = TRUE;
-
     (*info->accel->SetupForScreenToScreenCopy)(pScrn, xdir, ydir,
 					       GXcopy, (CARD32)(~0), color);
 
@@ -381,8 +439,9 @@
 						 dstx, dsty, w, h);
 
     if (pScrn->bitsPerPixel == info->CurrentLayout.bitsPerPixel)
-	SET_SYNC_FLAG(info->accel);
+        RADEON_MARK_SYNC(info, pScrn);
 }
+#endif /* USE_XAA */
 
 static Bool RADEON_OpenFramebuffer(ScrnInfoPtr pScrn,
 				   char **name,
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_dri.c
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_dri.c	2005-08-30 13:44:21.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_dri.c	2005-09-03 08:37:58.000000000 +1000
@@ -347,17 +347,15 @@
 {
     ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
     RADEONInfoPtr  info  = RADEONPTR(pScrn);
-#ifdef RENDER
     RADEONSAREAPrivPtr pSAREAPriv;
-#endif
 
-    if (info->accel) info->accel->NeedToSync = TRUE;
 
-#ifdef RENDER
+    RADEON_MARK_SYNC(info, pScrn);
+
     pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
     if (pSAREAPriv->ctxOwner != DRIGetContext(pScrn->pScreen))
-	info->RenderInited3D = FALSE;
-#endif
+	info->XInited3D = FALSE;
+
 
     /* TODO: Fix this more elegantly.
      * Sometimes (especially with multiple DRI clients), this code
@@ -528,6 +526,7 @@
 static void RADEONDRIMoveBuffers(WindowPtr pParent, DDXPointRec ptOldOrg,
 				 RegionPtr prgnSrc, CARD32 indx)
 {
+#ifdef USE_XAA
     ScreenPtr      pScreen  = pParent->drawable.pScreen;
     ScrnInfoPtr    pScrn    = xf86Screens[pScreen->myNum];
     RADEONInfoPtr  info     = RADEONPTR(pScrn);
@@ -551,6 +550,10 @@
     int            dx       = pParent->drawable.x - ptOldOrg.x;
     int            dy       = pParent->drawable.y - ptOldOrg.y;
 
+    /* XXX: Fix in EXA case. */
+    if (info->useEXA)
+	return;
+
     /* If the copy will overlap in Y, reverse the order */
     if (dy > 0) {
 	ydir = -1;
@@ -683,6 +686,7 @@
     DEALLOCATE_LOCAL(pboxNew1);
 
     info->accel->NeedToSync = TRUE;
+#endif /* USE_XAA */
 }
 
 static void RADEONDRIInitGARTValues(RADEONInfoPtr info)
@@ -1183,7 +1187,10 @@
 
 				/* Make sure the CP is on for the X server */
     RADEONCP_START(pScrn, info);
-    info->dst_pitch_offset = info->frontPitchOffset;
+#ifdef USE_XAA
+    if (!info->useEXA) /* XXX? */
+	info->dst_pitch_offset = info->frontPitchOffset;
+#endif
 }
 
 
@@ -1276,7 +1283,7 @@
     				RADEON_VERSION_MAJOR_TILED : RADEON_VERSION_MAJOR;
     pDRIInfo->ddxDriverMinorVersion      = RADEON_VERSION_MINOR;
     pDRIInfo->ddxDriverPatchVersion      = RADEON_VERSION_PATCH;
-    pDRIInfo->frameBufferPhysicalAddress = info->LinearAddr;
+    pDRIInfo->frameBufferPhysicalAddress = (void *)info->LinearAddr;
     pDRIInfo->frameBufferSize            = info->FbMapSize;
     pDRIInfo->frameBufferStride          = (pScrn->displayWidth *
 					    info->CurrentLayout.pixel_bytes);
@@ -1602,7 +1609,7 @@
 #endif
 
     /* Have shadowfb run only while there is 3d active. */
-    if (info->allowPageFlip /* && info->drmMinor >= 3 */) {
+    if (!info->useEXA && info->allowPageFlip /* && info->drmMinor >= 3 */) {
 	ShadowFBInit( pScreen, RADEONDRIRefreshArea );
     } else {
        info->allowPageFlip = 0;
@@ -1795,6 +1802,8 @@
     if (!pSAREAPriv->pfAllowPageFlip && pSAREAPriv->pfCurrentPage == 0)
 	return;
 
+#ifdef USE_XAA
+    /* XXX: implement for EXA */
     /* pretty much a hack. */
     if (info->tilingEnabled)
        info->dst_pitch_offset |= RADEON_DST_TILE_MACRO;
@@ -1815,6 +1824,7 @@
 	}
     }
     info->dst_pitch_offset &= ~RADEON_DST_TILE_MACRO;
+#endif /* USE_XAA */
 }
 
 static void RADEONEnablePageFlip(ScreenPtr pScreen)
@@ -1823,6 +1833,8 @@
     RADEONInfoPtr       info       = RADEONPTR(pScrn);
     RADEONSAREAPrivPtr  pSAREAPriv = DRIGetSAREAPrivate(pScreen);
 
+#ifdef USE_XAA
+    /* XXX: Fix in EXA case */
     if (info->allowPageFlip) {
         /* pretty much a hack. */
 	if (info->tilingEnabled)
@@ -1843,6 +1855,7 @@
 	info->dst_pitch_offset &= ~RADEON_DST_TILE_MACRO;
 	pSAREAPriv->pfAllowPageFlip = 1;
     }
+#endif /* USE_XAA */
 }
 
 static void RADEONDisablePageFlip(ScreenPtr pScreen)
@@ -1871,9 +1884,14 @@
 {
     ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
     RADEONInfoPtr  info  = RADEONPTR(pScrn);
+#ifdef USE_XAA
     FBAreaPtr      fbarea;
     int            width, height;
 
+    /* XXX: Fix in EXA case. */
+    if (info->useEXA)
+	return;
+
     /* reserve offscreen area for back and depth buffers and textures */
 
     /* If we still have an area for the back buffer reserved, free it
@@ -1929,13 +1947,14 @@
 		   "experience screen corruption\n");
 
     xf86FreeOffscreenArea(fbarea);
+#endif
 
     info->have3DWindows = 1;
 
     RADEONChangeSurfaces(pScrn);
     RADEONEnablePageFlip(pScreen);
 
-    if (info->cursor_start)
+    if (info->cursor_offset != 0)
 	xf86ForceHWCursor (pScreen, TRUE);
 }
 
@@ -1952,20 +1971,27 @@
     /* Shut down shadowing if we've made it back to the front page */
     if (pSAREAPriv->pfCurrentPage == 0) {
 	RADEONDisablePageFlip(pScreen);
-	xf86FreeOffscreenArea(info->backArea);
-	info->backArea = NULL;
+#ifdef USE_XAA
+	if (!info->useEXA) {
+	    xf86FreeOffscreenArea(info->backArea);
+	    info->backArea = NULL;
+	}
+#endif
     } else {
 	xf86DrvMsg(pScreen->myNum, X_WARNING,
 		   "[dri] RADEONDRITransitionTo2d: "
 		   "kernel failed to unflip buffers.\n");
     }
 
-    xf86FreeOffscreenArea(info->depthTexArea);
+#ifdef USE_XAA
+    if (!info->useEXA)
+	xf86FreeOffscreenArea(info->depthTexArea);
+#endif
 
     info->have3DWindows = 0;
 
     RADEONChangeSurfaces(pScrn);
 
-    if (info->cursor_start)
-	    xf86ForceHWCursor (pScreen, FALSE);
+    if (info->cursor_offset != 0)
+	xf86ForceHWCursor (pScreen, FALSE);
 }
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_driver.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_driver.c
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_driver.c	2005-08-30 13:44:21.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_driver.c	2005-09-03 08:37:58.000000000 +1000
@@ -96,13 +96,12 @@
 #include "xf86cmap.h"
 #include "vbe.h"
 
-				/* fbdevhw & vgaHW definitions */
+				/* fbdevhw * vgaHW definitions */
 #ifdef WITH_VGAHW
 #include "vgaHW.h"
 #endif
 #include "fbdevhw.h"
 
-				/* DPMS support. */
 #define DPMS_SERVER
 #include <X11/extensions/dpms.h>
 
@@ -193,7 +192,8 @@
     OPTION_BIOS_HOTKEYS,
     OPTION_VGA_ACCESS,
     OPTION_REVERSE_DDC,
-    OPTION_LVDS_PROBE_PLL
+    OPTION_LVDS_PROBE_PLL,
+    OPTION_ACCELMETHOD
 } RADEONOpts;
 
 static const OptionInfoRec RADEONOptions[] = {
@@ -255,6 +255,7 @@
     { OPTION_VGA_ACCESS,     "VGAAccess",        OPTV_BOOLEAN, {0}, TRUE  },
     { OPTION_REVERSE_DDC,    "ReverseDDC",       OPTV_BOOLEAN, {0}, FALSE },
     { OPTION_LVDS_PROBE_PLL, "LVDSProbePLL",     OPTV_BOOLEAN, {0}, FALSE },
+    { OPTION_ACCELMETHOD,    "AccelMethod",      OPTV_STRING,  {0}, FALSE },
     { -1,                    NULL,               OPTV_NONE,    {0}, FALSE }
 };
 
@@ -315,12 +316,25 @@
     NULL
 };
 
+
+#ifdef USE_EXA
+static const char *exaSymbols[] = {
+    "exaDriverInit",
+    "exaDriverFini",
+    "exaOffscreenAlloc",
+    "exaOffscreenFree",
+    NULL
+};
+#endif /* USE_EXA */
+
+#ifdef USE_XAA
 static const char *xaaSymbols[] = {
     "XAACreateInfoRec",
     "XAADestroyInfoRec",
     "XAAInit",
     NULL
 };
+#endif /* USE_XAA */
 
 #if 0
 static const char *xf8_32bppSymbols[] = {
@@ -434,7 +448,12 @@
 			  vgahwSymbols,
 #endif
 			  fbSymbols,
+#ifdef USE_EXA
+			  exaSymbols,
+#endif
+#ifdef USE_XAA
 			  xaaSymbols,
+#endif
 #if 0
 			  xf8_32bppSymbols,
 #endif
@@ -3963,7 +3982,7 @@
 			      info->allowColorTiling ? 2048 :
 			          64 * pScrn->bitsPerPixel, /* pitchInc */
 			      128,                   /* minHeight */
-			      info->MaxLines,      /* maxHeight */
+			      info->MaxLines,        /* maxHeight */
 			      pScrn->display->virtualX,
 			      pScrn->display->virtualY,
 			      info->FbMapSize,
@@ -4032,7 +4051,7 @@
 					  info->allowColorTiling ? 2048 :
 					      64 * pScrn->bitsPerPixel, /* pitchInc */
 					  128,                   /* minHeight */
-					  info->MaxLines,      /* maxHeight */
+					  info->MaxLines,        /* maxHeight */
 					  pScrn->display->virtualX,
 					  pScrn->display->virtualY,
 					  info->FbMapSize,
@@ -4199,31 +4218,72 @@
 {
 #ifdef XFree86LOADER
     RADEONInfoPtr  info = RADEONPTR(pScrn);
+    MessageType from;
+#if defined(USE_EXA) && defined(USE_XAA)
+    char *optstr;
+#endif
 
     if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) {
 	int errmaj = 0, errmin = 0;
 
-	info->xaaReq.majorversion = 1;
-	info->xaaReq.minorversion = 2;
+	from = X_DEFAULT;
+#if defined(USE_EXA)
+#if defined(USE_XAA)
+	optstr = (char *)xf86GetOptValString(info->Options, OPTION_ACCELMETHOD);
+	if (optstr != NULL) {
+	    if (xf86NameCmp(optstr, "EXA") == 0) {
+		from = X_CONFIG;
+		info->useEXA = TRUE;
+	    } else if (xf86NameCmp(optstr, "XAA") == 0) {
+		from = X_CONFIG;
+	    }
+	}
+#else /* USE_XAA */
+	info->useEXA = TRUE;
+#endif /* !USE_XAA */
+#endif /* USE_EXA */
+	xf86DrvMsg(pScrn->scrnIndex, from,
+	    "Using %s acceleration architecture\n",
+	    info->useEXA ? "EXA" : "XAA");
 
-	if (!LoadSubModule(pScrn->module, "xaa", NULL, NULL, NULL,
-			   &info->xaaReq, &errmaj, &errmin)) {
-	    info->xaaReq.minorversion = 1;
+#ifdef USE_EXA
+	if (info->useEXA) {
+	    info->exaReq.majorversion = 1;
+	    info->exaReq.minorversion = 0;
+
+	    if (!LoadSubModule(pScrn->module, "exa", NULL, NULL, NULL,
+			       &info->exaReq, &errmaj, &errmin)) {
+		LoaderErrorMsg(NULL, "exa", errmaj, errmin);
+		return FALSE;
+	    }
+	    xf86LoaderReqSymLists(exaSymbols, NULL);
+	}
+#endif /* USE_EXA */
+#ifdef USE_XAA
+	if (!info->useEXA) {
+	    info->xaaReq.majorversion = 1;
+	    info->xaaReq.minorversion = 2;
 
 	    if (!LoadSubModule(pScrn->module, "xaa", NULL, NULL, NULL,
-			       &info->xaaReq, &errmaj, &errmin)) {
-		info->xaaReq.minorversion = 0;
+			   &info->xaaReq, &errmaj, &errmin)) {
+		info->xaaReq.minorversion = 1;
 
 		if (!LoadSubModule(pScrn->module, "xaa", NULL, NULL, NULL,
 			       &info->xaaReq, &errmaj, &errmin)) {
-		    LoaderErrorMsg(NULL, "xaa", errmaj, errmin);
-		    return FALSE;
+		    info->xaaReq.minorversion = 0;
+
+		    if (!LoadSubModule(pScrn->module, "xaa", NULL, NULL, NULL,
+			       &info->xaaReq, &errmaj, &errmin)) {
+			LoaderErrorMsg(NULL, "xaa", errmaj, errmin);
+			return FALSE;
+		    }
 		}
 	    }
+	    xf86LoaderReqSymLists(xaaSymbols, NULL);
 	}
-	xf86LoaderReqSymLists(xaaSymbols, NULL);
+#endif /* USE_XAA */
     }
-#endif
+#endif /* XFree86Loader */
 
     return TRUE;
 }
@@ -4876,7 +4936,8 @@
     if (info->CPStarted) DRILock(pScrn->pScreen, 0);
 #endif
 
-    if (info->accelOn && pScrn->pScreen) info->accel->Sync(pScrn);
+    if (info->accelOn && pScrn->pScreen)
+        RADEON_SYNC(info, pScrn);
 
     if (info->FBDev) {
 	fbdevHWLoadPalette(pScrn, numColors, indices, colors, pVisual);
@@ -4999,8 +5060,9 @@
     RADEONInfoPtr  info    = RADEONPTR(pScrn);
 
 #ifdef XF86DRI
-    if (info->directRenderingEnabled)
+    if (info->directRenderingEnabled) {
 	FLUSH_RING();
+    }
 #endif
 
     pScreen->BlockHandler = info->BlockHandler;
@@ -5010,12 +5072,319 @@
     if (info->VideoTimerCallback)
 	(*info->VideoTimerCallback)(pScrn, currentTime.milliseconds);
 
-#ifdef RENDER
+#if defined(RENDER) && defined(USE_XAA)
     if(info->RenderCallback)
 	(*info->RenderCallback)(pScrn);
 #endif
+}
+
+
+#ifdef USE_XAA
+#ifdef XF86DRI
+Bool RADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen)
+{
+    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
+    RADEONInfoPtr  info  = RADEONPTR(pScrn);
+    int            cpp = info->CurrentLayout.pixel_bytes;
+    int            width_bytes = pScrn->displayWidth * cpp;
+    int            bufferSize;
+    int            depthSize;
+    int            l;
+    int            scanlines;
+    BoxRec         MemBox;
+    FBAreaPtr      fbarea;
+
+    info->frontOffset = 0;
+    info->frontPitch = pScrn->displayWidth;
+    info->backPitch = pScrn->displayWidth;
+
+    /* make sure we use 16 line alignment for tiling (8 might be enough).
+     * Might need that for non-XF86DRI too?
+     */
+    if (info->allowColorTiling) {
+	bufferSize = (((pScrn->virtualY + 15) & ~15) * width_bytes
+		      + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
+    } else {
+        bufferSize = (pScrn->virtualY * width_bytes
+		      + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
+    }
+
+    /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
+     * which is always the case if color tiling is used due to color pitch
+     * but not necessarily otherwise, and its height a multiple of 16 lines.
+     */
+    info->depthPitch = (pScrn->displayWidth + 31) & ~31;
+    depthSize = ((((pScrn->virtualY + 15) & ~15) * info->depthPitch
+		  * cpp + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN);
+
+    switch (info->CPMode) {
+    case RADEON_DEFAULT_CP_PIO_MODE:
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in PIO mode\n");
+	break;
+    case RADEON_DEFAULT_CP_BM_MODE:
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in BM mode\n");
+	break;
+    default:
+        xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in UNKNOWN mode\n");
+	break;
+    }
+
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+	       "Using %d MB GART aperture\n", info->gartSize);
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+	       "Using %d MB for the ring buffer\n", info->ringSize);
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+	       "Using %d MB for vertex/indirect buffers\n", info->bufSize);
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+	       "Using %d MB for GART textures\n", info->gartTexSize);
+
+    /* Try for front, back, depth, and three framebuffers worth of
+     * pixmap cache.  Should be enough for a fullscreen background
+     * image plus some leftovers.
+     */
+    info->textureSize = info->FbMapSize - 5 * bufferSize - depthSize;
+
+    /* If that gives us less than half the available memory, let's
+     * be greedy and grab some more.  Sorry, I care more about 3D
+     * performance than playing nicely, and you'll get around a full
+     * framebuffer's worth of pixmap cache anyway.
+     */
+    if (info->textureSize < (int)info->FbMapSize / 2) {
+        info->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
+    }
+    if (info->textureSize < (int)info->FbMapSize / 2) {
+        info->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
+    }
+
+    /* If there's still no space for textures, try without pixmap cache, but never use
+     * the reserved space and the space hw cursor might use
+     */
+    if (info->textureSize < 0) {
+	info->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
+		- 2 * width_bytes - 16384;
+    }
+
+    /* Check to see if there is more room available after the 8192nd
+     * scanline for textures
+     */
+    /* FIXME: what's this good for? condition is pretty much impossible to meet */
+    if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
+	> info->textureSize) {
+	info->textureSize =
+		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
+    }
+
+    /* If backbuffer is disabled, don't allocate memory for it */
+    if (info->noBackBuffer) {
+	info->textureSize += bufferSize;
+    }
+
+    /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
+       At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
+       otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
+       it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
+       area otherwise).
+       This might cause some space at the end of the video memory to be unused, since it
+       can't be used (?) due to that log_tex_granularity thing???
+       Could use different copyscreentoscreen function for the pageflip copies
+       (which would use different src and dst offsets) to avoid this. */   
+    if (info->allowColorTiling && !info->noBackBuffer) {
+	info->textureSize = info->FbMapSize - ((info->FbMapSize - info->textureSize +
+			  width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
+    }
+    if (info->textureSize > 0) {
+	l = RADEONMinBits((info->textureSize-1) / RADEON_NR_TEX_REGIONS);
+	if (l < RADEON_LOG_TEX_GRANULARITY)
+	    l = RADEON_LOG_TEX_GRANULARITY;
+	/* Round the texture size up to the nearest whole number of
+	 * texture regions.  Again, be greedy about this, don't
+	 * round down.
+	 */
+	info->log2TexGran = l;
+	info->textureSize = (info->textureSize >> l) << l;
+    } else {
+	info->textureSize = 0;
+    }
+
+    /* Set a minimum usable local texture heap size.  This will fit
+     * two 256x256x32bpp textures.
+     */
+    if (info->textureSize < 512 * 1024) {
+	info->textureOffset = 0;
+	info->textureSize = 0;
+    }
+
+    if (info->allowColorTiling && !info->noBackBuffer) {
+	info->textureOffset = ((info->FbMapSize - info->textureSize) /
+			       (width_bytes * 16)) * (width_bytes * 16);
+    }
+    else {
+	/* Reserve space for textures */
+	info->textureOffset = ((info->FbMapSize - info->textureSize +
+				RADEON_BUFFER_ALIGN) &
+			       ~(CARD32)RADEON_BUFFER_ALIGN);
+    }
+
+    /* Reserve space for the shared depth
+     * buffer.
+     */
+    info->depthOffset = ((info->textureOffset - depthSize +
+			  RADEON_BUFFER_ALIGN) &
+			 ~(CARD32)RADEON_BUFFER_ALIGN);
+
+    /* Reserve space for the shared back buffer */
+    if (info->noBackBuffer) {
+       info->backOffset = info->depthOffset;
+    } else {
+       info->backOffset = ((info->depthOffset - bufferSize +
+			    RADEON_BUFFER_ALIGN) &
+			   ~(CARD32)RADEON_BUFFER_ALIGN);
+    }
+
+    info->backY = info->backOffset / width_bytes;
+    info->backX = (info->backOffset - (info->backY * width_bytes)) / cpp;
+
+    scanlines = info->FbMapSize / width_bytes;
+    if (scanlines > 8191)
+	scanlines = 8191;
+
+    MemBox.x1 = 0;
+    MemBox.y1 = 0;
+    MemBox.x2 = pScrn->displayWidth;
+    MemBox.y2 = scanlines;
+
+    if (!xf86InitFBManager(pScreen, &MemBox)) {
+        xf86DrvMsg(scrnIndex, X_ERROR,
+		   "Memory manager initialization to "
+		   "(%d,%d) (%d,%d) failed\n",
+		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
+	return FALSE;
+    } else {
+	int  width, height;
+
+	xf86DrvMsg(scrnIndex, X_INFO,
+		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
+		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
+	/* why oh why can't we just request modes which are guaranteed to be 16 lines
+	   aligned... sigh */
+	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
+						pScrn->displayWidth,
+						info->allowColorTiling ? 
+						((pScrn->virtualY + 15) & ~15)
+						- pScrn->virtualY + 2 : 2,
+						0, NULL, NULL,
+						NULL))) {
+	    xf86DrvMsg(scrnIndex, X_INFO,
+		       "Reserved area from (%d,%d) to (%d,%d)\n",
+		       fbarea->box.x1, fbarea->box.y1,
+		       fbarea->box.x2, fbarea->box.y2);
+	} else {
+	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
+	}
+	if (xf86QueryLargestOffscreenArea(pScreen, &width,
+					  &height, 0, 0, 0)) {
+	    xf86DrvMsg(scrnIndex, X_INFO,
+		       "Largest offscreen area available: %d x %d\n",
+		       width, height);
+
+	    /* Lines in offscreen area needed for depth buffer and
+	     * textures
+	     */
+	    info->depthTexLines = (scanlines
+				   - info->depthOffset / width_bytes);
+	    info->backLines	    = (scanlines
+				       - info->backOffset / width_bytes
+				       - info->depthTexLines);
+	    info->backArea	    = NULL;
+	} else {
+	    xf86DrvMsg(scrnIndex, X_ERROR,
+		       "Unable to determine largest offscreen area "
+		       "available\n");
+	    return FALSE;
+	}
+    }
+
+    xf86DrvMsg(scrnIndex, X_INFO,
+	       "Will use back buffer at offset 0x%x\n",
+	       info->backOffset);
+    xf86DrvMsg(scrnIndex, X_INFO,
+	       "Will use depth buffer at offset 0x%x\n",
+	       info->depthOffset);
+    xf86DrvMsg(scrnIndex, X_INFO,
+	       "Will use %d kb for textures at offset 0x%x\n",
+	       info->textureSize/1024, info->textureOffset);
+
+    info->frontPitchOffset = (((info->frontPitch * cpp / 64) << 22) |
+			      ((info->frontOffset + info->fbLocation) >> 10));
+
+    info->backPitchOffset = (((info->backPitch * cpp / 64) << 22) |
+			     ((info->backOffset + info->fbLocation) >> 10));
+
+    info->depthPitchOffset = (((info->depthPitch * cpp / 64) << 22) |
+			      ((info->depthOffset + info->fbLocation) >> 10));
+}
+#endif /* XF86DRI */
+
+Bool RADEONSetupMemXAA(int scrnIndex, ScreenPtr pScreen)
+{
+    ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
+    RADEONInfoPtr  info  = RADEONPTR(pScrn);
+    BoxRec         MemBox;
+    int            y2;
+
+    int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
+
+    MemBox.x1 = 0;
+    MemBox.y1 = 0;
+    MemBox.x2 = pScrn->displayWidth;
+    y2 = info->FbMapSize / width_bytes;
+    if (y2 >= 32768)
+	y2 = 32767; /* because MemBox.y2 is signed short */
+    MemBox.y2 = y2;
+    
+    /* The acceleration engine uses 14 bit
+     * signed coordinates, so we can't have any
+     * drawable caches beyond this region.
+     */
+    if (MemBox.y2 > 8191)
+	MemBox.y2 = 8191;
+
+    if (!xf86InitFBManager(pScreen, &MemBox)) {
+	xf86DrvMsg(scrnIndex, X_ERROR,
+		   "Memory manager initialization to "
+		   "(%d,%d) (%d,%d) failed\n",
+		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
+	return FALSE;
+    } else {
+	int       width, height;
+	FBAreaPtr fbarea;
 
+	xf86DrvMsg(scrnIndex, X_INFO,
+		   "Memory manager initialized to (%d,%d) (%d,%d)\n",
+		   MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
+	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
+						pScrn->displayWidth,
+						info->allowColorTiling ? 
+						((pScrn->virtualY + 15) & ~15)
+						- pScrn->virtualY + 2 : 2,
+						0, NULL, NULL,
+						NULL))) {
+	    xf86DrvMsg(scrnIndex, X_INFO,
+		       "Reserved area from (%d,%d) to (%d,%d)\n",
+		       fbarea->box.x1, fbarea->box.y1,
+		       fbarea->box.x2, fbarea->box.y2);
+	} else {
+	    xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
+	}
+	if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
+					      0, 0, 0)) {
+	    xf86DrvMsg(scrnIndex, X_INFO,
+		       "Largest offscreen area available: %d x %d\n",
+		       width, height);
+	}
+    }    
 }
+#endif /* USE_XAA */
 
 /* Called at the start of each server generation. */
 _X_EXPORT Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen,
@@ -5023,8 +5392,7 @@
 {
     ScrnInfoPtr    pScrn = xf86Screens[pScreen->myNum];
     RADEONInfoPtr  info  = RADEONPTR(pScrn);
-    BoxRec         MemBox;
-    int            y2;
+    int            hasDRI = 0;
 #ifdef RENDER
     int            subPixelOrder = SubPixelUnknown;
     char*          s;
@@ -5061,6 +5429,11 @@
         }
     }
 
+    if (info->allowColorTiling && info->useEXA) {
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		   "Color tiling not supported yet with EXA, disabling\n");
+	info->allowColorTiling = FALSE;
+    }
     if (info->allowColorTiling && (pScrn->virtualX > info->MaxSurfaceWidth)) {
 	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
 		   "Color tiling not supported with virtual x resolutions larger than %d, disabling\n",
@@ -5179,6 +5552,8 @@
 	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
 		   "Depth moves disabled by default\n");
     }
+
+    hasDRI = info->directRenderingEnabled;
 #endif
 
     RADEONSetFBLocation(pScrn);
@@ -5229,294 +5604,25 @@
     }
 #endif
 				/* Memory manager setup */
-#ifdef XF86DRI
-    if (info->directRenderingEnabled) {
-	FBAreaPtr  fbarea;
-	int        cpp = info->CurrentLayout.pixel_bytes;
-	int        width_bytes = pScrn->displayWidth * cpp;
-	int        bufferSize;
-	int        depthSize;
-	int        l;
-	int        scanlines;
-
-	info->frontOffset = 0;
-	info->frontPitch = pScrn->displayWidth;
-	info->backPitch = pScrn->displayWidth;
-	/* make sure we use 16 line alignment for tiling (8 might be enough).
-	   Might need that for non-XF86DRI too? */
-	if (info->allowColorTiling) {
-	    bufferSize = (((pScrn->virtualY + 15) & ~15) * width_bytes
-			+ RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
-	}
-	else {
-	    bufferSize = (pScrn->virtualY * width_bytes
-			+ RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
-	}
-	/* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels,
-	 * which is always the case if color tiling is used due to color pitch
-	 * but not necessarily otherwise, and its height a multiple of 16 lines.
-	 */
-	info->depthPitch = (pScrn->displayWidth + 31) & ~31;
-	depthSize = ((((pScrn->virtualY + 15) & ~15) * info->depthPitch
-		* cpp + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN);
-
-	switch (info->CPMode) {
-	case RADEON_DEFAULT_CP_PIO_MODE:
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in PIO mode\n");
-	    break;
-	case RADEON_DEFAULT_CP_BM_MODE:
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in BM mode\n");
-	    break;
-	default:
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in UNKNOWN mode\n");
-	    break;
-	}
-
-	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		   "Using %d MB GART aperture\n", info->gartSize);
-	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		   "Using %d MB for the ring buffer\n", info->ringSize);
-	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		   "Using %d MB for vertex/indirect buffers\n", info->bufSize);
-	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		   "Using %d MB for GART textures\n", info->gartTexSize);
-
-	/* Try for front, back, depth, and three framebuffers worth of
-	 * pixmap cache.  Should be enough for a fullscreen background
-	 * image plus some leftovers.
-	 */
-	info->textureSize = info->FbMapSize - 5 * bufferSize - depthSize;
-
-	/* If that gives us less than half the available memory, let's
-	 * be greedy and grab some more.  Sorry, I care more about 3D
-	 * performance than playing nicely, and you'll get around a full
-	 * framebuffer's worth of pixmap cache anyway.
-	 */
-	if (info->textureSize < (int)info->FbMapSize / 2) {
-	    info->textureSize = info->FbMapSize - 4 * bufferSize - depthSize;
-	}
-	if (info->textureSize < (int)info->FbMapSize / 2) {
-	    info->textureSize = info->FbMapSize - 3 * bufferSize - depthSize;
-	}
-	/* If there's still no space for textures, try without pixmap cache, but never use
-	   the reserved space and the space hw cursor might use */
-	if (info->textureSize < 0) {
-	    info->textureSize = info->FbMapSize - 2 * bufferSize - depthSize
-				- 2 * width_bytes - 16384;
-	}
-
-	/* Check to see if there is more room available after the 8192nd
-	   scanline for textures */
-	/* FIXME: what's this good for? condition is pretty much impossible to meet */
-	if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize
-	    > info->textureSize) {
-	    info->textureSize =
-		info->FbMapSize - 8192*width_bytes - bufferSize - depthSize;
-	}
-
-	/* If backbuffer is disabled, don't allocate memory for it */
-	if (info->noBackBuffer) {
-	   info->textureSize += bufferSize;
-	}
-
-	/* RADEON_BUFFER_ALIGN is not sufficient for backbuffer!
-	   At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned,
-	   otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee
-	   it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor
-	   area otherwise).
-	   This might cause some space at the end of the video memory to be unused, since it
-	   can't be used (?) due to that log_tex_granularity thing???
-	   Could use different copyscreentoscreen function for the pageflip copies
-	   (which would use different src and dst offsets) to avoid this. */   
-	if (info->allowColorTiling && !info->noBackBuffer) {
-	    info->textureSize = info->FbMapSize - ((info->FbMapSize - info->textureSize +
-		width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16);
-	}
-	if (info->textureSize > 0) {
-	    l = RADEONMinBits((info->textureSize-1) / RADEON_NR_TEX_REGIONS);
-	    if (l < RADEON_LOG_TEX_GRANULARITY) l = RADEON_LOG_TEX_GRANULARITY;
-
-	    /* Round the texture size up to the nearest whole number of
-	     * texture regions.  Again, be greedy about this, don't
-	     * round down.
-	     */
-	    info->log2TexGran = l;
-	    info->textureSize = (info->textureSize >> l) << l;
-	} else {
-	    info->textureSize = 0;
-	}
-
-	/* Set a minimum usable local texture heap size.  This will fit
-	 * two 256x256x32bpp textures.
-	 */
-	if (info->textureSize < 512 * 1024) {
-	    info->textureOffset = 0;
-	    info->textureSize = 0;
-	}
-
-	if (info->allowColorTiling && !info->noBackBuffer) {
-	    info->textureOffset = ((info->FbMapSize - info->textureSize) /
-		(width_bytes * 16)) * (width_bytes * 16);
-	}
-	else {
-				/* Reserve space for textures */
-	    info->textureOffset = ((info->FbMapSize - info->textureSize +
-				RADEON_BUFFER_ALIGN) &
-			       ~(CARD32)RADEON_BUFFER_ALIGN);
-	}
-
-				/* Reserve space for the shared depth
-                                 * buffer.
-				 */
-	info->depthOffset = ((info->textureOffset - depthSize +
-			      RADEON_BUFFER_ALIGN) &
-			     ~(CARD32)RADEON_BUFFER_ALIGN);
-
-				/* Reserve space for the shared back buffer */
-	if (info->noBackBuffer) {
-	   info->backOffset = info->depthOffset;
-	} else {
-	   info->backOffset = ((info->depthOffset - bufferSize +
-				RADEON_BUFFER_ALIGN) &
-			       ~(CARD32)RADEON_BUFFER_ALIGN);
-	}
-
-	info->backY = info->backOffset / width_bytes;
-	info->backX = (info->backOffset - (info->backY * width_bytes)) / cpp;
-
-	scanlines = info->FbMapSize / width_bytes;
-	if (scanlines > 8191) scanlines = 8191;
-
-	MemBox.x1 = 0;
-	MemBox.y1 = 0;
-	MemBox.x2 = pScrn->displayWidth;
-	MemBox.y2 = scanlines;
-
-	if (!xf86InitFBManager(pScreen, &MemBox)) {
-	    xf86DrvMsg(scrnIndex, X_ERROR,
-		       "Memory manager initialization to "
-		       "(%d,%d) (%d,%d) failed\n",
-		       MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
-	    return FALSE;
-	} else {
-	    int  width, height;
-
-	    xf86DrvMsg(scrnIndex, X_INFO,
-		       "Memory manager initialized to (%d,%d) (%d,%d)\n",
-		       MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
-	    /* why oh why can't we just request modes which are guaranteed to be 16 lines
-	       aligned... sigh */
-	    if ((fbarea = xf86AllocateOffscreenArea(pScreen,
-						    pScrn->displayWidth,
-						    info->allowColorTiling ? 
-						    ((pScrn->virtualY + 15) & ~15)
-						        - pScrn->virtualY + 2 : 2,
-						    0, NULL, NULL,
-						    NULL))) {
-		xf86DrvMsg(scrnIndex, X_INFO,
-			   "Reserved area from (%d,%d) to (%d,%d)\n",
-			   fbarea->box.x1, fbarea->box.y1,
-			   fbarea->box.x2, fbarea->box.y2);
-	    } else {
-		xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
-	    }
-	    if (xf86QueryLargestOffscreenArea(pScreen, &width,
-					      &height, 0, 0, 0)) {
-		xf86DrvMsg(scrnIndex, X_INFO,
-			   "Largest offscreen area available: %d x %d\n",
-			   width, height);
-
-		/* Lines in offscreen area needed for depth buffer and
-		 * textures
-		 */
-		info->depthTexLines = (scanlines
-				       - info->depthOffset / width_bytes);
-		info->backLines	    = (scanlines
-				       - info->backOffset / width_bytes
-				       - info->depthTexLines);
-		info->backArea	    = NULL;
-	    } else {
-		xf86DrvMsg(scrnIndex, X_ERROR,
-			   "Unable to determine largest offscreen area "
-			   "available\n");
-		return FALSE;
-	    }
-	}
-
-	xf86DrvMsg(scrnIndex, X_INFO,
-		   "Will use back buffer at offset 0x%x\n",
-		   info->backOffset);
-	xf86DrvMsg(scrnIndex, X_INFO,
-		   "Will use depth buffer at offset 0x%x\n",
-		   info->depthOffset);
-	xf86DrvMsg(scrnIndex, X_INFO,
-		   "Will use %d kb for textures at offset 0x%x\n",
-		   info->textureSize/1024, info->textureOffset);
-
-	info->frontPitchOffset = (((info->frontPitch * cpp / 64) << 22) |
-				  ((info->frontOffset + info->fbLocation) >> 10));
-
-	info->backPitchOffset = (((info->backPitch * cpp / 64) << 22) |
-				 ((info->backOffset + info->fbLocation) >> 10));
-
-	info->depthPitchOffset = (((info->depthPitch * cpp / 64) << 22) |
-				  ((info->depthOffset + info->fbLocation) >> 10));
-    } else
+#ifdef USE_EXA
+    if (info->useEXA && !RADEONSetupMemEXA(pScreen))
+	return FALSE;
 #endif
-    {
-	int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes;
-	MemBox.x1 = 0;
-	MemBox.y1 = 0;
-	MemBox.x2 = pScrn->displayWidth;
-	y2 = info->FbMapSize / width_bytes;
-	if (y2 >= 32768) y2 = 32767; /* because MemBox.y2 is signed short */
-	MemBox.y2 = y2;
-
-				/* The acceleration engine uses 14 bit
-				   signed coordinates, so we can't have any
-				   drawable caches beyond this region. */
-	if (MemBox.y2 > 8191) MemBox.y2 = 8191;
 
-	if (!xf86InitFBManager(pScreen, &MemBox)) {
-	    xf86DrvMsg(scrnIndex, X_ERROR,
-		       "Memory manager initialization to "
-		       "(%d,%d) (%d,%d) failed\n",
-		       MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
-	    return FALSE;
-	} else {
-	    int       width, height;
-	    FBAreaPtr fbarea;
+#if defined(XF86DRI) && defined(USE_XAA)
+    if (!info->useEXA && hasDRI && !RADEONSetupMemXAA_DRI(scrnIndex, pScreen))
+	return FALSE;
+#endif
 
-	    xf86DrvMsg(scrnIndex, X_INFO,
-		       "Memory manager initialized to (%d,%d) (%d,%d)\n",
-		       MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
-	    if ((fbarea = xf86AllocateOffscreenArea(pScreen,
-						    pScrn->displayWidth,
-						    info->allowColorTiling ? 
-						    ((pScrn->virtualY + 15) & ~15)
-						        - pScrn->virtualY + 2 : 2,
-						    0, NULL, NULL,
-						    NULL))) {
-		xf86DrvMsg(scrnIndex, X_INFO,
-			   "Reserved area from (%d,%d) to (%d,%d)\n",
-			   fbarea->box.x1, fbarea->box.y1,
-			   fbarea->box.x2, fbarea->box.y2);
-	    } else {
-		xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n");
-	    }
-	    if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
-					      0, 0, 0)) {
-		xf86DrvMsg(scrnIndex, X_INFO,
-			   "Largest offscreen area available: %d x %d\n",
-			   width, height);
-	    }
-	}
-    }
+#ifdef USE_XAA
+    if (!info->useEXA && !hasDRI && !RADEONSetupMemXAA(scrnIndex, pScreen))
+	return FALSE;
+#endif
 
-    info->dst_pitch_offset = (((pScrn->displayWidth * info->CurrentLayout.pixel_bytes / 64) << 22) |
-				  ((info->fbLocation + pScrn->fbOffset) >> 10));
+    info->dst_pitch_offset = (((pScrn->displayWidth * info->CurrentLayout.pixel_bytes / 64)
+			       << 22) | ((info->fbLocation + pScrn->fbOffset) >> 10));
 
-				/* Acceleration setup */
+    /* Acceleration setup */
     if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) {
 	if (RADEONAccelInit(pScreen)) {
 	    xf86DrvMsg(scrnIndex, X_INFO, "Acceleration enabled\n");
@@ -5532,41 +5638,53 @@
 	info->accelOn = FALSE;
     }
 
-				/* DGA setup */
-    RADEONDGAInit(pScreen);
+    /* DGA setup XXX FIXME EXA */
+    if (!info->useEXA)
+	RADEONDGAInit(pScreen);
 
-				/* Backing store setup */
+    /* Backing store setup */
     miInitializeBackingStore(pScreen);
     xf86SetBackingStore(pScreen);
 
-				/* Set Silken Mouse */
+    /* Set Silken Mouse */
     xf86SetSilkenMouse(pScreen);
 
-				/* Cursor setup */
+    /* Cursor setup */
     miDCInitialize(pScreen, xf86GetPointerScreenFuncs());
 
-				/* Hardware cursor setup */
+    /* Hardware cursor setup */
     if (!xf86ReturnOptValBool(info->Options, OPTION_SW_CURSOR, FALSE)) {
 	if (RADEONCursorInit(pScreen)) {
-	    int  width, height;
+#ifdef USE_EXA
+	    if (info->useEXA) {
+		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+			   "Using hardware cursor (offset %d)\n",
+			   info->cursor_offset);
+	    }
+#endif /* USE_EXA */
+#ifdef USE_XAA
+	    if (!info->useEXA) {
+		int  width, height;
 
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		       "Using hardware cursor (scanline %ld)\n",
-		       info->cursor_start / pScrn->displayWidth
-		       / info->CurrentLayout.pixel_bytes);
-	    if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
+		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+			   "Using hardware cursor (scanline %ld)\n",
+			   info->cursor_offset / pScrn->displayWidth
+			   / info->CurrentLayout.pixel_bytes);
+		if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
 					      0, 0, 0)) {
-		xf86DrvMsg(scrnIndex, X_INFO,
-			   "Largest offscreen area available: %d x %d\n",
-			   width, height);
+		    xf86DrvMsg(scrnIndex, X_INFO,
+			       "Largest offscreen area available: %d x %d\n",
+			       width, height);
+		}
 	    }
+#endif /* USE_XAA */
 	} else {
 	    xf86DrvMsg(scrnIndex, X_ERROR,
 		       "Hardware cursor initialization failed\n");
 	    xf86DrvMsg(scrnIndex, X_INFO, "Using software cursor\n");
 	}
     } else {
-	info->cursor_start = 0;
+	info->cursor_offset = 0;
 	xf86DrvMsg(scrnIndex, X_INFO, "Using software cursor\n");
     }
 
@@ -5616,7 +5734,15 @@
 				   done their thing, complete the DRI
 				   setup. */
 	if (!(info->directRenderingEnabled = RADEONDRIFinishScreenInit(pScreen))) {
-	    RADEONAccelInitMMIO(pScreen, info->accel);
+#ifdef USE_EXA
+	    if (info->useEXA) {
+		RADEONDrawInitMMIO(pScreen);
+	    }
+#endif /* USE_EXA */
+#ifdef USE_XAA
+	    if (!info->useEXA)
+		RADEONAccelInitMMIO(pScreen, info->accel);
+#endif /* USE_XAA */
 	}
     }
     if (info->directRenderingEnabled) {
@@ -7974,7 +8100,8 @@
 #endif
     }
 
-    if (info->accelOn) info->accel->Sync(pScrn);
+    if (info->accelOn)
+        RADEON_SYNC(info, pScrn);
 
     if (info->FBDev) {
 	RADEONSaveFBDevRegisters(pScrn, &info->ModeReg);
@@ -7997,7 +8124,7 @@
     }
 
     if (info->accelOn) {
-	info->accel->Sync(pScrn);
+        RADEON_SYNC(info, pScrn);
 	RADEONEngineRestore(pScrn);
     }
 
@@ -8167,7 +8294,8 @@
     if (info->CPStarted) DRILock(pScrn->pScreen, 0);
 #endif
 
-    if (info->accelOn) info->accel->Sync(pScrn);
+    if (info->accelOn)
+        RADEON_SYNC(info, pScrn);
 
     if(info->MergedFB) {
     	RADEONAdjustFrameMerged(scrnIndex, x, y, flags);
@@ -8294,10 +8422,12 @@
     }
 #endif
 
-    if(info->RenderTex) {
+#ifdef USE_XAA
+    if(!info->useEXA && info->RenderTex) {
         xf86FreeOffscreenLinear(info->RenderTex);
         info->RenderTex = NULL;
     }
+#endif /* USE_XAA */
 
     if (pScrn->vtSema) {
 	RADEONDisplayPowerManagementSet(pScrn, DPMSModeOn, 0);
@@ -8305,11 +8435,21 @@
     }
     RADEONUnmapMem(pScrn);
 
-    if (info->accel) XAADestroyInfoRec(info->accel);
-    info->accel = NULL;
-
-    if (info->scratch_save) xfree(info->scratch_save);
-    info->scratch_save = NULL;
+#ifdef USE_EXA
+    if (info->useEXA && info->accelOn)
+	exaDriverFini(pScreen);
+#endif /* USE_EXA */
+#ifdef USE_XAA
+    if (!info->useEXA) {
+	if (info->accel)
+		XAADestroyInfoRec(info->accel);
+	info->accel = NULL;
+
+	if (info->scratch_save)
+	    xfree(info->scratch_save);
+	info->scratch_save = NULL;
+    }
+#endif /* USE_XAA */
 
     if (info->cursor) xf86DestroyCursorInfoRec(info->cursor);
     info->cursor = NULL;
@@ -8458,7 +8598,8 @@
     if (info->CPStarted) DRILock(pScrn->pScreen, 0);
 #endif
 
-    if (info->accelOn) info->accel->Sync(pScrn);
+    if (info->accelOn)
+        RADEON_SYNC(info, pScrn);
 
     if (info->FBDev) {
 	fbdevHWDPMSSet(pScrn, PowerManagementMode, flags);
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_exa.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_exa.c
--- /dev/null	2005-08-31 14:46:40.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_exa.c	2005-09-03 18:59:08.000000000 +1000
@@ -0,0 +1,392 @@
+/*
+ * Copyright 2005 Eric Anholt
+ * Copyright 2005 Benjamin Herrenschmidt
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <anholt at FreeBSD.org>
+ *    Zack Rusin <zrusin at trolltech.com>
+ *    Benjamin Herrenschmidt <benh at kernel.crashing.org>
+ *
+ */
+
+#include "radeon.h"
+#include "radeon_reg.h"
+#include "radeon_dri.h"
+#include "radeon_macros.h"
+#include "radeon_probe.h"
+#include "radeon_version.h"
+#include "radeon_sarea.h"
+
+#include "xf86.h"
+
+
+/***********************************************************************/
+#define RINFO_FROM_SCREEN(pScr) ScrnInfoPtr pScrn =  xf86Screens[pScr->myNum]; \
+    RADEONInfoPtr info   = RADEONPTR(pScrn)
+
+#define RADEON_TRACE_FALL 1
+#define RADEON_TRACE_DRAW 0
+
+#if RADEON_TRACE_FALL
+#define RADEON_FALLBACK(x)     		\
+do {					\
+	ErrorF("%s: ", __FUNCTION__);	\
+	ErrorF x;			\
+	return FALSE;			\
+} while (0)
+#else
+#define RADEON_FALLBACK(x) return FALSE
+#endif
+
+#if RADEON_TRACE_DRAW
+#define TRACE do { ErrorF("TRACE: %s\n", __FUNCTION__); } while(0)
+#else
+#define TRACE
+#endif
+
+static struct {
+    int rop;
+    int pattern;
+} RADEON_ROP[] = {
+    { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear        */
+    { RADEON_ROP3_DSa,  RADEON_ROP3_DPa  }, /* Gxand          */
+    { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse   */
+    { RADEON_ROP3_S,    RADEON_ROP3_P    }, /* GXcopy         */
+    { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted  */
+    { RADEON_ROP3_D,    RADEON_ROP3_D    }, /* GXnoop         */
+    { RADEON_ROP3_DSx,  RADEON_ROP3_DPx  }, /* GXxor          */
+    { RADEON_ROP3_DSo,  RADEON_ROP3_DPo  }, /* GXor           */
+    { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor          */
+    { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv        */
+    { RADEON_ROP3_Dn,   RADEON_ROP3_Dn   }, /* GXinvert       */
+    { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse    */
+    { RADEON_ROP3_Sn,   RADEON_ROP3_Pn   }, /* GXcopyInverted */
+    { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted   */
+    { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand         */
+    { RADEON_ROP3_ONE,  RADEON_ROP3_ONE  }  /* GXset          */
+};
+
+/* Compute log base 2 of val. */
+static __inline__ int
+RADEONLog2(int val)
+{
+	int bits;
+
+	for (bits = 0; val != 0; val >>= 1, ++bits)
+		;
+	return bits - 1;
+}
+
+static __inline__ CARD32 F_TO_DW(float val)
+{
+    union {
+	float f;
+	CARD32 l;
+    } tmp;
+    tmp.f = val;
+    return tmp.l;
+}
+
+/* Assumes that depth 15 and 16 can be used as depth 16, which is okay since we
+ * require src and dest datatypes to be equal.
+ */
+static Bool RADEONGetDatatypeBpp(int bpp, CARD32 *type)
+{
+	switch (bpp) {
+	case 8:
+		*type = ATI_DATATYPE_CI8;
+		return TRUE;
+	case 16:
+		*type = ATI_DATATYPE_RGB565;
+		return TRUE;
+	case 24:
+		*type = ATI_DATATYPE_CI8;
+		return TRUE;
+	case 32:
+		*type = ATI_DATATYPE_ARGB8888;
+		return TRUE;
+	default:
+		RADEON_FALLBACK(("Unsupported bpp: %d\n", bpp));
+		return FALSE;
+	}
+}
+
+static Bool RADEONPixmapIsColortiled(PixmapPtr pPix)
+{
+    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
+
+    /* This doesn't account for the back buffer, which we may want to wrap in
+     * a pixmap at some point for the purposes of DRI buffer moves.
+     */
+    if (info->tilingEnabled && exaGetPixmapOffset(pPix) == 0)
+	return TRUE;
+    else
+	return FALSE;
+}
+
+static Bool RADEONGetOffsetPitch(PixmapPtr pPix, int bpp, CARD32 *pitch_offset,
+				 unsigned int offset, unsigned int pitch)
+{
+	RINFO_FROM_SCREEN(pPix->drawable.pScreen);
+
+	if (pitch % info->exa.card.pixmapPitchAlign != 0)
+		RADEON_FALLBACK(("Bad pitch 0x%08x\n", pitch));
+
+	if (offset % info->exa.card.pixmapOffsetAlign != 0)
+		RADEON_FALLBACK(("Bad offset 0x%08x\n", offset));
+
+	pitch = pitch >> 6;
+	*pitch_offset = (pitch << 22) | (offset >> 10);
+
+	/* If it's the front buffer, we've got to note that it's tiled? */
+	if (RADEONPixmapIsColortiled(pPix))
+		*pitch_offset |= RADEON_DST_TILE_MACRO;
+	return TRUE;
+}
+
+static Bool RADEONGetPixmapOffsetPitch(PixmapPtr pPix, CARD32 *pitch_offset)
+{
+	RINFO_FROM_SCREEN(pPix->drawable.pScreen);
+	CARD32 pitch, offset;
+	int bpp;
+
+	bpp = pPix->drawable.bitsPerPixel;
+	if (bpp == 24)
+		bpp = 8;
+
+	offset = exaGetPixmapOffset(pPix) + info->fbLocation;
+	pitch = exaGetPixmapPitch(pPix);
+
+	return RADEONGetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch);
+}
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+
+static int swapper_surfaces[3] = { -1, -1, -1} ;
+
+static int
+blahLog2(int val)
+{
+    int bits;
+
+    if (!val)
+	return 0;
+    for (bits = 0; val != 0; bits++)
+	val >>= 1;
+    return bits - 1;
+}
+
+static Bool RADEONPrepareAccess(PixmapPtr pPix, int index)
+{
+    drmRadeonSurfaceAlloc drmsurfalloc;
+    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
+    CARD32 offset = exaGetPixmapOffset(pPix);
+    int bpp;
+
+    /* Front buffer is always set with proper swappers */
+    if (offset == 0)
+        return TRUE;
+
+    /* If same bpp as front buffer, just do nothing as the main
+     * swappers will apply
+     */
+    bpp = pPix->drawable.bitsPerPixel;
+    if (bpp == pScrn->bitsPerPixel)
+        return TRUE;
+
+    /* We need to setup a separate swapper, let's request a
+     * surface
+     */
+    // TODO
+    ErrorF("NYI surface change, bpp: %d vs. %d, size: %d\n",
+	   bpp, pScrn->bitsPerPixel);
+    return FALSE;
+
+}
+
+static void RADEONFinishAccess(PixmapPtr pPix, int index)
+{
+}
+
+#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */
+
+#define RADEON_SWITCH_TO_2D()						\
+do {									\
+	BEGIN_ACCEL(2);							\
+	OUT_ACCEL_REG(RADEON_RB2D_DSTCACHE_CTLSTAT,  RADEON_RB2D_DC_FLUSH); \
+	OUT_ACCEL_REG(RADEON_WAIT_UNTIL,				\
+		RADEON_WAIT_HOST_IDLECLEAN |				\
+		RADEON_WAIT_3D_IDLECLEAN);				\
+	FINISH_ACCEL();							\
+} while (0);
+
+#define RADEON_SWITCH_TO_3D()						\
+do {									\
+	BEGIN_ACCEL(2);							\
+	OUT_ACCEL_REG(RADEON_RB2D_DSTCACHE_CTLSTAT,  RADEON_RB2D_DC_FLUSH); \
+	OUT_ACCEL_REG(RADEON_WAIT_UNTIL,				\
+		RADEON_WAIT_HOST_IDLECLEAN |				\
+		RADEON_WAIT_2D_IDLECLEAN |				\
+		RADEON_WAIT_3D_IDLECLEAN);				\
+	FINISH_ACCEL();							\
+} while (0);
+
+#define ENTER_DRAW(x) TRACE
+#define LEAVE_DRAW(x) TRACE
+/***********************************************************************/
+
+#define ACCEL_MMIO
+#define ACCEL_PREAMBLE()	unsigned char *RADEONMMIO = info->MMIO
+#define BEGIN_ACCEL(n)		RADEONWaitForFifo(pScrn, (n))
+#define OUT_ACCEL_REG(reg, val)	OUTREG(reg, val)
+#define FINISH_ACCEL()
+
+#ifdef RENDER
+#include "radeon_exa_render.c"
+#endif
+#include "radeon_exa_funcs.c"
+
+#undef ACCEL_MMIO
+#undef ACCEL_PREAMBLE
+#undef BEGIN_ACCEL
+#undef OUT_ACCEL_REG
+#undef FINISH_ACCEL
+
+#ifdef XF86DRI
+
+#define ACCEL_CP
+#define ACCEL_PREAMBLE()						\
+    RING_LOCALS;							\
+    RADEONCP_REFRESH(pScrn, info)
+#define BEGIN_ACCEL(n)		BEGIN_RING(2*(n))
+#define OUT_ACCEL_REG(reg, val)	OUT_RING_REG(reg, val)
+#define FINISH_ACCEL()		ADVANCE_RING()
+
+#define OUT_RING_F(x) OUT_RING(F_TO_DW(x))
+
+#ifdef RENDER
+#include "radeon_exa_render.c"
+#endif
+#include "radeon_exa_funcs.c"
+
+#endif /* XF86DRI */
+
+/*
+ * Once screen->off_screen_base is set, this function
+ * allocates the remaining memory appropriately
+ */
+Bool RADEONSetupMemEXA (ScreenPtr pScreen)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    int cpp = info->CurrentLayout.pixel_bytes;
+    int l;
+    int screen_size;
+    int byteStride = pScrn->displayWidth * cpp;
+
+    /* Need to adjust screen size for 16 line tiles, and then make it align to.
+     * the buffer alignment requirement.
+     */
+    if (info->allowColorTiling)
+	screen_size = ((pScrn->virtualY + 15) & ~15) * byteStride;
+    else
+	screen_size = pScrn->virtualY * byteStride;
+    screen_size = (screen_size + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
+
+    info->exa.card.memoryBase = info->FB + pScrn->fbOffset;
+    info->exa.card.memorySize = info->FbMapSize;
+    info->exa.card.offScreenBase = screen_size;
+
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Allocating from a screen of %ld kb\n",
+	       info->exa.card.memorySize / 1024);
+
+#if defined(XF86DRI)
+    if (info->directRenderingEnabled) {
+	int depth_size;
+
+	info->frontOffset = 0;
+	info->frontPitch = pScrn->displayWidth;
+
+	/* Reserve a static area for the back buffer the same size as the
+	 * visible screen.  XXX: This would be better initialized in ati_dri.c
+	 * when GLX is set up, but the offscreen memory manager's allocations
+	 * don't last through VT switches, while the kernel's understanding of
+	 * offscreen locations does.
+	 */
+	info->backPitch = pScrn->displayWidth;
+	if (!info->noBackBuffer &&
+	    info->exa.card.offScreenBase + screen_size <=
+	    info->exa.card.memorySize)
+	{
+	    info->backOffset = info->exa.card.offScreenBase;
+	    info->exa.card.offScreenBase += screen_size;
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "Will use back buffer at offset 0x%x\n",
+		       info->backOffset);
+	}
+	
+	/* Reserve the static depth buffer, and adjust pitch and height to
+	 * handle tiling.
+	 */
+	info->depthPitch = (pScrn->displayWidth + 31) & ~31;
+	depth_size = ((pScrn->virtualY + 15) & ~15) * info->depthPitch * cpp;
+	depth_size = (depth_size + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN;
+	if (info->exa.card.offScreenBase + depth_size <=
+	    info->exa.card.memorySize)
+	{
+	    info->depthOffset = info->exa.card.offScreenBase;
+	    info->exa.card.offScreenBase += depth_size;
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "Will use depth buffer at offset 0x%x\n",
+		       info->depthOffset);
+	}
+	
+	/* Reserve approx. half of remaining offscreen memory for local
+	* textures.  Round down to a whole number of texture regions.
+	*/
+	info->textureSize = (info->exa.card.memorySize - info->exa.card.offScreenBase) / 2;
+	l = RADEONLog2(info->textureSize / RADEON_NR_TEX_REGIONS);
+	if (l < RADEON_LOG_TEX_GRANULARITY)
+	    l = RADEON_LOG_TEX_GRANULARITY;
+	info->textureSize = (info->textureSize >> l) << l;
+	if (info->textureSize >= 512 * 1024) {
+	    info->textureOffset = info->exa.card.offScreenBase;
+	    info->exa.card.offScreenBase += info->textureSize;
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "Will use %d kb for textures at offset 0x%x\n",
+		       info->textureSize/1024, info->textureOffset);
+	} else {
+	    /* Minimum texture size is for 2 256x256x32bpp textures */
+	    info->textureSize = 0;
+	}
+    }
+#endif /* XF86DRI */
+	
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+	       "Will use %ld kb for X Server offscreen at offset 0x%lx\n",
+	       (info->exa.card.memorySize - info->exa.card.offScreenBase) /
+	       1024, info->exa.card.offScreenBase);
+
+    return TRUE;
+}
+
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_exa_funcs.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_exa_funcs.c
--- /dev/null	2005-08-31 14:46:40.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_exa_funcs.c	2005-09-03 17:25:01.000000000 +1000
@@ -0,0 +1,426 @@
+/*
+ * Copyright 2005 Eric Anholt
+ * Copyright 2005 Benjamin Herrenschmidt
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <anholt at FreeBSD.org>
+ *    Zack Rusin <zrusin at trolltech.com>
+ *    Benjamin Herrenschmidt <benh at kernel.crashing.org>
+ *
+ */
+
+#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
+#error Cannot define both MMIO and CP acceleration!
+#endif
+
+#if !defined(UNIXCPP) || defined(ANSICPP)
+#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
+#else
+#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
+#endif
+
+#ifdef ACCEL_MMIO
+#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
+#else
+#ifdef ACCEL_CP
+#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
+#else
+#error No accel type defined!
+#endif
+#endif
+
+#include "radeon.h"
+#include "atidri.h"
+
+#include "exa.h"
+
+#include "fbdevhw.h"
+
+static void
+FUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker)
+{
+    TRACE;
+
+    FUNC_NAME(RADEONWaitForIdle)(xf86Screens[pScreen->myNum]);
+}
+
+static Bool
+FUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
+{
+    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
+    CARD32 datatype, dst_pitch_offset;
+    ACCEL_PREAMBLE();
+
+    TRACE;
+
+    if (pPix->drawable.bitsPerPixel == 24)
+	RADEON_FALLBACK(("24bpp unsupported\n"));
+    if (!RADEONGetDatatypeBpp(pPix->drawable.bitsPerPixel, &datatype))
+	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
+    if (!RADEONGetPixmapOffsetPitch(pPix, &dst_pitch_offset))
+	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch failed\n"));
+
+    RADEON_SWITCH_TO_2D();
+
+    BEGIN_ACCEL(5);
+    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
+	    RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+	    RADEON_GMC_BRUSH_SOLID_COLOR |
+	    (datatype << 8) |
+	    RADEON_GMC_SRC_DATATYPE_COLOR |
+	    RADEON_ROP[alu].pattern |
+	    RADEON_GMC_CLR_CMP_CNTL_DIS);
+    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
+    OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR, fg);
+    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, pm);
+    OUT_ACCEL_REG(RADEON_DP_CNTL,
+	(RADEON_DST_X_LEFT_TO_RIGHT | RADEON_DST_Y_TOP_TO_BOTTOM));
+    FINISH_ACCEL();
+
+    return TRUE;
+}
+
+
+static void
+FUNC_NAME(RADEONSolid)(PixmapPtr pPix, int x1, int y1, int x2, int y2)
+{
+
+    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
+    ACCEL_PREAMBLE();
+
+    TRACE;
+
+    BEGIN_ACCEL(2);
+    OUT_ACCEL_REG(RADEON_DST_Y_X, (y1 << 16) | x1);
+    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, ((y2 - y1) << 16) | (x2 - x1));
+    FINISH_ACCEL();
+}
+
+static void
+FUNC_NAME(RADEONDoneSolid)(PixmapPtr pPix)
+{
+    TRACE;
+}
+
+static Bool
+FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc,   PixmapPtr pDst,
+			     int xdir, int ydir,
+			     int rop,
+			     Pixel planemask)
+{
+    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
+    CARD32 datatype, src_pitch_offset, dst_pitch_offset;
+    ACCEL_PREAMBLE();
+
+    TRACE;
+
+    info->xdir = xdir;
+    info->ydir = ydir;
+
+    if (pDst->drawable.bitsPerPixel == 24)
+	RADEON_FALLBACK(("24bpp unsupported"));
+    if (!RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype))
+	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
+    if (!RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset))
+	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch source failed\n"));
+    if (!RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset))
+	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch dest failed\n"));
+
+    RADEON_SWITCH_TO_2D();
+
+    BEGIN_ACCEL(5);
+    OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL,
+	RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+	RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+	RADEON_GMC_BRUSH_NONE |
+	(datatype << 8) |
+	RADEON_GMC_SRC_DATATYPE_COLOR |
+	RADEON_ROP[rop].rop |
+	RADEON_DP_SRC_SOURCE_MEMORY |
+	RADEON_GMC_CLR_CMP_CNTL_DIS);
+    OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset);
+    OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset);
+    OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, planemask);
+    OUT_ACCEL_REG(RADEON_DP_CNTL,
+	((xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) |
+	 (ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0)));
+    FINISH_ACCEL();
+
+    return TRUE;
+}
+
+static void
+FUNC_NAME(RADEONCopy)(PixmapPtr pDst,
+		      int srcX, int srcY,
+		      int dstX, int dstY,
+		      int w, int h)
+{
+
+    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
+    ACCEL_PREAMBLE();
+
+    TRACE;
+
+    if (info->xdir < 0) {
+	srcX += w - 1;
+	dstX += w - 1;
+    }
+    if (info->ydir < 0) {
+	srcY += h - 1;
+	dstY += h - 1;
+    }
+
+    BEGIN_ACCEL(3);
+
+    OUT_ACCEL_REG(RADEON_SRC_Y_X,	   (srcY << 16) | srcX);
+    OUT_ACCEL_REG(RADEON_DST_Y_X,	   (dstY << 16) | dstX);
+    OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h  << 16) | w);
+
+    FINISH_ACCEL();
+}
+
+static void
+FUNC_NAME(RADEONDoneCopy)(PixmapPtr pDst)
+{
+    TRACE;
+}
+
+static Bool
+FUNC_NAME(RADEONUploadToScreen)(PixmapPtr pDst, char *src, int src_pitch)
+{
+#if X_BYTE_ORDER == X_BIG_ENDIAN || defined(ACCEL_CP)
+    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
+#endif
+    CARD8	   *dst	     = pDst->devPrivate.ptr;
+    unsigned int   dst_pitch = exaGetPixmapPitch(pDst);
+    unsigned int   w	     = pDst->drawable.width;
+    unsigned int   h	     = pDst->drawable.height;
+    unsigned int   bpp	     = pDst->drawable.bitsPerPixel;
+#ifdef ACCEL_CP
+    unsigned int   hpass;
+    CARD32	   buf_pitch;
+#endif
+#if X_BYTE_ORDER == X_BIG_ENDIAN 
+    unsigned char *RADEONMMIO = info->MMIO;
+    unsigned int swapper = info->ModeReg.surface_cntl &
+	    ~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP |
+	      RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP);
+#endif
+
+    TRACE;
+
+#ifdef ACCEL_CP
+    if (info->directRenderingEnabled) {
+	CARD8 *buf;
+	int cpp = bpp / 8;
+	ACCEL_PREAMBLE();
+
+	RADEON_SWITCH_TO_2D();
+	while ((buf = RADEONHostDataBlit(pScrn,
+					cpp, w, dst_pitch, &buf_pitch,
+					&dst, &h, &hpass)) != 0) {
+	    RADEONHostDataBlitCopyPass(pScrn, cpp, buf, (unsigned char *)src,
+				       hpass, buf_pitch, src_pitch);
+	    src += hpass * src_pitch;
+	}
+	
+	exaMarkSync(pDst->drawable.pScreen);
+	return TRUE;
+  }
+#endif
+
+    /* Do we need that sync here ? probably not .... */
+    exaWaitSync(pDst->drawable.pScreen);
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    switch(bpp) {
+    case 15:
+    case 16:
+	swapper |= RADEON_NONSURF_AP0_SWP_16BPP
+		|  RADEON_NONSURF_AP1_SWP_16BPP;
+	break;
+    case 24:
+    case 32:
+	swapper |= RADEON_NONSURF_AP0_SWP_32BPP
+		|  RADEON_NONSURF_AP1_SWP_32BPP;
+	break;
+    }
+    OUTREG(RADEON_SURFACE_CNTL, swapper);
+#endif
+    w *= bpp / 8;
+
+    while (h--) {
+	memcpy(dst, src, w);
+	src += src_pitch;
+	dst += dst_pitch;
+    }
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    /* restore byte swapping */
+    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl);
+#endif
+
+    return TRUE;
+}
+
+static Bool
+FUNC_NAME(RADEONDownloadFromScreen)(PixmapPtr pSrc, int x, int y, int w, int h,
+				    char *dst, int dst_pitch)
+{
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    RINFO_FROM_SCREEN(pSrc->drawable.pScreen);
+    unsigned char *RADEONMMIO = info->MMIO;
+    unsigned int swapper = info->ModeReg.surface_cntl &
+	    ~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP |
+	      RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP);
+#endif
+    unsigned char *src	     = pSrc->devPrivate.ptr;
+    int		   src_pitch = exaGetPixmapPitch(pSrc);
+    int		   bpp	     = pSrc->drawable.bitsPerPixel;
+
+    TRACE;
+
+    /*
+     * This is currently done without DMA until I have ironed out the
+     * various endian issues with R300 among others
+     */
+    exaWaitSync(pSrc->drawable.pScreen);
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    switch(bpp) {
+    case 15:
+    case 16:
+	swapper |= RADEON_NONSURF_AP0_SWP_16BPP
+		|  RADEON_NONSURF_AP1_SWP_16BPP;
+	break;
+    case 24:
+    case 32:
+	swapper |= RADEON_NONSURF_AP0_SWP_32BPP
+		|  RADEON_NONSURF_AP1_SWP_32BPP;
+	break;
+    }
+    OUTREG(RADEON_SURFACE_CNTL, swapper);
+#endif
+
+    src += (x * bpp / 8) + (y * src_pitch);
+    w *= bpp / 8;
+
+    while (h--) {
+	memcpy(dst, src, w);
+	src += src_pitch;
+	dst += dst_pitch;
+    }
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    /* restore byte swapping */
+    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl);
+#endif
+
+    return TRUE;
+}
+
+Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen)
+{
+    RINFO_FROM_SCREEN(pScreen);
+
+    memset(&info->exa.accel, 0, sizeof(ExaAccelInfoRec));
+
+    info->exa.accel.PrepareSolid = FUNC_NAME(RADEONPrepareSolid);
+    info->exa.accel.Solid = FUNC_NAME(RADEONSolid);
+    info->exa.accel.DoneSolid = FUNC_NAME(RADEONDoneSolid);
+
+    info->exa.accel.PrepareCopy = FUNC_NAME(RADEONPrepareCopy);
+    info->exa.accel.Copy = FUNC_NAME(RADEONCopy);
+    info->exa.accel.DoneCopy = FUNC_NAME(RADEONDoneCopy);
+
+    info->exa.accel.WaitMarker = FUNC_NAME(RADEONSync);
+    info->exa.accel.UploadToScreen = FUNC_NAME(RADEONUploadToScreen);
+    info->exa.accel.DownloadFromScreen = FUNC_NAME(RADEONDownloadFromScreen);
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    info->exa.accel.PrepareAccess = RADEONPrepareAccess;
+    info->exa.accel.FinishAccess = RADEONFinishAccess;
+#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */
+
+    info->exa.card.flags = EXA_OFFSCREEN_PIXMAPS;
+    info->exa.card.pixmapOffsetAlign = 1024;
+    info->exa.card.pixmapPitchAlign = 64;
+
+    info->exa.card.maxX = 2047;
+    info->exa.card.maxY = 2047;
+
+    if (info->RenderAccel) {
+	if (info->ChipFamily >= CHIP_FAMILY_R300) {
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
+			       "unsupported on R300 type cards and newer.\n");
+	} else if ((info->ChipFamily == CHIP_FAMILY_RV250) || 
+		   (info->ChipFamily == CHIP_FAMILY_RV280) || 
+		   (info->ChipFamily == CHIP_FAMILY_RS300) || 
+		   (info->ChipFamily == CHIP_FAMILY_R200)) {
+		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
+			       "enabled for R200 type cards.\n");
+		info->exa.accel.CheckComposite = R200CheckComposite;
+		info->exa.accel.PrepareComposite =
+		    FUNC_NAME(R200PrepareComposite);
+		info->exa.accel.Composite = FUNC_NAME(RadeonComposite);
+		info->exa.accel.DoneComposite = RadeonDoneComposite;
+	} else {
+		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration "
+			       "enabled for R100 type cards.\n");
+		info->exa.accel.CheckComposite = R100CheckComposite;
+		info->exa.accel.PrepareComposite =
+		    FUNC_NAME(R100PrepareComposite);
+		info->exa.accel.Composite = FUNC_NAME(RadeonComposite);
+		info->exa.accel.DoneComposite = RadeonDoneComposite;
+	}
+    }
+
+    RADEONEngineInit(pScrn);
+
+    if (!exaDriverInit(pScreen, &info->exa)) {
+	return FALSE;
+    }
+    exaMarkSync(pScreen);
+
+    info->scratch_area = NULL;
+
+#if 0
+    /* Reserve a scratch area.  It'll be used for storing glyph data during
+     * Composite operations, because glyphs aren't in real pixmaps and thus
+     * can't be migrated.
+     */
+    info->scratch_area = exaOffscreenAlloc(pScreen, 131072,
+					   info->exa.card.pixmapOffsetAlign,
+					   TRUE,
+					   FUNC_NAME(RADEONScratchSave),
+					   info);
+    if (info->scratch_area != NULL) {
+	info->scratch_next = info->scratch_area->offset;
+	info->exa.accel.UploadToScratch = FUNC_NAME(RADEONUploadToScratch);
+    }
+#endif
+
+    return TRUE;
+}
+
+#undef FUNC_NAME
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_exa_render.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_exa_render.c
--- /dev/null	2005-08-31 14:46:40.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_exa_render.c	2005-09-03 08:37:58.000000000 +1000
@@ -0,0 +1,796 @@
+/*
+ * Copyright 2005 Eric Anholt
+ * Copyright 2005 Benjamin Herrenschmidt
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <anholt at FreeBSD.org>
+ *    Zack Rusin <zrusin at trolltech.com>
+ *    Benjamin Herrenschmidt <benh at kernel.crashing.org>
+ *
+ */
+
+#if defined(ACCEL_MMIO) && defined(ACCEL_CP)
+#error Cannot define both MMIO and CP acceleration!
+#endif
+
+#if !defined(UNIXCPP) || defined(ANSICPP)
+#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix
+#else
+#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix
+#endif
+
+#ifdef ACCEL_MMIO
+#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO)
+#else
+#ifdef ACCEL_CP
+#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP)
+#else
+#error No accel type defined!
+#endif
+#endif
+
+#ifndef ACCEL_CP
+#define ONLY_ONCE
+#endif
+
+/* Only include the following (generic) bits once. */
+#ifdef ONLY_ONCE
+static Bool is_transform[2];
+static PictTransform *transform[2];
+
+struct blendinfo {
+    Bool dst_alpha;
+    Bool src_alpha;
+    CARD32 blend_cntl;
+};
+
+static struct blendinfo RadeonBlendOp[] = {
+    /* Clear */
+    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ZERO},
+    /* Src */
+    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ZERO},
+    /* Dst */
+    {0, 0, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE},
+    /* Over */
+    {0, 1, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
+    /* OverReverse */
+    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE},
+    /* In */
+    {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ZERO},
+    /* InReverse */
+    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_SRC_ALPHA},
+    /* Out */
+    {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO},
+    /* OutReverse */
+    {0, 1, RADEON_SRC_BLEND_GL_ZERO	      | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
+    /* Atop */
+    {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA     | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
+    /* AtopReverse */
+    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_SRC_ALPHA},
+    /* Xor */
+    {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA},
+    /* Add */
+    {0, 0, RADEON_SRC_BLEND_GL_ONE	      | RADEON_DST_BLEND_GL_ONE},
+};
+
+struct formatinfo {
+    int fmt;
+    Bool byte_swap;
+    CARD32 card_fmt;
+};
+
+/* Note on texture formats:
+ * TXFORMAT_Y8 expands to (Y,Y,Y,1).  TXFORMAT_I8 expands to (I,I,I,I)
+ */
+static struct formatinfo R100TexFormats[] = {
+	{PICT_a8r8g8b8,	0, RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP},
+	{PICT_x8r8g8b8,	0, RADEON_TXFORMAT_ARGB8888},
+	{PICT_a8b8g8r8,	1, RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP},
+	{PICT_x8b8g8r8,	1, RADEON_TXFORMAT_RGBA8888},
+	{PICT_r5g6b5,	0, RADEON_TXFORMAT_RGB565},
+	{PICT_a1r5g5b5,	0, RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP},
+	{PICT_x1r5g5b5,	0, RADEON_TXFORMAT_ARGB1555},
+	{PICT_a8,	0, RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP},
+};
+
+static struct formatinfo R200TexFormats[] = {
+    {PICT_a8r8g8b8,	0, R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP},
+    {PICT_x8r8g8b8,	0, R200_TXFORMAT_ARGB8888},
+    {PICT_a8r8g8b8,	1, R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP},
+    {PICT_x8r8g8b8,	1, R200_TXFORMAT_RGBA8888},
+    {PICT_r5g6b5,	0, R200_TXFORMAT_RGB565},
+    {PICT_a1r5g5b5,	0, R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP},
+    {PICT_x1r5g5b5,	0, R200_TXFORMAT_ARGB1555},
+    {PICT_a8,		0, R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP},
+};
+
+/* Common Radeon setup code */
+
+static Bool RADEONGetDestFormat(PicturePtr pDstPicture, CARD32 *dst_format)
+{
+    switch (pDstPicture->format) {
+    case PICT_a8r8g8b8:
+    case PICT_x8r8g8b8:
+	*dst_format = RADEON_COLOR_FORMAT_ARGB8888;
+	break;
+    case PICT_r5g6b5:
+	*dst_format = RADEON_COLOR_FORMAT_RGB565;
+	break;
+    case PICT_a1r5g5b5:
+    case PICT_x1r5g5b5:
+	*dst_format = RADEON_COLOR_FORMAT_ARGB1555;
+	break;
+    case PICT_a8:
+	*dst_format = RADEON_COLOR_FORMAT_RGB8;
+	break;
+    default:
+	RADEON_FALLBACK(("Unsupported dest format 0x%x\n",
+			(int)pDstPicture->format));
+    }
+
+    return TRUE;
+}
+static CARD32 RADEONGetBlendCntl(int op, CARD32 dst_format)
+{
+    CARD32 blendcntl = RadeonBlendOp[op].blend_cntl;
+    /* If there's no dst alpha channel, adjust the blend op so that we'll treat
+     * it as always 1.
+     */
+    if (PICT_FORMAT_A(dst_format) == 0 && RadeonBlendOp[op].dst_alpha) {
+	if ((blendcntl & RADEON_SRC_BLEND_MASK) ==
+	    RADEON_SRC_BLEND_GL_DST_ALPHA) {
+	    blendcntl = (blendcntl & ~RADEON_SRC_BLEND_MASK) |
+			 RADEON_SRC_BLEND_GL_ONE;
+	} else if ((blendcntl & RADEON_SRC_BLEND_MASK) ==
+		 RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA) {
+	    blendcntl = (blendcntl & ~RADEON_SRC_BLEND_MASK) |
+			 RADEON_SRC_BLEND_GL_ZERO;
+	}
+    }
+
+    return blendcntl;
+}
+
+union intfloat {
+    float f;
+    CARD32 i;
+};
+
+/* R100-specific code */
+
+static Bool R100CheckCompositeTexture(PicturePtr pPict, int unit)
+{
+    int w = pPict->pDrawable->width;
+    int h = pPict->pDrawable->height;
+    int i;
+
+    if ((w > 0x7ff) || (h > 0x7ff))
+	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
+
+    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++) {
+	if (R100TexFormats[i].fmt == pPict->format)
+	    break;
+    }
+    if (i == sizeof(R100TexFormats) / sizeof(R100TexFormats[0]))
+	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
+			(int)pPict->format));
+
+    if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0))
+	RADEON_FALLBACK(("NPOT repeat unsupported (%dx%d)\n", w, h));
+
+    if (pPict->filter != PictFilterNearest &&
+	pPict->filter != PictFilterBilinear)
+    {
+	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
+    }
+
+    return TRUE;
+}
+
+#endif /* ONLY_ONCE */
+
+static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
+					int unit)
+{
+    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
+    CARD32 txfilter, txformat, txoffset, txpitch;
+    int w = pPict->pDrawable->width;
+    int h = pPict->pDrawable->height;
+    int i;
+    ACCEL_PREAMBLE();
+
+    txpitch = exaGetPixmapPitch(pPix);
+    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation;
+
+    for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++)
+    {
+	if (R100TexFormats[i].fmt == pPict->format)
+	    break;
+    }
+    txformat = R100TexFormats[i].card_fmt;
+    if (R100TexFormats[i].byte_swap)
+	txoffset |= RADEON_TXO_ENDIAN_BYTE_SWAP;
+
+    if (pPict->repeat) {
+	txformat |= RADEONLog2(w) << RADEON_TXFORMAT_WIDTH_SHIFT;
+	txformat |= RADEONLog2(h) << RADEON_TXFORMAT_HEIGHT_SHIFT;
+    } else 
+	txformat |= RADEON_TXFORMAT_NON_POWER2;
+    txformat |= unit << 24; /* RADEON_TXFORMAT_ST_ROUTE_STQX */
+ 
+    if ((txoffset & 0x1f) != 0)
+	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
+    if ((txpitch & 0x1f) != 0)
+	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
+
+    switch (pPict->filter) {
+    case PictFilterNearest:
+	txfilter = (RADEON_MAG_FILTER_NEAREST | RADEON_MIN_FILTER_NEAREST);
+	break;
+    case PictFilterBilinear:
+	txfilter = (RADEON_MAG_FILTER_LINEAR | RADEON_MIN_FILTER_LINEAR);
+	break;
+    default:
+	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
+    }
+
+    BEGIN_ACCEL(5);
+    if (unit == 0) {
+	OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter);
+	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat);
+	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, txoffset);
+	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0,
+	    (pPix->drawable.width - 1) |
+	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
+	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32);
+    } else {
+	OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, txfilter);
+	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat);
+	OUT_ACCEL_REG(RADEON_PP_TXOFFSET_1, txoffset);
+	OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1,
+	    (pPix->drawable.width - 1) |
+	    ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
+	OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32);
+    }
+    FINISH_ACCEL();
+
+    if (pPict->transform != 0) {
+	is_transform[unit] = TRUE;
+	transform[unit] = pPict->transform;
+    } else {
+	is_transform[unit] = FALSE;
+    }
+
+    return TRUE;
+}
+
+#ifdef ONLY_ONCE
+static Bool R100CheckComposite(int op, PicturePtr pSrcPicture,
+			       PicturePtr pMaskPicture, PicturePtr pDstPicture)
+{
+    CARD32 tmp1;
+
+    /* Check for unsupported compositing operations. */
+    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
+	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
+    if (pMaskPicture != NULL && pMaskPicture->componentAlpha &&
+	RadeonBlendOp[op].src_alpha)
+    {
+	RADEON_FALLBACK(("Component alpha not supported with source "
+			"alpha blending.\n"));
+    }
+    if (pDstPicture->pDrawable->width >= (1 << 11) ||
+	pDstPicture->pDrawable->height >= (1 << 11))
+    {
+	RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n",
+			pDstPicture->pDrawable->width,
+			pDstPicture->pDrawable->height));
+    }
+
+    if (!R100CheckCompositeTexture(pSrcPicture, 0))
+	return FALSE;
+    if (pMaskPicture != NULL && !R100CheckCompositeTexture(pMaskPicture, 1))
+	return FALSE;
+
+    if (pDstPicture->componentAlpha)
+	return FALSE;
+
+    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
+	return FALSE;
+
+    return TRUE;
+}
+#endif /* ONLY_ONCE */
+
+static Bool FUNC_NAME(R100PrepareComposite)(int op,
+					    PicturePtr pSrcPicture,
+					    PicturePtr pMaskPicture,
+					    PicturePtr pDstPicture,
+					    PixmapPtr pSrc,
+					    PixmapPtr pMask,
+					    PixmapPtr pDst)
+{
+    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
+    CARD32 dst_format, dst_offset, dst_pitch, colorpitch;
+    CARD32 pp_cntl, blendcntl, cblend, ablend;
+    int pixel_shift;
+    ACCEL_PREAMBLE();
+
+    TRACE;
+
+    if (!info->XInited3D)
+	RADEONInit3DEngine(pScrn);
+
+    RADEONGetDestFormat(pDstPicture, &dst_format);
+    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
+
+    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation;
+    dst_pitch = exaGetPixmapPitch(pDst);
+    colorpitch = dst_pitch >> pixel_shift;
+    if (RADEONPixmapIsColortiled(pDst))
+	colorpitch |= RADEON_COLOR_TILE_ENABLE;
+
+    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation;
+    dst_pitch = exaGetPixmapPitch(pDst);
+    if ((dst_offset & 0x0f) != 0)
+	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
+    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
+	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
+
+    if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0))
+	return FALSE;
+    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
+
+    if (pMask != NULL) {
+	if (!FUNC_NAME(R100TextureSetup)(pMaskPicture, pMask, 1))
+	    return FALSE;
+	pp_cntl |= RADEON_TEX_1_ENABLE;
+    } else {
+	is_transform[1] = FALSE;
+    }
+
+    RADEON_SWITCH_TO_3D();
+
+    BEGIN_ACCEL(8);
+    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
+    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
+    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
+    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
+
+    /* IN operator: Multiply src by mask components or mask alpha.
+     * BLEND_CTL_ADD is A * B + C.
+     * If a picture is a8, we have to explicitly zero its color values.
+     * If the destination is a8, we have to route the alpha to red, I think.
+     */
+    cblend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_COLOR_ARG_C_ZERO;
+    ablend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_ALPHA_ARG_C_ZERO;
+
+    if (pDstPicture->format == PICT_a8)
+	cblend |= RADEON_COLOR_ARG_A_T0_ALPHA;
+    else if (pSrcPicture->format == PICT_a8)
+	cblend |= RADEON_COLOR_ARG_A_ZERO;
+    else
+	cblend |= RADEON_COLOR_ARG_A_T0_COLOR;
+    ablend |= RADEON_ALPHA_ARG_A_T0_ALPHA;
+
+    if (pMask) {
+	if (pMaskPicture->componentAlpha &&
+	    pDstPicture->format != PICT_a8)
+	    cblend |= RADEON_COLOR_ARG_B_T1_COLOR;
+	else
+	    cblend |= RADEON_COLOR_ARG_B_T1_ALPHA;
+	ablend |= RADEON_ALPHA_ARG_B_T1_ALPHA;
+    } else {
+	cblend |= RADEON_COLOR_ARG_B_ZERO | RADEON_COMP_ARG_B;
+	ablend |= RADEON_ALPHA_ARG_B_ZERO | RADEON_COMP_ARG_B;
+    }
+
+    OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, cblend);
+    OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, ablend);
+    OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY |
+				     RADEON_SE_VTX_FMT_ST0 |
+				     RADEON_SE_VTX_FMT_ST1);
+    /* Op operator. */
+    blendcntl = RADEONGetBlendCntl(op, pDstPicture->format);
+    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
+    FINISH_ACCEL();
+
+    return TRUE;
+}
+
+#ifdef ONLY_ONCE
+
+static Bool R200CheckCompositeTexture(PicturePtr pPict, int unit)
+{
+    int w = pPict->pDrawable->width;
+    int h = pPict->pDrawable->height;
+    int i;
+
+    if ((w > 0x7ff) || (h > 0x7ff))
+	RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
+
+    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
+    {
+	if (R200TexFormats[i].fmt == pPict->format)
+	    break;
+    }
+    if (i == sizeof(R200TexFormats) / sizeof(R200TexFormats[0]))
+	RADEON_FALLBACK(("Unsupported picture format 0x%x\n",
+			 (int)pPict->format));
+
+    if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0))
+	RADEON_FALLBACK(("NPOT repeat unsupported (%dx%d)\n", w, h));
+
+    if (pPict->filter != PictFilterNearest &&
+	pPict->filter != PictFilterBilinear)
+	RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter));
+
+    return TRUE;
+}
+
+#endif /* ONLY_ONCE */
+
+static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
+					int unit)
+{
+    RINFO_FROM_SCREEN(pPix->drawable.pScreen);
+    CARD32 txfilter, txformat, txoffset, txpitch;
+    int w = pPict->pDrawable->width;
+    int h = pPict->pDrawable->height;
+    int i;
+    ACCEL_PREAMBLE();
+
+    txpitch = exaGetPixmapPitch(pPix);
+    txoffset = exaGetPixmapOffset(pPix) + info->fbLocation;
+
+    for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++)
+    {
+	if (R200TexFormats[i].fmt == pPict->format)
+	    break;
+    }
+    txformat = R200TexFormats[i].card_fmt;
+    if (R200TexFormats[i].byte_swap)
+	txoffset |= R200_TXO_ENDIAN_BYTE_SWAP;
+
+    if (pPict->repeat) {
+	txformat |= RADEONLog2(w) << R200_TXFORMAT_WIDTH_SHIFT;
+	txformat |= RADEONLog2(h) << R200_TXFORMAT_HEIGHT_SHIFT;
+    } else
+	txformat |= R200_TXFORMAT_NON_POWER2;
+    txformat |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT;
+
+    if ((txoffset & 0x1f) != 0)
+	RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset));
+    if ((txpitch & 0x1f) != 0)
+	RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch));
+
+    switch (pPict->filter) {
+    case PictFilterNearest:
+	txfilter = (R200_MAG_FILTER_NEAREST |
+		    R200_MIN_FILTER_NEAREST);
+	break;
+    case PictFilterBilinear:
+	txfilter = (R200_MAG_FILTER_LINEAR |
+		    R200_MIN_FILTER_LINEAR);
+	break;
+    default:
+	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
+    }
+
+    BEGIN_ACCEL(6);
+    if (unit == 0) {
+	OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter);
+	OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat);
+	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0);
+	OUT_ACCEL_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) |
+		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
+	OUT_ACCEL_REG(R200_PP_TXPITCH_0, txpitch - 32);
+	OUT_ACCEL_REG(R200_PP_TXOFFSET_0, txoffset);
+    } else {
+	OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
+	OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat);
+	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
+	OUT_ACCEL_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) |
+		      ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT));
+	OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch - 32);
+	OUT_ACCEL_REG(R200_PP_TXOFFSET_1, txoffset);
+    }
+    FINISH_ACCEL();
+
+    if (pPict->transform != 0) {
+	is_transform[unit] = TRUE;
+	transform[unit] = pPict->transform;
+    } else {
+	is_transform[unit] = FALSE;
+    }
+
+    return TRUE;
+}
+
+#ifdef ONLY_ONCE
+static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
+			       PicturePtr pDstPicture)
+{
+    CARD32 tmp1;
+
+    TRACE;
+
+    /* Check for unsupported compositing operations. */
+    if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0]))
+	RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op));
+    if (pMaskPicture != NULL && pMaskPicture->componentAlpha &&
+	RadeonBlendOp[op].src_alpha)
+	RADEON_FALLBACK(("Component alpha not supported with source "
+			"alpha blending.\n"));
+
+    if (!R200CheckCompositeTexture(pSrcPicture, 0))
+	return FALSE;
+    if (pMaskPicture != NULL && !R200CheckCompositeTexture(pMaskPicture, 1))
+	return FALSE;
+
+    if (!RADEONGetDestFormat(pDstPicture, &tmp1))
+	return FALSE;
+
+    return TRUE;
+}
+#endif /* ONLY_ONCE */
+
+static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture,
+				PicturePtr pMaskPicture, PicturePtr pDstPicture,
+				PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
+{
+    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
+    CARD32 dst_format, dst_offset, dst_pitch;
+    CARD32 pp_cntl, blendcntl, cblend, ablend, colorpitch;
+    int pixel_shift;
+    ACCEL_PREAMBLE();
+
+    TRACE;
+
+    if (!info->XInited3D)
+	RADEONInit3DEngine(pScrn);
+
+    RADEONGetDestFormat(pDstPicture, &dst_format);
+    pixel_shift = pDst->drawable.bitsPerPixel >> 4;
+
+    dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation;
+    dst_pitch = exaGetPixmapPitch(pDst);
+    colorpitch = dst_pitch >> pixel_shift;
+    if (RADEONPixmapIsColortiled(pDst))
+	colorpitch |= RADEON_COLOR_TILE_ENABLE;
+
+    if ((dst_offset & 0x0f) != 0)
+	RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset));
+    if (((dst_pitch >> pixel_shift) & 0x7) != 0)
+	RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch));
+
+    if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0))
+	return FALSE;
+    pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE;
+
+    if (pMask != NULL) {
+	if (!FUNC_NAME(R200TextureSetup)(pMaskPicture, pMask, 1))
+	    return FALSE;
+	pp_cntl |= RADEON_TEX_1_ENABLE;
+    } else {
+	is_transform[1] = FALSE;
+    }
+
+    RADEON_SWITCH_TO_3D();
+
+    BEGIN_ACCEL(11);
+
+    OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl);
+    OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE);
+    OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset);
+
+    OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY);
+    OUT_ACCEL_REG(R200_SE_VTX_FMT_1,
+		 (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) |
+		 (2 << R200_VTX_TEX1_COMP_CNT_SHIFT));
+
+    OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch);
+
+    /* IN operator: Multiply src by mask components or mask alpha.
+     * BLEND_CTL_ADD is A * B + C.
+     * If a picture is a8, we have to explicitly zero its color values.
+     * If the destination is a8, we have to route the alpha to red, I think.
+     */
+    cblend = R200_TXC_OP_MADD | R200_TXC_ARG_C_ZERO;
+    ablend = R200_TXA_OP_MADD | R200_TXA_ARG_C_ZERO;
+
+    if (pDstPicture->format == PICT_a8)
+	cblend |= R200_TXC_ARG_A_R0_ALPHA;
+    else if (pSrcPicture->format == PICT_a8)
+	cblend |= R200_TXC_ARG_A_ZERO;
+    else
+	cblend |= R200_TXC_ARG_A_R0_COLOR;
+    ablend |= R200_TXA_ARG_A_R0_ALPHA;
+
+    if (pMask) {
+	if (pMaskPicture->componentAlpha &&
+	    pDstPicture->format != PICT_a8)
+	    cblend |= R200_TXC_ARG_B_R1_COLOR;
+	else
+	    cblend |= R200_TXC_ARG_B_R1_ALPHA;
+	ablend |= R200_TXA_ARG_B_R1_ALPHA;
+    } else {
+	cblend |= R200_TXC_ARG_B_ZERO | R200_TXC_COMP_ARG_B;
+	ablend |= R200_TXA_ARG_B_ZERO | R200_TXA_COMP_ARG_B;
+    }
+
+    OUT_ACCEL_REG(R200_PP_TXCBLEND_0, cblend);
+    OUT_ACCEL_REG(R200_PP_TXCBLEND2_0,
+	R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0);
+    OUT_ACCEL_REG(R200_PP_TXABLEND_0, ablend);
+    OUT_ACCEL_REG(R200_PP_TXABLEND2_0,
+	R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0);
+
+    /* Op operator. */
+    blendcntl = RADEONGetBlendCntl(op, pDstPicture->format);
+    OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl);
+    FINISH_ACCEL();
+
+    return TRUE;
+}
+
+#ifdef ACCEL_CP
+
+#define VTX_DWORD_COUNT 6
+
+#define VTX_OUT(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
+do {								\
+    OUT_RING_F(_dstX);						\
+    OUT_RING_F(_dstY);						\
+    OUT_RING_F(_srcX);						\
+    OUT_RING_F(_srcY);						\
+    OUT_RING_F(_maskX);						\
+    OUT_RING_F(_maskY);						\
+} while (0)
+
+#else /* ACCEL_CP */
+
+#define VTX_REG_COUNT 6
+
+#define VTX_OUT(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)	\
+do {								\
+    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _dstX);			\
+    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _dstX);			\
+    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _dstY);			\
+    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _srcX);			\
+    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _srcY);			\
+    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _maskX);		\
+    OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _maskY);		\
+} while (0)
+
+#endif /* !ACCEL_CP */
+
+static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
+				     int srcX, int srcY,
+				     int maskX, int maskY,
+				     int dstX, int dstY,
+				     int w, int h)
+{
+    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
+    int srcXend, srcYend, maskXend, maskYend;
+    PictVector v;
+    ACCEL_PREAMBLE();
+
+    ENTER_DRAW(0);
+
+    /*ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
+	   srcX, srcY, maskX, maskY,dstX, dstY, w, h);*/
+
+    srcXend = srcX + w;
+    srcYend = srcY + h;
+    maskXend = maskX + w;
+    maskYend = maskY + h;
+    if (is_transform[0]) {
+	v.vector[0] = IntToxFixed(srcX);
+	v.vector[1] = IntToxFixed(srcY);
+	v.vector[2] = xFixed1;
+	PictureTransformPoint(transform[0], &v);
+	srcX = xFixedToInt(v.vector[0]);
+	srcY = xFixedToInt(v.vector[1]);
+	v.vector[0] = IntToxFixed(srcXend);
+	v.vector[1] = IntToxFixed(srcYend);
+	v.vector[2] = xFixed1;
+	PictureTransformPoint(transform[0], &v);
+	srcXend = xFixedToInt(v.vector[0]);
+	srcYend = xFixedToInt(v.vector[1]);
+    }
+    if (is_transform[1]) {
+	v.vector[0] = IntToxFixed(maskX);
+	v.vector[1] = IntToxFixed(maskY);
+	v.vector[2] = xFixed1;
+	PictureTransformPoint(transform[1], &v);
+	maskX = xFixedToInt(v.vector[0]);
+	maskY = xFixedToInt(v.vector[1]);
+	v.vector[0] = IntToxFixed(maskXend);
+	v.vector[1] = IntToxFixed(maskYend);
+	v.vector[2] = xFixed1;
+	PictureTransformPoint(transform[1], &v);
+	maskXend = xFixedToInt(v.vector[0]);
+	maskYend = xFixedToInt(v.vector[1]);
+    }
+
+#ifdef ACCEL_CP
+    if (info->ChipFamily < CHIP_FAMILY_R200) {
+	BEGIN_RING(4 * VTX_DWORD_COUNT + 3);
+	OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD,
+			    4 * VTX_DWORD_COUNT + 1));
+	OUT_RING(RADEON_CP_VC_FRMT_XY |
+		 RADEON_CP_VC_FRMT_ST0 |
+		 RADEON_CP_VC_FRMT_ST1);
+	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
+		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+		 RADEON_CP_VC_CNTL_MAOS_ENABLE |
+		 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
+		 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+    } else {
+	BEGIN_RING(4 * VTX_DWORD_COUNT + 2);
+	OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+			    4 * VTX_DWORD_COUNT));
+	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN |
+		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+		 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+    }
+
+    VTX_OUT(dstX,     dstY,     srcX,    srcY,    maskX,    maskY);
+    VTX_OUT(dstX,     dstY + h, srcX,    srcYend, maskX,    maskYend);
+    VTX_OUT(dstX + w, dstY + h, srcXend, srcYend, maskXend, maskYend);
+    VTX_OUT(dstX + w, dstY,     srcXend, srcY,    maskXend, maskY);
+    ADVANCE_RING();
+#else /* ACCEL_CP */
+    BEGIN_ACCEL(1 + VTX_REG_COUNT * 4);
+    if (info->ChipFamily < CHIP_FAMILY_R200) {
+	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_FAN |
+					  RADEON_VF_PRIM_WALK_DATA |
+					  RADEON_VF_RADEON_MODE |
+					  4 << RADEON_VF_NUM_VERTICES_SHIFT));
+    } else {
+	OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST |
+					  RADEON_VF_PRIM_WALK_DATA |
+					  4 << RADEON_VF_NUM_VERTICES_SHIFT));
+    }
+
+    VTX_OUT(dstX,     dstY,     srcX,    srcY,    maskX,    maskY);
+    VTX_OUT(dstX,     dstY + h, srcX,    srcYend, maskX,    maskYend);
+    VTX_OUT(dstX + w, dstY + h, srcXend, srcYend, maskXend, maskYend);
+    VTX_OUT(dstX + w, dstY,     srcXend, srcY,    maskXend, maskY);
+    FINISH_ACCEL();
+#endif /* !ACCEL_CP */
+
+    LEAVE_DRAW(0);
+}
+#undef VTX_OUT
+
+#ifdef ONLY_ONCE
+static void RadeonDoneComposite(PixmapPtr pDst)
+{
+    ENTER_DRAW(0);
+    LEAVE_DRAW(0);
+}
+#endif /* ONLY_ONCE */
+
+#undef ONLY_ONCE
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_mergedfb.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_mergedfb.c
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_mergedfb.c	2005-08-17 11:55:37.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_mergedfb.c	2005-09-03 08:37:58.000000000 +1000
@@ -1961,7 +1961,14 @@
     OUTREG(RADEON_CUR_HORZ_VERT_POSN, (RADEON_CUR_LOCK
 				   | ((xorigin ? 0 : x1) << 16)
 				   | (yorigin ? 0 : y1)));
-    OUTREG(RADEON_CUR_OFFSET, info->cursor_start + yorigin * stride);
+#ifdef USE_EXA
+    if (info->useEXA)
+	OUTREG(RADEON_CUR_OFFSET, info->cursorArea->offset + yorigin * stride);
+#endif /* USE_EXA */
+#ifdef USE_XAA
+    if (!info->useEXA)
+	OUTREG(RADEON_CUR_OFFSET, info->cursor_offset + yorigin * stride);
+#endif /* USE_XAA */
 		/* cursor2 */
     OUTREG(RADEON_CUR2_HORZ_VERT_OFF,  (RADEON_CUR2_LOCK
 				    | (xorigin << 16)
@@ -1969,9 +1976,14 @@
     OUTREG(RADEON_CUR2_HORZ_VERT_POSN, (RADEON_CUR2_LOCK
 				    | ((xorigin ? 0 : x2) << 16)
 				    | (yorigin ? 0 : y2)));
-    OUTREG(RADEON_CUR2_OFFSET,
-	       info->cursor_start + yorigin * stride);
-
+#ifdef USE_EXA
+    if (info->useEXA)
+	OUTREG(RADEON_CUR2_OFFSET, info->cursorArea->offset + yorigin * stride);
+#endif /* USE_EXA */
+#ifdef USE_XAA
+    if (!info->useEXA)
+	OUTREG(RADEON_CUR2_OFFSET, info->cursor_offset + yorigin * stride);
+#endif /* USE_XAA */
 }
 
 /* radeon Xv helpers */
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_reg.h xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_reg.h
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_reg.h	2005-08-17 11:55:37.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_reg.h	2005-09-03 08:37:58.000000000 +1000
@@ -53,6 +53,22 @@
 #ifndef _RADEON_REG_H_
 #define _RADEON_REG_H_
 
+#define ATI_DATATYPE_VQ				0
+#define ATI_DATATYPE_CI4			1
+#define ATI_DATATYPE_CI8			2
+#define ATI_DATATYPE_ARGB1555			3
+#define ATI_DATATYPE_RGB565			4
+#define ATI_DATATYPE_RGB888			5
+#define ATI_DATATYPE_ARGB8888			6
+#define ATI_DATATYPE_RGB332			7
+#define ATI_DATATYPE_Y8				8
+#define ATI_DATATYPE_RGB8			9
+#define ATI_DATATYPE_CI16			10
+#define ATI_DATATYPE_VYUY_422			11
+#define ATI_DATATYPE_YVYU_422			12
+#define ATI_DATATYPE_AYUV_444			14
+#define ATI_DATATYPE_ARGB4444			15
+
 				/* Registers for 2D/Video/Overlay */
 #define RADEON_ADAPTER_ID                   0x0f2c /* PCI */
 #define RADEON_AGP_BASE                     0x0170
@@ -1226,6 +1242,28 @@
 #       define RADEON_RB2D_DC_FLUSH_ALL     0xf
 #       define RADEON_RB2D_DC_BUSY          (1 << 31)
 #define RADEON_RB2D_DSTCACHE_MODE           0x3428
+
+#define RADEON_RB3D_DSTCACHE_MODE           0x3258
+# define RADEON_RB3D_DC_CACHE_ENABLE            (0)
+# define RADEON_RB3D_DC_2D_CACHE_DISABLE        (1)
+# define RADEON_RB3D_DC_3D_CACHE_DISABLE        (2)
+# define RADEON_RB3D_DC_CACHE_DISABLE           (3)
+# define RADEON_RB3D_DC_2D_CACHE_LINESIZE_128   (1 << 2)
+# define RADEON_RB3D_DC_3D_CACHE_LINESIZE_128   (2 << 2)
+# define RADEON_RB3D_DC_2D_CACHE_AUTOFLUSH      (1 << 8)
+# define RADEON_RB3D_DC_3D_CACHE_AUTOFLUSH      (2 << 8)
+# define R200_RB3D_DC_2D_CACHE_AUTOFREE         (1 << 10)
+# define R200_RB3D_DC_3D_CACHE_AUTOFREE         (2 << 10)
+# define RADEON_RB3D_DC_FORCE_RMW               (1 << 16)
+# define RADEON_RB3D_DC_DISABLE_RI_FILL         (1 << 24)
+# define RADEON_RB3D_DC_DISABLE_RI_READ         (1 << 25)
+
+#define RADEON_RB3D_DSTCACHE_CTLSTAT            0x325C
+# define RADEON_RB3D_DC_FLUSH                   (3 << 0)
+# define RADEON_RB3D_DC_FREE                    (3 << 2)
+# define RADEON_RB3D_DC_FLUSH_ALL               0xf
+# define RADEON_RB3D_DC_BUSY                    (1 << 31)
+
 #define RADEON_REG_BASE                     0x0f18 /* PCI */
 #define RADEON_REGPROG_INF                  0x0f09 /* PCI */
 #define RADEON_REVISION_ID                  0x0f08 /* PCI */
@@ -2305,6 +2343,11 @@
 #       define R200_VC_16BIT_SWAP		(1 << 0)
 #       define R200_VC_32BIT_SWAP		(2 << 0)
 #define R200_PP_TXFILTER_0			0x2c00 
+#define R200_PP_TXFILTER_1			0x2c20
+#define R200_PP_TXFILTER_2			0x2c40
+#define R200_PP_TXFILTER_3			0x2c60
+#define R200_PP_TXFILTER_4			0x2c80
+#define R200_PP_TXFILTER_5			0x2ca0
 #       define R200_MAG_FILTER_NEAREST		(0  <<  0)
 #       define R200_MAG_FILTER_LINEAR		(1  <<  0)
 #       define R200_MAG_FILTER_MASK		(1  <<  0)
@@ -2355,6 +2398,11 @@
 #       define R200_BORDER_MODE_OGL		(0  << 31)
 #       define R200_BORDER_MODE_D3D		(1  << 31)
 #define R200_PP_TXFORMAT_0			0x2c04
+#define R200_PP_TXFORMAT_1			0x2c24
+#define R200_PP_TXFORMAT_2			0x2c44
+#define R200_PP_TXFORMAT_3			0x2c64
+#define R200_PP_TXFORMAT_4			0x2c84
+#define R200_PP_TXFORMAT_5			0x2ca4
 #       define R200_TXFORMAT_I8			(0 << 0)
 #       define R200_TXFORMAT_AI88		(1 << 0)
 #       define R200_TXFORMAT_RGB332		(2 << 0)
@@ -2394,8 +2442,26 @@
 #       define R200_TXFORMAT_CHROMA_KEY_ENABLE	(1 << 29)
 #       define R200_TXFORMAT_CUBIC_MAP_ENABLE		(1 << 30)
 #define R200_PP_TXFORMAT_X_0                    0x2c08
+#define R200_PP_TXFORMAT_X_1                    0x2c28
+#define R200_PP_TXFORMAT_X_2                    0x2c48
+#define R200_PP_TXFORMAT_X_3                    0x2c68
+#define R200_PP_TXFORMAT_X_4                    0x2c88
+#define R200_PP_TXFORMAT_X_5                    0x2ca8
+
 #define R200_PP_TXSIZE_0			0x2c0c /* NPOT only */
+#define R200_PP_TXSIZE_1			0x2c2c /* NPOT only */
+#define R200_PP_TXSIZE_2			0x2c4c /* NPOT only */
+#define R200_PP_TXSIZE_3			0x2c6c /* NPOT only */
+#define R200_PP_TXSIZE_4			0x2c8c /* NPOT only */
+#define R200_PP_TXSIZE_5			0x2cac /* NPOT only */
+
 #define R200_PP_TXPITCH_0                       0x2c10 /* NPOT only */
+#define R200_PP_TXPITCH_1			0x2c30 /* NPOT only */
+#define R200_PP_TXPITCH_2			0x2c50 /* NPOT only */
+#define R200_PP_TXPITCH_3			0x2c70 /* NPOT only */
+#define R200_PP_TXPITCH_4			0x2c90 /* NPOT only */
+#define R200_PP_TXPITCH_5			0x2cb0 /* NPOT only */
+
 #define R200_PP_TXOFFSET_0			0x2d00
 #       define R200_TXO_ENDIAN_NO_SWAP		(0 << 0)
 #       define R200_TXO_ENDIAN_BYTE_SWAP	(1 << 0)
@@ -2403,6 +2469,11 @@
 #       define R200_TXO_ENDIAN_HALFDW_SWAP	(3 << 0)
 #       define R200_TXO_OFFSET_MASK		0xffffffe0
 #       define R200_TXO_OFFSET_SHIFT		5
+#define R200_PP_TXOFFSET_1			0x2d18
+#define R200_PP_TXOFFSET_2			0x2d30
+#define R200_PP_TXOFFSET_3			0x2d48
+#define R200_PP_TXOFFSET_4			0x2d60
+#define R200_PP_TXOFFSET_5			0x2d78
 
 #define R200_PP_TFACTOR_0			0x2ee0
 #define R200_PP_TFACTOR_1			0x2ee4
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_render.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_render.c
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_render.c	2005-08-17 11:55:37.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_render.c	2005-09-03 08:37:58.000000000 +1000
@@ -1,7 +1,38 @@
+/*
+ * Copyright 2004 Eric Anholt
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <anholt at FreeBSD.org>
+ *    Hui Yu <hyu at ati.com>
+ *
+ */
+
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
+#ifdef USE_XAA
+
 #include "dixstruct.h"
 
 #include "xaa.h"
@@ -10,11 +41,6 @@
 #ifndef RENDER_GENERIC_HELPER
 #define RENDER_GENERIC_HELPER
 
-static void RadeonInit3DEngineMMIO(ScrnInfoPtr pScrn);
-#ifdef XF86DRI
-static void RadeonInit3DEngineCP(ScrnInfoPtr pScrn);
-#endif
-
 struct blendinfo {
 	Bool dst_alpha;
 	Bool src_alpha;
@@ -228,24 +254,6 @@
 	return bits - 1;
 }
 
-static void RadeonInit3DEngine(ScrnInfoPtr pScrn)
-{
-    RADEONInfoPtr info = RADEONPTR (pScrn);
-
-#ifdef XF86DRI
-    if (info->directRenderingEnabled) {
-	RADEONSAREAPrivPtr pSAREAPriv;
-
-	pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
-	pSAREAPriv->ctxOwner = DRIGetContext(pScrn->pScreen);
-	RadeonInit3DEngineCP(pScrn);
-    } else
-#endif
-	RadeonInit3DEngineMMIO(pScrn);
-
-    info->RenderInited3D = TRUE;
-}
-
 static void
 RemoveLinear (FBLinearPtr linear)
 {
@@ -357,61 +365,6 @@
 #endif
 #endif
 
-
-static void FUNC_NAME(RadeonInit3DEngine)(ScrnInfoPtr pScrn)
-{
-    RADEONInfoPtr  info       = RADEONPTR(pScrn);
-    ACCEL_PREAMBLE();
-
-    if (info->ChipFamily >= CHIP_FAMILY_R300) {
-	/* Unimplemented */
-    } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || 
-	       (info->ChipFamily == CHIP_FAMILY_RV280) || 
-	       (info->ChipFamily == CHIP_FAMILY_RS300) || 
-	       (info->ChipFamily == CHIP_FAMILY_R200)) {
-
-	BEGIN_ACCEL(7);
-        if (info->ChipFamily == CHIP_FAMILY_RS300) {
-            OUT_ACCEL_REG(R200_SE_VAP_CNTL_STATUS, RADEON_TCL_BYPASS);
-        } else {
-            OUT_ACCEL_REG(R200_SE_VAP_CNTL_STATUS, 0);
-        }
-	OUT_ACCEL_REG(R200_PP_CNTL_X, 0);
-	OUT_ACCEL_REG(R200_PP_TXMULTI_CTL_0, 0);
-	OUT_ACCEL_REG(R200_SE_VTX_STATE_CNTL, 0);
-	OUT_ACCEL_REG(R200_RE_CNTL, 0x0);
-	/* XXX: correct?  Want it to be like RADEON_VTX_ST?_NONPARAMETRIC */
-	OUT_ACCEL_REG(R200_SE_VTE_CNTL, R200_VTX_ST_DENORMALIZED);
-	OUT_ACCEL_REG(R200_SE_VAP_CNTL, R200_VAP_FORCE_W_TO_ONE |
-	    R200_VAP_VF_MAX_VTX_NUM);
-	FINISH_ACCEL();
-    } else {
-	BEGIN_ACCEL(2);
-        if ((info->ChipFamily == CHIP_FAMILY_RADEON) ||
-            (info->ChipFamily == CHIP_FAMILY_RV200))
-            OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, 0);
-        else
-            OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS);
-	OUT_ACCEL_REG(RADEON_SE_COORD_FMT,
-	    RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
-	    RADEON_VTX_ST0_NONPARAMETRIC |
-	    RADEON_VTX_ST1_NONPARAMETRIC |
-	    RADEON_TEX1_W_ROUTING_USE_W0);
-	FINISH_ACCEL();
-    }
-
-    BEGIN_ACCEL(3);
-    OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0);
-    OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, 0x07ff07ff);
-    OUT_ACCEL_REG(RADEON_SE_CNTL, RADEON_DIFFUSE_SHADE_GOURAUD |
-				  RADEON_BFACE_SOLID | 
-				  RADEON_FFACE_SOLID |
-				  RADEON_VTX_PIX_CENTER_OGL |
-				  RADEON_ROUND_MODE_ROUND |
-				  RADEON_ROUND_PREC_4TH_PIX);
-    FINISH_ACCEL();
-}
-
 static Bool FUNC_NAME(R100SetupTexture)(
 	ScrnInfoPtr pScrn,
 	CARD32 format,
@@ -474,9 +427,10 @@
     while ( height )
     {
     	tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width,
-							dst_pitch, &buf_pitch,
-							&dst, &height, &hpass );
-	RADEONHostDataBlitCopyPass( tmp_dst, src, hpass, buf_pitch, src_pitch );
+				      dst_pitch, &buf_pitch,
+				      &dst, &height, &hpass);
+	RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src,
+				    hpass, buf_pitch, src_pitch );
 	src += hpass * src_pitch;
     }
 
@@ -542,8 +496,8 @@
     if (blend_cntl == 0)
 	return FALSE;
 
-    if (!info->RenderInited3D)
-	RadeonInit3DEngine(pScrn);
+    if (!info->XInited3D)
+	RADEONInit3DEngine(pScrn);
 
     if (!FUNC_NAME(R100SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch,
 				     width, height, flags))
@@ -593,8 +547,8 @@
     if (blend_cntl == 0)
 	return FALSE;
     
-    if (!info->RenderInited3D)
-	RadeonInit3DEngine(pScrn);
+    if (!info->XInited3D)
+	RADEONInit3DEngine(pScrn);
 
     if (!FUNC_NAME(R100SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width,
 				     height, flags))
@@ -805,9 +759,10 @@
     while ( height )
     {
         tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width,
-							dst_pitch, &buf_pitch,
-							&dst, &height, &hpass );
-	RADEONHostDataBlitCopyPass( tmp_dst, src, hpass, buf_pitch, src_pitch );
+				      dst_pitch, &buf_pitch,
+				      &dst, &height, &hpass );
+	RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src,
+				    hpass, buf_pitch, src_pitch );
 	src += hpass * src_pitch;
     }
 
@@ -873,8 +828,8 @@
     if (blend_cntl == 0)
 	return FALSE;
 
-    if (!info->RenderInited3D)
-	RadeonInit3DEngine(pScrn);
+    if (!info->XInited3D)
+	RADEONInit3DEngine(pScrn);
 
     if (!FUNC_NAME(R200SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch,
 				     width, height, flags))
@@ -925,8 +880,8 @@
     if (blend_cntl == 0)
 	return FALSE;
 
-    if (!info->RenderInited3D)
-	RadeonInit3DEngine(pScrn);
+    if (!info->XInited3D)
+	RADEONInit3DEngine(pScrn);
 
     if (!FUNC_NAME(R200SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width,
 				     height, flags))
@@ -1074,4 +1029,4 @@
 }
 
 #undef FUNC_NAME
-
+#endif /* USE_XAA */
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_video.c xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_video.c
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_video.c	2005-08-17 11:55:37.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_video.c	2005-09-03 08:37:58.000000000 +1000
@@ -14,6 +14,7 @@
 #include "xf86.h"
 #include "dixstruct.h"
 #include "xf86PciInfo.h"
+#include "xf86fbman.h"
 
 #include <X11/extensions/Xv.h>
 #include "fourcc.h"
@@ -24,6 +25,13 @@
 #include "msp3430.h"
 #include "tda9885.h"
 
+#ifdef USE_EXA
+/* FIXME : the video code hasn't been ported so this is a hack to make
+ * it compile at all without too much ifdefing */
+#include "xaa.h"
+#include "xf86fbman.h"
+#endif
+
 #define OFF_DELAY       250  /* milliseconds */
 #define FREE_DELAY      15000
 
@@ -114,6 +122,19 @@
    (RADEONPortPrivPtr)((RADEONPTR(pScrn))->adaptor->pPortPrivates[0].ptr)
 
 
+#ifdef USE_EXA
+static void
+ATIVideoSave(ScreenPtr pScreen, ExaOffscreenArea *area)
+{
+    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    RADEONPortPrivPtr pPriv = info->adaptor->pPortPrivates[0].ptr;
+
+    if (pPriv->off_screen == area)
+        pPriv->off_screen = 0;
+}
+#endif
+
 void RADEONInitVideo(ScreenPtr pScreen)
 {
     ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
@@ -1038,7 +1059,8 @@
     /* this function is called from ScreenInit. pScreen is used 
        by XAA internally, but not valid until ScreenInit finishs.
     */
-    if (info->accelOn && pScrn->pScreen) info->accel->Sync(pScrn);
+    if (info->accelOn && pScrn->pScreen)
+	RADEON_SYNC(info, pScrn);
 
     /* this is done here because each time the server is reset these
        could change.. Otherwise they remain constant */
@@ -1204,20 +1226,20 @@
                                    } else {
                                    t->wComp0Connector=RT_COMP1;
                                    }
-                                xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Composite connector is port %d\n", t->wComp0Connector);
+                xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Composite connector is port %ld\n", t->wComp0Connector);
                                   break;
                         case 3:  if(a & 0x4){
                                    t->wSVideo0Connector=RT_YCR_COMP4;
                                    } else {
                                    t->wSVideo0Connector=RT_YCF_COMP4;
                                    }
-                                xf86DrvMsg(pScrn->scrnIndex, X_INFO, "SVideo connector is port %d\n", t->wSVideo0Connector);
+                xf86DrvMsg(pScrn->scrnIndex, X_INFO, "SVideo connector is port %ld\n", t->wSVideo0Connector);
                                    break;
                         default:
                                 break;
                         }
                 }
-         xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Rage Theatre: Connectors (detected): tuner=%d, composite=%d, svideo=%d\n",
+        xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Rage Theatre: Connectors (detected): tuner=%ld, composite=%ld, svideo=%ld\n",
     	     t->wTunerConnector, t->wComp0Connector, t->wSVideo0Connector);
         
          }
@@ -1226,7 +1248,7 @@
     if(info->RageTheatreCompositePort>=0)t->wComp0Connector=info->RageTheatreCompositePort;
     if(info->RageTheatreSVideoPort>=0)t->wSVideo0Connector=info->RageTheatreSVideoPort;
         
-    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "RageTheatre: Connectors (using): tuner=%d, composite=%d, svideo=%d\n",
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "RageTheatre: Connectors (using): tuner=%ld, composite=%ld, svideo=%ld\n",
     	t->wTunerConnector, t->wComp0Connector, t->wSVideo0Connector);
 
     switch((info->RageTheatreCrystal>=0)?info->RageTheatreCrystal:pll->reference_freq){
@@ -1253,7 +1275,6 @@
     XF86VideoAdaptorPtr adapt;
     RADEONInfoPtr info = RADEONPTR(pScrn);
     RADEONPortPrivPtr pPriv;
-    unsigned char *RADEONMMIO = info->MMIO;
     CARD32 dot_clock;
 
     if(!(adapt = xf86XVAllocateVideoAdaptorRec(pScrn)))
@@ -1531,10 +1552,18 @@
 		if(pPriv->uda1380 != NULL) xf86_uda1380_mute(pPriv->uda1380, TRUE);
         if(pPriv->i2c != NULL) RADEON_board_setmisc(pPriv);
      }
+#ifdef USE_EXA
+     if (pPriv->off_screen) {
+         exaOffscreenFree (pScrn->pScreen, pPriv->off_screen);
+         pPriv->off_screen = 0;
+     }
+#endif
+#ifdef USE_XAA
      if(info->videoLinear) {
 	xf86FreeOffscreenLinear(info->videoLinear);
 	info->videoLinear = NULL;
      }
+#endif
      pPriv->videoStatus = 0;
   } else {
      if(pPriv->videoStatus & CLIENT_VIDEO_ON) {
@@ -1556,7 +1585,7 @@
     Bool		setAlpha = FALSE;
     unsigned char *RADEONMMIO = info->MMIO;
 
-    info->accel->Sync(pScrn);
+    RADEON_SYNC(info, pScrn);
 
 #define RTFSaturation(a)   (1.0 + ((a)*1.0)/1000.0)
 #define RTFBrightness(a)   (((a)*1.0)/2000.0)
@@ -1785,7 +1814,7 @@
    else if(attribute == xvAdjustment) 
    {
   	pPriv->adjustment=value;
-        xf86DrvMsg(pScrn->scrnIndex,X_ERROR,"Setting pPriv->adjustment to %d\n", pPriv->adjustment);
+        xf86DrvMsg(pScrn->scrnIndex,X_ERROR,"Setting pPriv->adjustment to %ld\n", pPriv->adjustment);
   	if(pPriv->tda9885!=0){
 		pPriv->tda9885->top_adjustment=value;
 		RADEON_TDA9885_SetEncoding(pPriv);
@@ -1825,7 +1854,7 @@
     RADEONInfoPtr	info = RADEONPTR(pScrn);
     RADEONPortPrivPtr	pPriv = (RADEONPortPrivPtr)data;
 
-    if (info->accelOn) info->accel->Sync(pScrn);
+    if (info->accelOn) RADEON_SYNC(info, pScrn);
 
     if(attribute == xvAutopaintColorkey)
 	*value = pPriv->autopaint_colorkey;
@@ -2008,11 +2037,11 @@
   ScrnInfoPtr pScrn,
   unsigned char *src,
   unsigned char *dst,
-  int srcPitch,
-  int dstPitch,
-  int h,
-  int w,
-  int bpp
+  unsigned int srcPitch,
+  unsigned int dstPitch,
+  unsigned int h,
+  unsigned int w,
+  unsigned int bpp
 ){
     RADEONInfoPtr info = RADEONPTR(pScrn);
 #ifdef XF86DRI
@@ -2033,7 +2062,8 @@
 	while ( buf = RADEONHostDataBlit( pScrn, bpp, w, dstPitch,
 					  &bufPitch, &dst, &h, &hpass ) )
 	{
-	    RADEONHostDataBlitCopyPass( buf, src, hpass, bufPitch, srcPitch );
+	    RADEONHostDataBlitCopyPass( pScrn, bpp, buf, src, hpass, bufPitch,
+					srcPitch );
 	    src += hpass * srcPitch;
 	}
 
@@ -2046,24 +2076,25 @@
     {
 #if X_BYTE_ORDER == X_BIG_ENDIAN
 	unsigned char *RADEONMMIO = info->MMIO;
-	if ( bpp == 2 )
-	{
-	    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl
-					& ~(RADEON_NONSURF_AP0_SWP_32BPP
-					| RADEON_NONSURF_AP0_SWP_16BPP));
-	}
-	else /* bpp == 4 */
-	{
-	    OUTREG(RADEON_SURFACE_CNTL, (info->ModeReg.surface_cntl
-					| RADEON_NONSURF_AP0_SWP_32BPP)
-					& ~RADEON_NONSURF_AP0_SWP_16BPP);
+	unsigned int swapper = info->ModeReg.surface_cntl &
+		~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP |
+		  RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP);
+
+	switch(bpp) {
+	case 2:
+	    swapper |= RADEON_NONSURF_AP0_SWP_16BPP
+		    |  RADEON_NONSURF_AP1_SWP_16BPP;
+	    break;
+	case 4:
+	    swapper |= RADEON_NONSURF_AP0_SWP_32BPP
+		    |  RADEON_NONSURF_AP1_SWP_32BPP;
+	    break;
 	}
+	OUTREG(RADEON_SURFACE_CNTL, swapper);
 #endif
+	w *= bpp;
 
-	w *= 2;
-
-	while (h--)
-	{
+	while (h--) {
 	    memcpy(dst, src, w);
 	    src += srcPitch;
 	    dst += dstPitch;
@@ -2097,13 +2128,13 @@
   ScrnInfoPtr pScrn,
   unsigned char *src,
   unsigned char *dst,
-  int srcPitch,
-  int dstPitch,
-  int h,
-  int w
+  unsigned int srcPitch,
+  unsigned int dstPitch,
+  unsigned int h,
+  unsigned int w
 ){
     CARD32 *dptr;
-    CARD8 *sptr;
+    CARD8 *sptr = 0;
     int i,j;
     RADEONInfoPtr info = RADEONPTR(pScrn);
 #ifdef XF86DRI
@@ -2113,6 +2144,8 @@
 	CARD32 bufPitch;
 	unsigned int hpass;
 
+	/* XXX Fix endian flip on R300 */
+
 	while ( dptr = ( CARD32* )RADEONHostDataBlit( pScrn, 4, w, dstPitch,
 						      &bufPitch, &dst, &h,
 						      &hpass ) )
@@ -2171,11 +2204,11 @@
    unsigned char *src2,
    unsigned char *src3,
    unsigned char *dst1,
-   int srcPitch,
-   int srcPitch2,
-   int dstPitch,
-   int h,
-   int w
+   unsigned int srcPitch,
+   unsigned int srcPitch2,
+   unsigned int dstPitch,
+   unsigned int h,
+   unsigned int w
 ){
     RADEONInfoPtr info = RADEONPTR(pScrn);
 #ifdef XF86DRI
@@ -2186,6 +2219,8 @@
 	CARD32 y = 0, bufPitch;
 	unsigned int hpass;
 
+	/* XXX Fix endian flip on R300 */
+
 	while ( buf = RADEONHostDataBlit( pScrn, 4, w/2, dstPitch,
 					  &bufPitch, &dst1, &h, &hpass ) )
 	{
@@ -2249,9 +2284,8 @@
 	    {
 		src2 += srcPitch2;
 		src3 += srcPitch2;
-	    }
+	    }	
 	}
-
 #if X_BYTE_ORDER == X_BIG_ENDIAN
 	/* restore byte swapping */
 	OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl);
@@ -2259,7 +2293,6 @@
     }
 }
 
-
 static FBLinearPtr
 RADEONAllocateMemory(
    ScrnInfoPtr pScrn,
@@ -2267,6 +2300,7 @@
    int size
 ){
    ScreenPtr pScreen;
+   RADEONInfoPtr info = RADEONPTR(pScrn);
    FBLinearPtr new_linear;
 
    if(linear) {
@@ -2280,7 +2314,10 @@
    }
 
    pScreen = screenInfo.screens[pScrn->scrnIndex];
-
+#ifdef USE_EXA
+   if (info->useEXA)
+	ErrorF("XVideo is not finished with Exa\n");
+#endif
    new_linear = xf86AllocateOffscreenLinear(pScreen, size, 16,
 						NULL, NULL, NULL);
 
@@ -2478,7 +2515,7 @@
 
     RADEONWaitForFifo(pScrn, 2);
     OUTREG(RADEON_OV0_REG_LOAD_CNTL, 1);
-    if (info->accelOn) info->accel->Sync(pScrn);
+    if (info->accelOn) RADEON_SYNC(info, pScrn);
     while(!(INREG(RADEON_OV0_REG_LOAD_CNTL) & (1 << 3)));
 
     dsr=(double)(1<<0xC)/h_inc;
@@ -2721,11 +2758,34 @@
 	break;
    }
 
-   if(!(info->videoLinear = RADEONAllocateMemory(pScrn, info->videoLinear,
-		pPriv->doubleBuffer ? (new_size << 1) : new_size)))
-   {
+
+#ifdef USE_EXA
+   if (info->useEXA) {
+      int real_size = pPriv->doubleBuffer ? (new_size << 1) : new_size;
+
+      if (pPriv->off_screen != NULL && real_size != pPriv->size) {
+	 exaOffscreenFree(pScrn->pScreen, pPriv->off_screen);
+	 pPriv->off_screen = 0;
+      }
+
+      if (pPriv->off_screen == NULL) {
+	 pPriv->off_screen = exaOffscreenAlloc(pScrn->pScreen, real_size,
+					       64, TRUE, ATIVideoSave, pPriv);
+	 if (pPriv->off_screen == NULL)
+	    return BadAlloc;
+	 pPriv->size = real_size;
+      }
+   }
+#endif
+#ifdef USE_XAA
+   if (!info->useEXA) {
+      if(!(info->videoLinear = RADEONAllocateMemory(pScrn, info->videoLinear,
+	 pPriv->doubleBuffer ? (new_size << 1) : new_size)))
+      {
 	return BadAlloc;
+      }
    }
+#endif
 
    pPriv->currentBuffer ^= 1;
 
@@ -2734,7 +2794,8 @@
    left = (xa >> 16) & ~1;
    npixels = ((((xb + 0xffff) >> 16) + 1) & ~1) - left;
 
-   offset = (info->videoLinear->offset * bpp) + (top * dstPitch);
+   offset = (pPriv->video_offset) + (top * dstPitch);
+
    if(pPriv->doubleBuffer)
 	offset += pPriv->currentBuffer * new_size * bpp;
 
@@ -2861,10 +2922,18 @@
 	    }
 	} else {  /* FREE_TIMER */
 	    if(pPriv->freeTime < now) {
+#ifdef USE_EXA
+                if (pPriv->off_screen) {
+                    exaOffscreenFree (pScrn->pScreen, pPriv->off_screen);
+                    pPriv->off_screen = 0;
+                }
+#endif /* USE_EXA */
+#ifdef USE_XAA
 		if(info->videoLinear) {
 		   xf86FreeOffscreenLinear(info->videoLinear);
 		   info->videoLinear = NULL;
 		}
+#endif /* USE_XAA */
 		pPriv->videoStatus = 0;
 		info->VideoTimerCallback = NULL;
 	    }
@@ -3116,7 +3185,7 @@
    int mult;
    int vbi_line_width, vbi_start, vbi_end;
 
-   info->accel->Sync(pScrn);
+    RADEON_SYNC(info, pScrn);
    /*
     * s2offset, s3offset - byte offsets into U and V plane of the
     *                      source where copying starts.  Y plane is
@@ -3216,10 +3285,36 @@
    }
 
    new_size = new_size + 0x1f; /* for aligning */
-   if(!(info->videoLinear = RADEONAllocateMemory(pScrn, info->videoLinear, new_size*mult+(pPriv->capture_vbi_data?2*2*vbi_line_width*21:0))))
-   {
-        return BadAlloc;
-   }
+
+#ifdef USE_EXA
+    if (info->useEXA) {
+	new_size = new_size*mult+(pPriv->capture_vbi_data?2*2*vbi_line_width*21:0);
+	if (pPriv->off_screen != NULL && new_size != pPriv->size) {
+	    exaOffscreenFree(pScrn->pScreen, pPriv->off_screen);
+	    pPriv->off_screen = NULL;
+	}
+
+	if (pPriv->off_screen == NULL) {
+	    pPriv->off_screen = exaOffscreenAlloc(pScrn->pScreen, new_size,
+						  64, TRUE, ATIVideoSave,
+						  pPriv);
+	}
+        if (pPriv->off_screen == NULL)
+            return BadAlloc;
+
+	pPriv->video_offset = pPriv->off_screen->offset;
+        pPriv->size = new_size;
+    }
+#endif
+#ifdef USE_XAA
+    if (!info->useEXA) {
+	info->videoLinear = RADEONAllocateMemory(pScrn, info->videoLinear,
+						 new_size*mult+(pPriv->capture_vbi_data?2*2*vbi_line_width*21:0));
+	if (info->videoLinear == NULL)
+	    return BadAlloc;
+	pPriv->video_offset = info->videoLinear->offset * bpp;
+    }
+#endif
 
 /* I have suspicion that capture engine must be active _before_ Rage Theatre
    is being manipulated with.. */
@@ -3232,20 +3327,20 @@
    switch(pPriv->overlay_deinterlacing_method){
         case METHOD_BOB:
         case METHOD_SINGLE:
-           offset1 = (info->videoLinear->offset*bpp+0xf) & (~0xf);
-           offset2 = ((info->videoLinear->offset+new_size)*bpp + 0xf) & (~0xf);
+           offset1 = (pPriv->video_offset + 0xf) & (~0xf);
+           offset2 = (pPriv->video_offset + new_size * bpp + 0xf) & (~0xf);
            offset3 = offset1;
            offset4 = offset2;
            break;
         case METHOD_WEAVE:
-           offset1 = (info->videoLinear->offset*bpp+0xf) & (~0xf);
+           offset1 = (pPriv->video_offset + 0xf) & (~0xf);
            offset2 = offset1+dstPitch;
-           offset3 = ((info->videoLinear->offset+2*new_size)*bpp + 0xf) & (~0xf);
+           offset3 = (pPriv->video_offset + 2 * new_size * bpp + 0xf) & (~0xf);
            offset4 = offset3+dstPitch;
            break;
         default:
-           offset1 = (info->videoLinear->offset*bpp+0xf) & (~0xf);
-           offset2 = ((info->videoLinear->offset+new_size)*bpp + 0xf) & (~0xf);
+           offset1 = (pPriv->video_offset + 0xf) & (~0xf);
+           offset2 = (pPriv->video_offset + new_size * bpp + 0xf) & (~0xf);
            offset3 = offset1;
            offset4 = offset2;
         }
@@ -3268,8 +3363,7 @@
             vbi_end = 20;
         }
 
-
-        vbi_offset0 = ((info->videoLinear->offset+mult*new_size)*bpp+0xf) & (~0xf);
+        vbi_offset0 = (pPriv->video_offset + mult * new_size * bpp + 0xf) & (~0xf);
         vbi_offset1 = vbi_offset0 + dstPitch*20;
         OUTREG(RADEON_CAP0_VBI0_OFFSET, vbi_offset0+display_base);
         OUTREG(RADEON_CAP0_VBI1_OFFSET, vbi_offset1+display_base);
diff -urN xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_video.h xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_video.h
--- xc-HEAD.orig/programs/Xserver/hw/xfree86/drivers/ati/radeon_video.h	2005-08-17 11:55:37.000000000 +1000
+++ xc-HEAD/programs/Xserver/hw/xfree86/drivers/ati.anholt-15/radeon_video.h	2005-09-03 08:37:58.000000000 +1000
@@ -81,6 +81,13 @@
    Bool          autopaint_colorkey;
    Bool		 crt2; /* 0=CRT1, 1=CRT2 */
 
+#ifdef USE_EXA
+   int              size;
+   ExaOffscreenArea *off_screen;
+#endif
+
+   int           video_offset;
+
    Atom          device_id, location_id, instance_id;
 } RADEONPortPrivRec, *RADEONPortPrivPtr;
 





More information about the xorg mailing list