[PATCH xf86-video-r128] Add EXA support

Connor Behan connor.behan at gmail.com
Mon Jul 16 12:56:00 PDT 2012


This introduces EXA acceleration which can improve performance and allow
2D acceleration to be used without XAA. Implemented hooks are Solid,
Copy and Composite. They appear to pass all rendercheck tests, except
the gradient test which XAA also fails. Tested on multiple color depths,
with and without DRI, with and without the composite extension. Hardware
cursor, Xvideo and page flipping are supported as well.
Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=47866

Signed-off-by: Connor Behan <connor.behan at gmail.com>
---
 configure.ac          |  98 +++++++
 man/r128.man          |  21 ++
 src/Makefile.am       |   6 +-
 src/r128.h            |  84 ++++++
 src/r128_accel.c      |  37 ++-
 src/r128_cursor.c     |  50 ++--
 src/r128_dri.c        |  64 ++++-
 src/r128_driver.c     | 361 ++++++++++++++++++--------
 src/r128_exa.c        | 493 +++++++++++++++++++++++++++++++++++
 src/r128_exa_render.c | 695 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/r128_video.c      | 120 ++++++---
 11 files changed, 1858 insertions(+), 171 deletions(-)
 create mode 100644 src/r128_exa.c
 create mode 100644 src/r128_exa_render.c

diff --git a/configure.ac b/configure.ac
index 6c4f5d9..c841b46 100644
--- a/configure.ac
+++ b/configure.ac
@@ -63,6 +63,11 @@ AC_ARG_ENABLE(dri, AS_HELP_STRING([--disable-dri],
               [DRI="$enableval"],
               [DRI=auto])
 
+AC_ARG_ENABLE(exa, AS_HELP_STRING([--disable-exa],
+				  [Disable EXA support [[default=enabled]]]),
+              [EXA="$enableval"],
+              [EXA=yes])
+
 # Store the list of server defined optional extensions in REQUIRED_MODULES
 XORG_DRIVER_CHECK_EXT(RANDR, randrproto)
 XORG_DRIVER_CHECK_EXT(RENDER, renderproto)
@@ -112,6 +117,39 @@ fi
 
 SAVE_CPPFLAGS="$CPPFLAGS"
 CPPFLAGS="$CPPFLAGS $XORG_CFLAGS"
+# Properly handle EXA.
+AC_MSG_CHECKING([whether to enable EXA support])
+if test "x$EXA" = xyes; then
+        AC_MSG_RESULT(yes)
+
+        SAVE_CPPFLAGS="$CPPFLAGS"
+        CPPFLAGS="$CPPFLAGS $XORG_CFLAGS"
+        AC_CHECK_HEADER(exa.h,
+                       [have_exa_h="yes"], [have_exa_h="no"])
+        CPPFLAGS="$SAVE_CPPFLAGS"
+else
+        AC_MSG_RESULT(no)
+fi
+
+SAVE_CPPFLAGS="$CPPFLAGS"
+CPPFLAGS="$CPPFLAGS $XORG_CFLAGS"
+if test "x$have_exa_h" = xyes; then
+        AC_MSG_CHECKING([whether EXA version is at least 2.0.0])
+        AC_PREPROC_IFELSE([AC_LANG_PROGRAM([[
+#include "exa.h"
+#if EXA_VERSION_MAJOR < 2
+#error OLD EXA!
+#endif
+                          ]])],
+                          [USE_EXA=yes],
+                          [USE_EXA=no])
+        AC_MSG_RESULT($USE_EXA)
+
+        if test "x$USE_EXA" = xyes; then
+                AC_DEFINE(USE_EXA, 1, [Build support for Exa])
+        fi
+fi
+
 AC_CHECK_DECL(XSERVER_LIBPCIACCESS,
 	      [XSERVER_LIBPCIACCESS=yes],[XSERVER_LIBPCIACCESS=no],
 	      [#include "xorg-server.h"])
@@ -143,6 +181,66 @@ if test "x$XSERVER_LIBPCIACCESS" = xyes; then
 fi
 AM_CONDITIONAL(XSERVER_LIBPCIACCESS, test "x$XSERVER_LIBPCIACCESS" = xyes)
 
+# Checks for headers/macros for byte swapping
+# Known variants:
+#	<byteswap.h> bswap_16, bswap_32, bswap_64  (glibc)
+#	<sys/endian.h> __swap16, __swap32, __swap64 (OpenBSD)
+#	<sys/endian.h> bswap16, bswap32, bswap64 (other BSD's)
+#	and a fallback to local macros if none of the above are found
+
+# if <byteswap.h> is found, assume it's the correct version
+AC_CHECK_HEADERS([byteswap.h])
+
+# if <sys/endian.h> is found, have to check which version
+AC_CHECK_HEADER([sys/endian.h], [HAVE_SYS_ENDIAN_H="yes"], [HAVE_SYS_ENDIAN_H="no"])
+
+if test "x$HAVE_SYS_ENDIAN_H" = "xyes" ; then
+	AC_MSG_CHECKING([for __swap16 variant of <sys/endian.h> byteswapping macros])
+	AC_LINK_IFELSE([AC_LANG_PROGRAM([
+#include <sys/types.h>
+#include <sys/endian.h>
+ ], [
+int a = 1, b;
+b = __swap16(a);
+ ])
+], [SYS_ENDIAN__SWAP='yes'], [SYS_ENDIAN__SWAP='no'])
+	AC_MSG_RESULT([$SYS_ENDIAN__SWAP])
+
+	AC_MSG_CHECKING([for bswap16 variant of <sys/endian.h> byteswapping macros])
+	AC_LINK_IFELSE([AC_LANG_PROGRAM([
+#include <sys/types.h>
+#include <sys/endian.h>
+ ], [
+int a = 1, b;
+b = bswap16(a);
+ ])
+], [SYS_ENDIAN_BSWAP='yes'], [SYS_ENDIAN_BSWAP='no'])
+	AC_MSG_RESULT([$SYS_ENDIAN_BSWAP])
+
+	if test "$SYS_ENDIAN_BSWAP" = "yes" ; then
+		USE_SYS_ENDIAN_H=yes
+		BSWAP=bswap
+	else
+		if test "$SYS_ENDIAN__SWAP" = "yes" ; then
+			USE_SYS_ENDIAN_H=yes
+			BSWAP=__swap
+		else
+			USE_SYS_ENDIAN_H=no
+		fi
+	fi
+
+	if test "$USE_SYS_ENDIAN_H" = "yes" ; then
+	    AC_DEFINE([USE_SYS_ENDIAN_H], 1,
+		[Define to use byteswap macros from <sys/endian.h>])
+	    AC_DEFINE_UNQUOTED([bswap_16], ${BSWAP}16,
+			[Define to 16-bit byteswap macro])
+	    AC_DEFINE_UNQUOTED([bswap_32], ${BSWAP}32,
+			[Define to 32-bit byteswap macro])
+	    AC_DEFINE_UNQUOTED([bswap_64], ${BSWAP}64,
+			[Define to 64-bit byteswap macro])
+	fi
+fi
+
 AC_SUBST([moduledir])
 
 DRIVER_NAME=r128
diff --git a/man/r128.man b/man/r128.man
index 4ba933d..d490f93 100644
--- a/man/r128.man
+++ b/man/r128.man
@@ -57,6 +57,27 @@ Enables or disables all hardware acceleration.  The default is to
 .B enable
 hardware acceleration.
 .TP
+.BI "Option \*qEnablePageFlip\*q \*q" boolean \*q
+Enable page flipping for 3D acceleration. This will increase performance
+but not work correctly in some rare cases, hence the default is
+.B off.
+.TP
+.BI "Option \*qRenderAccel\*q \*q" boolean \*q
+Enables or disables hardware Render acceleration.  It is only supported when
+using EXA acceleration and DRI.  The default is to
+.B enable
+Render acceleration.
+.TP
+.BI "Option \*qAccelMethod\*q \*q" "string" \*q
+Chooses between available acceleration architectures.  Valid options are
+.B XAA
+and
+.B EXA.
+XAA is the traditional acceleration architecture and support for it is very
+stable.  EXA is a newer acceleration architecture with better performance for
+the Render and Composite extensions.  The default is
+.B XAA.
+.TP
 .BI "Option \*qDac6Bit\*q \*q" boolean \*q
 Enables or disables the use of 6 bits per color component when in 8 bpp
 mode (emulates VGA mode).  By default, all 8 bits per color component
diff --git a/src/Makefile.am b/src/Makefile.am
index f8bc8d1..4ee9725 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -30,6 +30,10 @@ if DRI
 R128_DRI_SRCS = r128_dri.c
 endif
 
+if USE_EXA
+R128_EXA_SRCS = r128_exa.c
+endif
+
 AM_CFLAGS = @XORG_CFLAGS@ @DRI_CFLAGS@
 
 r128_drv_la_LTLIBRARIES = r128_drv.la
@@ -37,7 +41,7 @@ r128_drv_la_LDFLAGS = -module -avoid-version
 r128_drv_ladir = @moduledir@/drivers
 r128_drv_la_SOURCES = \
 	r128_accel.c r128_cursor.c r128_dga.c r128_driver.c \
-	r128_video.c r128_misc.c r128_probe.c $(R128_DRI_SRCS)
+	r128_video.c r128_misc.c r128_probe.c $(R128_EXA_SRCS) $(R128_DRI_SRCS)
 
 EXTRA_DIST = \
         compat-api.h \
diff --git a/src/r128.h b/src/r128.h
index bee1562..9b2556f 100644
--- a/src/r128.h
+++ b/src/r128.h
@@ -43,6 +43,11 @@
 				/* PCI support */
 #include "xf86Pci.h"
 
+				/* EXA support */
+#ifdef USE_EXA
+#include "exa.h"
+#endif
+
 				/* XAA and Cursor Support */
 #ifdef HAVE_XAA_H
 #include "xaa.h"
@@ -75,6 +80,36 @@
 
 #include "r128_probe.h"
 
+#if HAVE_BYTESWAP_H
+#include <byteswap.h>
+#elif defined(USE_SYS_ENDIAN_H)
+#include <sys/endian.h>
+#else
+#define bswap_16(value)  \
+        ((((value) & 0xff) << 8) | ((value) >> 8))
+
+#define bswap_32(value) \
+        (((uint32_t)bswap_16((uint16_t)((value) & 0xffff)) << 16) | \
+        (uint32_t)bswap_16((uint16_t)((value) >> 16)))
+
+#define bswap_64(value) \
+        (((uint64_t)bswap_32((uint32_t)((value) & 0xffffffff)) \
+            << 32) | \
+        (uint64_t)bswap_32((uint32_t)((value) >> 32)))
+#endif
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+#define le32_to_cpu(x) bswap_32(x)
+#define le16_to_cpu(x) bswap_16(x)
+#define cpu_to_le32(x) bswap_32(x)
+#define cpu_to_le16(x) bswap_16(x)
+#else
+#define le32_to_cpu(x) (x)
+#define le16_to_cpu(x) (x)
+#define cpu_to_le32(x) (x)
+#define cpu_to_le16(x) (x)
+#endif
+
 #define R128_DEBUG          0   /* Turn off debugging output               */
 #define R128_IDLE_RETRY    32   /* Fall out of idle loops after this count */
 #define R128_TIMEOUT  2000000   /* Fall out of wait loops after this count */
@@ -83,6 +118,8 @@
 #define R128_VBIOS_SIZE 0x00010000
 
 #if R128_DEBUG
+#include "r128_version.h"
+
 #define R128TRACE(x)                                          \
     do {                                                      \
 	ErrorF("(**) %s(%d): ", R128_NAME, pScrn->scrnIndex); \
@@ -233,6 +270,36 @@ typedef enum
     MT_STV
 } R128MonitorType;
 
+#ifdef USE_EXA
+struct r128_2d_state {
+    Bool in_use;
+    Bool composite_setup;
+    uint32_t dst_pitch_offset;
+    uint32_t src_pitch_offset;
+    uint32_t dp_gui_master_cntl;
+    uint32_t dp_cntl;
+    uint32_t dp_write_mask;
+    uint32_t dp_brush_frgd_clr;
+    uint32_t dp_brush_bkgd_clr;
+    uint32_t dp_src_frgd_clr;
+    uint32_t dp_src_bkgd_clr;
+    uint32_t default_sc_bottom_right;
+#ifdef R128DRI
+#ifdef RENDER
+    Bool has_mask;
+    int x_offset;
+    int y_offset;
+    int widths[2];
+    int heights[2];
+    Bool is_transform[2];
+    PictTransform *transform[2];
+    PixmapPtr src_pix;
+    PixmapPtr msk_pix;
+#endif
+#endif
+};
+#endif
+
 typedef struct {
     EntityInfoPtr     pEnt;
     pciVideoPtr       PciInfo;
@@ -285,6 +352,15 @@ typedef struct {
     XAAInfoRecPtr     accel;
 #endif
     Bool              accelOn;
+
+    Bool	      useEXA;
+    Bool	      RenderAccel;
+#ifdef USE_EXA
+    ExaDriverPtr      ExaDriver;
+    XF86ModReqInfo    exaReq;
+    struct r128_2d_state state_2d;
+#endif
+
     xf86CursorInfoPtr cursor;
     unsigned long     cursor_start;
     unsigned long     cursor_end;
@@ -529,6 +605,14 @@ extern void        R128CCEReleaseIndirect(ScrnInfoPtr pScrn);
 extern void        R128CCEWaitForIdle(ScrnInfoPtr pScrn);
 extern int         R128CCEStop(ScrnInfoPtr pScrn);
 
+#ifdef USE_EXA
+extern Bool	   R128EXAInit(ScreenPtr pScreen);
+extern Bool	   R128GetDatatypeBpp(int bpp, uint32_t *type);
+extern Bool	   R128GetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset);
+extern void	   R128DoPrepareCopy(ScrnInfoPtr pScrn, uint32_t src_pitch_offset,
+				    uint32_t dst_pitch_offset, uint32_t datatype, int alu, Pixel planemask);
+#endif
+
 
 #define CCE_PACKET0( reg, n )						\
 	(R128_CCE_PACKET0 | ((n) << 16) | ((reg) >> 2))
diff --git a/src/r128_accel.c b/src/r128_accel.c
index defc076..1df4eb8 100644
--- a/src/r128_accel.c
+++ b/src/r128_accel.c
@@ -1641,7 +1641,6 @@ void R128CCEFlushIndirect( ScrnInfoPtr pScrn, int discard )
     info->indirectStart = buffer->used;
 }
 
-#ifdef HAVE_XAA_H
 /* Flush and release the indirect buffer.
  */
 void R128CCEReleaseIndirect( ScrnInfoPtr pScrn )
@@ -1666,6 +1665,7 @@ void R128CCEReleaseIndirect( ScrnInfoPtr pScrn )
                          &indirect, sizeof(drmR128Indirect));
 }
 
+#ifdef HAVE_XAA_H
 /* This callback is required for multihead cards using XAA */
 static
 void R128RestoreCCEAccelState(ScrnInfoPtr pScrn)
@@ -1873,15 +1873,40 @@ static void R128MMIOAccelInit(ScrnInfoPtr pScrn, XAAInfoRecPtr a)
    graphics hardware for acceleration. */
 Bool R128AccelInit(ScreenPtr pScreen)
 {
-#ifndef HAVE_XAA_H
-    return FALSE;
-#else
     ScrnInfoPtr   pScrn = xf86ScreenToScrn(pScreen);
     R128InfoPtr   info  = R128PTR(pScrn);
     XAAInfoRecPtr a;
 
-    if (!xf86LoadSubModule(pScrn, "xaa"))
-	return FALSE;
+#ifdef USE_EXA
+    if (info->useEXA) {
+        int errmaj = 0, errmin = 0;
+
+        info->exaReq.majorversion = EXA_VERSION_MAJOR;
+        info->exaReq.minorversion = EXA_VERSION_MINOR;
+
+        xf86DrvMsg(pScrn->scrnIndex,X_INFO,"Loading EXA module...\n");
+        if (!LoadSubModule(pScrn->module, "exa", NULL, NULL, NULL, &info->exaReq, &errmaj, &errmin)) {
+            LoaderErrorMsg(NULL, "exa", errmaj, errmin);
+            return FALSE;
+        }
+
+	/* Don't init EXA here because it'll be taken care of in mm init */
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Allocating EXA driver...\n");
+	info->ExaDriver = exaDriverAlloc();
+	if (!info->ExaDriver) {
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Could not allocate EXA driver...\n");
+	    info->accel = FALSE;
+	}
+
+	return TRUE;
+    }
+#endif
+#ifndef HAVE_XAA_H
+    return FALSE;
+#else
+    if (!info->useEXA) {
+        if (!xf86LoadSubModule(pScrn, "xaa")) return FALSE;
+    }
 
     if (!(a = info->accel = XAACreateInfoRec())) return FALSE;
 
diff --git a/src/r128_cursor.c b/src/r128_cursor.c
index 62d277d..974a6d5 100644
--- a/src/r128_cursor.c
+++ b/src/r128_cursor.c
@@ -54,6 +54,11 @@
 				/* X and server generic header files */
 #include "xf86.h"
 
+				/* Because for EXA we need to use a different allocator */
+#ifdef USE_EXA
+#include "exa.h"
+#endif
+
 #if X_BYTE_ORDER == X_BIG_ENDIAN
 #define P_SWAP32( a , b )                \
        ((char *)a)[0] = ((char *)b)[3];  \
@@ -253,11 +258,15 @@ Bool R128CursorInit(ScreenPtr pScreen)
     ScrnInfoPtr           pScrn   = xf86ScreenToScrn(pScreen);
     R128InfoPtr           info    = R128PTR(pScrn);
     xf86CursorInfoPtr     cursor;
-    FBAreaPtr             fbarea;
+    FBAreaPtr             fbarea  = NULL;
+#ifdef USE_EXA
+    ExaOffscreenArea*	  osArea  = NULL;
+#endif
     int                   width;
     int                   height;
     int                   size;
 
+    int                   cpp = info->CurrentLayout.pixel_bytes;
 
     if (!(cursor = info->cursor = xf86CreateCursorInfoRec())) return FALSE;
 
@@ -284,24 +293,35 @@ Bool R128CursorInit(ScreenPtr pScreen)
     size                      = (cursor->MaxWidth/4) * cursor->MaxHeight;
     width                     = pScrn->displayWidth;
     height                    = (size*2 + 1023) / pScrn->displayWidth;
-    fbarea                    = xf86AllocateOffscreenArea(pScreen,
-							  width,
-							  height,
-							  16,
-							  NULL,
-							  NULL,
-							  NULL);
-
-    if (!fbarea) {
+
+    if(!info->useEXA) {
+	fbarea = xf86AllocateOffscreenArea(pScreen, width, height,
+					   16, NULL, NULL, NULL);
+
+	if (fbarea) {
+	    info->cursor_start    = R128_ALIGN((fbarea->box.x1
+					    + width * fbarea->box.y1)
+					    * cpp, 16);
+	    info->cursor_end      = info->cursor_start + size;
+	}
+    }
+#ifdef USE_EXA
+    else {
+	osArea = exaOffscreenAlloc(pScreen, width * height, 16,
+				   TRUE, NULL, NULL);
+
+	if (osArea) {
+	    info->cursor_start	  = osArea->offset;
+	    info->cursor_end	  = osArea->offset + osArea->size;
+	}
+    }
+#endif
+
+    if ((!info->useEXA && !fbarea) || (info->useEXA && !osArea)) {
 	info->cursor_start    = 0;
 	xf86DrvMsg(pScrn->scrnIndex, X_WARNING,
 		   "Hardware cursor disabled"
 		   " due to insufficient offscreen memory\n");
-    } else {
-	info->cursor_start    = R128_ALIGN((fbarea->box.x1
-					    + width * fbarea->box.y1)
-					   * info->CurrentLayout.pixel_bytes, 16);
-	info->cursor_end      = info->cursor_start + size;
     }
 
     R128TRACE(("R128CursorInit (0x%08x-0x%08x)\n",
diff --git a/src/r128_dri.c b/src/r128_dri.c
index 09b3cff..67e8d1d 100644
--- a/src/r128_dri.c
+++ b/src/r128_dri.c
@@ -301,11 +301,16 @@ static void R128DestroyContext(ScreenPtr pScreen, drm_context_t hwContext,
    can start/stop the engine. */
 static void R128EnterServer(ScreenPtr pScreen)
 {
-#ifdef HAVE_XAA_H
     ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
     R128InfoPtr info = R128PTR(pScrn);
+
+#ifdef HAVE_XAA_H
     if (info->accel) info->accel->NeedToSync = TRUE;
 #endif
+#ifdef USE_EXA
+    if (info->ExaDriver) exaMarkSync(pScreen);
+    info->state_2d.composite_setup = FALSE;
+#endif
 }
 
 /* Called when the X server goes to sleep to allow the X server's
@@ -1390,11 +1395,10 @@ void R128DRICloseScreen(ScreenPtr pScreen)
 
 static void R128DRIRefreshArea(ScrnInfoPtr pScrn, int num, BoxPtr pbox)
 {
-#ifdef HAVE_XAA_H
     R128InfoPtr         info       = R128PTR(pScrn);
     int                 i;
-#endif
     R128SAREAPrivPtr    pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen);
+    PixmapPtr		pPix	   = pScrn->pScreen->GetScreenPixmap(pScrn->pScreen);
 
     /* Don't want to do this when no 3d is active and pages are
      * right-way-round
@@ -1403,49 +1407,89 @@ static void R128DRIRefreshArea(ScrnInfoPtr pScrn, int num, BoxPtr pbox)
 	return;
 
 #ifdef HAVE_XAA_H
-    (*info->accel->SetupForScreenToScreenCopy)(pScrn,
+    if (!info->useEXA) {
+	(*info->accel->SetupForScreenToScreenCopy)(pScrn,
 					       1, 1, GXcopy,
 					       (CARD32)(-1), -1);
+    }
+#endif
+#ifdef USE_EXA
+    if (info->useEXA) {
+        CARD32 src_pitch_offset, dst_pitch_offset, datatype;
+
+	R128GetPixmapOffsetPitch(pPix, &src_pitch_offset);
+	dst_pitch_offset = src_pitch_offset + (info->backOffset >> 5);
+	R128GetDatatypeBpp(pScrn->bitsPerPixel, &datatype);
+	info->xdir = info->ydir = 1;
+
+	R128DoPrepareCopy(pScrn, src_pitch_offset, dst_pitch_offset, datatype, GXcopy, ~0);
+    }
+#endif
 
     for (i = 0 ; i < num ; i++, pbox++) {
 	int xa = max(pbox->x1, 0), xb = min(pbox->x2, pScrn->virtualX-1);
 	int ya = max(pbox->y1, 0), yb = min(pbox->y2, pScrn->virtualY-1);
 
 	if (xa <= xb && ya <= yb) {
-	    (*info->accel->SubsequentScreenToScreenCopy)(pScrn, xa, ya,
+#ifdef HAVE_XAA_H
+	    if (!info->useEXA) {
+	        (*info->accel->SubsequentScreenToScreenCopy)(pScrn, xa, ya,
 							 xa + info->backX,
 							 ya + info->backY,
 							 xb - xa + 1,
 							 yb - ya + 1);
+	    }
+#endif
+#ifdef USE_EXA
+	    if (info->useEXA) {
+		(*info->ExaDriver->Copy)(pPix, xa, ya, xa, ya, xb - xa + 1, yb - ya + 1);
+	    }
+#endif
 	}
     }
-#endif
 }
 
 static void R128EnablePageFlip(ScreenPtr pScreen)
 {
-#ifdef HAVE_XAA_H
     ScrnInfoPtr         pScrn      = xf86ScreenToScrn(pScreen);
     R128InfoPtr         info       = R128PTR(pScrn);
     R128SAREAPrivPtr    pSAREAPriv = DRIGetSAREAPrivate(pScreen);
+    PixmapPtr		pPix	   = pScreen->GetScreenPixmap(pScreen);
 
     if (info->allowPageFlip) {
 	/* Duplicate the frontbuffer to the backbuffer */
-	(*info->accel->SetupForScreenToScreenCopy)(pScrn,
+#ifdef HAVE_XAA_H
+	if (!info->useEXA) {
+	    (*info->accel->SetupForScreenToScreenCopy)(pScrn,
 						   1, 1, GXcopy,
 						   (CARD32)(-1), -1);
 
-	(*info->accel->SubsequentScreenToScreenCopy)(pScrn,
+	    (*info->accel->SubsequentScreenToScreenCopy)(pScrn,
 						     0,
 						     0,
 						     info->backX,
 						     info->backY,
 						     pScrn->virtualX,
 						     pScrn->virtualY);
+	}
+#endif
+#ifdef USE_EXA
+	if (info->useEXA) {
+	    CARD32 src_pitch_offset, dst_pitch_offset, datatype;
+
+	    R128GetPixmapOffsetPitch(pPix, &src_pitch_offset);
+	    dst_pitch_offset = src_pitch_offset + (info->backOffset >> 5);
+	    R128GetDatatypeBpp(pScrn->bitsPerPixel, &datatype);
+	    info->xdir = info->ydir = 1;
+
+            R128DoPrepareCopy(pScrn, src_pitch_offset, dst_pitch_offset, datatype, GXcopy, ~0);
+
+	    (*info->ExaDriver->Copy)(pPix, 0, 0, 0, 0, pScrn->virtualX, pScrn->virtualY);
+	}
+#endif
 
 	pSAREAPriv->pfAllowPageFlip = 1;
     }
-#endif
 }
 
 static void R128DisablePageFlip(ScreenPtr pScreen)
diff --git a/src/r128_driver.c b/src/r128_driver.c
index 9714896..bb03e80 100644
--- a/src/r128_driver.c
+++ b/src/r128_driver.c
@@ -154,7 +154,9 @@ typedef enum {
   OPTION_FBDEV,
   OPTION_VIDEO_KEY,
   OPTION_SHOW_CACHE,
-  OPTION_VGA_ACCESS
+  OPTION_VGA_ACCESS,
+  OPTION_ACCELMETHOD,
+  OPTION_RENDERACCEL
 } R128Opts;
 
 static const OptionInfoRec R128Options[] = {
@@ -182,6 +184,8 @@ static const OptionInfoRec R128Options[] = {
   { OPTION_VIDEO_KEY,    "VideoKey",         OPTV_INTEGER, {0}, FALSE },
   { OPTION_SHOW_CACHE,   "ShowCache",        OPTV_BOOLEAN, {0}, FALSE },
   { OPTION_VGA_ACCESS,   "VGAAccess",        OPTV_BOOLEAN, {0}, TRUE  },
+  { OPTION_ACCELMETHOD,  "AccelMethod",      OPTV_STRING,  {0}, FALSE },
+  { OPTION_RENDERACCEL,  "RenderAccel",      OPTV_BOOLEAN, {0}, FALSE },
   { -1,                  NULL,               OPTV_NONE,    {0}, FALSE }
 };
 
@@ -2183,16 +2187,85 @@ R128BlockHandler(BLOCKHANDLER_ARGS_DECL)
     }
 }
 
+#ifdef USE_EXA
+Bool R128VerboseInitEXA(ScreenPtr pScreen)
+{
+    ScrnInfoPtr pScrn  = xf86ScreenToScrn(pScreen);
+    R128InfoPtr info   = R128PTR(pScrn);
+
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Going to init EXA...\n");
+
+    if (R128EXAInit(pScreen)) {
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Acceleration enabled\n");
+	info->accelOn = TRUE;
+
+	return TRUE;
+    } else {
+	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+		   "EXA Acceleration initialization failed\n");
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "EXA Acceleration disabled\n");
+	info->accelOn = FALSE;
+
+	return FALSE;
+    }
+}
+#endif
+
+void R128VerboseInitAccel(Bool noAccel, ScreenPtr pScreen)
+{
+    ScrnInfoPtr pScrn  = xf86ScreenToScrn(pScreen);
+    R128InfoPtr info   = R128PTR(pScrn);
+
+    if (!noAccel) {
+	if (R128AccelInit(pScreen)) {
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration enabled\n");
+	    info->accelOn = TRUE;
+	} else {
+	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+		       "Acceleration initialization failed\n");
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration disabled\n");
+	    info->accelOn = FALSE;
+	}
+    } else {
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration disabled\n");
+	info->accelOn = FALSE;
+    }
+}
+
 /* Called at the start of each server generation. */
 Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 {
     ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
     R128InfoPtr info   = R128PTR(pScrn);
     BoxRec      MemBox;
-    int		y2;
+    int width_bytes = (pScrn->displayWidth *
+			   info->CurrentLayout.pixel_bytes);
+    int         x1 = 0, x2 = 0, y1 = 0, y2 = 0;
     Bool	noAccel;
+#ifdef USE_EXA
+    ExaOffscreenArea*     osArea = NULL;
+#endif
+    char *optstr;
 
     R128TRACE(("R128ScreenInit %x %d\n", pScrn->memPhysBase, pScrn->fbOffset));
+    info->useEXA = FALSE;
+
+#ifdef USE_EXA
+    optstr = (char *)xf86GetOptValString(info->Options, OPTION_ACCELMETHOD);
+    if (optstr != NULL) {
+	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "AccelMethod option found\n");
+	if (xf86NameCmp(optstr, "EXA") == 0) {
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "AccelMethod is set to EXA, turning EXA on\n");
+	    info->useEXA = TRUE;
+	}
+    }
+#ifdef RENDER
+    info->RenderAccel = xf86ReturnOptValBool(info->Options, OPTION_RENDERACCEL, TRUE);
+    if (info->RenderAccel)
+        xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration of RENDER operations will be enabled"
+					     "upon successful loading of DRI and EXA\n");
+#endif
+#endif
 
 #ifdef R128DRI
 				/* Turn off the CCE for now. */
@@ -2242,8 +2315,6 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 	/* FIXME: When we move to dynamic allocation of back and depth
 	   buffers, we will want to revisit the following check for 3
 	   times the virtual size of the screen below. */
-	int width_bytes = (pScrn->displayWidth *
-			   info->CurrentLayout.pixel_bytes);
 	int maxy        = info->FbMapSize / width_bytes;
 
 	if (noAccel) {
@@ -2319,9 +2390,7 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 				/* Memory manager setup */
 #ifdef R128DRI
     if (info->directRenderingEnabled) {
-	FBAreaPtr fbarea;
-	int width_bytes = (pScrn->displayWidth *
-			   info->CurrentLayout.pixel_bytes);
+	FBAreaPtr fbarea = NULL;
 	int cpp = info->CurrentLayout.pixel_bytes;
 	int bufferSize = pScrn->virtualY * width_bytes;
 	int l, total;
@@ -2399,50 +2468,101 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 	MemBox.x2 = pScrn->displayWidth;
 	MemBox.y2 = scanlines;
 
-	if (!xf86InitFBManager(pScreen, &MemBox)) {
-	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
-		       "Memory manager initialization to (%d,%d) (%d,%d) failed\n",
-		       MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
-	    return FALSE;
-	} else {
-	    int width, height;
-
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		       "Memory manager initialized to (%d,%d) (%d,%d)\n",
-		       MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
-	    if ((fbarea = xf86AllocateOffscreenArea(pScreen,
-						    pScrn->displayWidth,
-						    2, 0, NULL, NULL, NULL))) {
-		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-			   "Reserved area from (%d,%d) to (%d,%d)\n",
-			   fbarea->box.x1, fbarea->box.y1,
-			   fbarea->box.x2, fbarea->box.y2);
+	if (!info->useEXA) {
+	    if (!xf86InitFBManager(pScreen, &MemBox)) {
+	        xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+		           "Memory manager initialization to (%d,%d) (%d,%d) failed\n",
+		           MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
+	        return FALSE;
 	    } else {
-		xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
-	    }
-	    if (xf86QueryLargestOffscreenArea(pScreen, &width,
-					      &height, 0, 0, 0)) {
-		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-			   "Largest offscreen area available: %d x %d\n",
-			   width, height);
+	        int width, height;
+
+	        xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		           "Memory manager initialized to (%d,%d) (%d,%d)\n",
+		           MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
+	        if ((fbarea = xf86AllocateOffscreenArea(pScreen,
+						        pScrn->displayWidth,
+						        2, 0, NULL, NULL, NULL))) {
+		    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+			       "Reserved area from (%d,%d) to (%d,%d)\n",
+			       fbarea->box.x1, fbarea->box.y1,
+			       fbarea->box.x2, fbarea->box.y2);
+	        } else {
+		    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
+	        }
+	        if (xf86QueryLargestOffscreenArea(pScreen, &width,
+						  &height, 0, 0, 0)) {
+		    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+			       "Largest offscreen area available: %d x %d\n",
+				width, height);
+	        }
+
+		R128VerboseInitAccel(noAccel, pScreen);
 	    }
 	}
+#ifdef USE_EXA
+	else {
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "Filling in EXA memory info\n");
+
+	    R128VerboseInitAccel(noAccel, pScreen);
+	    info->ExaDriver->offScreenBase = pScrn->virtualY * width_bytes;
+
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "Filled in offs\n");
+
+	    /* Don't give EXA the true full memory size, because the
+	       textureSize sized chunk on the end is handled by DRI */
+	    info->ExaDriver->memorySize = total;
+
+	    R128VerboseInitEXA(pScreen);
+	}
+#endif
 
 				/* Allocate the shared back buffer */
-	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
-						pScrn->virtualX,
-						pScrn->virtualY,
-						32, NULL, NULL, NULL))) {
+	if(!info->useEXA) {
+	    fbarea = xf86AllocateOffscreenArea(pScreen,
+					       pScrn->virtualX,
+					       pScrn->virtualY,
+					       32, NULL, NULL, NULL);
+
+	    if (fbarea) {
+		x1 = fbarea->box.x1;
+		x2 = fbarea->box.x2;
+		y1 = fbarea->box.y1;
+		y2 = fbarea->box.y2;
+	    }
+	}
+#ifdef USE_EXA
+	else {
 	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		       "Reserved back buffer from (%d,%d) to (%d,%d)\n",
-		       fbarea->box.x1, fbarea->box.y1,
-		       fbarea->box.x2, fbarea->box.y2);
-
-	    info->backX = fbarea->box.x1;
-	    info->backY = fbarea->box.y1;
-	    info->backOffset = (fbarea->box.y1 * width_bytes +
-				fbarea->box.x1 * cpp);
+		       "Actually trying an EXA allocation...\n");
+	    osArea = exaOffscreenAlloc(pScreen,
+				       pScrn->virtualY * width_bytes,
+				       32, TRUE, NULL, NULL);
+
+	    if (osArea) {
+		x1 = osArea->offset % width_bytes;
+		x2 = (osArea->offset + osArea->size) % width_bytes;
+		y1 = osArea->offset / width_bytes;
+		y2 = (osArea->offset + osArea->size) / width_bytes;
+
+		xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Went swimmingly...\n");
+	    }
+	}
+#endif
+
+	if ((!info->useEXA && fbarea) || (info->useEXA && osArea)) {
+	    /* info->backOffset = y1 * width_bytes + x1 * cpp; */
+	    info->backOffset = R128_ALIGN(y1 * width_bytes + x1 * cpp, 16);
+	    info->backX = info->backOffset % width_bytes;
+	    info->backY = info->backOffset / width_bytes;
 	    info->backPitch = pScrn->displayWidth;
+
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "Reserved back buffer from (%d,%d) to (%d,%d) offset: %x\n",
+		       x1, y1,
+		       x2, y2, info->backOffset);
 	} else {
 	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve back buffer\n");
 	    info->backX = -1;
@@ -2452,25 +2572,49 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 	}
 
 				/* Allocate the shared depth buffer */
-	if ((fbarea = xf86AllocateOffscreenArea(pScreen,
-						pScrn->virtualX,
-						pScrn->virtualY + 1,
-						32, NULL, NULL, NULL))) {
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		       "Reserved depth buffer from (%d,%d) to (%d,%d)\n",
-		       fbarea->box.x1, fbarea->box.y1,
-		       fbarea->box.x2, fbarea->box.y2);
-
-	    info->depthX = fbarea->box.x1;
-	    info->depthY = fbarea->box.y1;
-	    info->depthOffset = (fbarea->box.y1 * width_bytes +
-				 fbarea->box.x1 * cpp);
+	if(!info->useEXA) {
+	    fbarea = xf86AllocateOffscreenArea(pScreen,
+					       pScrn->virtualX,
+					       pScrn->virtualY + 1,
+					       32, NULL, NULL, NULL);
+	    if (fbarea) {
+		x1 = fbarea->box.x1;
+		x2 = fbarea->box.x2;
+		y1 = fbarea->box.y1;
+		y2 = fbarea->box.y2;
+	    }
+	}
+#ifdef USE_EXA
+	else {
+	    osArea = exaOffscreenAlloc(pScreen,
+				       (pScrn->virtualY + 1) * width_bytes,
+				       32, TRUE, NULL, NULL);
+
+	    if (osArea) {
+		x1 = osArea->offset % width_bytes;
+		x2 = (osArea->offset + osArea->size) % width_bytes;
+		y1 = osArea->offset / width_bytes;
+		y2 = (osArea->offset + osArea->size) / width_bytes;
+	    }
+	}
+#endif
+
+	if ((!info->useEXA && fbarea) || (info->useEXA && osArea)) {
+	    /* info->depthOffset = y1 * width_bytes + x1 * cpp; */
+	    info->depthOffset = R128_ALIGN(y1 * width_bytes + x1 * cpp, 16);
+	    info->depthX = info->depthOffset % width_bytes;
+	    info->depthY = info->depthOffset / width_bytes;
 	    info->depthPitch = pScrn->displayWidth;
-	    info->spanOffset = ((fbarea->box.y2 - 1) * width_bytes +
-				fbarea->box.x1 * cpp);
+	    info->spanOffset = (y2 - 1) * width_bytes + x1 * cpp;
+
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "Reserved depth buffer from (%d,%d) to (%d,%d) offset: %x\n",
+		       x1, y1,
+		       x2, y2, info->depthOffset);
+
 	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
 		       "Reserved depth span from (%d,%d) offset 0x%x\n",
-		       fbarea->box.x1, fbarea->box.y2 - 1, info->spanOffset);
+		       x1, y2 - 1, info->spanOffset);
 	} else {
 	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve depth buffer\n");
 	    info->depthX = -1;
@@ -2485,7 +2629,7 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 		   info->textureSize/1024, info->textureOffset);
     }
     else
-#endif
+#endif /* R128DRI */
     {
 	MemBox.x1 = 0;
 	MemBox.y1 = 0;
@@ -2499,50 +2643,51 @@ Bool R128ScreenInit(SCREEN_INIT_ARGS_DECL)
 	if (y2 > 8191) y2 = 8191;
 	MemBox.y2 = y2;
 
-	if (!xf86InitFBManager(pScreen, &MemBox)) {
-	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
-		       "Memory manager initialization to (%d,%d) (%d,%d) failed\n",
-		       MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
-	    return FALSE;
-	} else {
-	    int       width, height;
-	    FBAreaPtr fbarea;
-
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-		       "Memory manager initialized to (%d,%d) (%d,%d)\n",
-		       MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
-	    if ((fbarea = xf86AllocateOffscreenArea(pScreen, pScrn->displayWidth,
-						    2, 0, NULL, NULL, NULL))) {
-		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-			   "Reserved area from (%d,%d) to (%d,%d)\n",
-			   fbarea->box.x1, fbarea->box.y1,
-			   fbarea->box.x2, fbarea->box.y2);
+	if (!info->useEXA) {
+	    if (!xf86InitFBManager(pScreen, &MemBox)) {
+	        xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
+		           "Memory manager initialization to (%d,%d) (%d,%d) failed\n",
+		           MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
+	        return FALSE;
 	    } else {
-		xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
-	    }
-	    if (xf86QueryLargestOffscreenArea(pScreen, &width, &height,
-					      0, 0, 0)) {
-		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
-			   "Largest offscreen area available: %d x %d\n",
-			   width, height);
+	        int       width, height;
+	        FBAreaPtr fbarea;
+
+	        xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		           "Memory manager initialized to (%d,%d) (%d,%d)\n",
+		           MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2);
+	        if ((fbarea = xf86AllocateOffscreenArea(pScreen, pScrn->displayWidth, 2, 0, NULL, NULL, NULL))) {
+		    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+			       "Reserved area from (%d,%d) to (%d,%d)\n",
+			       fbarea->box.x1, fbarea->box.y1,
+			       fbarea->box.x2, fbarea->box.y2);
+	        } else {
+		    xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "Unable to reserve area\n");
+	        }
+	        if (xf86QueryLargestOffscreenArea(pScreen, &width, &height, 0, 0, 0)) {
+		    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+			       "Largest offscreen area available: %d x %d\n",
+				width, height);
+	        }
+
+		R128VerboseInitAccel(noAccel, pScreen);
 	    }
 	}
-    }
+#ifdef USE_EXA
+	else {
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "Filling in EXA memory info\n");
 
-				/* Acceleration setup */
-    if (!noAccel) {
-	if (R128AccelInit(pScreen)) {
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration enabled\n");
-	    info->accelOn = TRUE;
-	} else {
-	    xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
-		       "Acceleration initialization failed\n");
-	    xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration disabled\n");
-	    info->accelOn = FALSE;
+	    R128VerboseInitAccel(noAccel, pScreen);
+	    info->ExaDriver->offScreenBase = pScrn->virtualY * width_bytes;
+
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "Filled in offs\n");
+
+	    info->ExaDriver->memorySize = info->FbMapSize;
+	    R128VerboseInitEXA(pScreen);
 	}
-    } else {
-	xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Acceleration disabled\n");
-	info->accelOn = FALSE;
+#endif
     }
 
 				/* DGA setup */
@@ -4263,6 +4408,10 @@ void R128LeaveVT(VT_FUNC_ARGS_DECL)
 	DRILock(pScrn->pScreen, 0);
 	R128CCE_STOP(pScrn, info);
     }
+#ifdef USE_EXA
+    if (info->useEXA)
+        info->state_2d.composite_setup = FALSE;
+#endif
 #endif
     R128SavePalette(pScrn, save);
     info->PaletteSavedOnVT = TRUE;
@@ -4296,9 +4445,17 @@ static Bool R128CloseScreen(CLOSE_SCREEN_ARGS_DECL)
 	R128UnmapMem(pScrn);
     }
 
+#ifdef USE_EXA
+        if (info->useEXA) {
+	    exaDriverFini(pScreen);
+	    free(info->ExaDriver);
+	} else
+#endif
 #ifdef HAVE_XAA_H
-    if (info->accel)             XAADestroyInfoRec(info->accel);
-    info->accel                  = NULL;
+	{
+            if (info->accel)             XAADestroyInfoRec(info->accel);
+	    info->accel                  = NULL;
+        }
 #endif
 
     if (info->scratch_save)      free(info->scratch_save);
diff --git a/src/r128_exa.c b/src/r128_exa.c
new file mode 100644
index 0000000..8fb8b64
--- /dev/null
+++ b/src/r128_exa.c
@@ -0,0 +1,493 @@
+/*
+ * Copyright 2006 Joseph Garvin
+ * Copyright 2012 Connor Behan
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Joseph Garvin <joseph.h.garvin at gmail.com>
+ *    Connor Behan <connor.behan at gmail.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "r128.h"
+#include "exa.h"
+
+#include "r128_reg.h"
+
+#include "xf86.h"
+
+static struct {
+    int rop;
+    int pattern;
+} R128_ROP[] = {
+    { R128_ROP3_ZERO, R128_ROP3_ZERO }, /* GXclear        */
+    { R128_ROP3_DSa,  R128_ROP3_DPa  }, /* Gxand          */
+    { R128_ROP3_SDna, R128_ROP3_PDna }, /* GXandReverse   */
+    { R128_ROP3_S,    R128_ROP3_P    }, /* GXcopy         */
+    { R128_ROP3_DSna, R128_ROP3_DPna }, /* GXandInverted  */
+    { R128_ROP3_D,    R128_ROP3_D    }, /* GXnoop         */
+    { R128_ROP3_DSx,  R128_ROP3_DPx  }, /* GXxor          */
+    { R128_ROP3_DSo,  R128_ROP3_DPo  }, /* GXor           */
+    { R128_ROP3_DSon, R128_ROP3_DPon }, /* GXnor          */
+    { R128_ROP3_DSxn, R128_ROP3_PDxn }, /* GXequiv        */
+    { R128_ROP3_Dn,   R128_ROP3_Dn   }, /* GXinvert       */
+    { R128_ROP3_SDno, R128_ROP3_PDno }, /* GXorReverse    */
+    { R128_ROP3_Sn,   R128_ROP3_Pn   }, /* GXcopyInverted */
+    { R128_ROP3_DSno, R128_ROP3_DPno }, /* GXorInverted   */
+    { R128_ROP3_DSan, R128_ROP3_DPan }, /* GXnand         */
+    { R128_ROP3_ONE,  R128_ROP3_ONE  }  /* GXset          */
+};
+
+/* Assumes that depth 15 and 16 can be used as depth 16, which is okay since we
+ * require src and dest datatypes to be equal.
+ */
+Bool R128GetDatatypeBpp(int bpp, uint32_t *type)
+{
+    switch (bpp) {
+    case 8:
+        *type = R128_DATATYPE_CI8;
+        return TRUE;
+    case 16:
+        *type = R128_DATATYPE_RGB565;
+        return TRUE;
+    case 24:
+        *type = R128_DATATYPE_RGB888;
+        return TRUE;
+    case 32:
+        *type = R128_DATATYPE_ARGB8888;
+        return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+static Bool R128GetOffsetPitch(PixmapPtr pPix, int bpp, uint32_t *pitch_offset,
+				 unsigned int offset, unsigned int pitch)
+{
+    ScreenPtr     pScreen   = pPix->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+
+    if (pitch > 16320 || pitch % info->ExaDriver->pixmapPitchAlign != 0) {
+        R128TRACE(("Bad pitch 0x%08x\n", pitch));
+	return FALSE;
+    }
+
+    if (offset % info->ExaDriver->pixmapOffsetAlign != 0) {
+        R128TRACE(("Bad offset 0x%08x\n", offset));
+	return FALSE;
+    }
+
+    *pitch_offset = ((pitch / bpp) << 21) | (offset >> 5);
+
+    return TRUE;
+}
+
+Bool R128GetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset)
+{
+    uint32_t pitch, offset;
+    int bpp;
+
+    bpp = pPix->drawable.bitsPerPixel;
+    if (bpp == 24)
+        bpp = 8;
+
+    offset = exaGetPixmapOffset(pPix);
+    pitch = exaGetPixmapPitch(pPix);
+
+    return R128GetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch);
+}
+
+static void Emit2DState(ScrnInfoPtr pScrn)
+{
+    R128InfoPtr   info      = R128PTR(pScrn);
+    int has_src		    = info->state_2d.src_pitch_offset;
+    unsigned char *R128MMIO = info->MMIO;
+
+    R128WaitForFifo(pScrn, (has_src ? 10 : 9));
+
+    OUTREG(R128_DEFAULT_SC_BOTTOM_RIGHT, info->state_2d.default_sc_bottom_right);
+    OUTREG(R128_DP_GUI_MASTER_CNTL, info->state_2d.dp_gui_master_cntl);
+    OUTREG(R128_DP_BRUSH_FRGD_CLR, info->state_2d.dp_brush_frgd_clr);
+    OUTREG(R128_DP_BRUSH_BKGD_CLR, info->state_2d.dp_brush_bkgd_clr);
+    OUTREG(R128_DP_SRC_FRGD_CLR,   info->state_2d.dp_src_frgd_clr);
+    OUTREG(R128_DP_SRC_BKGD_CLR,   info->state_2d.dp_src_bkgd_clr);
+    OUTREG(R128_DP_WRITE_MASK, info->state_2d.dp_write_mask);
+    OUTREG(R128_DP_CNTL, info->state_2d.dp_cntl);
+
+    OUTREG(R128_DST_PITCH_OFFSET, info->state_2d.dst_pitch_offset);
+    if (has_src) OUTREG(R128_SRC_PITCH_OFFSET, info->state_2d.src_pitch_offset);
+}
+
+static void EmitCCE2DState(ScrnInfoPtr pScrn)
+{
+    R128InfoPtr   info      = R128PTR(pScrn);
+    int has_src		    = info->state_2d.src_pitch_offset;
+    RING_LOCALS;
+
+    R128CCE_REFRESH( pScrn, info );
+
+    BEGIN_RING( (has_src ? 20 : 18) );
+
+    OUT_RING_REG( R128_DEFAULT_SC_BOTTOM_RIGHT, info->state_2d.default_sc_bottom_right );
+    OUT_RING_REG( R128_DP_GUI_MASTER_CNTL, info->state_2d.dp_gui_master_cntl );
+    OUT_RING_REG( R128_DP_BRUSH_FRGD_CLR, info->state_2d.dp_brush_frgd_clr );
+    OUT_RING_REG( R128_DP_BRUSH_BKGD_CLR, info->state_2d.dp_brush_bkgd_clr );
+    OUT_RING_REG( R128_DP_SRC_FRGD_CLR,   info->state_2d.dp_src_frgd_clr );
+    OUT_RING_REG( R128_DP_SRC_BKGD_CLR,   info->state_2d.dp_src_bkgd_clr );
+    OUT_RING_REG( R128_DP_WRITE_MASK, info->state_2d.dp_write_mask );
+    OUT_RING_REG( R128_DP_CNTL, info->state_2d.dp_cntl );
+
+    OUT_RING_REG( R128_DST_PITCH_OFFSET, info->state_2d.dst_pitch_offset );
+    if (has_src) OUT_RING_REG( R128_SRC_PITCH_OFFSET, info->state_2d.src_pitch_offset );
+
+    ADVANCE_RING();
+}
+
+/* EXA Callbacks */
+
+static Bool
+R128PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
+{
+    ScreenPtr     pScreen   = pPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+
+    int bpp = pPixmap->drawable.bitsPerPixel;
+    uint32_t datatype, dst_pitch_offset;
+
+    if (!R128GetDatatypeBpp(bpp, &datatype)) {
+        R128TRACE(("R128GetDatatypeBpp failed\n"));
+	return FALSE;
+    }
+    if (!R128GetPixmapOffsetPitch(pPixmap, &dst_pitch_offset)) {
+        R128TRACE(("R128GetPixmapOffsetPitch failed\n"));
+	return FALSE;
+    }
+    if (info->state_2d.in_use) return FALSE;
+
+    info->state_2d.in_use = TRUE;
+    info->state_2d.default_sc_bottom_right = (R128_DEFAULT_SC_RIGHT_MAX | R128_DEFAULT_SC_BOTTOM_MAX);
+    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
+    info->state_2d.dp_src_frgd_clr = 0xffffffff;
+    info->state_2d.dp_src_bkgd_clr = 0x00000000;
+    info->state_2d.dp_gui_master_cntl = (R128_GMC_DST_PITCH_OFFSET_CNTL |
+					  R128_GMC_BRUSH_SOLID_COLOR |
+					  (datatype >> 8) |
+					  R128_GMC_SRC_DATATYPE_COLOR |
+					  R128_ROP[alu].pattern |
+					  R128_GMC_CLR_CMP_CNTL_DIS);
+    info->state_2d.dp_brush_frgd_clr = fg;
+    info->state_2d.dp_cntl = (R128_DST_X_LEFT_TO_RIGHT | R128_DST_Y_TOP_TO_BOTTOM);
+    info->state_2d.dp_write_mask = planemask;
+    info->state_2d.dst_pitch_offset = dst_pitch_offset;
+    info->state_2d.src_pitch_offset = 0;
+
+#ifdef R128DRI
+    if (info->directRenderingEnabled) {
+        EmitCCE2DState(pScrn);
+    } else
+#endif
+    {
+        Emit2DState(pScrn);
+    }
+    return TRUE;
+}
+
+static void
+R128Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
+{
+    ScreenPtr     pScreen   = pPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    unsigned char *R128MMIO = info->MMIO;
+
+    R128WaitForFifo(pScrn, 2);
+    OUTREG(R128_DST_Y_X,          (y1 << 16) | x1);
+    OUTREG(R128_DST_WIDTH_HEIGHT, ((x2-x1) << 16) | (y2-y1));
+}
+
+#define R128DoneSolid R128Done
+
+void
+R128DoPrepareCopy(ScrnInfoPtr pScrn, uint32_t src_pitch_offset,
+			uint32_t dst_pitch_offset, uint32_t datatype, int alu, Pixel planemask)
+{
+    R128InfoPtr   info      = R128PTR(pScrn);
+
+    info->state_2d.in_use = TRUE;
+    info->state_2d.dp_gui_master_cntl = (R128_GMC_DST_PITCH_OFFSET_CNTL |
+					  R128_GMC_SRC_PITCH_OFFSET_CNTL |
+					  R128_GMC_BRUSH_NONE |
+					  (datatype >> 8) |
+					  R128_GMC_SRC_DATATYPE_COLOR |
+					  R128_ROP[alu].rop |
+					  R128_DP_SRC_SOURCE_MEMORY |
+					  R128_GMC_CLR_CMP_CNTL_DIS);
+    info->state_2d.dp_cntl = ((info->xdir >= 0 ? R128_DST_X_LEFT_TO_RIGHT : 0) |
+			       (info->ydir >= 0 ? R128_DST_Y_TOP_TO_BOTTOM : 0));
+    info->state_2d.dp_brush_frgd_clr = 0xffffffff;
+    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
+    info->state_2d.dp_src_frgd_clr = 0xffffffff;
+    info->state_2d.dp_src_bkgd_clr = 0x00000000;
+    info->state_2d.dp_write_mask = planemask;
+    info->state_2d.dst_pitch_offset = dst_pitch_offset;
+    info->state_2d.src_pitch_offset = src_pitch_offset;
+    info->state_2d.default_sc_bottom_right = (R128_DEFAULT_SC_RIGHT_MAX | R128_DEFAULT_SC_BOTTOM_MAX);
+
+#ifdef R128DRI
+    if (info->directRenderingEnabled) {
+        EmitCCE2DState(pScrn);
+    } else
+#endif
+    {
+        Emit2DState(pScrn);
+    }
+}
+
+static Bool
+R128PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, int ydir, int alu, Pixel planemask)
+{
+    ScreenPtr     pScreen   = pSrcPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+
+    int bpp = pDstPixmap->drawable.bitsPerPixel;
+    uint32_t datatype, src_pitch_offset, dst_pitch_offset;
+
+    if (!R128GetDatatypeBpp(bpp, &datatype)) {
+        R128TRACE(("R128GetDatatypeBpp failed\n"));
+	return FALSE;
+    }
+    if (!R128GetPixmapOffsetPitch(pSrcPixmap, &src_pitch_offset)) {
+        R128TRACE(("R128GetPixmapOffsetPitch source failed\n"));
+	return FALSE;
+    }
+    if (!R128GetPixmapOffsetPitch(pDstPixmap, &dst_pitch_offset)) {
+        R128TRACE(("R128GetPixmapOffsetPitch dest failed\n"));
+	return FALSE;
+    }
+    if (info->state_2d.in_use) return FALSE;
+
+    info->xdir = xdir;
+    info->ydir = ydir;
+
+    R128DoPrepareCopy(pScrn, src_pitch_offset, dst_pitch_offset, datatype, alu, planemask);
+
+    return TRUE;
+}
+
+static void
+R128Copy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, int width, int height)
+{
+    ScreenPtr     pScreen   = pDstPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    unsigned char *R128MMIO = info->MMIO;
+
+    if (info->xdir < 0) srcX += width - 1, dstX += width - 1;
+    if (info->ydir < 0) srcY += height - 1, dstY += height - 1;
+
+    R128WaitForFifo(pScrn, 3);
+    OUTREG(R128_SRC_Y_X,          (srcY << 16) | srcX);
+    OUTREG(R128_DST_Y_X,          (dstY << 16) | dstX);
+    OUTREG(R128_DST_HEIGHT_WIDTH, (height << 16) | width);
+}
+
+#define R128DoneCopy R128Done
+
+static void
+R128Sync(ScreenPtr pScreen, int marker)
+{
+    R128WaitForIdle(xf86ScreenToScrn(pScreen));
+}
+
+static void
+R128Done(PixmapPtr pPixmap)
+{
+    ScreenPtr     pScreen   = pPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+
+    info->state_2d.in_use = FALSE;
+#ifdef R128DRI
+#ifdef RENDER
+    if (info->state_2d.src_pix) {
+        pScreen->DestroyPixmap(info->state_2d.src_pix);
+	info->state_2d.src_pix = NULL;
+    }
+    if (info->state_2d.msk_pix) {
+        pScreen->DestroyPixmap(info->state_2d.msk_pix);
+	info->state_2d.msk_pix = NULL;
+    }
+#endif
+#endif
+}
+
+#ifdef R128DRI
+
+#define R128CCEPrepareSolid R128PrepareSolid
+
+static void
+R128CCESolid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
+{
+    ScreenPtr     pScreen   = pPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    RING_LOCALS;
+
+    R128CCE_REFRESH( pScrn, info );
+
+    BEGIN_RING( 4 );
+
+    OUT_RING_REG( R128_DST_Y_X,          (y1 << 16) | x1 );
+    OUT_RING_REG( R128_DST_WIDTH_HEIGHT, ((x2-x1) << 16) | (y2-y1) );
+
+    ADVANCE_RING();
+}
+
+#define R128CCEDoneSolid R128Done
+
+#define R128CCEPrepareCopy R128PrepareCopy
+
+static void
+R128CCECopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY,
+	 int width, int height)
+{
+    ScreenPtr     pScreen   = pDstPixmap->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    RING_LOCALS;
+
+    R128CCE_REFRESH( pScrn, info );
+
+    if (info->xdir < 0) srcX += width - 1, dstX += width - 1;
+    if (info->ydir < 0) srcY += height - 1, dstY += height - 1;
+
+    BEGIN_RING( 6 );
+
+    OUT_RING_REG( R128_SRC_Y_X,          (srcY << 16) | srcX );
+    OUT_RING_REG( R128_DST_Y_X,          (dstY << 16) | dstX );
+    OUT_RING_REG( R128_DST_HEIGHT_WIDTH, (height << 16) | width );
+
+    ADVANCE_RING();
+}
+
+#define R128CCEDoneCopy R128Done
+
+static void
+R128CCESync(ScreenPtr pScreen, int marker)
+{
+    R128CCEWaitForIdle(xf86ScreenToScrn(pScreen));
+}
+
+#ifdef RENDER
+#include "r128_exa_render.c"
+#endif
+
+#endif
+
+Bool
+R128EXAInit(ScreenPtr pScreen)
+{
+    ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
+    R128InfoPtr info  = R128PTR(pScrn);
+
+    info->ExaDriver->exa_major = EXA_VERSION_MAJOR;
+    info->ExaDriver->exa_minor = EXA_VERSION_MINOR;
+
+    info->ExaDriver->memoryBase = info->FB + pScrn->fbOffset;
+    info->ExaDriver->flags = EXA_OFFSCREEN_PIXMAPS | EXA_OFFSCREEN_ALIGN_POT;
+
+#if EXA_VERSION_MAJOR > 2 || (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 3)
+    info->ExaDriver->maxPitchBytes = 16320;
+#endif
+    /* Pitch alignment is in sets of 8 pixels, and we need to cover 32bpp, so it's 32 bytes */
+    info->ExaDriver->pixmapPitchAlign = 32;
+    info->ExaDriver->pixmapOffsetAlign = 32;
+    info->ExaDriver->maxX = 2048;
+    info->ExaDriver->maxY = 2048;
+
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+	       "Setting up EXA callbacks\n");
+
+#ifdef R128DRI
+    if (info->directRenderingEnabled) {
+	info->ExaDriver->PrepareSolid = R128CCEPrepareSolid;
+	info->ExaDriver->Solid = R128CCESolid;
+	info->ExaDriver->DoneSolid = R128CCEDoneSolid;
+
+	info->ExaDriver->PrepareCopy = R128CCEPrepareCopy;
+	info->ExaDriver->Copy = R128CCECopy;
+	info->ExaDriver->DoneCopy = R128CCEDoneCopy;
+
+#ifdef RENDER
+	if (info->RenderAccel) {
+	    info->ExaDriver->CheckComposite = R128CCECheckComposite;
+	    info->ExaDriver->PrepareComposite = R128CCEPrepareComposite;
+	    info->ExaDriver->Composite = R128CCEComposite;
+	    info->ExaDriver->DoneComposite = R128CCEDoneComposite;
+	}
+#endif
+
+	info->ExaDriver->WaitMarker = R128CCESync;
+    } else
+#endif
+    {
+	info->ExaDriver->PrepareSolid = R128PrepareSolid;
+	info->ExaDriver->Solid = R128Solid;
+	info->ExaDriver->DoneSolid = R128DoneSolid;
+
+	info->ExaDriver->PrepareCopy = R128PrepareCopy;
+	info->ExaDriver->Copy = R128Copy;
+	info->ExaDriver->DoneCopy = R128DoneCopy;
+
+	/* The registers used for r128 compositing are CCE specific, just like the
+	 * registers used for radeon compositing are CP specific. The radeon driver
+	 * falls back to different registers when there is no DRI. The equivalent
+	 * registers on the r128 (if they even exist) are not listed in the register
+	 * file so I can't implement compositing without DRI.
+	 */
+
+	info->ExaDriver->WaitMarker = R128Sync;
+    }
+
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+	       "Initalizing 2D acceleration engine...\n");
+
+    R128EngineInit(pScrn);
+
+    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+	       "Initializing EXA driver...\n");
+
+    if (!exaDriverInit(pScreen, info->ExaDriver)) {
+        free(info->ExaDriver);
+	return FALSE;
+    }
+
+    info->state_2d.composite_setup = FALSE;
+    return TRUE;
+}
diff --git a/src/r128_exa_render.c b/src/r128_exa_render.c
new file mode 100644
index 0000000..db14bb1
--- /dev/null
+++ b/src/r128_exa_render.c
@@ -0,0 +1,695 @@
+/*
+ * Copyright 2003 Eric Anholt
+ * Copyright 2003 Anders Carlsson
+ * Copyright 2012 Connor Behan
+ * Copyright 2012 Michel Dänzer
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Anders Carlsson <andersca at gnome.org>
+ *    Eric Anholt <anholt at FreeBSD.org>
+ *    Connor Behan <connor.behan at gmail.com>
+ *    Michel Dänzer <michel.daenzer at amd.com>
+ *
+ */
+
+/* The following is based on the kdrive ATI driver. */
+
+#include <stdio.h>
+#include <string.h>
+
+static struct {
+    Bool dst_alpha;
+    Bool src_alpha;
+    CARD32 sblend;
+    CARD32 dblend;
+} R128BlendOp[] = {
+    /* Clear */
+    {0, 0, R128_ALPHA_BLEND_ZERO        , R128_ALPHA_BLEND_ZERO},
+    /* Src */
+    {0, 0, R128_ALPHA_BLEND_ONE         , R128_ALPHA_BLEND_ZERO},
+    /* Dst */
+    {0, 0, R128_ALPHA_BLEND_ZERO        , R128_ALPHA_BLEND_ONE},
+    /* Over */
+    {0, 1, R128_ALPHA_BLEND_ONE         , R128_ALPHA_BLEND_INVSRCALPHA},
+    /* OverReverse */
+    {1, 0, R128_ALPHA_BLEND_INVDSTALPHA , R128_ALPHA_BLEND_ONE},
+    /* In */
+    {1, 0, R128_ALPHA_BLEND_DSTALPHA    , R128_ALPHA_BLEND_ZERO},
+    /* InReverse */
+    {0, 1, R128_ALPHA_BLEND_ZERO        , R128_ALPHA_BLEND_SRCALPHA},
+    /* Out */
+    {1, 0, R128_ALPHA_BLEND_INVDSTALPHA , R128_ALPHA_BLEND_ZERO},
+    /* OutReverse */
+    {0, 1, R128_ALPHA_BLEND_ZERO        , R128_ALPHA_BLEND_INVSRCALPHA},
+    /* Atop */
+    {1, 1, R128_ALPHA_BLEND_DSTALPHA    , R128_ALPHA_BLEND_INVSRCALPHA},
+    /* AtopReverse */
+    {1, 1, R128_ALPHA_BLEND_INVDSTALPHA , R128_ALPHA_BLEND_SRCALPHA},
+    /* Xor */
+    {1, 1, R128_ALPHA_BLEND_INVDSTALPHA , R128_ALPHA_BLEND_INVSRCALPHA},
+    /* Add */
+    {0, 0, R128_ALPHA_BLEND_ONE         , R128_ALPHA_BLEND_ONE},
+};
+
+static Bool
+R128TransformAffineOrScaled(PictTransformPtr t)
+{
+    if (t == NULL) return TRUE;
+
+    /* the shaders don't handle scaling either */
+    return t->matrix[2][0] == 0 && t->matrix[2][1] == 0 && t->matrix[2][2] == IntToxFixed(1);
+}
+
+static PixmapPtr
+R128GetDrawablePixmap(DrawablePtr pDrawable)
+{
+    if (pDrawable->type == DRAWABLE_WINDOW)
+	return pDrawable->pScreen->GetWindowPixmap((WindowPtr)pDrawable);
+    else
+	return (PixmapPtr)pDrawable;
+}
+
+static PixmapPtr
+R128SolidPixmap(ScreenPtr pScreen, uint32_t solid)
+{
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    PixmapPtr	  pPix	    = pScreen->CreatePixmap(pScreen, 1, 1, 32, 0);
+
+    exaMoveInPixmap(pPix);
+    if (!exaDrawableIsOffscreen(&pPix->drawable)) {
+        pScreen->DestroyPixmap(pPix);
+	return NULL;
+    }
+    info->ExaDriver->WaitMarker(pScreen, 0);
+    memcpy(info->ExaDriver->memoryBase + exaGetPixmapOffset(pPix), &solid, 4);
+
+    return pPix;
+}
+
+static Bool
+R128GetDatatypePict1(uint32_t format, uint32_t *type)
+{
+    switch(format) {
+    case PICT_r5g6b5:
+	*type = R128_DATATYPE_RGB565;
+	return TRUE;
+    case PICT_x1r5g5b5:
+	*type = R128_DATATYPE_ARGB1555;
+	return TRUE;
+    case PICT_x8r8g8b8:
+	*type = R128_DATATYPE_ARGB8888;
+	return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+static Bool
+R128GetDatatypePict2(uint32_t format, uint32_t *type)
+{
+    switch(format) {
+    case PICT_a8:
+        *type = R128_DATATYPE_RGB8;
+	return TRUE;
+    case PICT_r5g6b5:
+	*type = R128_DATATYPE_RGB565;
+	return TRUE;
+    case PICT_a8r8g8b8:
+	*type = R128_DATATYPE_ARGB8888;
+	return TRUE;
+    default:
+        return FALSE;
+    }
+}
+
+static Bool
+R128CheckCompositeTexture(PicturePtr pPict, PicturePtr pDstPict, int op)
+{
+    ScreenPtr     pScreen   = pDstPict->pDrawable->pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+
+    unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
+    uint32_t tmp1;
+
+    if (!R128GetDatatypePict2(pPict->format, &tmp1)) return FALSE;
+
+    if (pPict->pDrawable) {
+        int w = pPict->pDrawable->width;
+        int h = pPict->pDrawable->height;
+
+        if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0)) {
+            R128TRACE(("NPOT repeat unsupported (%dx%d)\n", w, h));
+	    return FALSE;
+        }
+    }
+
+    if (pPict->filter != PictFilterNearest && pPict->filter != PictFilterBilinear) {
+	R128TRACE(("Unsupported filter 0x%x\n", pPict->filter));
+	return FALSE;
+    }
+
+    /* The radeon driver has a long explanation about this part that I don't really understand */
+    if (pPict->transform != 0 && repeatType == RepeatNone && PICT_FORMAT_A(pPict->format) == 0) {
+	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0))) {
+	    R128TRACE(("REPEAT_NONE unsupported for transformed xRGB source\n"));
+	    return FALSE;
+	}
+    }
+    if (!R128TransformAffineOrScaled(pPict->transform)) {
+	R128TRACE(("Non-affine transforms not supported\n"));
+	return FALSE;
+    }
+
+    return TRUE;
+}
+
+static Bool
+R128CCECheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture)
+{
+    ScreenPtr     pScreen   = pDstPicture->pDrawable->pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+
+    PixmapPtr pSrcPixmap, pDstPixmap;
+    uint32_t tmp1;
+
+    /* Check for unsupported compositing operations. */
+    if (op >= sizeof(R128BlendOp) / sizeof(R128BlendOp[0])) {
+	R128TRACE(("Unsupported Composite op 0x%x\n", op));
+	return FALSE;
+    }
+
+    pDstPixmap = R128GetDrawablePixmap(pDstPicture->pDrawable);
+    if (pDstPixmap->drawable.width > 1024 || pDstPixmap->drawable.height > 1024) {
+	R128TRACE(("Dest w/h too large (%d,%d).\n", pDstPixmap->drawable.width, pDstPixmap->drawable.height));
+	return FALSE;
+    }
+
+    if (pSrcPicture->pDrawable) {
+        pSrcPixmap = R128GetDrawablePixmap(pSrcPicture->pDrawable);
+        if (pSrcPixmap->drawable.width > 1024 || pSrcPixmap->drawable.height > 1024) {
+	    R128TRACE(("Source w/h too large (%d,%d).\n", pSrcPixmap->drawable.width, pSrcPixmap->drawable.height));
+	    return FALSE;
+        }
+    } else if (pSrcPicture->pSourcePict->type != SourcePictTypeSolidFill) {
+        R128TRACE(("Gradient pictures not supported yet\n"));
+	return FALSE;
+    }
+
+    if (pDstPicture->format == PICT_a8) {
+        if (R128BlendOp[op].src_alpha || R128BlendOp[op].dst_alpha || pMaskPicture != NULL) {
+	    R128TRACE(("Alpha blending unsupported with A8 dst?\n"));
+	    return FALSE;
+	}
+    } else {
+        if (!R128GetDatatypePict1(pDstPicture->format, &tmp1)) return FALSE;
+    }
+
+    if (pMaskPicture) {
+        PixmapPtr pMaskPixmap;
+
+        if (pMaskPicture->pDrawable) {
+	    pMaskPixmap = R128GetDrawablePixmap(pMaskPicture->pDrawable);
+            if (pMaskPixmap->drawable.width > 1024 || pMaskPixmap->drawable.height > 1024) {
+	        R128TRACE(("Mask w/h too large (%d,%d).\n", pMaskPixmap->drawable.width, pMaskPixmap->drawable.height));
+	        return FALSE;
+            }
+	} else if (pMaskPicture->pSourcePict->type != SourcePictTypeSolidFill) {
+	    R128TRACE(("Gradient pictures not supported yet\n"));
+	    return FALSE;
+	}
+
+	if (pMaskPicture->componentAlpha && R128BlendOp[op].src_alpha) {
+	    R128TRACE(("Component alpha not supported with source alpha blending\n"));
+	    return FALSE;
+	}
+
+	if (!R128CheckCompositeTexture(pMaskPicture, pDstPicture, op)) return FALSE;
+    }
+
+    if (!R128CheckCompositeTexture(pSrcPicture, pDstPicture, op)) return FALSE;
+    return TRUE;
+}
+
+static Bool
+R128TextureSetup(PicturePtr pPict, PixmapPtr pPix, int unit, uint32_t *txsize, uint32_t *tex_cntl_c, Bool trying_solid)
+{
+    ScreenPtr     pScreen   = pPix->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+
+    int w, h, bytepp, shift, l2w, l2h, l2p, pitch;
+
+    if (pPict->pDrawable) {
+	w = pPict->pDrawable->width;
+	h = pPict->pDrawable->height;
+    } else {
+	w = h = 1;
+    }
+
+    pitch = exaGetPixmapPitch(pPix);
+    if ((pitch & (pitch - 1)) != 0) {
+        R128TRACE(("NPOT pitch 0x%x unsupported\n", pitch));
+	return FALSE;
+    }
+
+    if (!R128GetDatatypePict2(pPict->format, tex_cntl_c)) return FALSE;
+
+    bytepp = PICT_FORMAT_BPP(pPict->format) / 8;
+    *tex_cntl_c |= R128_MIP_MAP_DISABLE;
+
+    if (pPict->filter == PictFilterBilinear) {
+        *tex_cntl_c |= R128_MIN_BLEND_LINEAR | R128_MAG_BLEND_LINEAR;
+    } else if (pPict->filter == PictFilterNearest) {
+	*tex_cntl_c |= R128_MIN_BLEND_NEAREST | R128_MAG_BLEND_NEAREST;
+    } else {
+	R128TRACE(("Bad filter 0x%x\n", pPict->filter));
+	return FALSE;
+    }
+
+    if (unit == 0) {
+        shift = 0;
+    } else {
+        shift = 16;
+        *tex_cntl_c |= R128_SEC_SELECT_SEC_ST;
+    }
+
+    l2w = R128MinBits(w) - 1;
+    l2h = R128MinBits(h) - 1;
+    l2p = R128MinBits(pitch / bytepp) - 1;
+
+    if (pPict->repeat && w == 1 && h == 1) {
+        l2p = 0;
+    } else if (pPict->repeat && l2p != l2w) {
+        R128TRACE(("Repeat not supported for pitch != width\n"));
+	return FALSE;
+    }
+
+    l2w = l2p;
+    /* This is required to handle NPOT height */
+    if ((unit == 1) || (unit == 0 && !pPict->repeat && !trying_solid)) l2h++;
+
+    info->state_2d.widths[unit] = 1 << l2w;
+    info->state_2d.heights[unit] = 1 << l2h;
+    *txsize |= l2p << (R128_TEX_PITCH_SHIFT + shift);
+    *txsize |= ((w > h) ? l2w : l2h) << (R128_TEX_SIZE_SHIFT + shift);
+    *txsize |= l2h << (R128_TEX_HEIGHT_SHIFT + shift);
+
+    if (pPict->transform != 0) {
+        info->state_2d.is_transform[unit] = TRUE;
+        info->state_2d.transform[unit] = pPict->transform;
+    } else {
+        info->state_2d.is_transform[unit] = FALSE;
+    }
+
+    return TRUE;
+}
+
+/* The composite preparation commands that are the same every time can
+ * just be written once.
+ */
+#define COMPOSITE_SETUP()				\
+do {							\
+    BEGIN_RING( 10 );					\
+							\
+    OUT_RING_REG(R128_SCALE_3D_CNTL,			\
+		    R128_SCALE_3D_TEXMAP_SHADE |	\
+		    R128_SCALE_PIX_REPLICATE |		\
+		    R128_TEX_CACHE_SPLIT |		\
+		    R128_TEX_MAP_ALPHA_IN_TEXTURE |	\
+		    R128_TEX_CACHE_LINE_SIZE_4QW);	\
+    OUT_RING_REG(R128_SETUP_CNTL,			\
+		    R128_COLOR_SOLID_COLOR |		\
+		    R128_PRIM_TYPE_TRI |		\
+		    R128_TEXTURE_ST_MULT_W |		\
+		    R128_STARTING_VERTEX_1 |		\
+		    R128_ENDING_VERTEX_3 |		\
+		    R128_SUB_PIX_4BITS);		\
+    OUT_RING_REG(R128_PM4_VC_FPU_SETUP,			\
+		    R128_FRONT_DIR_CCW |		\
+		    R128_BACKFACE_CULL |		\
+		    R128_FRONTFACE_SOLID |		\
+		    R128_FPU_COLOR_SOLID |		\
+		    R128_FPU_SUB_PIX_4BITS |		\
+		    R128_FPU_MODE_3D |			\
+		    R128_TRAP_BITS_DISABLE |		\
+		    R128_XFACTOR_2 |			\
+		    R128_YFACTOR_2 |			\
+		    R128_FLAT_SHADE_VERTEX_OGL |	\
+		    R128_FPU_ROUND_TRUNCATE |		\
+		    R128_WM_SEL_8DW);			\
+    OUT_RING_REG(R128_PLANE_3D_MASK_C, 0xffffffff);	\
+    OUT_RING_REG(R128_CONSTANT_COLOR_C, 0xff000000);	\
+							\
+    ADVANCE_RING();					\
+} while(0)
+
+static Bool
+R128CCEPrepareComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture,
+    PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst)
+{
+    ScreenPtr     pScreen   = pDst->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    unsigned char *R128MMIO = info->MMIO;
+    RING_LOCALS;
+
+    Bool add_src = FALSE;
+    Bool add_msk = FALSE;
+    uint32_t txsize = 0, prim_tex_cntl_c, sec_tex_cntl_c = 0, dstDatatype;
+    uint32_t src_pitch_offset, dst_pitch_offset, color_factor, in_color_factor, alpha_comb;
+    uint32_t sblend, dblend, blend_cntl, window_offset;
+    int i;
+
+    if (pDstPicture->format == PICT_a8) {
+        if (R128BlendOp[op].dst_alpha) {
+	    R128TRACE(("Can't dst alpha blend A8\n"));
+	    return FALSE;
+        }
+        dstDatatype = R128_DATATYPE_Y8;
+    } else {
+        if (!R128GetDatatypePict1(pDstPicture->format, &dstDatatype)) return FALSE;
+    }
+
+    if (!pSrc) {
+	pSrc = R128SolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color));
+	if (!pSrc) {
+	    R128TRACE(("Failed to create solid scratch pixmap\n"));
+	    return FALSE;
+	}
+	add_src = TRUE;
+    }
+    if (pMaskPicture) {
+        info->state_2d.has_mask = TRUE;
+        if (!pMask) {
+	    pMask = R128SolidPixmap(pScreen, cpu_to_le32(pMaskPicture->pSourcePict->solidFill.color));
+	    if (!pMask) {
+	        if (!pSrcPicture->pDrawable)
+		    pScreen->DestroyPixmap(pSrc);
+		info->state_2d.has_mask = FALSE;
+	        R128TRACE(("Failed to create solid scratch pixmap\n"));
+	        return FALSE;
+	    }
+	    add_msk = TRUE;
+	}
+    } else {
+        info->state_2d.has_mask = FALSE;
+    }
+
+    if (!R128TextureSetup(pSrcPicture, pSrc, 0, &txsize, &prim_tex_cntl_c, (add_src || add_msk))) return FALSE;
+
+    if (pMask != NULL) {
+	info->state_2d.has_mask = TRUE;
+        if (!R128TextureSetup(pMaskPicture, pMask, 1, &txsize, &sec_tex_cntl_c, (add_src || add_msk))) return FALSE;
+    } else {
+	info->state_2d.has_mask = FALSE;
+	info->state_2d.is_transform[1] = FALSE;
+    }
+
+    if (!R128GetPixmapOffsetPitch(pDst, &dst_pitch_offset)) return FALSE;
+    if (!R128GetPixmapOffsetPitch(pSrc, &src_pitch_offset)) return FALSE;
+
+    info->state_2d.in_use = TRUE;
+    if (add_src) info->state_2d.src_pix = pSrc;
+    if (add_msk) info->state_2d.msk_pix = pMask;
+    sblend = R128BlendOp[op].sblend;
+    dblend = R128BlendOp[op].dblend;
+    if (PICT_FORMAT_A(pDstPicture->format) == 0 && R128BlendOp[op].dst_alpha) {
+        if (sblend == R128_ALPHA_BLEND_DSTALPHA)
+	    sblend = R128_ALPHA_BLEND_ONE;
+	else if (sblend == R128_ALPHA_BLEND_INVDSTALPHA)
+	    sblend = R128_ALPHA_BLEND_ZERO;
+    }
+    blend_cntl = (sblend << R128_ALPHA_BLEND_SRC_SHIFT) |
+		 (dblend << R128_ALPHA_BLEND_DST_SHIFT);
+
+    R128CCE_REFRESH( pScrn, info );
+
+    if (!info->state_2d.composite_setup) {
+        COMPOSITE_SETUP();
+	/* DRI and EXA are fighting over control of the texture hardware.
+	 * That means we need to set up the compositing hardware every time
+	 * while a 3D app is running and once after it closes.
+	 */
+	if (!info->have3DWindows)
+	    info->state_2d.composite_setup = TRUE;
+    }
+
+    /* We cannot guarantee that this register will stay zero - DRI needs it too. */
+    if (info->have3DWindows)
+        info->ExaDriver->WaitMarker(pScreen, 0);
+    window_offset = INREG(R128_WINDOW_XY_OFFSET);
+    info->state_2d.x_offset = (window_offset & 0xfff00000) >> R128_WINDOW_X_SHIFT;
+    info->state_2d.y_offset = (window_offset & 0x000fffff) >> R128_WINDOW_Y_SHIFT;
+
+    info->state_2d.dp_gui_master_cntl = (R128_GMC_DST_PITCH_OFFSET_CNTL |
+        R128_GMC_BRUSH_SOLID_COLOR |
+        (dstDatatype >> 8) |
+        R128_GMC_SRC_DATATYPE_COLOR |
+        R128_ROP[3].rop |
+        R128_DP_SRC_SOURCE_MEMORY |
+        R128_GMC_3D_FCN_EN |
+        R128_GMC_CLR_CMP_CNTL_DIS |
+        R128_GMC_AUX_CLIP_DIS |
+        R128_GMC_WR_MSK_DIS);
+    info->state_2d.dp_cntl = (R128_DST_X_LEFT_TO_RIGHT | R128_DST_Y_TOP_TO_BOTTOM);
+    info->state_2d.dp_brush_frgd_clr = 0xffffffff;
+    info->state_2d.dp_brush_bkgd_clr = 0x00000000;
+    info->state_2d.dp_src_frgd_clr = 0xffffffff;
+    info->state_2d.dp_src_bkgd_clr = 0x00000000;
+    info->state_2d.dp_write_mask = 0xffffffff;
+    info->state_2d.dst_pitch_offset = dst_pitch_offset;
+    info->state_2d.src_pitch_offset = src_pitch_offset;
+    info->state_2d.default_sc_bottom_right = (R128_DEFAULT_SC_RIGHT_MAX | R128_DEFAULT_SC_BOTTOM_MAX);
+    EmitCCE2DState(pScrn);
+
+    BEGIN_RING( 6 );
+    OUT_RING_REG(R128_MISC_3D_STATE_CNTL_REG,
+        R128_MISC_SCALE_3D_TEXMAP_SHADE |
+        R128_MISC_SCALE_PIX_REPLICATE |
+        R128_ALPHA_COMB_ADD_CLAMP |
+        blend_cntl);
+    OUT_RING_REG(R128_TEX_CNTL_C,
+        R128_TEXMAP_ENABLE |
+        ((pMask != NULL) ? R128_SEC_TEXMAP_ENABLE : 0) |
+        R128_ALPHA_ENABLE |
+        R128_TEX_CACHE_FLUSH);
+    OUT_RING_REG(R128_PC_GUI_CTLSTAT, R128_PC_FLUSH_GUI);
+    ADVANCE_RING();
+
+    /* IN operator: Without a mask, only the first texture unit is enabled.
+     * With a mask, we put the source in the first unit and have it pass
+     * through as input to the 2nd.  The 2nd unit takes the incoming source
+     * pixel and modulates it with either the alpha or each of the channels
+     * in the mask, depending on componentAlpha.
+     */
+    BEGIN_RING( 15 );
+    /* R128_PRIM_TEX_CNTL_C,
+     * R128_PRIM_TEXTURE_COMBINE_CNTL_C,
+     * R128_TEX_SIZE_PITCH_C,
+     * R128_PRIM_TEX_0_OFFSET_C - R128_PRIM_TEX_10_OFFSET_C
+     */
+    OUT_RING(CCE_PACKET0(R128_PRIM_TEX_CNTL_C, 13));
+    OUT_RING(prim_tex_cntl_c);
+
+    /* If this is the only stage and the dest is a8, route the alpha result
+     * to the color (red channel, in particular), too.  Otherwise, be sure
+     * to zero out color channels of an a8 source.
+     */
+    if (pMaskPicture == NULL && pDstPicture->format == PICT_a8)
+        color_factor = R128_COLOR_FACTOR_ALPHA;
+    else if (pSrcPicture->format == PICT_a8)
+        color_factor = R128_COLOR_FACTOR_CONST_COLOR;
+    else
+        color_factor = R128_COLOR_FACTOR_TEX;
+
+    if (PICT_FORMAT_A(pSrcPicture->format) == 0)
+        alpha_comb = R128_COMB_ALPHA_COPY_INP;
+    else
+        alpha_comb = R128_COMB_ALPHA_DIS;
+
+    OUT_RING(R128_COMB_COPY |
+        color_factor |
+        R128_INPUT_FACTOR_INT_COLOR |
+        alpha_comb |
+        R128_ALPHA_FACTOR_TEX_ALPHA |
+        R128_INP_FACTOR_A_CONST_ALPHA);
+    OUT_RING(txsize);
+    /* We could save some output by only writing the offset register that
+     * will actually be used.  On the other hand, this is easy.
+     */
+    for (i = 0; i <= 10; i++)
+        OUT_RING(exaGetPixmapOffset(pSrc));
+    ADVANCE_RING();
+
+    if (pMask != NULL) {
+        BEGIN_RING( 14 );
+	/* R128_SEC_TEX_CNTL_C,
+	 * R128_SEC_TEXTURE_COMBINE_CNTL_C,
+	 * R128_SEC_TEX_0_OFFSET_C - R128_SEC_TEX_10_OFFSET_C
+	 */
+        OUT_RING(CCE_PACKET0(R128_SEC_TEX_CNTL_C, 12));
+        OUT_RING(sec_tex_cntl_c);
+
+        if (pDstPicture->format == PICT_a8) {
+            color_factor = R128_COLOR_FACTOR_ALPHA;
+            in_color_factor = R128_INPUT_FACTOR_PREV_ALPHA;
+        } else if (pMaskPicture->componentAlpha) {
+            color_factor = R128_COLOR_FACTOR_TEX;
+            in_color_factor = R128_INPUT_FACTOR_PREV_COLOR;
+        } else {
+            color_factor = R128_COLOR_FACTOR_ALPHA;
+            in_color_factor = R128_INPUT_FACTOR_PREV_COLOR;
+        }
+
+        OUT_RING(R128_COMB_MODULATE |
+            color_factor |
+            in_color_factor |
+            R128_COMB_ALPHA_MODULATE |
+            R128_ALPHA_FACTOR_TEX_ALPHA |
+            R128_INP_FACTOR_A_PREV_ALPHA);
+        for (i = 0; i <= 10; i++)
+            OUT_RING(exaGetPixmapOffset(pMask));
+	ADVANCE_RING();
+    }
+
+    return TRUE;
+}
+
+typedef union { float f; CARD32 i; } fi_type;
+
+static inline CARD32
+R128FloatAsInt(float val)
+{
+	fi_type fi;
+
+	fi.f = val;
+	return fi.i;
+}
+
+#define VTX_OUT_MASK(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY)			\
+do {											\
+    OUT_RING(R128FloatAsInt((_dstX)));							\
+    OUT_RING(R128FloatAsInt(((float)(_dstY)) + 0.125));					\
+    OUT_RING(R128FloatAsInt(0.0));							\
+    OUT_RING(R128FloatAsInt(1.0));							\
+    OUT_RING(R128FloatAsInt((((float)(_srcX)) + 0.5) / (info->state_2d.widths[0])));	\
+    OUT_RING(R128FloatAsInt((((float)(_srcY)) + 0.5) / (info->state_2d.heights[0])));	\
+    OUT_RING(R128FloatAsInt((((float)(_maskX)) + 0.5) / (info->state_2d.widths[1])));	\
+    OUT_RING(R128FloatAsInt((((float)(_maskY)) + 0.5) / (info->state_2d.heights[1])));	\
+} while (0)
+
+#define VTX_OUT(_dstX, _dstY, _srcX, _srcY)						\
+do {								       			\
+    OUT_RING(R128FloatAsInt((_dstX)));							\
+    OUT_RING(R128FloatAsInt(((float)(_dstY)) + 0.125));					\
+    OUT_RING(R128FloatAsInt(0.0));							\
+    OUT_RING(R128FloatAsInt(1.0));							\
+    OUT_RING(R128FloatAsInt((((float)(_srcX)) + 0.5) / (info->state_2d.widths[0])));	\
+    OUT_RING(R128FloatAsInt((((float)(_srcY)) + 0.5) / (info->state_2d.heights[0])));	\
+} while (0)
+
+static void
+R128CCEComposite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, int dstX, int dstY, int w, int h)
+{
+    ScreenPtr     pScreen   = pDst->drawable.pScreen;
+    ScrnInfoPtr   pScrn     = xf86ScreenToScrn(pScreen);
+    R128InfoPtr   info      = R128PTR(pScrn);
+    RING_LOCALS;
+
+    int srcXend, srcYend, maskXend, maskYend;
+    PictVector v;
+
+    srcXend = srcX + w;
+    srcYend = srcY + h;
+    maskXend = maskX + w;
+    maskYend = maskY + h;
+    if (info->state_2d.is_transform[0]) {
+        v.vector[0] = IntToxFixed(srcX);
+        v.vector[1] = IntToxFixed(srcY);
+        v.vector[2] = xFixed1;
+        PictureTransformPoint(info->state_2d.transform[0], &v);
+        srcX = xFixedToInt(v.vector[0]);
+        srcY = xFixedToInt(v.vector[1]);
+        v.vector[0] = IntToxFixed(srcXend);
+        v.vector[1] = IntToxFixed(srcYend);
+        v.vector[2] = xFixed1;
+        PictureTransformPoint(info->state_2d.transform[0], &v);
+        srcXend = xFixedToInt(v.vector[0]);
+        srcYend = xFixedToInt(v.vector[1]);
+    }
+    if (info->state_2d.is_transform[1]) {
+        v.vector[0] = IntToxFixed(maskX);
+        v.vector[1] = IntToxFixed(maskY);
+        v.vector[2] = xFixed1;
+        PictureTransformPoint(info->state_2d.transform[1], &v);
+        maskX = xFixedToInt(v.vector[0]);
+        maskY = xFixedToInt(v.vector[1]);
+        v.vector[0] = IntToxFixed(maskXend);
+        v.vector[1] = IntToxFixed(maskYend);
+        v.vector[2] = xFixed1;
+        PictureTransformPoint(info->state_2d.transform[1], &v);
+        maskXend = xFixedToInt(v.vector[0]);
+        maskYend = xFixedToInt(v.vector[1]);
+    }
+
+    dstX -= info->state_2d.x_offset;
+    dstY -= info->state_2d.y_offset;
+
+    R128CCE_REFRESH( pScrn, info );
+
+    if (info->state_2d.has_mask) {
+        BEGIN_RING( 3 + 4 * 8 );
+        OUT_RING(CCE_PACKET3(R128_CCE_PACKET3_3D_RNDR_GEN_PRIM, 1 + 4 * 8));
+
+	OUT_RING(R128_CCE_VC_FRMT_RHW |
+            R128_CCE_VC_FRMT_S_T |
+            R128_CCE_VC_FRMT_S2_T2);
+    } else {
+        BEGIN_RING( 3 + 4 * 6 );
+        OUT_RING(CCE_PACKET3(R128_CCE_PACKET3_3D_RNDR_GEN_PRIM, 1 + 4 * 6));
+
+	OUT_RING(R128_CCE_VC_FRMT_RHW |
+            R128_CCE_VC_FRMT_S_T);
+    }
+
+    OUT_RING(R128_CCE_VC_CNTL_PRIM_TYPE_TRI_FAN |
+        R128_CCE_VC_CNTL_PRIM_WALK_RING |
+        (4 << R128_CCE_VC_CNTL_NUM_SHIFT));
+
+    if (info->state_2d.has_mask) {
+	VTX_OUT_MASK(dstX,     dstY,     srcX,    srcY,    maskX,    maskY);
+	VTX_OUT_MASK(dstX,     dstY + h, srcX,    srcYend, maskX,    maskYend);
+	VTX_OUT_MASK(dstX + w, dstY + h, srcXend, srcYend, maskXend, maskYend);
+	VTX_OUT_MASK(dstX + w, dstY,     srcXend, srcY,    maskXend, maskY);
+    } else {
+	VTX_OUT(dstX,     dstY,     srcX,    srcY);
+	VTX_OUT(dstX,     dstY + h, srcX,    srcYend);
+	VTX_OUT(dstX + w, dstY + h, srcXend, srcYend);
+	VTX_OUT(dstX + w, dstY,     srcXend, srcY);
+    }
+
+    ADVANCE_RING();
+}
+
+#define R128CCEDoneComposite R128Done
diff --git a/src/r128_video.c b/src/r128_video.c
index 81b2ab6..4507b30 100644
--- a/src/r128_video.c
+++ b/src/r128_video.c
@@ -56,7 +56,8 @@ typedef struct {
    int           saturation;
    Bool          doubleBuffer;
    unsigned char currentBuffer;
-   FBLinearPtr   linear;
+   void*         BufferHandle;
+   int		 videoOffset;
    RegionRec     clip;
    CARD32        colorKey;
    CARD32        videoStatus;
@@ -270,9 +271,16 @@ R128StopVideo(ScrnInfoPtr pScrn, pointer data, Bool cleanup)
      if(pPriv->videoStatus & CLIENT_VIDEO_ON) {
 	OUTREG(R128_OV0_SCALE_CNTL, 0);
      }
-     if(pPriv->linear) {
-	xf86FreeOffscreenLinear(pPriv->linear);
-	pPriv->linear = NULL;
+     if(pPriv->BufferHandle) {
+        if (!info->useEXA) {
+	   xf86FreeOffscreenLinear((FBLinearPtr) pPriv->BufferHandle);
+	}
+#ifdef USE_EXA
+	else {
+	   exaOffscreenFree(pScrn->pScreen, (ExaOffscreenArea *) pPriv->BufferHandle);
+	}
+#endif
+	pPriv->BufferHandle = NULL;
      }
      pPriv->videoStatus = 0;
   } else {
@@ -381,7 +389,7 @@ R128QueryBestSize(
  *
  */
 
-static Bool
+Bool
 R128DMA(
   R128InfoPtr info,
   unsigned char *src,
@@ -564,45 +572,78 @@ R128CopyData420(
 }
 
 
-static FBLinearPtr
+static CARD32
 R128AllocateMemory(
    ScrnInfoPtr pScrn,
-   FBLinearPtr linear,
+   void **mem_struct,
    int size
 ){
-   ScreenPtr pScreen;
-   FBLinearPtr new_linear;
+   R128InfoPtr info = R128PTR(pScrn);
+   ScreenPtr pScreen = xf86ScrnToScreen(pScrn);
+   int offset = 0;
 
-   if(linear) {
-	if(linear->size >= size)
-	   return linear;
+   if(!info->useEXA) {
+        FBLinearPtr linear = *mem_struct;
+        int cpp = info->CurrentLayout.pixel_bytes;
 
-	if(xf86ResizeOffscreenLinear(linear, size))
-	   return linear;
+	/* XAA allocates in units of pixels at the screen bpp, so adjust size appropriately. */
+	size = (size + cpp - 1) / cpp;
 
-	xf86FreeOffscreenLinear(linear);
-   }
+        if(linear) {
+	     if(linear->size >= size)
+	        return linear->offset * cpp;
 
-   pScreen = xf86ScrnToScreen(pScrn);
+	     if(xf86ResizeOffscreenLinear(linear, size))
+	        return linear->offset * cpp;
 
-   new_linear = xf86AllocateOffscreenLinear(pScreen, size, 8,
+	     xf86FreeOffscreenLinear(linear);
+        }
+
+
+        linear = xf86AllocateOffscreenLinear(pScreen, size, 8,
 						NULL, NULL, NULL);
+	*mem_struct = linear;
 
-   if(!new_linear) {
-	int max_size;
+        if(!linear) {
+	     int max_size;
 
-	xf86QueryLargestOffscreenLinear(pScreen, &max_size, 8,
+	     xf86QueryLargestOffscreenLinear(pScreen, &max_size, 8,
 						PRIORITY_EXTREME);
 
-	if(max_size < size)
-	   return NULL;
+	     if(max_size < size)
+	        return 0;
 
-	xf86PurgeUnlockedOffscreenAreas(pScreen);
-	new_linear = xf86AllocateOffscreenLinear(pScreen, size, 8,
+	     xf86PurgeUnlockedOffscreenAreas(pScreen);
+	     linear = xf86AllocateOffscreenLinear(pScreen, size, 8,
 						NULL, NULL, NULL);
+
+	     if(!linear) return 0;
+        }
+
+	offset = linear->offset * cpp;
    }
+#ifdef USE_EXA
+   else {
+        /* EXA support based on mga driver */
+	ExaOffscreenArea *area = *mem_struct;
+
+	if(area) {
+	     if(area->size >= size)
+	        return area->offset;
 
-   return new_linear;
+	     exaOffscreenFree(pScrn->pScreen, area);
+	}
+
+	area = exaOffscreenAlloc(pScrn->pScreen, size, 64, TRUE, NULL, NULL);
+	*mem_struct = area;
+
+	if(!area) return 0;
+
+	offset = area->offset;
+   }
+#endif
+
+   return offset;
 }
 
 static void
@@ -781,7 +822,7 @@ R128PutImage(
    int new_size, offset, s1offset, s2offset, s3offset;
    int srcPitch, srcPitch2, dstPitch;
    int d1line, d2line, d3line, d1offset, d2offset, d3offset;
-   int top, left, npixels, nlines, bpp;
+   int top, left, npixels, nlines;
    BoxRec dstBox;
    CARD32 tmp;
 #if X_BYTE_ORDER == X_BIG_ENDIAN
@@ -833,15 +874,13 @@ R128PutImage(
    dstBox.y1 -= pScrn->frameY0;
    dstBox.y2 -= pScrn->frameY0;
 
-   bpp = pScrn->bitsPerPixel >> 3;
-
    switch(id) {
    case FOURCC_YV12:
    case FOURCC_I420:
 	srcPitch = (width + 3) & ~3;
 	srcPitch2 = ((width >> 1) + 3) & ~3;
 	dstPitch = (width + 31) & ~31;  /* of luma */
-	new_size = ((dstPitch * (height + (height >> 1))) + bpp - 1) / bpp;
+	new_size = dstPitch * (height + (height >> 1));
 	s1offset = 0;
 	s2offset = srcPitch * height;
 	s3offset = (srcPitch2 * (height >> 1)) + s2offset;
@@ -852,14 +891,14 @@ R128PutImage(
 	srcPitch = width << 1;
 	srcPitch2 = 0;
 	dstPitch = ((width << 1) + 15) & ~15;
-	new_size = ((dstPitch * height) + bpp - 1) / bpp;
+	new_size = dstPitch * height;
 	s1offset = 0;
 	s2offset = 0;
 	s3offset = 0;
 	break;
    }
 
-   if(!(pPriv->linear = R128AllocateMemory(pScrn, pPriv->linear,
+   if(!(pPriv->videoOffset = R128AllocateMemory(pScrn, &(pPriv->BufferHandle),
 		pPriv->doubleBuffer ? (new_size << 1) : new_size)))
    {
 	return BadAlloc;
@@ -872,9 +911,9 @@ R128PutImage(
    left = (xa >> 16) & ~1;
    npixels = ((((xb + 0xffff) >> 16) + 1) & ~1) - left;
 
-   offset = pPriv->linear->offset * bpp;
+   offset = pPriv->videoOffset;
    if(pPriv->doubleBuffer)
-	offset += pPriv->currentBuffer * new_size * bpp;
+	offset += pPriv->currentBuffer * new_size;
 
    switch(id) {
     case FOURCC_YV12:
@@ -1015,9 +1054,16 @@ R128VideoTimerCallback(ScrnInfoPtr pScrn, Time now)
 	    }
 	} else {  /* FREE_TIMER */
 	    if(pPriv->freeTime < now) {
-		if(pPriv->linear) {
-		   xf86FreeOffscreenLinear(pPriv->linear);
-		   pPriv->linear = NULL;
+		if(pPriv->BufferHandle) {
+		   if (!info->useEXA) {
+		      xf86FreeOffscreenLinear((FBLinearPtr) pPriv->BufferHandle);
+		   }
+#ifdef USE_EXA
+		   else {
+		      exaOffscreenFree(pScrn->pScreen, (ExaOffscreenArea *) pPriv->BufferHandle);
+		   }
+#endif
+		   pPriv->BufferHandle = NULL;
 		}
 		pPriv->videoStatus = 0;
 		info->VideoTimerCallback = NULL;
-- 
1.7.11.1



More information about the xorg-driver-ati mailing list