xf86-video-ati: Branch 'master' - 4 commits

George Sapountzis gsap7 at kemper.freedesktop.org
Sat Aug 5 03:19:56 EEST 2006


 src/Makefile.am      |    3 
 src/atiaccel.c       |   31 --
 src/aticonfig.c      |   22 +
 src/atidga.c         |   15 +
 src/atidri.c         |   67 ++++
 src/atiload.c        |   71 ++++-
 src/atiload.h        |   15 +
 src/atimach64.c      |   35 +-
 src/atimach64accel.c |   15 -
 src/atimach64accel.h |   10 
 src/atimach64exa.c   |  684 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/atimach64io.h    |   70 ++++-
 src/atimach64xv.c    |  139 +++++++++-
 src/atimisc.c        |   12 
 src/atioption.c      |    7 
 src/atioption.h      |    3 
 src/atiscreen.c      |  442 ++++++++++++++++++++------------
 src/atistruct.h      |   18 +
 18 files changed, 1394 insertions(+), 265 deletions(-)

New commits:
diff-tree a1da2b5e0a38feae01107676a76bdc5c185ad34b (from a66c8d2f9855caa10142b39f155dd8232b1f2389)
Author: George Sapountzis <gsap7 at yahoo.gr>
Date:   Sat Aug 5 03:18:34 2006 +0300

    [mach64] Halve max w/h for hardware-accelerated operations.
    
    EXA hits these limits for some operations on offscreen pixmaps and GTPRO seems
    to support the reduced limits.

diff --git a/src/atimach64.c b/src/atimach64.c
index 5cb991d..cc3a3cb 100644
--- a/src/atimach64.c
+++ b/src/atimach64.c
@@ -84,7 +84,6 @@ ATIMach64PreInit
 )
 {
     CARD32 bus_cntl, config_cntl;
-    int    tmp;
 
 #ifndef AVOID_CPIO
 
@@ -258,12 +257,29 @@ ATIMach64PreInit
         pATIHW->src_cntl = SRC_LINE_X_DIR;
 
         /* Initialise scissor, allowing for offscreen areas */
-        pATIHW->sc_right = (pATI->displayWidth * pATI->XModifier) - 1;
-        tmp = pATI->displayWidth * pATI->bitsPerPixel;
-        tmp = (((pScreenInfo->videoRam * (1024 * 8)) + tmp - 1) / tmp) - 1;
-        if (tmp > ATIMach64MaxY)
-            tmp = ATIMach64MaxY;
-        pATIHW->sc_bottom = tmp;
+#ifdef USE_XAA
+        if (!pATI->useEXA)
+        {
+            int width, height, total;
+
+            pATIHW->sc_right = (pATI->displayWidth * pATI->XModifier) - 1;
+            width = pATI->displayWidth * pATI->bitsPerPixel;
+            total = pScreenInfo->videoRam * (1024 * 8);
+            height = (total + width - 1) / width;
+            if (height > ATIMach64MaxY + 1)
+                height = ATIMach64MaxY + 1;
+            pATIHW->sc_bottom = height - 1;
+        }
+#endif /* USE_XAA */
+
+#ifdef USE_EXA
+        if (pATI->useEXA)
+        {
+            pATIHW->sc_right = ATIMach64MaxX;
+            pATIHW->sc_bottom = ATIMach64MaxY;
+        }
+#endif /* USE_EXA */
+
         pATI->sc_left_right = SetWord(pATI->NewHW.sc_right, 1) |
             SetWord(pATI->NewHW.sc_left, 0);
         pATI->sc_top_bottom = SetWord(pATI->NewHW.sc_bottom, 1) |
diff --git a/src/atimach64accel.h b/src/atimach64accel.h
index 2917b7a..973f2ba 100644
--- a/src/atimach64accel.h
+++ b/src/atimach64accel.h
@@ -29,8 +29,8 @@
 #include "xaa.h"
 #include "exa.h"
 
-#define ATIMach64MaxX  8191
-#define ATIMach64MaxY 32767
+#define ATIMach64MaxX  4095
+#define ATIMach64MaxY 16383
 
 #ifdef USE_EXA
 extern Bool ATIMach64ExaInit(ScreenPtr);
diff-tree a66c8d2f9855caa10142b39f155dd8232b1f2389 (from 0fb8503d60cf646c91a806f9ed322f9ceff18baa)
Author: George Sapountzis <gsap7 at yahoo.gr>
Date:   Sat Aug 5 03:16:56 2006 +0300

    [mach64] EXA: add memcpy-based UTS/DFS.
    
    EXA hits more optimized paths when it does not have to fallback because of
    missing UTS/DFS.

diff --git a/src/atimach64exa.c b/src/atimach64exa.c
index c68495b..90cc2af 100644
--- a/src/atimach64exa.c
+++ b/src/atimach64exa.c
@@ -56,6 +56,8 @@
 #include "config.h"
 #endif
 
+#include <string.h>
+
 #include "ati.h"
 #include "atichip.h"
 #include "atidri.h"
@@ -385,6 +387,60 @@ Mach64Solid
 
 static void Mach64DoneSolid(PixmapPtr pPixmap) { }
 
+/*
+ * Memcpy-based UTS.
+ */
+static Bool
+Mach64UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
+    char *src, int src_pitch)
+{
+    char  *dst        = pDst->devPrivate.ptr;
+    int    dst_pitch  = exaGetPixmapPitch(pDst);
+
+    int bpp    = pDst->drawable.bitsPerPixel;
+    int cpp    = (bpp + 7) / 8;
+    int wBytes = w * cpp;
+
+    exaWaitSync(pDst->drawable.pScreen);
+
+    dst += (x * cpp) + (y * dst_pitch);
+
+    while (h--) {
+        memcpy(dst, src, wBytes);
+        src += src_pitch;
+        dst += dst_pitch;
+    }
+
+    return TRUE;
+}
+
+/*
+ * Memcpy-based DFS.
+ */
+static Bool
+Mach64DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
+    char *dst, int dst_pitch)
+{
+    char  *src        = pSrc->devPrivate.ptr;
+    int    src_pitch  = exaGetPixmapPitch(pSrc);
+
+    int bpp    = pSrc->drawable.bitsPerPixel;
+    int cpp    = (bpp + 7) / 8;
+    int wBytes = w * cpp;
+
+    exaWaitSync(pSrc->drawable.pScreen);
+
+    src += (x * cpp) + (y * src_pitch);
+
+    while (h--) {
+        memcpy(dst, src, wBytes);
+        src += src_pitch;
+        dst += dst_pitch;
+    }
+
+    return TRUE;
+}
+
 /* Compute log base 2 of val. */
 static __inline__ int Mach64Log2(int val)
 {
@@ -611,6 +667,12 @@ Bool ATIMach64ExaInit(ScreenPtr pScreen)
     pExa->Copy = Mach64Copy;
     pExa->DoneCopy = Mach64DoneCopy;
 
+    /* EXA hits more optimized paths when it does not have to fallback because
+     * of missing UTS/DFS, hook memcpy-based UTS/DFS.
+     */
+    pExa->UploadToScreen = Mach64UploadToScreen;
+    pExa->DownloadFromScreen = Mach64DownloadFromScreen;
+
     if (!exaDriverInit(pScreen, pATI->pExa)) {
 	xfree(pATI->pExa);
 	pATI->pExa = NULL;
diff-tree 0fb8503d60cf646c91a806f9ed322f9ceff18baa (from e203d86643d5d70bf18248712d05b72b79aee705)
Author: George Sapountzis <gsap7 at yahoo.gr>
Date:   Sat Aug 5 03:16:15 2006 +0300

    [mach64] XAA: minor refactoring of memory manager setup.
    
    - DRI: factor out to separate function and add comments on layout
    - no DRI: factor out to separate function, maxScanlines is always equal to
      ATIMach64MaxY

diff --git a/src/atiaccel.c b/src/atiaccel.c
index f573a63..99efae9 100644
--- a/src/atiaccel.c
+++ b/src/atiaccel.c
@@ -48,8 +48,6 @@ ATIInitializeAcceleration
     ATIPtr      pATI
 )
 {
-    int maxScanlines = 32767, maxPixelArea, PixelArea;
-
     if (pATI->OptionAccel)
     {
         if (!(pATI->pXAAInfo = XAACreateInfoRec()))
@@ -58,7 +56,7 @@ ATIInitializeAcceleration
         switch (pATI->Adapter)
         {
             case ATI_ADAPTER_MACH64:
-                maxScanlines = ATIMach64AccelInit(pATI, pATI->pXAAInfo);
+                ATIMach64AccelInit(pATI, pATI->pXAAInfo);
                 break;
 
             default:
@@ -66,31 +64,6 @@ ATIInitializeAcceleration
         }
     }
 
-#ifndef AVOID_CPIO
-
-    if (!pATI->BankInfo.BankSize)
-
-#endif /* AVOID_CPIO */
-
-#ifdef XF86DRI_DEVEL
-
-        /* If DRI is enabled, we've already set up the FB manager in ATIScreenInit */
-        if (!pATI->directRenderingEnabled)
-
-#endif /* XF86DRI */
-    {
-        /*
-         * Note:  If PixelArea exceeds the engine's maximum, the excess is
-         *        never used, even though it would be useful for such things
-         *        as XVideo buffers.
-         */
-        maxPixelArea = maxScanlines * pScreenInfo->displayWidth;
-        PixelArea = pScreenInfo->videoRam * 1024 * 8 / pATI->bitsPerPixel;
-        if (PixelArea > maxPixelArea)
-            PixelArea = maxPixelArea;
-        xf86InitFBManagerArea(pScreen, PixelArea, 2);
-    }
-
     if (!pATI->OptionAccel || XAAInit(pScreen, pATI->pXAAInfo))
         return TRUE;
 
diff --git a/src/atiscreen.c b/src/atiscreen.c
index 7dff827..3297df5 100644
--- a/src/atiscreen.c
+++ b/src/atiscreen.c
@@ -125,6 +125,235 @@ ATIMinBits
     for (bits = 0; val; val >>= 1, ++bits);
     return bits;
 }
+
+#ifdef USE_XAA
+static Bool
+ATIMach64SetupMemXAA_NoDRI
+(
+    int       iScreen,
+    ScreenPtr pScreen
+)
+{
+    ScrnInfoPtr  pScreenInfo = xf86Screens[iScreen];
+    ATIPtr       pATI        = ATIPTR(pScreenInfo);
+
+    int maxScanlines = ATIMach64MaxY;
+    int maxPixelArea, PixelArea;
+
+#ifndef AVOID_CPIO
+
+    if (!pATI->BankInfo.BankSize)
+
+#endif /* AVOID_CPIO */
+    {
+        /*
+         * Note:  If PixelArea exceeds the engine's maximum, the excess is
+         *        never used, even though it would be useful for such things
+         *        as XVideo buffers.
+         */
+        maxPixelArea = maxScanlines * pScreenInfo->displayWidth;
+        PixelArea = pScreenInfo->videoRam * 1024 * 8 / pATI->bitsPerPixel;
+        if (PixelArea > maxPixelArea)
+            PixelArea = maxPixelArea;
+        xf86InitFBManagerArea(pScreen, PixelArea, 2);
+    }
+
+    return TRUE;
+}
+
+#ifdef XF86DRI_DEVEL
+/*
+ * Memory layour for XAA with DRI (no local_textures):
+ * | front  | pixmaps, xv | back   | depth  | textures | c |
+ *
+ * 1024x768 at 16bpp with 8 MB:
+ * | 1.5 MB | ~3.5 MB     | 1.5 MB | 1.5 MB | 0        | c |
+ *
+ * 1024x768 at 32bpp with 8 MB:
+ * | 3.0 MB | ~0.5 MB     | 3.0 MB | 1.5 MB | 0        | c |
+ *
+ * "c" is the hw cursor which occupies 1KB
+ */
+static Bool
+ATIMach64SetupMemXAA
+(
+    int       iScreen,
+    ScreenPtr pScreen
+)
+{
+	ScrnInfoPtr  pScreenInfo = xf86Screens[iScreen];
+	ATIPtr       pATI        = ATIPTR(pScreenInfo);
+
+	ATIDRIServerInfoPtr pATIDRIServer = pATI->pDRIServerInfo;
+	int cpp = pATI->bitsPerPixel >> 3;
+	int widthBytes = pScreenInfo->displayWidth * cpp;
+	int zWidthBytes = pScreenInfo->displayWidth * 2; /* always 16-bit z-buffer */
+	int fbSize = pScreenInfo->videoRam * 1024;
+	int bufferSize = pScreenInfo->virtualY * widthBytes;
+	int zBufferSize = pScreenInfo->virtualY * zWidthBytes;
+	int offscreenBytes, total, scanlines;
+
+	pATIDRIServer->fbX = 0;
+	pATIDRIServer->fbY = 0;
+	pATIDRIServer->frontOffset = 0;
+	pATIDRIServer->frontPitch = pScreenInfo->displayWidth;
+
+	/* Calculate memory remaining for pixcache and textures after 
+	 * front, back, and depth buffers
+	 */
+	offscreenBytes = fbSize - ( 2 * bufferSize + zBufferSize );
+
+	if ( !pATIDRIServer->IsPCI && !pATI->OptionLocalTextures ) {
+	    /* Don't allocate a local texture heap for AGP unless requested */
+	    pATIDRIServer->textureSize = 0;
+	} else {
+	    int l, maxPixcache;
+
+#ifdef XvExtension
+
+	    int xvBytes;
+
+	    /* Try for enough pixmap cache for DVD and a full viewport
+	     */
+	    xvBytes = 720*480*cpp; /* enough for single-buffered DVD */
+	    maxPixcache = xvBytes > bufferSize ? xvBytes : bufferSize;
+
+#else /* XvExtension */
+
+	    /* Try for one viewport */
+	    maxPixcache = bufferSize;
+
+#endif /* XvExtension */
+
+	    pATIDRIServer->textureSize = offscreenBytes - maxPixcache;
+
+	    /* If that gives us less than half the offscreen mem available for textures, split 
+	     * the available mem between textures and pixmap cache
+	     */
+	    if (pATIDRIServer->textureSize < (offscreenBytes/2)) {
+		pATIDRIServer->textureSize = offscreenBytes/2;
+	    }
+
+	    if (pATIDRIServer->textureSize <= 0)
+		pATIDRIServer->textureSize = 0;
+
+	    l = ATIMinBits((pATIDRIServer->textureSize-1) / MACH64_NR_TEX_REGIONS);
+	    if (l < MACH64_LOG_TEX_GRANULARITY) l = MACH64_LOG_TEX_GRANULARITY;
+
+	    /* Round the texture size up to the nearest whole number of
+	     * texture regions.  Again, be greedy about this, don't round
+	     * down.
+	     */
+	    pATIDRIServer->logTextureGranularity = l;
+	    pATIDRIServer->textureSize =
+		(pATIDRIServer->textureSize >> l) << l;
+	}
+
+	total = fbSize - pATIDRIServer->textureSize;
+	scanlines = total / widthBytes;
+	if (scanlines > ATIMach64MaxY) scanlines = ATIMach64MaxY;
+
+	/* Recalculate the texture offset and size to accomodate any
+	 * rounding to a whole number of scanlines.
+	 * FIXME: Is this actually needed?
+	 */
+	pATIDRIServer->textureOffset = scanlines * widthBytes;
+	pATIDRIServer->textureSize = fbSize - pATIDRIServer->textureOffset;
+
+	/* Set a minimum usable local texture heap size.  This will fit
+	 * two 256x256 textures.  We check this after any rounding of
+	 * the texture area.
+	 */
+	if (pATIDRIServer->textureSize < 256*256 * cpp * 2) {
+	    pATIDRIServer->textureOffset = 0;
+	    pATIDRIServer->textureSize = 0;
+	    scanlines = fbSize / widthBytes;
+	    if (scanlines > ATIMach64MaxY) scanlines = ATIMach64MaxY;
+	}
+
+	pATIDRIServer->depthOffset = scanlines * widthBytes - zBufferSize;
+	pATIDRIServer->depthPitch = pScreenInfo->displayWidth;
+	pATIDRIServer->depthY = pATIDRIServer->depthOffset/widthBytes;
+	pATIDRIServer->depthX =  (pATIDRIServer->depthOffset - 
+				  (pATIDRIServer->depthY * widthBytes)) / cpp;
+
+	pATIDRIServer->backOffset = pATIDRIServer->depthOffset - bufferSize;
+	pATIDRIServer->backPitch = pScreenInfo->displayWidth;
+	pATIDRIServer->backY = pATIDRIServer->backOffset/widthBytes;
+	pATIDRIServer->backX =  (pATIDRIServer->backOffset - 
+				  (pATIDRIServer->backY * widthBytes)) / cpp;
+
+	scanlines = fbSize / widthBytes;
+	if (scanlines > ATIMach64MaxY) scanlines = ATIMach64MaxY;
+
+	if ( pATIDRIServer->IsPCI && pATIDRIServer->textureSize == 0 ) {
+	    xf86DrvMsg(iScreen, X_WARNING,
+		       "Not enough memory for local textures, disabling DRI\n");
+	    ATIDRICloseScreen(pScreen);
+	    pATI->directRenderingEnabled = FALSE;
+	} else {
+	    BoxRec ScreenArea;
+
+	    ScreenArea.x1 = 0;
+	    ScreenArea.y1 = 0;
+	    ScreenArea.x2 = pATI->displayWidth;
+	    ScreenArea.y2 = scanlines;
+
+	    if (!xf86InitFBManager(pScreen, &ScreenArea)) {
+		xf86DrvMsg(pScreenInfo->scrnIndex, X_ERROR,
+			   "Memory manager initialization to (%d,%d) (%d,%d) failed\n",
+			   ScreenArea.x1, ScreenArea.y1,
+			   ScreenArea.x2, ScreenArea.y2);
+		return FALSE;
+	    } else {
+		int width, height;
+
+		xf86DrvMsg(pScreenInfo->scrnIndex, X_INFO,
+			   "Memory manager initialized to (%d,%d) (%d,%d)\n",
+			   ScreenArea.x1, ScreenArea.y1, ScreenArea.x2, ScreenArea.y2);
+
+		if (xf86QueryLargestOffscreenArea(pScreen, &width, &height, 0, 0, 0)) {
+		    xf86DrvMsg(pScreenInfo->scrnIndex, X_INFO,
+			       "Largest offscreen area available: %d x %d\n",
+			       width, height);
+
+		    /* lines in offscreen area needed for depth buffer and textures */
+		    pATI->depthTexLines = scanlines
+			- pATIDRIServer->depthOffset / widthBytes;
+		    pATI->backLines     = scanlines
+			- pATIDRIServer->backOffset / widthBytes
+			- pATI->depthTexLines;
+		    pATI->depthTexArea  = NULL;
+		    pATI->backArea      = NULL;
+		} else {
+		    xf86DrvMsg(pScreenInfo->scrnIndex, X_ERROR, 
+			       "Unable to determine largest offscreen area available\n");
+		    return FALSE;
+		}
+
+	    }
+
+	    xf86DrvMsg(iScreen, X_INFO, "Will use %d kB of offscreen memory for XAA\n", 
+		       (offscreenBytes - pATIDRIServer->textureSize)/1024);
+
+	    xf86DrvMsg(iScreen, X_INFO, "Will use back buffer at offset 0x%x\n",
+		       pATIDRIServer->backOffset);
+
+	    xf86DrvMsg(iScreen, X_INFO, "Will use depth buffer at offset 0x%x\n",
+		       pATIDRIServer->depthOffset);
+
+	    if (pATIDRIServer->textureSize > 0) {
+		xf86DrvMsg(pScreenInfo->scrnIndex, X_INFO,
+			   "Will use %d kB for local textures at offset 0x%x\n",
+			   pATIDRIServer->textureSize/1024,
+			   pATIDRIServer->textureOffset);
+	    }
+	}
+
+	return TRUE;
+}
+#endif /* XF86DRI_DEVEL */
+#endif /* USE_XAA */
   	 
 /*
  * ATIScreenInit --
@@ -144,9 +373,6 @@ ATIScreenInit
     ATIPtr       pATI        = ATIPTR(pScreenInfo);
     pointer      pFB;
     int          VisualMask;
-#ifdef XF86DRI_DEVEL
-    BoxRec       ScreenArea;
-#endif
 
     /* Set video hardware state */
     if (!ATIEnterGraphics(pScreen, pScreenInfo, pATI))
@@ -333,183 +559,23 @@ ATIScreenInit
 
     if (!pATI->useEXA) {
 
-	/* Memory manager setup */
+    /* Memory manager setup */
 
 #ifdef XF86DRI_DEVEL
-
     if (pATI->directRenderingEnabled)
     {
-	ATIDRIServerInfoPtr pATIDRIServer = pATI->pDRIServerInfo;
-	int cpp = pATI->bitsPerPixel >> 3;
-	int widthBytes = pScreenInfo->displayWidth * cpp;
-	int zWidthBytes = pScreenInfo->displayWidth * 2; /* always 16-bit z-buffer */
-	int fbSize = pScreenInfo->videoRam * 1024;
-	int bufferSize = pScreenInfo->virtualY * widthBytes;
-	int zBufferSize = pScreenInfo->virtualY * zWidthBytes;
-	int offscreenBytes, total, scanlines;
-
-	pATIDRIServer->fbX = 0;
-	pATIDRIServer->fbY = 0;
-	pATIDRIServer->frontOffset = 0;
-	pATIDRIServer->frontPitch = pScreenInfo->displayWidth;
-
-	/* Calculate memory remaining for pixcache and textures after 
-	 * front, back, and depth buffers
-	 */
-	offscreenBytes = fbSize - ( 2 * bufferSize + zBufferSize );
-
-	if ( !pATIDRIServer->IsPCI && !pATI->OptionLocalTextures ) {
-	    /* Don't allocate a local texture heap for AGP unless requested */
-	    pATIDRIServer->textureSize = 0;
-	} else {
-	    int l, maxPixcache;
-
-#ifdef XvExtension
-
-	    int xvBytes;
-
-	    /* Try for enough pixmap cache for DVD and a full viewport
-	     */
-	    xvBytes = 720*480*cpp; /* enough for single-buffered DVD */
-	    maxPixcache = xvBytes > bufferSize ? xvBytes : bufferSize;
-
-#else /* XvExtension */
-
-	    /* Try for one viewport */
-	    maxPixcache = bufferSize;
-
-#endif /* XvExtension */
-
-	    pATIDRIServer->textureSize = offscreenBytes - maxPixcache;
-
-	    /* If that gives us less than half the offscreen mem available for textures, split 
-	     * the available mem between textures and pixmap cache
-	     */
-	    if (pATIDRIServer->textureSize < (offscreenBytes/2)) {
-		pATIDRIServer->textureSize = offscreenBytes/2;
-	    }
-
-	    if (pATIDRIServer->textureSize <= 0)
-		pATIDRIServer->textureSize = 0;
-
-	    l = ATIMinBits((pATIDRIServer->textureSize-1) / MACH64_NR_TEX_REGIONS);
-	    if (l < MACH64_LOG_TEX_GRANULARITY) l = MACH64_LOG_TEX_GRANULARITY;
-
-	    /* Round the texture size up to the nearest whole number of
-	     * texture regions.  Again, be greedy about this, don't round
-	     * down.
-	     */
-	    pATIDRIServer->logTextureGranularity = l;
-	    pATIDRIServer->textureSize =
-		(pATIDRIServer->textureSize >> l) << l;
-	}
-
-	total = fbSize - pATIDRIServer->textureSize;
-	scanlines = total / widthBytes;
-	if (scanlines > ATIMach64MaxY) scanlines = ATIMach64MaxY;
-
-	/* Recalculate the texture offset and size to accomodate any
-	 * rounding to a whole number of scanlines.
-	 * FIXME: Is this actually needed?
-	 */
-	pATIDRIServer->textureOffset = scanlines * widthBytes;
-	pATIDRIServer->textureSize = fbSize - pATIDRIServer->textureOffset;
-
-	/* Set a minimum usable local texture heap size.  This will fit
-	 * two 256x256 textures.  We check this after any rounding of
-	 * the texture area.
-	 */
-	if (pATIDRIServer->textureSize < 256*256 * cpp * 2) {
-	    pATIDRIServer->textureOffset = 0;
-	    pATIDRIServer->textureSize = 0;
-	    scanlines = fbSize / widthBytes;
-	    if (scanlines > ATIMach64MaxY) scanlines = ATIMach64MaxY;
-	}
-
-	pATIDRIServer->depthOffset = scanlines * widthBytes - zBufferSize;
-	pATIDRIServer->depthPitch = pScreenInfo->displayWidth;
-	pATIDRIServer->depthY = pATIDRIServer->depthOffset/widthBytes;
-	pATIDRIServer->depthX =  (pATIDRIServer->depthOffset - 
-				  (pATIDRIServer->depthY * widthBytes)) / cpp;
-
-	pATIDRIServer->backOffset = pATIDRIServer->depthOffset - bufferSize;
-	pATIDRIServer->backPitch = pScreenInfo->displayWidth;
-	pATIDRIServer->backY = pATIDRIServer->backOffset/widthBytes;
-	pATIDRIServer->backX =  (pATIDRIServer->backOffset - 
-				  (pATIDRIServer->backY * widthBytes)) / cpp;
-
-	scanlines = fbSize / widthBytes;
-	if (scanlines > ATIMach64MaxY) scanlines = ATIMach64MaxY;
-
-	if ( pATIDRIServer->IsPCI && pATIDRIServer->textureSize == 0 ) {
-	    xf86DrvMsg(iScreen, X_WARNING,
-		       "Not enough memory for local textures, disabling DRI\n");
-	    ATIDRICloseScreen(pScreen);
-	    pATI->directRenderingEnabled = FALSE;
-	} else {
-
-	    ScreenArea.x1 = 0;
-	    ScreenArea.y1 = 0;
-	    ScreenArea.x2 = pATI->displayWidth;
-	    ScreenArea.y2 = scanlines;
-
-	    if (!xf86InitFBManager(pScreen, &ScreenArea)) {
-		xf86DrvMsg(pScreenInfo->scrnIndex, X_ERROR,
-			   "Memory manager initialization to (%d,%d) (%d,%d) failed\n",
-			   ScreenArea.x1, ScreenArea.y1,
-			   ScreenArea.x2, ScreenArea.y2);
-		return FALSE;
-	    } else {
-		int width, height;
-
-		xf86DrvMsg(pScreenInfo->scrnIndex, X_INFO,
-			   "Memory manager initialized to (%d,%d) (%d,%d)\n",
-			   ScreenArea.x1, ScreenArea.y1, ScreenArea.x2, ScreenArea.y2);
-
-		if (xf86QueryLargestOffscreenArea(pScreen, &width, &height, 0, 0, 0)) {
-		    xf86DrvMsg(pScreenInfo->scrnIndex, X_INFO,
-			       "Largest offscreen area available: %d x %d\n",
-			       width, height);
-
-		    /* lines in offscreen area needed for depth buffer and textures */
-		    pATI->depthTexLines = scanlines
-			- pATIDRIServer->depthOffset / widthBytes;
-		    pATI->backLines     = scanlines
-			- pATIDRIServer->backOffset / widthBytes
-			- pATI->depthTexLines;
-		    pATI->depthTexArea  = NULL;
-		    pATI->backArea      = NULL;
-		} else {
-		    xf86DrvMsg(pScreenInfo->scrnIndex, X_ERROR, 
-			       "Unable to determine largest offscreen area available\n");
-		    return FALSE;
-		}
-
-	    }
-
-	    xf86DrvMsg(iScreen, X_INFO, "Will use %d kB of offscreen memory for XAA\n", 
-		       (offscreenBytes - pATIDRIServer->textureSize)/1024);
-
-	    xf86DrvMsg(iScreen, X_INFO, "Will use back buffer at offset 0x%x\n",
-		       pATIDRIServer->backOffset);
-
-	    xf86DrvMsg(iScreen, X_INFO, "Will use depth buffer at offset 0x%x\n",
-		       pATIDRIServer->depthOffset);
-
-	    if (pATIDRIServer->textureSize > 0) {
-		xf86DrvMsg(pScreenInfo->scrnIndex, X_INFO,
-			   "Will use %d kB for local textures at offset 0x%x\n",
-			   pATIDRIServer->textureSize/1024,
-			   pATIDRIServer->textureOffset);
-	    }
-	}
+        if (!ATIMach64SetupMemXAA(iScreen, pScreen))
+            return FALSE;
     }
-
+    else
 #endif /* XF86DRI_DEVEL */
+    {
+        if (!ATIMach64SetupMemXAA_NoDRI(iScreen, pScreen))
+            return FALSE;
+    }
 
     /* Setup acceleration */
-    /* If direct rendering is not enabled, the framebuffer memory 
-     * manager is initialized by this function call */
+
     if (!ATIInitializeAcceleration(pScreen, pScreenInfo, pATI))
         return FALSE;
 
diff-tree e203d86643d5d70bf18248712d05b72b79aee705 (from dc1e289a611a17090e6dc7ae8a8d3f26d20df4eb)
Author: George Sapountzis <gsap7 at yahoo.gr>
Date:   Sat Aug 5 03:15:42 2006 +0300

    [mach64] EXA support.

diff --git a/src/Makefile.am b/src/Makefile.am
index 22172a7..d31ac24 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -42,6 +42,7 @@ ATIMISC_DGA_SOURCES = atidga.c
 endif
 
 if USE_EXA
+ATIMISC_EXA_SOURCES = atimach64exa.c
 RADEON_EXA_SOURCES = radeon_exa.c
 endif
 
@@ -65,7 +66,7 @@ atimisc_drv_la_SOURCES = \
 	atimach64i2c.c atimach64io.c atimach64xv.c atimode.c atipreinit.c \
 	atiprint.c atirgb514.c atiscreen.c atituner.c atiutil.c ativalid.c \
 	atixv.c atiload.c atimisc.c $(ATIMISC_DRI_SRCS) $(ATIMISC_DGA_SOURCES) \
-	$(ATIMISC_CPIO_SOURCES)
+	$(ATIMISC_CPIO_SOURCES) $(ATIMISC_EXA_SOURCES)
 
 r128_drv_la_LTLIBRARIES = r128_drv.la
 r128_drv_la_LDFLAGS = -module -avoid-version
diff --git a/src/atiaccel.c b/src/atiaccel.c
index 050019d..f573a63 100644
--- a/src/atiaccel.c
+++ b/src/atiaccel.c
@@ -33,6 +33,7 @@
 #include "atimach64accel.h"
 #include "atistruct.h"
 
+#ifdef USE_XAA
 /*
  * ATIInitializeAcceleration --
  *
@@ -144,3 +145,4 @@ ATIResizeOffscreenLinear
 
     return pLinear;
 }
+#endif /* USE_XAA */
diff --git a/src/aticonfig.c b/src/aticonfig.c
index 33b9519..dffabc7 100644
--- a/src/aticonfig.c
+++ b/src/aticonfig.c
@@ -153,6 +153,7 @@ ATIProcessOptions
 #   define ProbeClocks   PublicOption[ATI_OPTION_PROBE_CLOCKS].value.bool
 #   define ShadowFB      PublicOption[ATI_OPTION_SHADOW_FB].value.bool
 #   define SWCursor      PublicOption[ATI_OPTION_SWCURSOR].value.bool
+#   define AccelMethod   PublicOption[ATI_OPTION_ACCELMETHOD].value.str
 #   define LCDSync       PrivateOption[ATI_OPTION_LCDSYNC].value.bool
 
 #   define ReferenceClock \
@@ -340,5 +341,26 @@ ATIProcessOptions
         }
     }
 
+    pATI->useEXA = FALSE;
+    if (pATI->OptionAccel)
+    {
+        MessageType from = X_DEFAULT;
+#if defined(USE_EXA)
+#if defined(USE_XAA)
+        if (AccelMethod != NULL)
+        {
+            from = X_CONFIG;
+            if (xf86NameCmp(AccelMethod, "EXA") == 0)
+                pATI->useEXA = TRUE;
+        }
+#else /* USE_XAA */
+        pATI->useEXA = TRUE;
+#endif /* !USE_XAA */
+#endif /* USE_EXA */
+        xf86DrvMsg(pScreenInfo->scrnIndex, from,
+            "Using %s acceleration architecture\n",
+            pATI->useEXA ? "EXA" : "XAA");
+    }
+
     xfree(PublicOption);
 }
diff --git a/src/atidga.c b/src/atidga.c
index fff28f5..1c652a7 100644
--- a/src/atidga.c
+++ b/src/atidga.c
@@ -188,6 +188,8 @@ ATIDGAFillRect
 )
 {
     ATIPtr        pATI     = ATIPTR(pScreenInfo);
+/*FIXME : use EXA if available */
+#ifdef USE_XAA
     XAAInfoRecPtr pXAAInfo = pATI->pXAAInfo;
 
     (*pXAAInfo->SetupForSolidFill)(pScreenInfo, (int)colour, GXcopy,
@@ -196,6 +198,7 @@ ATIDGAFillRect
 
     if (pScreenInfo->bitsPerPixel == pATI->bitsPerPixel)
         SET_SYNC_FLAG(pXAAInfo);
+#endif
 }
 
 /*
@@ -217,6 +220,8 @@ ATIDGABlitRect
 )
 {
     ATIPtr        pATI     = ATIPTR(pScreenInfo);
+/*FIXME : use EXA if available */
+#ifdef USE_XAA
     XAAInfoRecPtr pXAAInfo = pATI->pXAAInfo;
     int           xdir     = ((xSrc < xDst) && (ySrc == yDst)) ? -1 : 1;
     int           ydir     = (ySrc < yDst) ? -1 : 1;
@@ -228,6 +233,7 @@ ATIDGABlitRect
 
     if (pScreenInfo->bitsPerPixel == pATI->bitsPerPixel)
         SET_SYNC_FLAG(pXAAInfo);
+#endif
 }
 
 /*
@@ -250,6 +256,8 @@ ATIDGABlitTransRect
 )
 {
     ATIPtr        pATI     = ATIPTR(pScreenInfo);
+/*FIXME : use EXA if available */
+#ifdef USE_XAA
     XAAInfoRecPtr pXAAInfo = pATI->pXAAInfo;
     int           xdir     = ((xSrc < xDst) && (ySrc == yDst)) ? -1 : 1;
     int           ydir     = (ySrc < yDst) ? -1 : 1;
@@ -266,6 +274,7 @@ ATIDGABlitTransRect
 
     if (pScreenInfo->bitsPerPixel == pATI->bitsPerPixel)
         SET_SYNC_FLAG(pXAAInfo);
+#endif
 }
 
 /*
@@ -335,8 +344,10 @@ ATIDGAAddModes
                     pDGAMode->flags |= DGA_PIXMAP_AVAILABLE;
                     pDGAMode->address = pATI->pMemory;
 
+#ifdef USE_XAA
                     if (pATI->pXAAInfo)
                         pDGAMode->flags &= ~DGA_CONCURRENT_ACCESS;
+#endif
                 }
                 if ((pMode->Flags & V_DBLSCAN) || (pMode->VScan > 1))
                     pDGAMode->flags |= DGA_DOUBLESCAN;
@@ -397,7 +408,9 @@ ATIDGAInit
     ATIPtr      pATI
 )
 {
+#ifdef USE_XAA
     XAAInfoRecPtr pXAAInfo;
+#endif
     int           flags;
 
     if (!pATI->nDGAMode)
@@ -422,6 +435,7 @@ ATIDGAInit
         pATI->ATIDGAFunctions.GetViewport     = ATIDGAGetViewport;
 
         flags = 0;
+#ifdef USE_XAA
         if ((pXAAInfo = pATI->pXAAInfo))
         {
             pATI->ATIDGAFunctions.Sync = pXAAInfo->Sync;
@@ -439,6 +453,7 @@ ATIDGAInit
                 pATI->ATIDGAFunctions.BlitTransRect = ATIDGABlitTransRect;
             }
         }
+#endif
         if (!flags)
             flags = DGA_CONCURRENT_ACCESS;
 
diff --git a/src/atidri.c b/src/atidri.c
index 5155820..720c96a 100644
--- a/src/atidri.c
+++ b/src/atidri.c
@@ -293,9 +293,9 @@ static void ATIEnterServer( ScreenPtr pS
    ScrnInfoPtr pScreenInfo = xf86Screens[pScreen->myNum];
    ATIPtr pATI = ATIPTR(pScreenInfo);
 
-   if ( pATI->directRenderingEnabled && pATI->pXAAInfo ) { 
-      pATI->pXAAInfo->NeedToSync = TRUE;
-      pATI->NeedDRISync = TRUE;
+   if ( pATI->directRenderingEnabled ) { 
+      ATIDRIMarkSyncInt(pScreenInfo);
+      ATIDRIMarkSyncExt(pScreenInfo);
    }
 }
 
@@ -333,6 +333,7 @@ static void ATIDRISwapContext( ScreenPtr
    }
 }
 
+#ifdef USE_XAA
 static void ATIDRITransitionTo2d(ScreenPtr pScreen)
 {
    ScrnInfoPtr pScreenInfo = xf86Screens[pScreen->myNum];
@@ -400,10 +401,49 @@ static void ATIDRITransitionTo3d(ScreenP
 
    pATI->have3DWindows = TRUE;
 }
+#endif /* USE_XAA */
+
+#ifdef USE_EXA
+static void ATIDRITransitionTo2d_EXA(ScreenPtr pScreen)
+{
+   ScrnInfoPtr pScreenInfo = xf86Screens[pScreen->myNum];
+   ATIPtr pATI = ATIPTR(pScreenInfo);
+#if 0
+   ATIDRIServerInfoPtr pATIDRIServer = pATI->pDRIServerInfo;
+
+   exaEnableDisableFBAccess(pScreen->myNum, FALSE);
+
+   pATI->pExa->offScreenBase = pATIDRIServer->backOffset;
+
+   exaEnableDisableFBAccess(pScreen->myNum, TRUE);
+#endif
+
+   pATI->have3DWindows = FALSE;
+}
+
+static void ATIDRITransitionTo3d_EXA(ScreenPtr pScreen)
+{
+   ScrnInfoPtr pScreenInfo = xf86Screens[pScreen->myNum];
+   ATIPtr pATI = ATIPTR(pScreenInfo);
+#if 0
+   ATIDRIServerInfoPtr pATIDRIServer = pATI->pDRIServerInfo;
+
+   exaEnableDisableFBAccess(pScreen->myNum, FALSE);
+
+   pATI->pExa->offScreenBase = pATIDRIServer->textureOffset +
+			       pATIDRIServer->textureSize;
+
+   exaEnableDisableFBAccess(pScreen->myNum, TRUE);
+#endif
+
+   pATI->have3DWindows = TRUE;
+}
+#endif /* USE_EXA */
 
 /* Initialize the state of the back and depth buffers. */
 static void ATIDRIInitBuffers( WindowPtr pWin, RegionPtr prgn, CARD32 indx )
 {
+#ifdef USE_XAA
    ScreenPtr   pScreen = pWin->drawable.pScreen;
    ScrnInfoPtr pScreenInfo   = xf86Screens[pScreen->myNum];
    ATIPtr pATI = ATIPTR(pScreenInfo);
@@ -454,7 +494,8 @@ static void ATIDRIInitBuffers( WindowPtr
 					      pbox->x2 - pbox->x1,
 					      pbox->y2 - pbox->y1);
 
-   pXAAInfo->NeedToSync = TRUE;
+   ATIDRIMarkSyncInt(pScreenInfo);
+#endif
 }
 
 /* Copy the back and depth buffers when the X server moves a window.
@@ -469,6 +510,7 @@ static void ATIDRIInitBuffers( WindowPtr
 static void ATIDRIMoveBuffers( WindowPtr pWin, DDXPointRec ptOldOrg,
 			       RegionPtr prgnSrc, CARD32 indx )
 {
+#ifdef USE_XAA
     ScreenPtr pScreen = pWin->drawable.pScreen;
     ScrnInfoPtr pScreenInfo = xf86Screens[pScreen->myNum];
     ATIPtr pATI = ATIPTR(pScreenInfo);
@@ -632,7 +674,8 @@ static void ATIDRIMoveBuffers( WindowPtr
     DEALLOCATE_LOCAL(pptNew1);
     DEALLOCATE_LOCAL(pboxNew1);
 
-    pXAAInfo->NeedToSync = TRUE;
+    ATIDRIMarkSyncInt(pScreenInfo);
+#endif
 }
 
 /* Compute log base 2 of val. */
@@ -1204,8 +1247,18 @@ Bool ATIDRIScreenInit( ScreenPtr pScreen
    pDRIInfo->SwapContext	= ATIDRISwapContext;
    pDRIInfo->InitBuffers	= ATIDRIInitBuffers;
    pDRIInfo->MoveBuffers	= ATIDRIMoveBuffers;
-   pDRIInfo->TransitionTo2d     = ATIDRITransitionTo2d;
-   pDRIInfo->TransitionTo3d     = ATIDRITransitionTo3d;
+#ifdef USE_XAA
+   if (!pATI->useEXA) {
+      pDRIInfo->TransitionTo2d  = ATIDRITransitionTo2d;
+      pDRIInfo->TransitionTo3d  = ATIDRITransitionTo3d;
+   }
+#endif /* USE_XAA */
+#ifdef USE_EXA
+   if (pATI->useEXA) {
+      pDRIInfo->TransitionTo2d  = ATIDRITransitionTo2d_EXA;
+      pDRIInfo->TransitionTo3d  = ATIDRITransitionTo3d_EXA;
+   }
+#endif /* USE_EXA */
    pDRIInfo->bufferRequests	= DRI_ALL_WINDOWS;
 
    pDRIInfo->createDummyCtx     = TRUE;
diff --git a/src/atiload.c b/src/atiload.c
index 7c901f6..83165bc 100644
--- a/src/atiload.c
+++ b/src/atiload.c
@@ -144,6 +144,19 @@ const char *ATIshadowfbSymbols[] =
     NULL
 };
 
+#ifdef USE_EXA
+const char *ATIexaSymbols[] =
+{
+    "exaDriverAlloc",
+    "exaDriverInit",
+    "exaDriverFini",
+    "exaOffscreenAlloc",
+    "exaOffscreenFree",
+    NULL
+};
+#endif
+
+#ifdef USE_XAA
 const char *ATIxaaSymbols[] =
 {
     "XAACreateInfoRec",
@@ -151,6 +164,7 @@ const char *ATIxaaSymbols[] =
     "XAAInit",
     NULL
 };
+#endif
 
 const char *ATIramdacSymbols[] =
 {
@@ -205,21 +219,13 @@ ATILoadModules
     ATIPtr      pATI
 )
 {
+    pointer fbPtr = NULL;
+
     /* Load shadow frame buffer code if needed */
     if (pATI->OptionShadowFB &&
         !ATILoadModule(pScreenInfo, "shadowfb", ATIshadowfbSymbols))
         return NULL;
 
-    /* Load XAA if needed */
-    if (pATI->OptionAccel &&
-        !ATILoadModule(pScreenInfo, "xaa", ATIxaaSymbols))
-        return NULL;
-
-    /* Load ramdac module if needed */
-    if ((pATI->Cursor > ATI_CURSOR_SOFTWARE) &&
-        !ATILoadModule(pScreenInfo, "ramdac", ATIramdacSymbols))
-        return NULL;
-
     /* Load depth-specific entry points */
     switch (pATI->bitsPerPixel)
     {
@@ -227,10 +233,12 @@ ATILoadModules
 #ifndef AVOID_CPIO
 
         case 1:
-            return ATILoadModule(pScreenInfo, "xf1bpp", ATIxf1bppSymbols);
+            fbPtr = ATILoadModule(pScreenInfo, "xf1bpp", ATIxf1bppSymbols);
+            break;
 
         case 4:
-            return ATILoadModule(pScreenInfo, "xf4bpp", ATIxf4bppSymbols);
+            fbPtr = ATILoadModule(pScreenInfo, "xf4bpp", ATIxf4bppSymbols);
+            break;
 
 #endif /* AVOID_CPIO */
 
@@ -238,11 +246,48 @@ ATILoadModules
         case 16:
         case 24:
         case 32:
-            return ATILoadModule(pScreenInfo, "fb", ATIfbSymbols);
+            fbPtr = ATILoadModule(pScreenInfo, "fb", ATIfbSymbols);
+            break;
 
         default:
             return NULL;
     }
+    if (!fbPtr)
+        return NULL;
+
+    /* Load ramdac module if needed */
+    if ((pATI->Cursor > ATI_CURSOR_SOFTWARE) &&
+        !ATILoadModule(pScreenInfo, "ramdac", ATIramdacSymbols))
+        return NULL;
+
+#ifdef USE_EXA
+    /* Load EXA if needed */
+    if (pATI->useEXA && pATI->OptionAccel)
+    {
+        /* Cannot use ATILoadModule(), because of version checking */
+        XF86ModReqInfo req;
+        int errmaj, errmin;
+
+        memset(&req, 0, sizeof(XF86ModReqInfo));
+        req.majorversion = 2;
+        req.minorversion = 0;
+        if (!LoadSubModule(pScreenInfo->module, "exa", NULL, NULL, NULL, &req,
+            &errmaj, &errmin))
+        {
+            LoaderErrorMsg(NULL, "exa", errmaj, errmin);
+            return NULL;
+        }
+        xf86LoaderReqSymLists(ATIexaSymbols, NULL);
+    }
+#endif
+#ifdef USE_XAA
+    /* Load XAA if needed */
+    if (!pATI->useEXA && pATI->OptionAccel &&
+        !ATILoadModule(pScreenInfo, "xaa", ATIxaaSymbols))
+        return NULL;
+#endif
+
+    return fbPtr;
 }
 
 #endif /* XFree86LOADER */
diff --git a/src/atiload.h b/src/atiload.h
index 96606e3..49f2bf2 100644
--- a/src/atiload.h
+++ b/src/atiload.h
@@ -47,7 +47,20 @@ extern const char *ATIint10Symbols[], *A
  
 #endif /* XF86DRI_DEVEL */
 
-                  *ATIfbSymbols[], *ATIshadowfbSymbols[], *ATIxaaSymbols[],
+                  *ATIfbSymbols[], *ATIshadowfbSymbols[],
+
+#ifdef USE_EXA
+ 
+                  *ATIexaSymbols[],
+
+#endif /* USE_EXA */
+
+#ifdef USE_XAA
+ 
+                  *ATIxaaSymbols[],
+
+#endif /* USE_XAA */
+
                   *ATIramdacSymbols[], *ATIi2cSymbols[];
 
 extern pointer ATILoadModule(ScrnInfoPtr, const char *, const char **);
diff --git a/src/atimach64.c b/src/atimach64.c
index 2cdec78..5cb991d 100644
--- a/src/atimach64.c
+++ b/src/atimach64.c
@@ -854,9 +854,7 @@ ATIMach64Set
         outf(HOST_CNTL, pATIHW->host_cntl);
 
         /* Set host transfer window address and size clamp */
-        pATI->pHOST_DATA =
-            (CARD8 *)pATI->pBlock[GetBits(HOST_DATA_0, BLOCK_SELECT)] +
-            (HOST_DATA_0 & MM_IO_SELECT);
+        pATI->pHOST_DATA = ATIHostDataAddr(HOST_DATA_0);
         pATI->nHostFIFOEntries = pATI->nFIFOEntries >> 1;
         if (pATI->nHostFIFOEntries > 16)
             pATI->nHostFIFOEntries = 16;
@@ -980,6 +978,7 @@ ATIMach64Set
             CacheRegister(DP_BKGD_CLR);
             CacheRegister(DP_FRGD_CLR);
             CacheRegister(DP_WRITE_MASK);
+            CacheRegister(DP_PIX_WIDTH);
             CacheRegister(DP_MIX);
 
             CacheRegister(CLR_CMP_CLR);
diff --git a/src/atimach64accel.c b/src/atimach64accel.c
index 272de3d..b9d312e 100644
--- a/src/atimach64accel.c
+++ b/src/atimach64accel.c
@@ -79,7 +79,7 @@
 /*
  * X-to-Mach64 mix translation table.
  */
-static CARD8 ATIMach64ALU[16] =
+CARD8 ATIMach64ALU[16] =
 {
     MIX_0,                       /* GXclear */
     MIX_AND,                     /* GXand */
@@ -105,7 +105,7 @@ static CARD8 ATIMach64ALU[16] =
  * This function ensures the current scissor settings do not interfere with
  * the current draw request.
  */
-static void
+void
 ATIMach64ValidateClip
 (
     ATIPtr pATI,
@@ -162,6 +162,7 @@ ATIMach64Sync
 	    UncacheRegister(DP_BKGD_CLR);
 	    UncacheRegister(DP_FRGD_CLR);
 	    UncacheRegister(DP_WRITE_MASK);
+	    UncacheRegister(DP_PIX_WIDTH);
 	    UncacheRegister(DP_MIX);
 	    UncacheRegister(CLR_CMP_CNTL);
 	}
@@ -207,6 +208,7 @@ ATIMach64Sync
 	    CacheRegister(DP_BKGD_CLR);
 	    CacheRegister(DP_FRGD_CLR);
 	    CacheRegister(DP_WRITE_MASK);
+	    CacheRegister(DP_PIX_WIDTH);
 	    CacheRegister(DP_MIX);
 	    CacheRegister(CLR_CMP_CNTL);
 	}
@@ -245,8 +247,14 @@ ATIMach64Sync
       }
     }
 
+#ifdef USE_EXA
+    /* EXA sets pEXA->needsSync to FALSE on its own */
+#endif
+
+#ifdef USE_XAA
     if (pATI->pXAAInfo)
         pATI->pXAAInfo->NeedToSync = FALSE;
+#endif
 
     if (pATI->Chip >= ATI_CHIP_264VTB)
     {
@@ -307,6 +315,7 @@ TestRegisterCachingDP(ScrnInfoPtr pScree
     TestRegisterCaching(DP_BKGD_CLR);
     TestRegisterCaching(DP_FRGD_CLR);
     TestRegisterCaching(DP_WRITE_MASK);
+    TestRegisterCaching(DP_PIX_WIDTH);
     TestRegisterCaching(DP_MIX);
 
     TestRegisterCaching(CLR_CMP_CLR);
@@ -373,6 +382,7 @@ TestRegisterCachingXV(ScrnInfoPtr pScree
     TestRegisterCaching(SCALER_BUF1_OFFSET_V);
 }
 
+#ifdef USE_XAA
 /*
  * ATIMach64SetupForScreenToScreenCopy --
  *
@@ -1031,3 +1041,4 @@ ATIMach64AccelInit
 
     return ATIMach64MaxY;
 }
+#endif /* USE_XAA */
diff --git a/src/atimach64accel.h b/src/atimach64accel.h
index a1b9426..2917b7a 100644
--- a/src/atimach64accel.h
+++ b/src/atimach64accel.h
@@ -27,11 +27,17 @@
 #include "atipriv.h"
 
 #include "xaa.h"
+#include "exa.h"
 
 #define ATIMach64MaxX  8191
 #define ATIMach64MaxY 32767
 
+#ifdef USE_EXA
+extern Bool ATIMach64ExaInit(ScreenPtr);
+#endif
+#ifdef USE_XAA
 extern int  ATIMach64AccelInit(ATIPtr, XAAInfoRecPtr);
+#endif
 extern void ATIMach64Sync(ScrnInfoPtr);
 
 #endif /* ___ATIMACH64ACCEL_H___ */
diff --git a/src/atimach64exa.c b/src/atimach64exa.c
new file mode 100644
index 0000000..c68495b
--- /dev/null
+++ b/src/atimach64exa.c
@@ -0,0 +1,622 @@
+/*
+ * Copyright 2003 through 2004 by Marc Aurele La France (TSI @ UQV), tsi at xfree86.org
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of Marc Aurele La France not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Marc Aurele La France makes no representations
+ * about the suitability of this software for any purpose.  It is provided
+ * "as-is" without express or implied warranty.
+ *
+ * MARC AURELE LA FRANCE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.  IN NO
+ * EVENT SHALL MARC AURELE LA FRANCE BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Copyright 1999-2000 Precision Insight, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+/* 
+ * DRI support by:
+ *    Manuel Teira
+ *    Leif Delgass <ldelgass at retinalburn.net>
+ *
+ * EXA support by:
+ *    Jakub Stachowski <qbast at go2.pl>
+ *    George Sapountzis <gsap7 at yahoo.gr>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "ati.h"
+#include "atichip.h"
+#include "atidri.h"
+#include "atimach64accel.h"
+#include "atimach64io.h"
+#include "atipriv.h"
+#include "atiregs.h"
+
+#ifdef XF86DRI_DEVEL
+#include "mach64_dri.h"
+#include "mach64_sarea.h"
+#endif
+
+#ifdef USE_EXA
+extern CARD8 ATIMach64ALU[];
+
+extern void
+ATIMach64ValidateClip
+(
+    ATIPtr pATI,
+    int sc_left,
+    int sc_right,
+    int sc_top,
+    int sc_bottom
+);
+
+#if 0
+#define MACH64_TRACE(x)				\
+do {						\
+    ErrorF("Mach64(%s): ", __FUNCTION__);	\
+    ErrorF x;					\
+} while(0)
+#else
+#define MACH64_TRACE(x) do { } while(0)
+#endif
+
+#if 0
+#define MACH64_FALLBACK(x)			\
+do {						\
+    ErrorF("Fallback(%s): ", __FUNCTION__);	\
+    ErrorF x;					\
+    return FALSE;				\
+} while (0)
+#else
+#define MACH64_FALLBACK(x) return FALSE
+#endif
+
+static void
+Mach64WaitMarker(ScreenPtr pScreenInfo, int Marker)
+{
+    ATIMach64Sync(xf86Screens[pScreenInfo->myNum]);
+}
+
+static Bool
+Mach64GetDatatypeBpp(PixmapPtr pPix, CARD32 *pix_width)
+{
+	int bpp = pPix->drawable.bitsPerPixel;
+
+	switch (bpp) {
+	case 8:
+		*pix_width =
+			SetBits(PIX_WIDTH_8BPP, DP_DST_PIX_WIDTH) |
+			SetBits(PIX_WIDTH_8BPP, DP_SRC_PIX_WIDTH) |
+			SetBits(PIX_WIDTH_1BPP, DP_HOST_PIX_WIDTH);
+		break;
+	case 16:
+		*pix_width =
+			SetBits(PIX_WIDTH_16BPP, DP_DST_PIX_WIDTH) |
+			SetBits(PIX_WIDTH_16BPP, DP_SRC_PIX_WIDTH) |
+			SetBits(PIX_WIDTH_1BPP, DP_HOST_PIX_WIDTH);
+		break;
+	case 24:
+		*pix_width =
+			SetBits(PIX_WIDTH_8BPP, DP_DST_PIX_WIDTH) |
+			SetBits(PIX_WIDTH_8BPP, DP_SRC_PIX_WIDTH) |
+			SetBits(PIX_WIDTH_1BPP, DP_HOST_PIX_WIDTH);
+		break;
+	case 32:
+		*pix_width =
+			SetBits(PIX_WIDTH_32BPP, DP_DST_PIX_WIDTH) |
+			SetBits(PIX_WIDTH_32BPP, DP_SRC_PIX_WIDTH) |
+			SetBits(PIX_WIDTH_1BPP, DP_HOST_PIX_WIDTH);
+		break;
+	default:
+		MACH64_FALLBACK(("Unsupported bpp: %d\n", bpp));
+	}
+
+#if X_BYTE_ORDER == X_LITTLE_ENDIAN
+
+        *pix_width |= DP_BYTE_PIX_ORDER;
+
+#endif /* X_BYTE_ORDER */
+
+	return TRUE;
+}
+
+static Bool
+Mach64GetOffsetPitch(PixmapPtr pPix, int bpp, CARD32 *pitch_offset,
+		     unsigned int offset, unsigned int pitch)
+{
+#if 0
+    ScrnInfoPtr pScreenInfo = xf86Screens[pPix->drawable.pScreen->myNum];
+    ATIPtr pATI = ATIPTR(pScreenInfo);
+
+    if (pitch % pATI->pExa->pixmapPitchAlign != 0)
+        MACH64_FALLBACK(("Bad pitch 0x%08x\n", pitch));
+
+    if (offset % pATI->pExa->pixmapOffsetAlign != 0)
+        MACH64_FALLBACK(("Bad offset 0x%08x\n", offset));
+#endif
+
+    /* pixels / 8 = ((bytes * 8) / bpp) / 8 = bytes / bpp */
+    pitch = pitch / bpp;
+
+    /* bytes / 8 */
+    offset = offset >> 3;
+
+    *pitch_offset = ((pitch  << 22) | (offset <<  0));
+
+    return TRUE;
+}
+
+static Bool
+Mach64GetPixmapOffsetPitch(PixmapPtr pPix, CARD32 *pitch_offset)
+{
+    CARD32 pitch, offset;
+    int bpp;
+
+    bpp = pPix->drawable.bitsPerPixel;
+    if (bpp == 24)
+        bpp = 8;
+
+    pitch = exaGetPixmapPitch(pPix);
+    offset = exaGetPixmapOffset(pPix);
+
+    return Mach64GetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch);
+}
+
+static Bool
+Mach64PrepareCopy
+(
+    PixmapPtr pSrcPixmap,
+    PixmapPtr pDstPixmap,
+    int       xdir,
+    int       ydir,
+    int       alu,
+    Pixel     planemask
+)
+{
+    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
+    ATIPtr pATI = ATIPTR(pScreenInfo);
+    CARD32 src_pitch_offset, dst_pitch_offset, dp_pix_width;
+
+    ATIDRISync(pScreenInfo);
+
+    if (!Mach64GetDatatypeBpp(pDstPixmap, &dp_pix_width))
+        return FALSE;
+    if (!Mach64GetPixmapOffsetPitch(pSrcPixmap, &src_pitch_offset))
+        return FALSE;
+    if (!Mach64GetPixmapOffsetPitch(pDstPixmap, &dst_pitch_offset))
+        return FALSE;
+
+    ATIMach64WaitForFIFO(pATI, 7);
+    outf(DP_WRITE_MASK, planemask);
+    outf(DP_PIX_WIDTH, dp_pix_width);
+    outf(SRC_OFF_PITCH, src_pitch_offset);
+    outf(DST_OFF_PITCH, dst_pitch_offset); 
+    
+    outf(DP_SRC, DP_MONO_SRC_ALLONES |
+        SetBits(SRC_BLIT, DP_FRGD_SRC) | SetBits(SRC_BKGD, DP_BKGD_SRC));
+    outf(DP_MIX, SetBits(ATIMach64ALU[alu], DP_FRGD_MIX));
+
+    outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE);
+
+    pATI->dst_cntl = 0;
+
+    if (ydir > 0)
+        pATI->dst_cntl |= DST_Y_DIR;
+    if (xdir > 0)
+        pATI->dst_cntl |= DST_X_DIR;
+
+    if (pATI->XModifier == 1)
+        outf(DST_CNTL, pATI->dst_cntl);
+    else
+        pATI->dst_cntl |= DST_24_ROT_EN;
+
+    return TRUE;
+}
+
+static void
+Mach64Copy
+(
+    PixmapPtr pDstPixmap,
+    int       srcX,
+    int       srcY,
+    int       dstX,
+    int       dstY,
+    int       w,
+    int       h
+)
+{
+    ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
+    ATIPtr pATI = ATIPTR(pScreenInfo);
+
+    srcX *= pATI->XModifier;
+    dstY *= pATI->XModifier;
+    w    *= pATI->XModifier;
+
+    ATIDRISync(pScreenInfo);
+
+    /* Disable clipping if it gets in the way */
+    ATIMach64ValidateClip(pATI, dstX, dstX + w - 1, dstY, dstY + h - 1);
+
+    if (!(pATI->dst_cntl & DST_X_DIR))
+    {
+        srcX += w - 1;
+        dstX += w - 1;
+    }
+
+    if (!(pATI->dst_cntl & DST_Y_DIR))
+    {
+        srcY += h - 1;
+        dstY += h - 1;
+    }
+
+    if (pATI->XModifier != 1)
+        outf(DST_CNTL, pATI->dst_cntl | SetBits((dstX / 4) % 6, DST_24_ROT));
+
+    ATIMach64WaitForFIFO(pATI, 4);
+    outf(SRC_Y_X, SetWord(srcX, 1) | SetWord(srcY, 0));
+    outf(SRC_WIDTH1, w);
+    outf(DST_Y_X, SetWord(dstX, 1) | SetWord(dstY, 0));
+    outf(DST_HEIGHT_WIDTH, SetWord(w, 1) | SetWord(h, 0));
+
+    /*
+     * On VTB's and later, the engine will randomly not wait for a copy
+     * operation to commit its results to video memory before starting the next
+     * one.  The probability of such occurrences increases with GUI_WB_FLUSH
+     * (or GUI_WB_FLUSH_P) setting, bitsPerPixel and/or CRTC clock.  This
+     * would point to some kind of video memory bandwidth problem were it noti
+     * for the fact that the problem occurs less often (but still occurs) when
+     * copying larger rectangles.
+     */
+    if ((pATI->Chip >= ATI_CHIP_264VTB) && !pATI->OptionDevel)
+    {
+        exaMarkSync(pScreenInfo->pScreen); /* Force sync. */
+        exaWaitSync(pScreenInfo->pScreen); /* Sync and notify EXA. */
+    }
+}
+
+static void Mach64DoneCopy(PixmapPtr pDstPixmap) { }
+
+static Bool
+Mach64PrepareSolid
+(
+    PixmapPtr pPixmap,
+    int       alu, 
+    Pixel     planemask, 
+    Pixel     fg
+)
+{
+    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
+    ATIPtr pATI = ATIPTR(pScreenInfo);
+    CARD32 dst_pitch_offset, dp_pix_width;
+
+    ATIDRISync(pScreenInfo);
+
+    if (!Mach64GetDatatypeBpp(pPixmap, &dp_pix_width))
+        return FALSE;
+    if (!Mach64GetPixmapOffsetPitch(pPixmap, &dst_pitch_offset))
+        return FALSE;
+
+    ATIMach64WaitForFIFO(pATI, 7);
+    outf(DP_WRITE_MASK, planemask);
+    outf(DP_PIX_WIDTH, dp_pix_width);
+    outf(DST_OFF_PITCH, dst_pitch_offset); 
+
+    outf(DP_SRC, DP_MONO_SRC_ALLONES |
+        SetBits(SRC_FRGD, DP_FRGD_SRC) | SetBits(SRC_BKGD, DP_BKGD_SRC));
+    outf(DP_FRGD_CLR, fg);
+    outf(DP_MIX, SetBits(ATIMach64ALU[alu], DP_FRGD_MIX));
+
+    outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE);
+
+    if (pATI->XModifier == 1)
+        outf(DST_CNTL, DST_X_DIR | DST_Y_DIR);
+
+    return TRUE;
+}
+
+static void
+Mach64Solid
+(
+    PixmapPtr pPixmap, 
+    int       x1, 
+    int       y1, 
+    int       x2, 
+    int       y2
+)
+{
+    ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum];
+    ATIPtr pATI = ATIPTR(pScreenInfo);
+
+    int x = x1;
+    int y = y1;
+    int w = x2-x1;
+    int h = y2-y1;
+
+    ATIDRISync(pScreenInfo);
+
+    if (pATI->XModifier != 1)
+    {
+        x *= pATI->XModifier;
+        w *= pATI->XModifier;
+
+        outf(DST_CNTL, SetBits((x / 4) % 6, DST_24_ROT) |
+            (DST_X_DIR | DST_Y_DIR | DST_24_ROT_EN));
+    }
+
+    /* Disable clipping if it gets in the way */
+    ATIMach64ValidateClip(pATI, x, x + w - 1, y, y + h - 1);
+
+    ATIMach64WaitForFIFO(pATI, 2);
+    outf(DST_Y_X, SetWord(x, 1) | SetWord(y, 0));
+    outf(DST_HEIGHT_WIDTH, SetWord(w, 1) | SetWord(h, 0));
+}
+
+static void Mach64DoneSolid(PixmapPtr pPixmap) { }
+
+/* Compute log base 2 of val. */
+static __inline__ int Mach64Log2(int val)
+{
+    int bits;
+
+    for (bits = 0; val != 0; val >>= 1, ++bits)
+        ;
+    return bits - 1;
+}
+
+/*
+ * Memory layour for EXA with DRI (no local_textures):
+ * | front  | back   | depth  | textures | pixmaps, xv | c |
+ *
+ * 1024x768 at 16bpp with 8 MB:
+ * | 1.5 MB | 1.5 MB | 1.5 MB | 0        | ~3.5 MB     | c |
+ *
+ * 1024x768 at 32bpp with 8 MB:
+ * | 3.0 MB | 3.0 MB | 1.5 MB | 0        | ~0.5 MB     | c |
+ *
+ * "c" is the hw cursor which occupies 1KB
+ */
+static void
+Mach64SetupMemEXA(ScreenPtr pScreen)
+{
+    ScrnInfoPtr pScreenInfo = xf86Screens[pScreen->myNum];
+    ATIPtr pATI = ATIPTR(pScreenInfo);
+
+    int cpp = (pScreenInfo->bitsPerPixel + 7) / 8;
+    /* front and back buffer */
+    int bufferSize = pScreenInfo->virtualY * pScreenInfo->displayWidth * cpp;
+    /* always 16-bit z-buffer */
+    int depthSize  = pScreenInfo->virtualY * pScreenInfo->displayWidth * 2;
+
+    ExaDriverPtr pExa = pATI->pExa;
+
+    pExa->memoryBase = pATI->pMemory;
+    pExa->memorySize = pScreenInfo->videoRam * 1024;
+    pExa->offScreenBase = bufferSize;
+
+#ifdef XF86DRI_DEVEL
+    if (pATI->directRenderingEnabled)
+    {
+	ATIDRIServerInfoPtr pATIDRIServer = pATI->pDRIServerInfo;
+	Bool is_pci = pATIDRIServer->IsPCI;
+
+	int textureSize = 0;
+	int pixmapCache = 0;
+	int next = 0;
+
+	/* front buffer */
+	pATIDRIServer->frontOffset = 0;
+	pATIDRIServer->frontPitch = pScreenInfo->displayWidth;
+	next += bufferSize;
+
+	/* back buffer */
+	pATIDRIServer->backOffset = next;
+	pATIDRIServer->backPitch = pScreenInfo->displayWidth;
+	next += bufferSize;
+
+	/* depth buffer */
+	pATIDRIServer->depthOffset = next;
+	pATIDRIServer->depthPitch = pScreenInfo->displayWidth;
+	next += depthSize;
+
+	/* ATIScreenInit does check for the this condition. */
+	if (next > pExa->memorySize)
+	{
+	    xf86DrvMsg(pScreen->myNum, X_WARNING,
+		"DRI static buffer allocation failed, disabling DRI --"
+		"need at least %d kB video memory\n", next / 1024 );
+	    ATIDRICloseScreen(pScreen);
+	    pATI->directRenderingEnabled = FALSE;
+	}
+
+	/* local textures */
+
+	/* Reserve approx. half of offscreen memory for local textures */
+	textureSize = (pExa->memorySize - next) / 2;
+
+	/* In case DRI requires more offscreen memory than available,
+	 * should not happen as ATIScreenInit would have not enabled DRI */
+	if (textureSize < 0)
+	    textureSize = 0;
+
+	/* Try for enough pixmap cache for a full viewport */
+	pixmapCache = (pExa->memorySize - next) - textureSize;
+	if (pixmapCache < bufferSize)
+	    textureSize = 0;
+
+	/* Don't allocate a local texture heap for AGP unless requested */
+	if ( !is_pci && !pATI->OptionLocalTextures )
+	    textureSize = 0;
+
+	if (textureSize > 0)
+	{
+	    int l = Mach64Log2(textureSize / MACH64_NR_TEX_REGIONS);
+	    if (l < MACH64_LOG_TEX_GRANULARITY)
+		l = MACH64_LOG_TEX_GRANULARITY;
+	    pATIDRIServer->logTextureGranularity = l;
+
+	    /* Round the texture size down to the nearest whole number of
+	     * texture regions.
+	     */
+	    textureSize = (textureSize >> l) << l;
+	}
+
+	/* Set a minimum usable local texture heap size.  This will fit
+	 * two 256x256 textures.  We check this after any rounding of
+	 * the texture area.
+	 */
+	if (textureSize < 256*256 * cpp * 2)
+	    textureSize = 0;
+
+	/* Disable DRI for PCI if cannot allocate a local texture heap */
+	if ( is_pci && textureSize == 0 )
+	{
+	    xf86DrvMsg(pScreen->myNum, X_WARNING,
+		"Not enough memory for local textures, disabling DRI\n");
+	    ATIDRICloseScreen(pScreen);
+	    pATI->directRenderingEnabled = FALSE;
+	}
+
+	pATIDRIServer->textureOffset = next;
+	pATIDRIServer->textureSize = textureSize;
+	next += textureSize;
+
+	if (pATI->directRenderingEnabled)
+	    pExa->offScreenBase = next;
+    }
+#endif /* XF86DRI_DEVEL */
+
+    xf86DrvMsg(pScreen->myNum, X_INFO,
+        "EXA memory management initialized\n"
+        "\t base     :  %10p\n"
+        "\t offscreen: +%10lx\n"
+        "\t size     : +%10lx\n"
+        "\t cursor   :  %10p\n",
+        pExa->memoryBase,
+        pExa->offScreenBase,
+        pExa->memorySize,
+        pATI->pCursorImage);
+
+    if (TRUE || xf86GetVerbosity() > 1)
+    {
+        int offscreen = pExa->memorySize - pExa->offScreenBase;
+        int viewport = bufferSize;
+        int dvdframe = 720*480*cpp; /* enough for single-buffered DVD */
+
+        xf86DrvMsg(pScreen->myNum, X_INFO,
+            "Will use %d kB of offscreen memory for EXA\n"
+            "\t\t or %5.2f viewports (composite)\n"
+            "\t\t or %5.2f dvdframes (xvideo)\n",
+            offscreen / 1024,
+            1.0 * offscreen / viewport,
+            1.0 * offscreen / dvdframe);
+    }
+
+#ifdef XF86DRI_DEVEL
+    if (pATI->directRenderingEnabled)
+    {
+        ATIDRIServerInfoPtr pATIDRIServer = pATI->pDRIServerInfo;
+
+        xf86DrvMsg(pScreen->myNum, X_INFO,
+                   "Will use back  buffer at offset 0x%x\n",
+                   pATIDRIServer->backOffset);
+
+        xf86DrvMsg(pScreen->myNum, X_INFO,
+                   "Will use depth buffer at offset 0x%x\n",
+                   pATIDRIServer->depthOffset);
+
+        if (pATIDRIServer->textureSize > 0)
+        {
+            xf86DrvMsg(pScreen->myNum, X_INFO,
+                   "Will use %d kB for local textures at offset 0x%x\n",
+                   pATIDRIServer->textureSize/1024,
+                   pATIDRIServer->textureOffset);
+        }
+    }
+#endif /* XF86DRI_DEVEL */
+
+    pExa->pixmapOffsetAlign = 64;
+    pExa->pixmapPitchAlign = 64;
+
+    pExa->flags = EXA_OFFSCREEN_PIXMAPS;
+
+    pExa->maxX = ATIMach64MaxX;
+    pExa->maxY = ATIMach64MaxY;
+}
+
+Bool ATIMach64ExaInit(ScreenPtr pScreen)
+{
+    ScrnInfoPtr pScreenInfo = xf86Screens[pScreen->myNum];
+    ATIPtr pATI = ATIPTR(pScreenInfo);
+    ExaDriverPtr pExa;
+
+    /* FIXME: which chips support EXA ? */
+    if (pATI->Chip < ATI_CHIP_264CT)
+    {
+        xf86DrvMsg(pScreenInfo->scrnIndex, X_ERROR,
+            "EXA is not supported for ATI chips earlier than "
+            "the ATI Mach64.\n");
+        return FALSE;
+    }
+
+    pExa = exaDriverAlloc();
+    if (!pExa)
+        return FALSE;
+
+    pATI->pExa = pExa;
+
+    pExa->exa_major = 2;
+    pExa->exa_minor = 0;
+
+    Mach64SetupMemEXA(pScreen);
+
+    pExa->WaitMarker = Mach64WaitMarker;
+
+    pExa->PrepareSolid = Mach64PrepareSolid;
+    pExa->Solid = Mach64Solid;
+    pExa->DoneSolid = Mach64DoneSolid;
+
+    pExa->PrepareCopy = Mach64PrepareCopy;
+    pExa->Copy = Mach64Copy;
+    pExa->DoneCopy = Mach64DoneCopy;
+
+    if (!exaDriverInit(pScreen, pATI->pExa)) {
+	xfree(pATI->pExa);
+	pATI->pExa = NULL;
+	return FALSE;
+    }
+
+    return TRUE;
+}
+#endif
diff --git a/src/atimach64io.h b/src/atimach64io.h
index 9dbf244..e2d3c7c 100644
--- a/src/atimach64io.h
+++ b/src/atimach64io.h
@@ -206,6 +206,10 @@ extern void ATIMach64PollEngineStatus(AT
 
 #ifdef XF86DRI_DEVEL
  
+/*
+ * DRI Sync and Lock definitions.
+ */
+
 #define ATIDRIWaitForIdle(_pATI)                                \
 do {                                                            \
     ATIDRIServerInfoPtr pATIDRIServer = _pATI->pDRIServerInfo;  \
@@ -226,6 +230,53 @@ do {                                    
     }                                                           \
 } while (0)
 
+/*
+ * Set upon DRISwapContext and when DRI accesses the GPU engine
+ * from within the server, see DRIInitBuffers/DRIMoveBuffers.
+ *
+ * Forces the EXA/XAA software paths to sync before accessing the FB memory.
+ */
+static __inline__ void ATIDRIMarkSyncInt(ScrnInfoPtr _pScrInfo)
+{
+    ATIPtr _pATI=ATIPTR(_pScrInfo);
+#ifdef USE_EXA
+    if (_pATI->useEXA)
+        exaMarkSync(_pScrInfo->pScreen);
+#endif
+#ifdef USE_XAA
+    if (!_pATI->useEXA)
+        SET_SYNC_FLAG(_pATI->pXAAInfo); /* NeedToSync = TRUE */
+#endif
+}
+
+/*
+ * Set upon DRISwapContext and when the server acquires the DRI lock.
+ *
+ * Forces the EXA/XAA accelerated paths to sync before accessing the GPU engine.
+ */
+static __inline__ void ATIDRIMarkSyncExt(ScrnInfoPtr _pScrInfo)
+{
+    ATIPtr _pATI=ATIPTR(_pScrInfo);
+    _pATI->NeedDRISync = TRUE;
+}
+
+static __inline__ void ATIDRISync(ScrnInfoPtr _pScrInfo)
+{
+    ATIPtr _pATI=ATIPTR(_pScrInfo);
+#ifdef USE_EXA
+    if (_pATI->directRenderingEnabled && _pATI->pExa)
+    {
+        if (_pATI->NeedDRISync) exaWaitSync(_pScrInfo->pScreen);
+    }
+#endif
+#ifdef USE_XAA
+    if (_pATI->directRenderingEnabled && _pATI->pXAAInfo)
+    {
+        if (_pATI->NeedDRISync) (*_pATI->pXAAInfo->Sync)(_pScrInfo);
+    }
+#endif
+}
+
 #define ATIDRILock(_pScrInfo)                   \
 do                                              \
 {                                               \
@@ -233,7 +284,7 @@ do                                      
     if (_pATI->directRenderingEnabled)          \
     {                                           \
         DRILock(_pScrInfo->pScreen, 0);         \
-        pATI->NeedDRISync = TRUE;               \
+        ATIDRIMarkSyncExt(_pScrInfo);           \
     }                                           \
 } while (0)
                                                                                 
@@ -247,16 +298,6 @@ do                                      
     }                                           \
 } while (0)
 
-#define ATIDRISync(_pScrInfo)                                                   \
-do                                                                              \
-{                                                                               \
-    ATIPtr _pATI=ATIPTR(_pScrInfo);                                             \
-    if (_pATI->directRenderingEnabled && _pATI->pXAAInfo)                       \
-    {                                                                           \
-        if (_pATI->NeedDRISync) (*_pATI->pXAAInfo->Sync)(_pScrInfo);            \
-    }                                                                           \
-} while (0)
-                                                                                               
 #else /* XF86DRI_DEVEL */
 
                                                                                                
@@ -368,4 +409,11 @@ extern void ATIMach64AccessPLLReg(ATIPtr
 
 #endif
 
+/*
+ * Return the MMIO address of register, used for HOST_DATA_X only.
+ */
+#define ATIHostDataAddr(_Register)                             \
+    ((CARD8 *)pATI->pBlock[GetBits(_Register, BLOCK_SELECT)] + \
+              ((_Register) & MM_IO_SELECT))
+
 #endif /* ___ATIMACH64IO_H___ */
diff --git a/src/atimach64xv.c b/src/atimach64xv.c
index 75cda6c..b72f88f 100644
--- a/src/atimach64xv.c
+++ b/src/atimach64xv.c
@@ -501,6 +501,25 @@ ATIMach64GetPortAttribute
     return Success;
 }
 
+static pointer
+ATIMach64XVMemAlloc
+(
+    ScreenPtr pScreen,
+    pointer   pVideo,
+    int       size,
+    int       *offset,
+    ATIPtr    pATI
+);
+
+static void
+ATIMach64XVMemFree
+(
+    ScreenPtr pScreen,
+    pointer   pVideo,
+    ATIPtr    pATI
+);
+
+#ifdef USE_XAA
 /*
  * ATIMach64RemoveLinearCallback --
  *
@@ -519,6 +538,7 @@ ATIMach64RemoveLinearCallback
     pATI->pXVBuffer = NULL;
     outf(OVERLAY_SCALE_CNTL, SCALE_EN);
 }
+#endif /* USE_XAA */
 
 /*
  * ATIMach64StopVideo --
@@ -543,19 +563,23 @@ ATIMach64StopVideo
 
     REGION_EMPTY(pScreen, &pATI->VideoClip);
 
-    if (!Cleanup)
+#ifdef USE_XAA
+    if (!pATI->useEXA && !Cleanup)
     {
         /*
          * Free offscreen buffer if/when its allocation is needed by XAA's
          * pixmap cache.
          */
-        if (pATI->pXVBuffer)
-            pATI->pXVBuffer->RemoveLinearCallback =
+        FBLinearPtr linear = (FBLinearPtr)pATI->pXVBuffer;
+        if (linear)
+            linear->RemoveLinearCallback =
                 ATIMach64RemoveLinearCallback;
         return;
     }
+#endif /* USE_XAA */
 
-    pATI->pXVBuffer = ATIResizeOffscreenLinear(pScreen, pATI->pXVBuffer, 0);
+    ATIMach64XVMemFree(pScreen, pATI->pXVBuffer, pATI);
+    pATI->pXVBuffer = NULL;
     outf(OVERLAY_SCALE_CNTL, SCALE_EN);
 }
 
@@ -956,6 +980,7 @@ ATIMach64PutImage
     int       SrcTop, SrcLeft, DstWidth, DstHeight;
     int       Top, Bottom, Left, Right, nLine, nPixel, Offset;
     int       OffsetV, OffsetU;
+    int       XVOffset;
     int       tmp;
     CARD8     *pDst;
 
@@ -981,11 +1006,11 @@ ATIMach64PutImage
      */
     DstPitch = /* bytes */
         (DstWidth + DstWidth + 15) & ~15;
-    DstSize =  /* pixels */
-        ((DstPitch * DstHeight) + pATI->AdjustDepth - 1) / pATI->AdjustDepth;
+    DstSize =  /* bytes */
+        (DstPitch * DstHeight);
 
-    pATI->pXVBuffer = ATIResizeOffscreenLinear(pScreen, pATI->pXVBuffer,
-        (pATI->DoubleBuffer + 1) * DstSize);
+    pATI->pXVBuffer = ATIMach64XVMemAlloc(pScreen, pATI->pXVBuffer,
+        (pATI->DoubleBuffer + 1) * DstSize, &XVOffset, pATI);
 
     if (!pATI->pXVBuffer)
     {
@@ -993,7 +1018,7 @@ ATIMach64PutImage
             return BadAlloc;
 
         pATI->pXVBuffer =
-            ATIResizeOffscreenLinear(pScreen, pATI->pXVBuffer, DstSize);
+            ATIMach64XVMemAlloc(pScreen, pATI->pXVBuffer, DstSize, &XVOffset, pATI);
 
         if (!pATI->pXVBuffer)
             return BadAlloc;
@@ -1012,8 +1037,7 @@ ATIMach64PutImage
     /* Synchronise video memory accesses */
     ATIMach64Sync(pScreenInfo);
 
-    Offset = (pATI->pXVBuffer->offset * pATI->AdjustDepth) +
-             (pATI->CurrentBuffer * DstSize * pATI->AdjustDepth);
+    Offset = XVOffset + pATI->CurrentBuffer * DstSize;
     pDst = pATI->pMemoryLE;
     pDst += Offset;
 
@@ -1113,6 +1137,7 @@ ATIMach64AllocateSurface
 {
     ScreenPtr pScreen;
     ATIPtr    pATI = ATIPTR(pScreenInfo);
+    int       XVOffset;
 
     if (pATI->ActiveSurface)
         return BadAlloc;
@@ -1126,13 +1151,12 @@ ATIMach64AllocateSurface
 
     pScreen = pScreenInfo->pScreen;
 
-    pATI->pXVBuffer = ATIResizeOffscreenLinear(pScreen, pATI->pXVBuffer,
-        ((Height * pATI->SurfacePitch) + pATI->AdjustDepth - 1) /
-        pATI->AdjustDepth);
+    pATI->pXVBuffer = ATIMach64XVMemAlloc(pScreen, pATI->pXVBuffer,
+        Height * pATI->SurfacePitch, &XVOffset, pATI);
     if (!pATI->pXVBuffer)
         return BadAlloc;
 
-    pATI->SurfaceOffset = pATI->pXVBuffer->offset * pATI->AdjustDepth;
+    pATI->SurfaceOffset = XVOffset;
 
     pSurface->pScrn = pScreenInfo;
     pSurface->id = ImageID;
@@ -1167,8 +1191,8 @@ ATIMach64FreeSurface
         return Success;
 
     outf(OVERLAY_SCALE_CNTL, SCALE_EN);
-    pATI->pXVBuffer = ATIResizeOffscreenLinear(pSurface->pScrn->pScreen,
-        pATI->pXVBuffer, 0);
+    ATIMach64XVMemFree(pSurface->pScrn->pScreen, pATI->pXVBuffer, pATI);
+    pATI->pXVBuffer = NULL;
     pATI->ActiveSurface = FALSE;
 
     return Success;
@@ -1498,3 +1522,84 @@ ATIMach64CloseXVideo
 
     REGION_UNINIT(pScreen, &pATI->VideoClip);
 }
+
+static pointer
+ATIMach64XVMemAlloc
+(
+    ScreenPtr pScreen,
+    pointer   pVideo,
+    int       size,
+    int       *offset,
+    ATIPtr    pATI
+)
+{
+#ifdef USE_EXA
+    if (pATI->useEXA) {
+        ExaOffscreenArea *area = (ExaOffscreenArea *)pVideo;
+
+        if (area != NULL) {
+            if (area->size >= size) {
+                *offset = area->offset;
+                return area;
+            }
+
+            exaOffscreenFree(pScreen, area);
+        }
+
+        area = exaOffscreenAlloc(pScreen, size, 64, TRUE, NULL, NULL);
+        if (area != NULL) {
+            *offset = area->offset;
+            return area;
+        }
+    }
+#endif /* USE_EXA */
+
+#ifdef USE_XAA
+    if (!pATI->useEXA) {
+        FBLinearPtr linear = (FBLinearPtr)pVideo;
+        int cpp = pATI->AdjustDepth;
+
+        /* XAA allocates in units of pixels at the screen bpp, so adjust size
+         * appropriately.
+         */
+        size = (size + cpp - 1) / cpp;
+
+        linear = ATIResizeOffscreenLinear(pScreen, linear, size);
+        if (linear != NULL) {
+            *offset = linear->offset * cpp;
+            return linear;
+        }
+    }
+#endif /* USE_XAA */
+
+    *offset = 0;
+    return NULL;
+}
+
+static void
+ATIMach64XVMemFree
+(
+    ScreenPtr pScreen,
+    pointer   pVideo,
+    ATIPtr    pATI
+)
+{
+#ifdef USE_EXA
+    if (pATI->useEXA) {
+        ExaOffscreenArea *area = (ExaOffscreenArea *)pVideo;
+
+        if (area != NULL)
+            exaOffscreenFree(pScreen, area);
+    }
+#endif /* USE_EXA */
+
+#ifdef USE_XAA
+    if (!pATI->useEXA) {
+        FBLinearPtr linear = (FBLinearPtr)pVideo;
+
+        if (linear != NULL)
+            ATIResizeOffscreenLinear(pScreen, linear, 0);
+    }
+#endif /* USE_XAA */
+}
+
diff --git a/src/atimisc.c b/src/atimisc.c
index de118c0..ee4b518 100644
--- a/src/atimisc.c
+++ b/src/atimisc.c
@@ -127,7 +127,19 @@ ATISetup
 
             ATIfbSymbols,
             ATIshadowfbSymbols,
+
+#ifdef USE_EXA
+
+            ATIexaSymbols,
+
+#endif /* USE_EXA */
+
+#ifdef USE_XAA
+
             ATIxaaSymbols,
+
+#endif /* USE_XAA */
+
             ATIramdacSymbols,
             ATIi2cSymbols,
             NULL);
diff --git a/src/atioption.c b/src/atioption.c
index 6c862ed..28bac3a 100644
--- a/src/atioption.c
+++ b/src/atioption.c
@@ -219,6 +219,13 @@ const OptionInfoRec ATIPublicOptions[] =
         FALSE,
     },
     {
+        ATI_OPTION_ACCELMETHOD,
+        "AccelMethod",
+        OPTV_STRING,
+        {0, },
+        FALSE
+    },
+    {
         -1,
         NULL,
         OPTV_NONE,
diff --git a/src/atioption.h b/src/atioption.h
index 51778a4..836e911 100644
--- a/src/atioption.h
+++ b/src/atioption.h
@@ -69,7 +69,8 @@ typedef enum
     ATI_OPTION_PROBE_CLOCKS,
     ATI_OPTION_REFERENCE_CLOCK,
     ATI_OPTION_SHADOW_FB,
-    ATI_OPTION_SWCURSOR
+    ATI_OPTION_SWCURSOR,
+    ATI_OPTION_ACCELMETHOD
 } ATIPublicOptionType;
 
 #ifdef TV_OUT
diff --git a/src/atiscreen.c b/src/atiscreen.c
index 134129c..7dff827 100644
--- a/src/atiscreen.c
+++ b/src/atiscreen.c
@@ -329,6 +329,10 @@ ATIScreenInit
 
 #endif /* AVOID_CPIO */
 
+#ifdef USE_XAA
+
+    if (!pATI->useEXA) {
+
 	/* Memory manager setup */
 
 #ifdef XF86DRI_DEVEL
@@ -509,6 +513,21 @@ ATIScreenInit
     if (!ATIInitializeAcceleration(pScreen, pScreenInfo, pATI))
         return FALSE;
 
+    }
+
+#endif /* USE_XAA */
+
+#ifdef USE_EXA
+
+    if (pATI->useEXA) {
+        /* EXA setups both memory manager and acceleration here */
+
+        if (pATI->OptionAccel && !ATIMach64ExaInit(pScreen))
+            return FALSE;
+    }
+
+#endif /* USE_EXA */
+
 #ifndef AVOID_DGA
 
     /* Initialise DGA support */
@@ -623,11 +642,21 @@ ATICloseScreen
 
     ATICloseXVideo(pScreen, pScreenInfo, pATI);
 
+#ifdef USE_EXA
+    if (pATI->pExa)
+    {
+        exaDriverFini(pScreen);
+        xfree(pATI->pExa);
+        pATI->pExa = NULL;
+    }
+#endif
+#ifdef USE_XAA
     if (pATI->pXAAInfo)
     {
         XAADestroyInfoRec(pATI->pXAAInfo);
         pATI->pXAAInfo = NULL;
     }
+#endif
 
     if ((pScreen->CloseScreen = pATI->CloseScreen))
     {
@@ -645,9 +674,14 @@ ATICloseScreen
 
     ATILeaveGraphics(pScreenInfo, pATI);
 
-    xfree(pATI->ExpansionBitmapScanlinePtr[1]);
-    pATI->ExpansionBitmapScanlinePtr[0] =
+#ifdef USE_XAA
+    if (!pATI->useEXA)
+    {
+        xfree(pATI->ExpansionBitmapScanlinePtr[1]);
+        pATI->ExpansionBitmapScanlinePtr[0] = NULL;
         pATI->ExpansionBitmapScanlinePtr[1] = NULL;
+    }
+#endif
 
     xfree(pATI->pShadow);
     pATI->pShadow = NULL;
diff --git a/src/atistruct.h b/src/atistruct.h
index a84b0e8..2cb6625 100644
--- a/src/atistruct.h
+++ b/src/atistruct.h
@@ -52,7 +52,12 @@
 
 #endif /* TV_OUT */
 
+#ifdef USE_EXA
+#include "exa.h"
+#endif
+#ifdef USE_XAA
 #include "xaa.h"
+#endif
 #include "xf86Cursor.h"
 #include "xf86Pci.h"
 #include "xf86Resources.h"
@@ -296,15 +301,23 @@ typedef struct _ATIRec
     /*
      * XAA interface.
      */
+    Bool useEXA;
+#ifdef USE_EXA
+    ExaDriverPtr pExa;
+#endif
+#ifdef USE_XAA
     XAAInfoRecPtr pXAAInfo;
+#endif
     int nAvailableFIFOEntries, nFIFOEntries, nHostFIFOEntries;
     CARD8 EngineIsBusy, EngineIsLocked, XModifier;
     CARD32 dst_cntl;    /* For SetupFor/Subsequent communication */
     CARD32 sc_left_right, sc_top_bottom;
     CARD16 sc_left, sc_right, sc_top, sc_bottom;        /* Current scissors */
     pointer pHOST_DATA; /* Current HOST_DATA_* transfer window address */
+#ifdef USE_XAA
     CARD32 *ExpansionBitmapScanlinePtr[2];
     int ExpansionBitmapWidth;
+#endif
 
     /*
      * Cursor-related definitions.
@@ -382,7 +395,8 @@ typedef struct _ATIRec
      * XVideo-related data.
      */
     DevUnion XVPortPrivate[1];
-    FBLinearPtr pXVBuffer;
+    pointer pXVBuffer;		/* USE_EXA: ExaOffscreenArea*
+				   USE_XAA: FBLinearPtr */
     RegionRec VideoClip;
     int SurfacePitch, SurfaceOffset;
     CARD8 AutoPaint, DoubleBuffer, CurrentBuffer, ActiveSurface;
@@ -485,10 +499,12 @@ typedef struct _ATIRec
     Bool have3DWindows;
                                                                                 
     /* offscreen memory management */
+#ifdef USE_XAA
     int               backLines;
     FBAreaPtr         backArea;
     int               depthTexLines;
     FBAreaPtr         depthTexArea;
+#endif
     CARD8 OptionIsPCI;           /* Force PCI mode */
     CARD8 OptionDMAMode;         /* async, sync, mmio */
     CARD8 OptionAGPMode;         /* AGP mode */



More information about the xorg-commit mailing list