xf86-video-ati: Branch 'master' - 3 commits

George Sapountzis gsap7 at kemper.freedesktop.org
Sat Aug 12 22:27:43 EEST 2006


 src/Makefile.am       |    1 
 src/aticonfig.c       |    7 
 src/atimach64.c       |   53 ++
 src/atimach64accel.c  |   29 +
 src/atimach64exa.c    |   22 +
 src/atimach64render.c |  891 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/atioption.c       |    7 
 src/atioption.h       |    3 
 src/atiregs.h         |  128 +++++++
 src/atistruct.h       |   38 ++
 10 files changed, 1172 insertions(+), 7 deletions(-)

New commits:
diff-tree 750ed51bc3d5119e2d2a91a05ae5a971dcd3b88a (from b2beea2fa8949874d3d57fb9b43fe85cc08a8bff)
Author: George Sapountzis <gsap7 at yahoo.gr>
Date:   Mon Aug 7 17:09:02 2006 +0300

    [mach64] RENDER support: drop caching DP_WRITE_MASK.
    
    Caching DP_WRITE_MASK causes artifacts, e.g. with evas_xrender_x11_test.

diff --git a/src/atimach64.c b/src/atimach64.c
index 401c426..ba2ac8e 100644
--- a/src/atimach64.c
+++ b/src/atimach64.c
@@ -1040,7 +1040,6 @@ ATIMach64Set
 
             CacheRegister(DP_BKGD_CLR);
             CacheRegister(DP_FRGD_CLR);
-            CacheRegister(DP_WRITE_MASK);
             CacheRegister(DP_PIX_WIDTH);
             CacheRegister(DP_MIX);
 
diff --git a/src/atimach64accel.c b/src/atimach64accel.c
index 21e9fd7..442dcf7 100644
--- a/src/atimach64accel.c
+++ b/src/atimach64accel.c
@@ -163,7 +163,6 @@ ATIMach64Sync
 	    UncacheRegister(SC_TOP_BOTTOM);
 	    UncacheRegister(DP_BKGD_CLR);
 	    UncacheRegister(DP_FRGD_CLR);
-	    UncacheRegister(DP_WRITE_MASK);
 	    UncacheRegister(DP_PIX_WIDTH);
 	    UncacheRegister(DP_MIX);
 	    UncacheRegister(CLR_CMP_CNTL);
@@ -218,7 +217,6 @@ ATIMach64Sync
 	    CacheRegister(SC_TOP_BOTTOM);
 	    CacheRegister(DP_BKGD_CLR);
 	    CacheRegister(DP_FRGD_CLR);
-	    CacheRegister(DP_WRITE_MASK);
 	    CacheRegister(DP_PIX_WIDTH);
 	    CacheRegister(DP_MIX);
 	    CacheRegister(CLR_CMP_CNTL);
@@ -331,7 +329,6 @@ TestRegisterCachingDP(ScrnInfoPtr pScree
 
     TestRegisterCaching(DP_BKGD_CLR);
     TestRegisterCaching(DP_FRGD_CLR);
-    TestRegisterCaching(DP_WRITE_MASK);
     TestRegisterCaching(DP_PIX_WIDTH);
     TestRegisterCaching(DP_MIX);
 
diff-tree b2beea2fa8949874d3d57fb9b43fe85cc08a8bff (from 43aaed99950640c3695b3c2b91faabf00c6338a7)
Author: George Sapountzis <gsap7 at yahoo.gr>
Date:   Sat Aug 5 18:36:24 2006 +0300

    [mach64] RENDER support: save/restore, cache texture registers.

diff --git a/src/atimach64.c b/src/atimach64.c
index cc3a3cb..401c426 100644
--- a/src/atimach64.c
+++ b/src/atimach64.c
@@ -483,6 +483,15 @@ ATIMach64Save
         pATIHW->src_height2 = inm(SRC_HEIGHT2);
         pATIHW->src_cntl = inm(SRC_CNTL);
 
+        if (pATI->Chip >= ATI_CHIP_264GTPRO)
+        {
+            CARD32 offset = TEX_LEVEL(inm(TEX_SIZE_PITCH));
+
+            /* Save 3D control & texture registers */
+            pATIHW->tex_offset = inm(TEX_0_OFF + offset);
+            pATIHW->scale_3d_cntl = inm(SCALE_3D_CNTL);
+        }
+
         /* Save host data register */
         pATIHW->host_cntl = inm(HOST_CNTL);
 
@@ -514,6 +523,13 @@ ATIMach64Save
         /* Save context */
         pATIHW->context_mask = inm(CONTEXT_MASK);
 
+        if (pATI->Chip >= ATI_CHIP_264GTPRO)
+        {
+            /* Save texture setup registers */
+            pATIHW->tex_size_pitch = inm(TEX_SIZE_PITCH);
+            pATIHW->tex_cntl = inm(TEX_CNTL);
+        }
+
         if (pATI->Block1Base)
         {
             /* Save overlay & scaler registers */
@@ -853,6 +869,14 @@ ATIMach64Set
         outf(DST_BRES_DEC, pATIHW->dst_bres_dec);
         outf(DST_CNTL, pATIHW->dst_cntl);
 
+        if (pATI->Chip >= ATI_CHIP_264GTPRO)
+        {
+            /* Load ROP unit registers */
+            ATIMach64WaitForFIFO(pATI, 2);
+            outf(Z_CNTL, 0);
+            outf(ALPHA_TST_CNTL, 0);
+        }
+
         /* Load source registers */
         ATIMach64WaitForFIFO(pATI, 6);
         outf(SRC_OFF_PITCH, pATIHW->src_off_pitch);
@@ -865,6 +889,16 @@ ATIMach64Set
             SetWord(pATIHW->src_width2, 1) | SetWord(pATIHW->src_height2, 0));
         outf(SRC_CNTL, pATIHW->src_cntl);
 
+        if (pATI->Chip >= ATI_CHIP_264GTPRO)
+        {
+            CARD32 offset = TEX_LEVEL(pATIHW->tex_size_pitch);
+
+            /* Load 3D control & texture registers */
+            ATIMach64WaitForFIFO(pATI, 2);
+            outf(TEX_0_OFF + offset, pATIHW->tex_offset);
+            outf(SCALE_3D_CNTL, pATIHW->scale_3d_cntl);
+        }
+
         /* Load host data register */
         ATIMach64WaitForFIFO(pATI, 1);
         outf(HOST_CNTL, pATIHW->host_cntl);
@@ -912,6 +946,14 @@ ATIMach64Set
         ATIMach64WaitForFIFO(pATI, 1);
         outf(CONTEXT_MASK, pATIHW->context_mask);
 
+        if (pATI->Chip >= ATI_CHIP_264GTPRO)
+        {
+            /* Load texture setup registers */
+            ATIMach64WaitForFIFO(pATI, 2);
+            outf(TEX_SIZE_PITCH, pATIHW->tex_size_pitch);
+            outf(TEX_CNTL, pATIHW->tex_cntl);
+        }
+
         if (pATI->Block1Base)
         {
             /* Load overlay & scaler registers */
@@ -982,6 +1024,11 @@ ATIMach64Set
 
             CacheRegister(SRC_CNTL);
 
+            if (pATI->Chip >= ATI_CHIP_264GTPRO)
+            {
+                CacheRegister(SCALE_3D_CNTL);
+            }
+
             CacheRegister(HOST_CNTL);
 
             CacheRegister(PAT_REG0);
@@ -1001,6 +1048,11 @@ ATIMach64Set
             CacheRegister(CLR_CMP_MSK);
             CacheRegister(CLR_CMP_CNTL);
 
+            if (pATI->Chip >= ATI_CHIP_264GTPRO)
+            {
+                CacheRegister(TEX_SIZE_PITCH);
+            }
+
             if (pATI->Block1Base)
             {
                 CacheRegister(OVERLAY_Y_X_START);
diff --git a/src/atimach64accel.c b/src/atimach64accel.c
index b9d312e..21e9fd7 100644
--- a/src/atimach64accel.c
+++ b/src/atimach64accel.c
@@ -151,10 +151,12 @@ ATIMach64Sync
     if ( pATI->directRenderingEnabled && pATI->NeedDRISync )
     {
 	ATIHWPtr pATIHW = &pATI->NewHW;
+	CARD32 offset;
 
 	if (pATI->OptionMMIOCache) {
 	    /* "Invalidate" the MMIO cache so the cache slots get updated */
 	    UncacheRegister(SRC_CNTL);
+	    UncacheRegister(SCALE_3D_CNTL);
 	    UncacheRegister(HOST_CNTL);
 	    UncacheRegister(PAT_CNTL);
 	    UncacheRegister(SC_LEFT_RIGHT);
@@ -165,6 +167,7 @@ ATIMach64Sync
 	    UncacheRegister(DP_PIX_WIDTH);
 	    UncacheRegister(DP_MIX);
 	    UncacheRegister(CLR_CMP_CNTL);
+	    UncacheRegister(TEX_SIZE_PITCH);
 	}
 
 	ATIDRIWaitForIdle(pATI);
@@ -185,12 +188,19 @@ ATIMach64Sync
 	outf( DP_MIX, pATIHW->dp_mix );
 	outf( DP_FRGD_CLR,  pATIHW->dp_frgd_clr );
 	outf( DP_WRITE_MASK, pATIHW->dp_write_mask );
-	
 	outf( DP_PIX_WIDTH, pATIHW->dp_pix_width );
+
 	outf( CLR_CMP_CNTL, pATIHW->clr_cmp_cntl );
+
+	offset = TEX_LEVEL(pATIHW->tex_size_pitch);
+
+	ATIMach64WaitForFIFO(pATI, 6);
 	outf( ALPHA_TST_CNTL, 0 );
 	outf( Z_CNTL, 0 );
-	outf( SCALE_3D_CNTL, 0 );
+	outf( SCALE_3D_CNTL, pATIHW->scale_3d_cntl );
+	outf( TEX_0_OFF + offset, pATIHW->tex_offset );
+	outf( TEX_SIZE_PITCH, pATIHW->tex_size_pitch );
+	outf( TEX_CNTL, pATIHW->tex_cntl );
 
 	ATIMach64WaitForFIFO(pATI, 2);
 	outf( SC_LEFT_RIGHT,
@@ -201,6 +211,7 @@ ATIMach64Sync
 	if (pATI->OptionMMIOCache) {
 	    /* Now that the cache slots reflect the register state, re-enable MMIO cache */
 	    CacheRegister(SRC_CNTL);
+	    CacheRegister(SCALE_3D_CNTL);
 	    CacheRegister(HOST_CNTL);
 	    CacheRegister(PAT_CNTL);
 	    CacheRegister(SC_LEFT_RIGHT);
@@ -211,6 +222,7 @@ ATIMach64Sync
 	    CacheRegister(DP_PIX_WIDTH);
 	    CacheRegister(DP_MIX);
 	    CacheRegister(CLR_CMP_CNTL);
+	    CacheRegister(TEX_SIZE_PITCH);
 	}
 
 	ATIMach64WaitForIdle(pATI);
@@ -288,6 +300,11 @@ TestRegisterCachingDP(ScrnInfoPtr pScree
 
     TestRegisterCaching(SRC_CNTL);
 
+    if (pATI->Chip >= ATI_CHIP_264GTPRO)
+    {
+        TestRegisterCaching(SCALE_3D_CNTL);
+    }
+
     TestRegisterCaching(HOST_CNTL);
 
     TestRegisterCaching(PAT_REG0);
@@ -321,6 +338,11 @@ TestRegisterCachingDP(ScrnInfoPtr pScree
     TestRegisterCaching(CLR_CMP_CLR);
     TestRegisterCaching(CLR_CMP_MSK);
     TestRegisterCaching(CLR_CMP_CNTL);
+
+    if (pATI->Chip >= ATI_CHIP_264GTPRO)
+    {
+        TestRegisterCaching(TEX_SIZE_PITCH);
+    }
 }
 
 static __inline__ void
diff --git a/src/atistruct.h b/src/atistruct.h
index 12cf61e..08f6a5b 100644
--- a/src/atistruct.h
+++ b/src/atistruct.h
@@ -147,6 +147,8 @@ typedef struct _ATIHWRec
     CARD32 clr_cmp_clr, clr_cmp_msk, clr_cmp_cntl;
     CARD32 context_mask, context_load_cntl;
 
+    CARD32 scale_3d_cntl, tex_size_pitch, tex_cntl, tex_offset;
+
     /* Mach64 MMIO Block 1 registers */
     CARD32 overlay_y_x_start, overlay_y_x_end, overlay_graphics_key_clr,
            overlay_graphics_key_msk, overlay_key_cntl, overlay_scale_inc,
diff-tree 43aaed99950640c3695b3c2b91faabf00c6338a7 (from d113f40fd97408661193895ff3aa82262f100143)
Author: George Sapountzis <gsap7 at yahoo.gr>
Date:   Sat Aug 12 22:12:07 2006 +0300

    [mach64] RENDER support (bug #6877).
    
    RENDER accleration is disabled by default. It is strongly recommended that the
    patch from bug #6772 and the 'exa-damagetrack branch' are merged in the xserver
    before enabling RENDER acceleration. To enable RENDER acceleration, add the
    following in xorg.conf:
    
    Section "Device"
            [...]
    
            Option          "AccelMethod"           "exa"
            Option          "RenderAccel"           "true"
    EndSection

diff --git a/src/Makefile.am b/src/Makefile.am
index d31ac24..7e6af9c 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -138,6 +138,7 @@ EXTRA_DIST = \
 	atimach64.h \
 	atimach64i2c.h \
 	atimach64io.h \
+	atimach64render.c \
 	atimach64xv.h \
 	atimode.h \
 	atimodule.h \
diff --git a/src/aticonfig.c b/src/aticonfig.c
index dffabc7..d0a3677 100644
--- a/src/aticonfig.c
+++ b/src/aticonfig.c
@@ -154,6 +154,7 @@ ATIProcessOptions
 #   define ShadowFB      PublicOption[ATI_OPTION_SHADOW_FB].value.bool
 #   define SWCursor      PublicOption[ATI_OPTION_SWCURSOR].value.bool
 #   define AccelMethod   PublicOption[ATI_OPTION_ACCELMETHOD].value.str
+#   define RenderAccel   PublicOption[ATI_OPTION_RENDER_ACCEL].value.bool
 #   define LCDSync       PrivateOption[ATI_OPTION_LCDSYNC].value.bool
 
 #   define ReferenceClock \
@@ -360,6 +361,12 @@ ATIProcessOptions
         xf86DrvMsg(pScreenInfo->scrnIndex, from,
             "Using %s acceleration architecture\n",
             pATI->useEXA ? "EXA" : "XAA");
+
+        pATI->RenderAccelEnabled = FALSE;
+#if defined(USE_EXA)
+        if (pATI->useEXA && RenderAccel)
+            pATI->RenderAccelEnabled = TRUE;
+#endif
     }
 
     xfree(PublicOption);
diff --git a/src/atimach64exa.c b/src/atimach64exa.c
index 90cc2af..e2b2540 100644
--- a/src/atimach64exa.c
+++ b/src/atimach64exa.c
@@ -441,6 +441,8 @@ Mach64DownloadFromScreen(PixmapPtr pSrc,
     return TRUE;
 }
 
+#include "atimach64render.c"
+
 /* Compute log base 2 of val. */
 static __inline__ int Mach64Log2(int val)
 {
@@ -673,6 +675,26 @@ Bool ATIMach64ExaInit(ScreenPtr pScreen)
     pExa->UploadToScreen = Mach64UploadToScreen;
     pExa->DownloadFromScreen = Mach64DownloadFromScreen;
 
+    if (pATI->RenderAccelEnabled) {
+	if (pATI->Chip >= ATI_CHIP_264GTPRO) {
+	    /* 3D Rage Pro does not support NPOT textures. */
+	    pExa->flags |= EXA_OFFSCREEN_ALIGN_POT;
+
+	    pExa->CheckComposite = Mach64CheckComposite;
+	    pExa->PrepareComposite = Mach64PrepareComposite;
+	    pExa->Composite = Mach64Composite;
+	    pExa->DoneComposite = Mach64DoneComposite;
+	} else {
+	    xf86DrvMsg(pScreen->myNum, X_INFO,
+		       "Render acceleration is not supported for ATI chips "
+		       "earlier than the ATI 3D Rage Pro.\n");
+	    pATI->RenderAccelEnabled = FALSE;
+	}
+    }
+
+    xf86DrvMsg(pScreen->myNum, X_INFO, "Render acceleration %s\n",
+	       pATI->RenderAccelEnabled ? "enabled" : "disabled");
+
     if (!exaDriverInit(pScreen, pATI->pExa)) {
 	xfree(pATI->pExa);
 	pATI->pExa = NULL;
diff --git a/src/atimach64render.c b/src/atimach64render.c
new file mode 100644
index 0000000..8a6ebbc
--- /dev/null
+++ b/src/atimach64render.c
@@ -0,0 +1,891 @@
+/*
+ * Copyright 2006 George Sapountzis
+ * All Rights Reserved.
+ *
+ * Based on the mach64 DRI and DRM drivers:
+ * Copyright 2000 Gareth Hughes
+ * Copyright 2002-2003 Leif Delgass
+ * All Rights Reserved.
+ *
+ * Based on the ati hw/kdrive driver:
+ * Copyright 2003 Eric Anholt, Anders Carlsson
+ *
+ * Based on the via hw/xfree86 driver:
+ * Copyright 2006 Thomas Hellstrom. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    George Sapountzis <gsap7 at yahoo.gr>
+ */
+
+/*
+ * Interesting cases for RENDER acceleration:
+ *
+ * cursor      : ARGB8888 (24x24)   Over
+ *               RGB565
+ *
+ * glyph       : A8       (9x10)    Add
+ *               A8       (420x13)
+ * glyph set   : ARGB8888 (1x1 R)   In
+ *               A8       (420x13)  Over
+ *               RGB565
+ *
+ * shadow      : ARGB8888 (1x1 R)   In
+ *               A8       (670x362) Over
+ *               RGB565
+ * translucent : RGB565   (652x344) In
+ *               A8       (1x1 R)   Over
+ *               RGB565
+ *
+ * In all interesting cases one of src/mask is "1x1 R".
+ */
+
+/*
+ * Assumptions and limitations of mach64 RENDER acceleration:
+ *
+ * RENDER acceleration is supported for GTPRO and later chips using the 3D
+ * triangle setup, i.e. the VERTEX_? registers (see the dri driver). According
+ * to atiregs.h, SCALE_3D_CNTL and TEX_?_OFF appear in GT, thus chips as old
+ * as GT should be capable of RENDER acceleration, using the S_?_INC, T_?_INC
+ * registers for texture mapping (see the directfb driver).
+ *
+ * GTPRO added a triangle setup engine and multitexturing. However, it seems
+ * that none of the 8bpp mach64 formats expands the 8bit value to the alpha
+ * channel in texture mapping, RGB8 appears to expand to (I,I,I,0). This makes
+ * GTPRO multitexturing unsuitable for emulating the IN operation. Moreover,
+ * it seems that GT/GTPRO has a muxltiplexer instead of a blender for computing
+ * the final alpha channel which forbids destinations with an alpha channel and
+ * generic two-pass compositing.
+ *
+ * A texture unit combines the fragment color (VERTEX_?_ARGB) coming in from
+ * triangle rasterization with the texel from the texture according to the
+ * texture environment (TEX_LIGHT_FCN_). "1x1 R" textures may come in as frag-
+ * ment colors, eliminating the need for multitexturing in all interesting
+ * cases (via also uses this optimization).
+ *
+ * Texture registers are saved/restored and cached (see atimach64.c). TEX_CNTL
+ * cannot be cached because it flushes the texture cache. TEX_?_OFF are also
+ * not cached because I am not sure whether writing at some offset register
+ * affects the value at another offset.
+ *
+ * Vertex registers are not saved/restored. This shouldn't be a problem though
+ * either for DRI or VT switch because vertex registers are set and used within
+ * a signle acceleration hook. Synchronization between the DDX and DRI is based
+ * on calling ATIDRISync() at the beginning of each DDX acceleration hook,
+ * which suggests the assumption that individual acceleration hooks are not
+ * interrupted.
+ */
+
+#include <string.h>
+#include <stdio.h>
+
+/*
+ * Helper functions copied from exa and via.
+ */
+
+#if 0
+static void
+Mach64ExaCompositePictDesc(PicturePtr pict, char *string, int n)
+{
+    char format[20];
+    char size[20];
+
+    if (!pict) {
+        snprintf(string, n, "None");
+        return;
+    }
+
+    switch (pict->format) {
+    case PICT_x8r8g8b8:
+        snprintf(format, 20, "RGB8888 ");
+        break;
+    case PICT_x8b8g8r8:
+        snprintf(format, 20, "BGR8888 ");
+        break;
+    case PICT_a8r8g8b8:
+        snprintf(format, 20, "ARGB8888");
+        break;
+    case PICT_a8b8g8r8:
+        snprintf(format, 20, "ABGR8888");
+        break;
+    case PICT_r5g6b5:
+        snprintf(format, 20, "RGB565  ");
+        break;
+    case PICT_x1r5g5b5:
+        snprintf(format, 20, "RGB555  ");
+        break;
+    case PICT_a8:
+        snprintf(format, 20, "A8      ");
+        break;
+    case PICT_a1:
+        snprintf(format, 20, "A1      ");
+        break;
+    default:
+        snprintf(format, 20, "0x%x", (int)pict->format);
+        break;
+    }
+
+    snprintf(size, 20, "%dx%d%s%s",
+        pict->pDrawable->width,
+        pict->pDrawable->height,
+        pict->repeat ? " R" : "",
+        pict->componentAlpha ? " C" : ""
+    );
+
+    snprintf(string, n, "%-10p: fmt %s (%s)", (void *)pict->pDrawable, format, size);
+}
+
+static void
+Mach64ExaPrintComposite(CARD8 op,
+    PicturePtr pSrc, PicturePtr pMask, PicturePtr pDst, char *string)
+{
+    char sop[20];
+    char srcdesc[40], maskdesc[40], dstdesc[40];
+
+    switch (op) {
+    case PictOpSrc:
+        sprintf(sop, "Src");
+        break;
+    case PictOpOver:
+        sprintf(sop, "Over");
+        break;
+    case PictOpInReverse:
+        sprintf(sop, "InR");
+        break;
+    case PictOpOutReverse:
+        sprintf(sop, "OutR");
+        break;
+    case PictOpAdd:
+        sprintf(sop, "Add");
+        break;
+    default:
+        sprintf(sop, "0x%x", (int)op);
+        break;
+    }
+
+    Mach64ExaCompositePictDesc(pSrc, srcdesc, 40);
+    Mach64ExaCompositePictDesc(pMask, maskdesc, 40);
+    Mach64ExaCompositePictDesc(pDst, dstdesc, 40);
+
+    sprintf(string, "op %s, \n"
+        "                src  %s\n"
+        "                mask %s\n"
+        "                dst  %s\n", sop, srcdesc, maskdesc, dstdesc);
+}
+#endif
+
+static __inline__ CARD32
+viaBitExpandHelper(CARD32 component, CARD32 bits)
+{
+    CARD32 tmp, mask;
+
+    mask = (1 << (8 - bits)) - 1;
+    tmp = component << (8 - bits);
+    return ((component & 1) ? tmp | mask : tmp);
+}
+
+static __inline__ void
+Mach64PixelARGB(PixmapPtr pPixmap, CARD32 format, CARD32 *argb)
+{
+    CARD32 pixel;
+    CARD8  comp;
+    int    bits, shift;
+
+    /* Ensure that texture drawing has completed. */
+    exaWaitSync(pPixmap->drawable.pScreen);
+
+    /* exaGetPixmapFirstPixel() */
+
+    switch (pPixmap->drawable.bitsPerPixel) {
+    case 32:
+        pixel = *(CARD32 *)(pPixmap->devPrivate.ptr);
+        break;
+    case 16:
+        pixel = *(CARD16 *)(pPixmap->devPrivate.ptr);
+        break;
+    default:
+        pixel = *(CARD8 *)(pPixmap->devPrivate.ptr);
+        break;
+    }
+
+    /* exaGetRGBAFromPixel()/viaPixelARGB8888() */
+
+    switch (PICT_FORMAT_TYPE(format)) {
+    case PICT_TYPE_A:
+        shift = 0;
+        bits = PICT_FORMAT_A(format);
+        comp = (pixel >> shift) & ((1 << bits) - 1);
+        comp = viaBitExpandHelper(comp, bits);
+        *argb = comp << 24;
+        break;
+    case PICT_TYPE_ARGB:
+        shift = 0;
+        bits = PICT_FORMAT_B(format);
+        comp = (pixel >> shift) & ((1 << bits) - 1);
+        comp = viaBitExpandHelper(comp, bits);
+        *argb = comp;
+
+        shift += bits;
+        bits = PICT_FORMAT_G(format);
+        comp = (pixel >> shift) & ((1 << bits) - 1);
+        comp = viaBitExpandHelper(comp, bits);
+        *argb |= comp << 8;
+
+        shift += bits;
+        bits = PICT_FORMAT_R(format);
+        comp = (pixel >> shift) & ((1 << bits) - 1);
+        comp = viaBitExpandHelper(comp, bits);
+        *argb |= comp << 16;
+
+        shift += bits;
+        bits = PICT_FORMAT_A(format);
+        if (bits) {
+            comp = (pixel >> shift) & ((1 << bits) - 1);
+            comp = viaBitExpandHelper(comp, bits);
+        } else {
+            comp = 0xff;
+        }
+        *argb |= comp << 24;
+        break;
+    case PICT_TYPE_ABGR:
+        break;
+    default:
+        break;
+    }
+}
+
+/*
+ * RENDER acceleration for mach64
+ */
+
+typedef struct {
+    Bool supported;
+    CARD32 scale_3d_cntl;
+} Mach64BlendOp;
+
+static Mach64BlendOp Mach64BlendOps[] = {
+    /* Clear */
+    {1, MACH64_ALPHA_BLEND_SRC_ZERO        | MACH64_ALPHA_BLEND_DST_ZERO},
+    /* Src */
+    {1, MACH64_ALPHA_BLEND_SRC_ONE         | MACH64_ALPHA_BLEND_DST_ZERO},
+    /* Dst */
+    {1, MACH64_ALPHA_BLEND_SRC_ZERO        | MACH64_ALPHA_BLEND_DST_ONE},
+    /* Over */
+    {1, MACH64_ALPHA_BLEND_SRC_ONE         | MACH64_ALPHA_BLEND_DST_INVSRCALPHA},
+    /* OverReverse */
+    {1, MACH64_ALPHA_BLEND_SRC_INVDSTALPHA | MACH64_ALPHA_BLEND_DST_ONE},
+    /* In */
+    {1, MACH64_ALPHA_BLEND_SRC_DSTALPHA    | MACH64_ALPHA_BLEND_DST_ZERO},
+    /* InReverse */
+    {1, MACH64_ALPHA_BLEND_SRC_ZERO        | MACH64_ALPHA_BLEND_DST_SRCALPHA},
+    /* Out */
+    {1, MACH64_ALPHA_BLEND_SRC_INVDSTALPHA | MACH64_ALPHA_BLEND_DST_ZERO},
+    /* OutReverse */
+    {1, MACH64_ALPHA_BLEND_SRC_ZERO        | MACH64_ALPHA_BLEND_DST_INVSRCALPHA},
+    /* Atop */
+    {0, MACH64_ALPHA_BLEND_SRC_DSTALPHA    | MACH64_ALPHA_BLEND_DST_INVSRCALPHA},
+    /* AtopReverse */
+    {0, MACH64_ALPHA_BLEND_SRC_INVDSTALPHA | MACH64_ALPHA_BLEND_DST_SRCALPHA},
+    /* Xor */
+    {1, MACH64_ALPHA_BLEND_SRC_INVDSTALPHA | MACH64_ALPHA_BLEND_DST_INVSRCALPHA},
+    /* Add */
+    {1, MACH64_ALPHA_BLEND_SRC_ONE         | MACH64_ALPHA_BLEND_DST_ONE}
+};
+
+#define MACH64_NR_BLEND_OPS \
+    (sizeof(Mach64BlendOps) / sizeof(Mach64BlendOps[0]))
+
+typedef struct {
+    CARD32 pictFormat;
+    CARD32 dstFormat;
+    CARD32 texFormat;
+} Mach64TexFormat;
+
+static Mach64TexFormat Mach64TexFormats[] = {
+    {PICT_a8r8g8b8, -1,                       MACH64_DATATYPE_ARGB8888},
+    {PICT_x8r8g8b8, MACH64_DATATYPE_ARGB8888, MACH64_DATATYPE_ARGB8888},
+    {PICT_a1r5g5b5, -1,                       MACH64_DATATYPE_ARGB1555},
+    {PICT_x1r5g5b5, MACH64_DATATYPE_ARGB1555, MACH64_DATATYPE_ARGB1555},
+    {PICT_r5g6b5,   MACH64_DATATYPE_RGB565,   MACH64_DATATYPE_RGB565  },
+    {PICT_a8,       MACH64_DATATYPE_RGB8,     MACH64_DATATYPE_RGB8    }
+};
+
+#define MACH64_NR_TEX_FORMATS \
+    (sizeof(Mach64TexFormats) / sizeof(Mach64TexFormats[0]))
+
+#define MACH64_PICT_IS_1x1R(_pPict)      \
+    ((_pPict) &&                         \
+     (_pPict)->pDrawable->width == 1 &&  \
+     (_pPict)->pDrawable->height == 1 && \
+     (_pPict)->repeat)
+
+/*
+ * CheckComposite hook helper functions.
+ */
+static __inline__ Bool
+Mach64GetOrder(int val, int *shift)
+{
+    *shift = 0;
+
+    while (val > (1 << *shift))
+        (*shift)++;
+
+    return (val == (1 << *shift));
+}
+
+static Bool
+Mach64CheckTexture(PicturePtr pPict)
+{
+    int w = pPict->pDrawable->width;
+    int h = pPict->pDrawable->height;
+    int l2w, l2h, level, i;
+
+    for (i = 0; i < MACH64_NR_TEX_FORMATS; i++) {
+        if (Mach64TexFormats[i].pictFormat == pPict->format)
+            break;
+    }
+
+    if (i == MACH64_NR_TEX_FORMATS)
+        MACH64_FALLBACK(("Unsupported picture format 0x%x\n",
+                        (int)pPict->format));
+
+    /* l2w equals l2p (pitch) for all interesting cases (w >= 64) */
+    Mach64GetOrder(w, &l2w);
+    Mach64GetOrder(h, &l2h);
+
+    level = (l2w > l2h) ? l2w : l2h;
+
+    if (level > 10)
+        MACH64_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h));
+
+    return TRUE;
+}
+
+/*
+ * CheckComposite acceleration hook.
+ */
+Bool
+Mach64CheckComposite
+(
+    int        op,
+    PicturePtr pSrcPicture,
+    PicturePtr pMaskPicture,
+    PicturePtr pDstPicture
+)
+{
+    Bool src_solid, mask_solid, mask_comp, op_comp;
+    int i;
+
+    if (op >= MACH64_NR_BLEND_OPS || !Mach64BlendOps[op].supported)
+        return FALSE;
+
+    if (!Mach64CheckTexture(pSrcPicture))
+        return FALSE;
+
+    if (pMaskPicture && !Mach64CheckTexture(pMaskPicture))
+        return FALSE;
+
+    /* Check destination format */
+
+    for (i = 0; i < MACH64_NR_TEX_FORMATS; i++) {
+        if (Mach64TexFormats[i].pictFormat == pDstPicture->format)
+            break;
+    }
+
+    if (i == MACH64_NR_TEX_FORMATS || Mach64TexFormats[i].dstFormat == -1)
+        MACH64_FALLBACK(("Unsupported dst format 0x%x\n",
+                        (int)pDstPicture->format));
+
+    /* Check that A8 src/dst appears only as "A8 ADD A8" */
+
+    if (pDstPicture->format == PICT_a8) {
+        if (pMaskPicture || pSrcPicture->format != PICT_a8 || op != PictOpAdd)
+            MACH64_FALLBACK(("A8 dst with mask or non-A8 src.\n"));
+    }
+
+    if (pDstPicture->format != PICT_a8) {
+        if (pSrcPicture->format == PICT_a8)
+            MACH64_FALLBACK(("A8 src with non-A8 dst.\n"));
+    }
+
+    /* Check that one of src/mask can come in as the fragment color. */
+
+    src_solid = MACH64_PICT_IS_1x1R(pSrcPicture);
+
+    mask_solid = MACH64_PICT_IS_1x1R(pMaskPicture);
+
+    mask_comp = pMaskPicture && pMaskPicture->componentAlpha;
+
+    op_comp = op == PictOpAdd ||
+              op == PictOpInReverse ||
+              op == PictOpOutReverse;
+
+    if (mask_solid && src_solid)
+        MACH64_FALLBACK(("Bad one-pixel IN composite operation.\n"));
+
+    if (pMaskPicture) {
+        if (!mask_solid && !src_solid)
+            MACH64_FALLBACK(("Multitexturing required.\n"));
+
+        if (!mask_solid && !op_comp)
+            MACH64_FALLBACK(("Non-solid mask.\n"));
+
+        if (mask_comp && !src_solid)
+            MACH64_FALLBACK(("Component-alpha mask.\n"));
+
+        if (!mask_comp && pMaskPicture->format != PICT_a8)
+            MACH64_FALLBACK(("Non-A8 mask.\n"));
+
+        if (mask_comp && pMaskPicture->format != PICT_a8r8g8b8)
+            MACH64_FALLBACK(("Non-ARGB mask.\n"));
+    }
+
+    return TRUE;
+}
+
+/*
+ * This function setups the fragment color from a solid pixmap in the presence
+ * of a mask.
+ */
+static __inline__ Bool
+Mach64PrepareMask
+(
+    Mach64ContextRegs3D *m3d,
+    int        op,
+    PicturePtr pSrcPicture,
+    PicturePtr pMaskPicture,
+    PixmapPtr  pSrc,
+    PixmapPtr  pMask
+)
+{
+    Bool mask_solid, src_solid;
+    CARD32 argb = 0;
+
+    mask_solid = MACH64_PICT_IS_1x1R(pMaskPicture);
+
+    src_solid = MACH64_PICT_IS_1x1R(pSrcPicture);
+
+    if (mask_solid) {
+        Mach64PixelARGB(pMask, pMaskPicture->format, &argb);
+        argb >>= 24;
+        argb &= 0xff;
+
+        m3d->frag_mask = TRUE;
+        m3d->frag_color = (argb << 24) | (argb << 16) | (argb << 8) | argb;
+        return TRUE;
+    }
+
+    if (src_solid) {
+        /* We can only handle cases where either the src color (e.g. ADD) or
+         * the src alpha (e.g. IN_REV, OUT_REV) is used but not both.
+         *
+         * (ARGB8888 IN A8) OVER RGB565 is implemented as:
+         * (ARGB8888 IN A8) ADD ((ARGB8888 IN A8) OUT_REV RGB565).
+         */
+        if (op == PictOpInReverse || op == PictOpOutReverse) {
+            Mach64PixelARGB(pSrc, pSrcPicture->format, &argb);
+            argb >>= 24;
+            argb &= 0xff;
+
+            m3d->frag_src = TRUE;
+            m3d->frag_color = (argb << 24) | (argb << 16) | (argb << 8) | argb;
+            m3d->color_alpha = TRUE;
+            return TRUE;
+        }
+
+        if (op == PictOpAdd) {
+            Mach64PixelARGB(pSrc, pSrcPicture->format, &argb);
+
+            m3d->frag_src = TRUE;
+            m3d->frag_color = argb;
+            return TRUE;
+        }
+    }
+
+    return FALSE;
+}
+
+/*
+ * This function setups the texturing and blending environments. It also
+ * manipulates blend control for non-solid masks.
+ */
+static void __inline__
+Mach64BlendCntl(Mach64ContextRegs3D *m3d, int op)
+{
+    m3d->scale_3d_cntl |= MACH64_SCALE_PIX_EXPAND_DYNAMIC_RANGE |
+                          MACH64_SCALE_DITHER_2D_TABLE |
+                          MACH64_DITHER_INIT_RESET;
+
+    m3d->scale_3d_cntl |= Mach64BlendOps[op].scale_3d_cntl;
+
+    if (m3d->color_alpha) {
+        /* A8 uses RGB8 which expands to (I,I,I,0). Thus, we use the color
+         * channels instead of the alpha channel as the alpha factor. We also
+         * use the color channels for ARGB8888 masks with component-alpha.
+         */
+        CARD32 Ad = m3d->scale_3d_cntl & MACH64_ALPHA_BLEND_DST_MASK;
+
+        /* InReverse */
+        if (Ad == MACH64_ALPHA_BLEND_DST_SRCALPHA) {
+            m3d->scale_3d_cntl &= ~MACH64_ALPHA_BLEND_DST_MASK;
+            m3d->scale_3d_cntl |=  MACH64_ALPHA_BLEND_DST_SRCCOLOR;
+        }
+
+        /* OutReverse */
+        if (Ad == MACH64_ALPHA_BLEND_DST_INVSRCALPHA) {
+            m3d->scale_3d_cntl &= ~MACH64_ALPHA_BLEND_DST_MASK;
+            m3d->scale_3d_cntl |=  MACH64_ALPHA_BLEND_DST_INVSRCCOLOR;
+        }
+    }
+
+    /* Can't color mask and blend at the same time */
+    m3d->dp_write_mask = 0xffffffff;
+
+    /* Can't fog and blend at the same time */
+    m3d->scale_3d_cntl |= MACH64_ALPHA_FOG_EN_ALPHA;
+
+    /* Enable texture mapping mode */
+    m3d->scale_3d_cntl |= MACH64_SCALE_3D_FCN_TEXTURE;
+    m3d->scale_3d_cntl |= MACH64_MIP_MAP_DISABLE;
+
+    /* Setup the texture environment */
+    m3d->scale_3d_cntl |= MACH64_TEX_LIGHT_FCN_MODULATE;
+
+    /* Initialize texture unit */
+    m3d->tex_cntl |= MACH64_TEX_ST_DIRECT |
+                     MACH64_TEX_SRC_LOCAL |
+                     MACH64_TEX_UNCOMPRESSED |
+                     MACH64_TEX_CACHE_FLUSH |
+                     MACH64_TEX_CACHE_SIZE_4K;
+}
+
+/*
+ * This function setups the texture unit.
+ */
+static Bool
+Mach64PrepareTexture(PicturePtr pPict, PixmapPtr pPix)
+{
+    ScrnInfoPtr pScreenInfo = xf86Screens[pPix->drawable.pScreen->myNum];
+    ATIPtr pATI = ATIPTR(pScreenInfo);
+    Mach64ContextRegs3D *m3d = &pATI->m3d;
+
+    CARD32 texFormat;
+
+    int w = pPict->pDrawable->width;
+    int h = pPict->pDrawable->height;
+    int l2w, l2h, l2p, level, pitch, cpp, i;
+
+    /* Prepare picture format */
+    for (i = 0; i < MACH64_NR_TEX_FORMATS; i++) {
+        if (Mach64TexFormats[i].pictFormat == pPict->format)
+            break;
+    }
+    texFormat = Mach64TexFormats[i].texFormat;
+
+    /* Prepare picture size */
+    cpp = PICT_FORMAT_BPP(pPict->format) / 8;
+    pitch = exaGetPixmapPitch(pPix) / cpp;
+
+    Mach64GetOrder(w, &l2w);
+    Mach64GetOrder(h, &l2h);
+    Mach64GetOrder(pitch, &l2p);
+
+    if (pPict->repeat && w == 1 && h == 1)
+        l2p = 0;
+    else if (pPict->repeat)
+        MACH64_FALLBACK(("Repeat not supported for w,h != 1,1\n"));
+
+    l2w = l2p;
+
+    level = (l2w > l2h) ? l2w : l2h;
+
+    m3d->tex_width = (1 << l2w);
+    m3d->tex_height = (1 << l2h);
+
+    /* Update hw state */
+    m3d->dp_pix_width |= SetBits(texFormat, DP_SCALE_PIX_WIDTH);
+
+    m3d->tex_size_pitch = (l2w   << 0) |
+                          (level << 4) |
+                          (l2h   << 8);
+
+    m3d->tex_offset = exaGetPixmapOffset(pPix);
+
+    if (PICT_FORMAT_A(pPict->format))
+        m3d->scale_3d_cntl |= MACH64_TEX_MAP_AEN;
+
+    switch (pPict->filter) {
+    case PictFilterNearest:
+        m3d->scale_3d_cntl |= MACH64_TEX_BLEND_FCN_NEAREST;
+        break;
+    case PictFilterBilinear:
+        /* FIXME */
+#if 0
+        m3d->scale_3d_cntl |= MACH64_TEX_BLEND_FCN_LINEAR;
+        m3d->scale_3d_cntl |= MACH64_BILINEAR_TEX_EN;
+#endif
+        MACH64_FALLBACK(("Bilinear filter 0x%x\n", pPict->filter));
+        break;
+    default:
+        MACH64_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
+    }
+
+    m3d->transform = pPict->transform;
+
+    return TRUE;
+}
+
+/*
+ * PrepareComposite acceleration hook.
+ */
+Bool
+Mach64PrepareComposite
+(
+    int        op,
+    PicturePtr pSrcPicture,
+    PicturePtr pMaskPicture,
+    PicturePtr pDstPicture,
+    PixmapPtr  pSrc,
+    PixmapPtr  pMask,
+    PixmapPtr  pDst
+)
+{
+    ScrnInfoPtr pScreenInfo = xf86Screens[pDst->drawable.pScreen->myNum];
+    ATIPtr pATI = ATIPTR(pScreenInfo);
+    Mach64ContextRegs3D *m3d = &pATI->m3d;
+
+    CARD32 dstFormat;
+    int offset, i;
+
+    ATIDRISync(pScreenInfo);
+
+    /* Initialize state */
+    m3d->dp_mix = SetBits(MIX_SRC, DP_BKGD_MIX) |
+                  SetBits(MIX_SRC, DP_FRGD_MIX);
+
+    m3d->dp_src = SetBits(SRC_SCALER_3D, DP_BKGD_SRC) |
+                  SetBits(SRC_SCALER_3D, DP_FRGD_SRC) |
+                  DP_MONO_SRC_ALLONES;
+
+    Mach64GetPixmapOffsetPitch(pDst, &m3d->dst_pitch_offset);
+
+    m3d->scale_3d_cntl = 0;
+    m3d->tex_cntl = 0;
+
+    m3d->frag_src = FALSE;
+    m3d->frag_mask = FALSE;
+    m3d->frag_color = 0xffffffff;
+
+    m3d->color_alpha = FALSE;
+
+    m3d->transform = NULL;
+
+    /* Compute state */
+    if (pMaskPicture && !Mach64PrepareMask(m3d, op, pSrcPicture, pMaskPicture,
+                                           pSrc, pMask))
+        return FALSE;
+
+    Mach64BlendCntl(m3d, op);
+
+    for (i = 0; i < MACH64_NR_TEX_FORMATS; i++) {
+        if (Mach64TexFormats[i].pictFormat == pDstPicture->format)
+            break;
+    }
+    dstFormat = Mach64TexFormats[i].dstFormat;
+
+    m3d->dp_pix_width = SetBits(dstFormat, DP_DST_PIX_WIDTH) |
+                        SetBits(dstFormat, DP_SRC_PIX_WIDTH) |
+                        SetBits(dstFormat, DP_HOST_PIX_WIDTH);
+
+    if (!m3d->frag_src) {
+        if (!Mach64PrepareTexture(pSrcPicture, pSrc))
+            return FALSE;
+    }
+
+    if (pMaskPicture && !m3d->frag_mask) {
+        if (!Mach64PrepareTexture(pMaskPicture, pMask))
+            return FALSE;
+    }
+
+    offset = TEX_LEVEL(m3d->tex_size_pitch);
+
+    /* Emit state */
+    ATIMach64WaitForFIFO(pATI, 12);
+    outf(DP_SRC, m3d->dp_src);
+    outf(DP_MIX, m3d->dp_mix);
+
+    outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE);
+    outf(DST_CNTL, DST_X_DIR | DST_Y_DIR);
+    outf(DST_OFF_PITCH, m3d->dst_pitch_offset);
+
+    outf(SCALE_3D_CNTL, m3d->scale_3d_cntl);
+    outf(DP_WRITE_MASK, m3d->dp_write_mask);
+    outf(DP_PIX_WIDTH, m3d->dp_pix_width);
+
+    outf(SETUP_CNTL, 0);
+
+    outf(TEX_SIZE_PITCH, m3d->tex_size_pitch);
+    outf(TEX_CNTL, m3d->tex_cntl);
+    outf(TEX_0_OFF + offset, m3d->tex_offset);
+
+    return TRUE;
+}
+
+/*
+ * Vertex format, setup and emission.
+ */
+typedef struct {
+    float s0;    /* normalized texture coords */
+    float t0;
+    float x;     /* quarter-pixels */
+    float y;
+    CARD32 argb; /* fragment color */
+} Mach64Vertex;
+
+#define VTX_SET(_v, _col, _dstX, _dstY, _srcX, _dx, _srcY, _dy) \
+do {                                                            \
+    _v.s0 = ((float)(_srcX) + _dx) / m3d->tex_width;            \
+    _v.t0 = ((float)(_srcY) + _dy) / m3d->tex_height;           \
+    _v.x  = ((float)(_dstX) * 4.0);                             \
+    _v.y  = ((float)(_dstY) * 4.0);                             \
+    _v.argb = _col;                                             \
+} while (0)
+
+#define FVAL(_fval) (*(CARD32 *)&(_fval))
+
+#define VTX_OUT(_v, n)                    \
+do {                                      \
+    float w = 1.0;                        \
+    CARD32 z = 0xffff << 15;              \
+    CARD32 x_y = ((CARD16)_v.x << 16) |   \
+                 ((CARD16)_v.y & 0xffff); \
+                                          \
+    ATIMach64WaitForFIFO(pATI, 6);        \
+    outf(VERTEX_##n##_S, FVAL(_v.s0));    \
+    outf(VERTEX_##n##_T, FVAL(_v.t0));    \
+    outf(VERTEX_##n##_W, FVAL(w));        \
+                                          \
+    outf(VERTEX_##n##_Z, z);              \
+    outf(VERTEX_##n##_ARGB, _v.argb);     \
+    outf(VERTEX_##n##_X_Y, x_y);          \
+} while (0)
+
+/*
+ * Composite acceleration hook.
+ */
+void
+Mach64Composite
+(
+    PixmapPtr pDst,
+    int       srcX,
+    int       srcY,
+    int       maskX,
+    int       maskY,
+    int       dstX,
+    int       dstY,
+    int       w,
+    int       h
+)
+{
+    ScrnInfoPtr pScreenInfo = xf86Screens[pDst->drawable.pScreen->myNum];
+    ATIPtr pATI = ATIPTR(pScreenInfo);
+    Mach64ContextRegs3D *m3d = &pATI->m3d;
+
+    Mach64Vertex v0, v1, v2, v3;
+    float ooa;
+    CARD32 col;
+    PictVector v;
+    int srcXend, srcYend;
+    float dxy = 0.0, dwh = 0.0;
+
+    ATIDRISync(pScreenInfo);
+
+    /* Disable clipping if it gets in the way */
+    ATIMach64ValidateClip(pATI, dstX, dstX + w - 1, dstY, dstY + h - 1);
+
+    /* Handle solid textures which come in as fragment color */
+    col = m3d->frag_color;
+    if (m3d->frag_src) {
+        srcX = maskX;
+        srcY = maskY;
+    }
+
+    /* Handle transform */
+    srcXend = srcX + w;
+    srcYend = srcY + h;
+    if (m3d->transform) {
+        v.vector[0] = IntToxFixed(srcX);
+        v.vector[1] = IntToxFixed(srcY);
+        v.vector[2] = xFixed1;
+        PictureTransformPoint(m3d->transform, &v);
+        srcX = xFixedToInt(v.vector[0]);
+        srcY = xFixedToInt(v.vector[1]);
+
+        v.vector[0] = IntToxFixed(srcXend);
+        v.vector[1] = IntToxFixed(srcYend);
+        v.vector[2] = xFixed1;
+        PictureTransformPoint(m3d->transform, &v);
+        srcXend = xFixedToInt(v.vector[0]);
+        srcYend = xFixedToInt(v.vector[1]);
+
+#if 0
+        /* Bilinear needs manipulation of texture coordinates */
+        if (m3d->scale_3d_cntl & MACH64_BILINEAR_TEX_EN) {
+            dxy =  0.5;
+            dwh = -1.0;
+        }
+#endif
+    }
+
+    /* Create vertices in clock-wise order */
+    VTX_SET(v0, col, dstX,     dstY,     srcX, dxy,    srcY, dxy);
+    VTX_SET(v1, col, dstX + w, dstY,     srcXend, dwh, srcY, dxy);
+    VTX_SET(v2, col, dstX + w, dstY + h, srcXend, dwh, srcYend, dwh);
+    VTX_SET(v3, col, dstX,     dstY + h, srcX, dxy,    srcYend, dwh);
+
+    /* Setup upper triangle (v0, v1, v3) */
+    VTX_OUT(v0, 1);
+    VTX_OUT(v1, 2);
+    VTX_OUT(v3, 3);
+
+    ooa = 1.0 / (w * h);
+    outf(ONE_OVER_AREA, FVAL(ooa));
+
+    /* Setup lower triangle (v2, v1, v3) */
+    VTX_OUT(v2, 1);
+
+    ooa = -ooa;
+    outf(ONE_OVER_AREA, FVAL(ooa));
+}
+
+/*
+ * DoneComposite acceleration hook.
+ */
+void
+Mach64DoneComposite(PixmapPtr pDst)
+{
+    ScrnInfoPtr pScreenInfo = xf86Screens[pDst->drawable.pScreen->myNum];
+    ATIPtr pATI = ATIPTR(pScreenInfo);
+
+    ATIDRISync(pScreenInfo);
+
+    outf(SCALE_3D_CNTL, 0);
+}
diff --git a/src/atioption.c b/src/atioption.c
index 28bac3a..bc1129b 100644
--- a/src/atioption.c
+++ b/src/atioption.c
@@ -226,6 +226,13 @@ const OptionInfoRec ATIPublicOptions[] =
         FALSE
     },
     {
+        ATI_OPTION_RENDER_ACCEL,
+        "RenderAccel",
+        OPTV_BOOLEAN,
+        {0, },
+        FALSE
+    },
+    {
         -1,
         NULL,
         OPTV_NONE,
diff --git a/src/atioption.h b/src/atioption.h
index 836e911..808d90d 100644
--- a/src/atioption.h
+++ b/src/atioption.h
@@ -70,7 +70,8 @@ typedef enum
     ATI_OPTION_REFERENCE_CLOCK,
     ATI_OPTION_SHADOW_FB,
     ATI_OPTION_SWCURSOR,
-    ATI_OPTION_ACCELMETHOD
+    ATI_OPTION_ACCELMETHOD,
+    ATI_OPTION_RENDER_ACCEL
 } ATIPublicOptionType;
 
 #ifdef TV_OUT
diff --git a/src/atiregs.h b/src/atiregs.h
index 102347b..8ab8340 100644
--- a/src/atiregs.h
+++ b/src/atiregs.h
@@ -2751,4 +2751,132 @@
 /*	?				0x06u */
 /*	?				0x07u */
 
+/* 3D Engine for render acceleration (from Mach64 DRI driver) */
+
+/* SCALE_3D_CNTL */
+#define MACH64_SCALE_PIX_EXPAND_ZERO_EXTEND	(0 << 0)
+#define MACH64_SCALE_PIX_EXPAND_DYNAMIC_RANGE	(1 << 0)
+#define MACH64_SCALE_DITHER_ERROR_DIFFUSE	(0 << 1)
+#define MACH64_SCALE_DITHER_2D_TABLE		(1 << 1)
+#define MACH64_DITHER_EN			(1 << 2)
+#define MACH64_DITHER_INIT_CURRENT		(O << 3)
+#define MACH64_DITHER_INIT_RESET		(1 << 3)
+#define MACH64_ROUND_EN				(1 << 4)
+
+#define MACH64_TEX_CACHE_DIS			(1 << 5)
+
+#define MACH64_SCALE_3D_FCN_MASK		(3 << 6)
+#define MACH64_SCALE_3D_FCN_NOP			(0 << 6)
+#define MACH64_SCALE_3D_FCN_SCALE		(1 << 6)
+#define MACH64_SCALE_3D_FCN_TEXTURE		(2 << 6)
+#define MACH64_SCALE_3D_FCN_SHADE		(3 << 6)
+#define MACH64_TEXTURE_DISABLE			(1 << 6)
+
+#define MACH64_EDGE_ANTI_ALIAS			(1 << 8)
+#define MACH64_TEX_CACHE_SPLIT			(1 << 9)
+#define MACH64_APPLE_YUV_MODE			(1 << 10)
+
+#define MACH64_ALPHA_FOG_EN_MASK		(3 << 11)
+#define MACH64_ALPHA_FOG_DIS			(0 << 11)
+#define MACH64_ALPHA_FOG_EN_ALPHA		(1 << 11)
+#define MACH64_ALPHA_FOG_EN_FOG			(2 << 11)
+
+#define MACH64_ALPHA_BLEND_SAT			(1 << 13)
+#define MACH64_RED_DITHER_MAX			(1 << 14)
+#define MACH64_SIGNED_DST_CLAMP			(1 << 15)
+
+#define MACH64_ALPHA_BLEND_SRC_MASK		(7 << 16)
+#define MACH64_ALPHA_BLEND_SRC_ZERO		(0 << 16)
+#define MACH64_ALPHA_BLEND_SRC_ONE		(1 << 16)
+#define MACH64_ALPHA_BLEND_SRC_DSTCOLOR		(2 << 16)
+#define MACH64_ALPHA_BLEND_SRC_INVDSTCOLOR	(3 << 16)
+#define MACH64_ALPHA_BLEND_SRC_SRCALPHA		(4 << 16)
+#define MACH64_ALPHA_BLEND_SRC_INVSRCALPHA	(5 << 16)
+#define MACH64_ALPHA_BLEND_SRC_DSTALPHA		(6 << 16)
+#define MACH64_ALPHA_BLEND_SRC_INVDSTALPHA	(7 << 16)
+#define MACH64_ALPHA_BLEND_DST_MASK		(7 << 19)
+#define MACH64_ALPHA_BLEND_DST_ZERO		(0 << 19)
+#define MACH64_ALPHA_BLEND_DST_ONE		(1 << 19)
+#define MACH64_ALPHA_BLEND_DST_SRCCOLOR		(2 << 19)
+#define MACH64_ALPHA_BLEND_DST_INVSRCCOLOR	(3 << 19)
+#define MACH64_ALPHA_BLEND_DST_SRCALPHA		(4 << 19)
+#define MACH64_ALPHA_BLEND_DST_INVSRCALPHA	(5 << 19)
+#define MACH64_ALPHA_BLEND_DST_DSTALPHA		(6 << 19)
+#define MACH64_ALPHA_BLEND_DST_INVDSTALPHA	(7 << 19)
+
+#define MACH64_TEX_LIGHT_FCN_MASK		(3 << 22)
+#define MACH64_TEX_LIGHT_FCN_REPLACE		(0 << 22)
+#define MACH64_TEX_LIGHT_FCN_MODULATE		(1 << 22)
+#define MACH64_TEX_LIGHT_FCN_ALPHA_DECAL	(2 << 22)
+
+#define MACH64_MIP_MAP_DISABLE			(1 << 24)
+
+#define MACH64_BILINEAR_TEX_EN			(1 << 25)
+#define MACH64_TEX_BLEND_FCN_MASK		(3 << 26)
+#define MACH64_TEX_BLEND_FCN_NEAREST		(0 << 26)
+#define MACH64_TEX_BLEND_FCN_LINEAR		(2 << 26)
+#define MACH64_TEX_BLEND_FCN_TRILINEAR		(3 << 26)
+
+#define MACH64_TEX_AMASK_AEN			(1 << 28)
+#define MACH64_TEX_AMASK_BLEND_EDGE		(1 << 29)
+#define MACH64_TEX_MAP_AEN			(1 << 30)
+#define MACH64_SRC_3D_HOST_FIFO			(1 << 31)
+
+/* TEX_CNTL */
+#define MACH64_LOD_BIAS_SHIFT			0
+#define MACH64_LOD_BIAS_MASK			(0xf << 0)
+#define MACH64_COMP_FACTOR_SHIFT		4
+#define MACH64_COMP_FACTOR_MASK			(0xf << 4)
+
+#define MACH64_TEXTURE_COMPOSITE		(1 << 8)
+
+#define MACH64_COMP_COMBINE_BLEND		(0 << 9)
+#define MACH64_COMP_COMBINE_MODULATE		(1 << 9)
+#define MACH64_COMP_BLEND_NEAREST		(0 << 11)
+#define MACH64_COMP_BLEND_BILINEAR		(1 << 11)
+#define MACH64_COMP_FILTER_NEAREST		(0 << 12)
+#define MACH64_COMP_FILTER_BILINEAR		(1 << 12)
+#define MACH64_COMP_ALPHA			(1 << 13)
+
+#define MACH64_TEXTURE_TILING			(1 << 14)
+#define MACH64_COMPOSITE_TEX_TILING		(1 << 15)
+#define MACH64_TEX_COLLISION_DISABLE		(1 << 16)
+
+#define MACH64_TEXTURE_CLAMP_S			(1 << 17)
+#define MACH64_TEXTURE_CLAMP_T			(1 << 18)
+#define MACH64_TEX_ST_MULT_W			(0 << 19)
+#define MACH64_TEX_ST_DIRECT			(1 << 19)
+#define MACH64_TEX_SRC_LOCAL			(0 << 20)
+#define MACH64_TEX_SRC_AGP			(1 << 20)
+#define MACH64_TEX_UNCOMPRESSED			(0 << 21)
+#define MACH64_TEX_VQ_COMPRESSED		(1 << 21)
+#define MACH64_COMP_TEX_UNCOMPRESSED		(0 << 22)
+#define MACH64_COMP_TEX_VQ_COMPRESSED		(1 << 22)
+#define MACH64_TEX_CACHE_FLUSH			(1 << 23)
+#define MACH64_SEC_TEX_CLAMP_S			(1 << 24)
+#define MACH64_SEC_TEX_CLAMP_T			(1 << 25)
+#define MACH64_TEX_WRAP_S			(1 << 28)
+#define MACH64_TEX_WRAP_T			(1 << 29)
+#define MACH64_TEX_CACHE_SIZE_4K		(1 << 30)
+#define MACH64_TEX_CACHE_SIZE_2K		(1 << 30)
+#define MACH64_SECONDARY_STW			(1 << 31)
+
+/* DP_PIX_WIDTH (superset of PIX_WIDTH_?BPP) */
+#define MACH64_DATATYPE_CI8                             2
+#define MACH64_DATATYPE_ARGB1555                        3
+#define MACH64_DATATYPE_RGB565                          4
+#define MACH64_DATATYPE_ARGB8888                        6
+#define MACH64_DATATYPE_RGB332                          7
+#define MACH64_DATATYPE_Y8                              8
+#define MACH64_DATATYPE_RGB8                            9
+#define MACH64_DATATYPE_VYUY422                         11
+#define MACH64_DATATYPE_YVYU422                         12
+#define MACH64_DATATYPE_AYUV444                         14
+#define MACH64_DATATYPE_ARGB4444                        15
+
+/* Extract texture level from TEX_SIZE_PITCH and shift appropriately for
+ * addition to TEX_0_OFF.
+ */
+#define TEX_LEVEL(_tex_size_pitch) (((_tex_size_pitch) & 0xf0) >> 2)
+
 #endif /* ___ATIREGS_H___ */
diff --git a/src/atistruct.h b/src/atistruct.h
index 2cb6625..12cf61e 100644
--- a/src/atistruct.h
+++ b/src/atistruct.h
@@ -176,6 +176,38 @@ typedef struct _ATIHWRec
 
 } ATIHWRec;
 
+#ifdef USE_EXA
+/*
+ * Card engine state for communication across RENDER acceleration hooks.
+ */
+typedef struct _Mach64ContextRegs3D
+{
+    CARD32	dp_mix;
+    CARD32	dp_src;
+    CARD32	dp_write_mask;
+    CARD32	dp_pix_width;
+    CARD32	dst_pitch_offset;
+
+    CARD32	scale_3d_cntl;
+
+    CARD32	tex_cntl;
+    CARD32	tex_size_pitch;
+    CARD32	tex_offset;
+
+    int		tex_width;	/* src/mask texture width (pixels) */
+    int		tex_height;	/* src/mask texture height (pixels) */
+
+    Bool	frag_src;	/* solid src uses fragment color */
+    Bool	frag_mask;	/* solid mask uses fragment color */
+    CARD32	frag_color;	/* solid src/mask color */
+
+    Bool	color_alpha;	/* the alpha value is contained in the color
+				   channels instead of the alpha channel */
+
+    PictTransform *transform;
+} Mach64ContextRegs3D;
+#endif /* USE_EXA */
+
 /*
  * This structure defines the driver's private area.
  */
@@ -318,6 +350,10 @@ typedef struct _ATIRec
     CARD32 *ExpansionBitmapScanlinePtr[2];
     int ExpansionBitmapWidth;
 #endif
+#ifdef USE_EXA
+    Bool RenderAccelEnabled;
+    Mach64ContextRegs3D m3d;
+#endif
 
     /*
      * Cursor-related definitions.



More information about the xorg-commit mailing list