xf86-video-ati: Branch 'master' - 8 commits

Michel Dänzer daenzer at kemper.freedesktop.org
Sat Oct 3 07:39:24 PDT 2009


 src/drmmode_display.c            |   17 ++++++
 src/r600_textured_videofuncs.c   |    2 
 src/radeon.h                     |   12 +++-
 src/radeon_drm.h                 |    7 +-
 src/radeon_exa.c                 |   13 +++-
 src/radeon_exa_funcs.c           |  109 ++++++++++++++++++++++++++++++---------
 src/radeon_exa_render.c          |   66 +++++++++++++++++++----
 src/radeon_kms.c                 |   30 +++++++++-
 src/radeon_textured_video.c      |   32 ++++++++---
 src/radeon_textured_videofuncs.c |   36 +++++++-----
 src/radeon_video.c               |   87 ++++++++++++++++---------------
 src/radeon_video.h               |    2 
 12 files changed, 302 insertions(+), 111 deletions(-)

New commits:
commit 9460ea864b12ec1fbd11c5d9a20bb5a4279d9d3d
Author: Michel Dänzer <daenzer at vmware.com>
Date:   Sat Oct 3 16:33:33 2009 +0200

    Fix KMS on big endian machines.
    
    Requires at least xserver 1.7 to work properly.
    
    Also make sure the front buffer is and stays tiled if colour tiling is enabled.

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index c9037b4..ecfc629 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -915,6 +915,7 @@ drmmode_xf86crtc_resize (ScrnInfoPtr scrn, int width, int height)
 	int screen_size;
 	int cpp = info->CurrentLayout.pixel_bytes;
 	struct radeon_bo *front_bo;
+	uint32_t tiling_flags = 0;
 
 	if (scrn->virtualX == width && scrn->virtualY == height)
 		return TRUE;
@@ -948,6 +949,22 @@ drmmode_xf86crtc_resize (ScrnInfoPtr scrn, int width, int height)
 	if (!info->front_bo)
 		goto fail;
 
+	if (info->allowColorTiling)
+	    tiling_flags |= RADEON_TILING_MACRO;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	switch (cpp) {
+	case 4:
+	    tiling_flags |= RADEON_TILING_SWAP_32BIT;
+	    break;
+	case 2:
+	    tiling_flags |= RADEON_TILING_SWAP_16BIT;
+	    break;
+	}
+#endif
+	if (tiling_flags)
+	    radeon_bo_set_tiling(info->front_bo,
+				 tiling_flags | RADEON_TILING_SURFACE, pitch * cpp);
+
 	ret = drmModeAddFB(drmmode->fd, width, height, scrn->depth,
 			   scrn->bitsPerPixel, pitch * cpp,
 			   info->front_bo->handle,
diff --git a/src/radeon.h b/src/radeon.h
index 0322bf0..9d283bb 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -463,6 +463,7 @@ typedef struct _atomBiosHandle *atomBiosHandlePtr;
 struct radeon_exa_pixmap_priv {
     struct radeon_bo *bo;
     int flags;
+    Bool bo_mapped;
 };
 
 typedef struct {
diff --git a/src/radeon_drm.h b/src/radeon_drm.h
index f974e19..49a5f81 100644
--- a/src/radeon_drm.h
+++ b/src/radeon_drm.h
@@ -802,9 +802,10 @@ struct drm_radeon_gem_create {
 
 #define RADEON_TILING_MACRO 0x1
 #define RADEON_TILING_MICRO 0x2
-#define RADEON_TILING_SWAP  0x4
-#define RADEON_TILING_SURFACE  0x8 /* this object requires a surface
-				    * when mapped - i.e. front buffer */
+#define RADEON_TILING_SWAP_16BIT  0x4
+#define RADEON_TILING_SWAP_32BIT  0x8
+#define RADEON_TILING_SURFACE     0x10 /* this object requires a surface
+					* when mapped - i.e. front buffer */
 
 struct drm_radeon_gem_set_tiling {
 	uint32_t	handle;
diff --git a/src/radeon_exa.c b/src/radeon_exa.c
index 6cf9598..99a93a4 100644
--- a/src/radeon_exa.c
+++ b/src/radeon_exa.c
@@ -339,10 +339,18 @@ static void RADEONFinishAccess_BE(PixmapPtr pPix, int index)
 #ifdef XF86DRM_MODE
 Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index)
 {
-    ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum];
+    ScreenPtr pScreen = pPix->drawable.pScreen;
+    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
     struct radeon_exa_pixmap_priv *driver_priv;
     int ret;
 
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    /* May need to handle byte swapping in DownloadFrom/UploadToScreen */
+    if (pPix->drawable.bitsPerPixel > 8 &&
+	pPix != pScreen->GetScreenPixmap(pScreen))
+	return FALSE;
+#endif
+
     driver_priv = exaGetPixmapDriverPrivate(pPix);
     if (!driver_priv)
       return FALSE;
@@ -357,6 +365,7 @@ Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index)
       FatalError("failed to map pixmap %d\n", ret);
       return FALSE;
     }
+    driver_priv->bo_mapped = TRUE;
 
     pPix->devPrivate.ptr = driver_priv->bo->ptr;
 
@@ -368,7 +377,7 @@ void RADEONFinishAccess_CS(PixmapPtr pPix, int index)
     struct radeon_exa_pixmap_priv *driver_priv;
 
     driver_priv = exaGetPixmapDriverPrivate(pPix);
-    if (!driver_priv)
+    if (!driver_priv || !driver_priv->bo_mapped)
         return;
 
     radeon_bo_unmap(driver_priv->bo);
diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index 98aca93..d5a3103 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -450,15 +450,18 @@ static Bool
 RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
 		       char *src, int src_pitch)
 {
-    RINFO_FROM_SCREEN(pDst->drawable.pScreen);
+    ScreenPtr pScreen = pDst->drawable.pScreen;
+    RINFO_FROM_SCREEN(pScreen);
     struct radeon_exa_pixmap_priv *driver_priv;
     struct radeon_bo *scratch;
+    unsigned char *dst;
     unsigned size;
     uint32_t datatype = 0;
     uint32_t dst_domain;
     uint32_t dst_pitch_offset;
     unsigned bpp = pDst->drawable.bitsPerPixel;
     uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63;
+    uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
     Bool r;
     int i;
 
@@ -466,11 +469,34 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
 	return FALSE;
 
     driver_priv = exaGetPixmapDriverPrivate(pDst);
+    if (!driver_priv || !driver_priv->bo)
+	return FALSE;
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    switch (bpp) {
+    case 32:
+	swap = RADEON_HOST_DATA_SWAP_32BIT;
+	break;
+    case 16:
+	swap = RADEON_HOST_DATA_SWAP_16BIT;
+	break;
+    }
+#endif
 
     /* If we know the BO won't be busy, don't bother */
     if (driver_priv->bo->cref == 1 &&
-	!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
+	!radeon_bo_is_busy(driver_priv->bo, &dst_domain)) {
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	/* Can't return FALSE here if we need to swap bytes */
+	if (swap != RADEON_HOST_DATA_SWAP_NONE &&
+	    driver_priv->bo != info->front_bo) {
+	    scratch = driver_priv->bo;
+	    scratch_pitch = pDst->devKind;
+	    goto copy;
+	}
+#endif
 	return FALSE;
+    }
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
@@ -486,6 +512,7 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
         goto out;
     }
 
+copy:
     r = radeon_bo_map(scratch, 0);
     if (r) {
         r = FALSE;
@@ -493,22 +520,28 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     }
     r = TRUE;
     size = w * bpp / 8;
+    dst = scratch->ptr;
+    if (scratch == driver_priv->bo)
+	dst += y * scratch_pitch + x * bpp / 8;
     for (i = 0; i < h; i++) {
-        memcpy(scratch->ptr + i * scratch_pitch, src, size);
+        RADEONCopySwap(dst + i * scratch_pitch, (uint8_t*)src, size, swap);
         src += src_pitch;
     }
     radeon_bo_unmap(scratch);
 
-    RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype);
-    RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset);
-    ACCEL_PREAMBLE();
-    RADEON_SWITCH_TO_2D();
-    RADEONBlitChunk(pScrn, scratch, driver_priv->bo, datatype, scratch_pitch << 16,
-                    dst_pitch_offset, 0, 0, x, y, w, h,
-                    RADEON_GEM_DOMAIN_GTT, RADEON_GEM_DOMAIN_VRAM);
+    if (scratch != driver_priv->bo) {
+	RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype);
+	RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset);
+	ACCEL_PREAMBLE();
+	RADEON_SWITCH_TO_2D();
+	RADEONBlitChunk(pScrn, scratch, driver_priv->bo, datatype, scratch_pitch << 16,
+			dst_pitch_offset, 0, 0, x, y, w, h,
+			RADEON_GEM_DOMAIN_GTT, RADEON_GEM_DOMAIN_VRAM);
+    }
 
 out:
-    radeon_bo_unref(scratch);
+    if (scratch != driver_priv->bo)
+	radeon_bo_unref(scratch);
     return r;
 }
 
@@ -525,12 +558,26 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     uint32_t src_pitch_offset;
     unsigned bpp = pSrc->drawable.bitsPerPixel;
     uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63;
+    uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
     Bool r;
 
     if (bpp < 8)
 	return FALSE;
 
     driver_priv = exaGetPixmapDriverPrivate(pSrc);
+    if (!driver_priv || !driver_priv->bo)
+	return FALSE;
+
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+    switch (bpp) {
+    case 32:
+	swap = RADEON_HOST_DATA_SWAP_32BIT;
+	break;
+    case 16:
+	swap = RADEON_HOST_DATA_SWAP_16BIT;
+	break;
+    }
+#endif
 
     /* If we know the BO won't end up in VRAM anyway, don't bother */
     if (driver_priv->bo->cref > 1) {
@@ -546,8 +593,17 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     if (!src_domain)
 	radeon_bo_is_busy(driver_priv->bo, &src_domain);
 
-    if (src_domain != RADEON_GEM_DOMAIN_VRAM)
+    if (src_domain != RADEON_GEM_DOMAIN_VRAM) {
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	/* Can't return FALSE here if we need to swap bytes */
+	if (swap != RADEON_HOST_DATA_SWAP_NONE) {
+	    scratch = driver_priv->bo;
+	    scratch_pitch = pSrc->devKind;
+	    goto copy;
+	}
+#endif
 	return FALSE;
+    }
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
@@ -572,6 +628,7 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
                     RADEON_GEM_DOMAIN_GTT);
     FLUSH_RING();
 
+copy:
     r = radeon_bo_map(scratch, 0);
     if (r) {
         r = FALSE;
@@ -579,15 +636,19 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     }
     r = TRUE;
     w *= bpp / 8;
-    size = 0;
+    if (scratch == driver_priv->bo)
+	size = y * scratch_pitch + x * bpp / 8;
+    else
+	size = 0;
     while (h--) {
-        memcpy(dst, scratch->ptr + size, w);
+        RADEONCopySwap((uint8_t*)dst, scratch->ptr + size, w, swap);
         size += scratch_pitch;
         dst += dst_pitch;
     }
     radeon_bo_unmap(scratch);
 out:
-    radeon_bo_unref(scratch);
+    if (scratch != driver_priv->bo)
+	radeon_bo_unref(scratch);
     return r;
 }
 #endif
diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index bd6020b..432cee7 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -887,6 +887,11 @@ static Bool radeon_setup_kernel_mem(ScreenPtr pScreen)
                     return FALSE;
                 }
 
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+		radeon_bo_set_tiling(info->cursor_bo[c], RADEON_TILING_SWAP_32BIT |
+				     RADEON_TILING_SURFACE, stride);
+#endif
+
                 if (radeon_bo_map(info->cursor_bo[c], 1)) {
                     ErrorF("Failed to map cursor buffer memory\n");
                 }
@@ -907,6 +912,8 @@ static Bool radeon_setup_kernel_mem(ScreenPtr pScreen)
     info->dri->textureSize = 0;
 
     if (info->front_bo == NULL) {
+	uint32_t tiling_flags = 0;
+
         info->front_bo = radeon_bo_open(info->bufmgr, 0, screen_size,
                                         0, RADEON_GEM_DOMAIN_VRAM, 0);
         if (info->r600_shadow_fb == TRUE) {
@@ -915,8 +922,22 @@ static Bool radeon_setup_kernel_mem(ScreenPtr pScreen)
             }
         }
         if (info->allowColorTiling) {
-            radeon_bo_set_tiling(info->front_bo, RADEON_TILING_MACRO, stride);
+	    tiling_flags |= RADEON_TILING_MACRO;
         }
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	switch (cpp) {
+	case 4:
+	    tiling_flags |= RADEON_TILING_SWAP_32BIT;
+	    break;
+	case 2:
+	    tiling_flags |= RADEON_TILING_SWAP_16BIT;
+	    break;
+	}
+#endif
+	if (tiling_flags) {
+            radeon_bo_set_tiling(info->front_bo,
+				 tiling_flags | RADEON_TILING_SURFACE, stride);
+	}
     }
 
     xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Front buffer size: %dK\n", info->front_bo->size/1024);
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index 04a2401..d6b221f 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -709,7 +709,13 @@ Bool radeon_load_bicubic_texture(ScrnInfoPtr pScrn)
 	} else
 	    bicubic_addr = (uint8_t *)(info->FB + info->bicubic_offset);
 
-	RADEONCopyData(pScrn, (uint8_t *)bicubic_tex_512, bicubic_addr, 1024, 1024, 1, 512, 2);
+	RADEONCopySwap(bicubic_addr, (uint8_t *)bicubic_tex_512, 1024,
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+		       RADEON_HOST_DATA_SWAP_16BIT
+#else
+		       RADEON_HOST_DATA_SWAP_NONE
+#endif
+);
 	if (info->cs)
 	    radeon_bo_unmap(info->bicubic_bo);
     }
diff --git a/src/radeon_video.c b/src/radeon_video.c
index f1fe72b..7aaa266 100644
--- a/src/radeon_video.c
+++ b/src/radeon_video.c
@@ -2198,36 +2198,37 @@ RADEONCopyData(
     else
 #endif /* XF86DRI */
     {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	unsigned char *RADEONMMIO = info->MMIO;
-	unsigned int swapper = info->ModeReg->surface_cntl &
-		~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP |
-		  RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP);
+	int swap = RADEON_HOST_DATA_SWAP_NONE;
 
-	switch(bpp) {
-	case 2:
-	    swapper |= RADEON_NONSURF_AP0_SWP_16BPP
-		    |  RADEON_NONSURF_AP1_SWP_16BPP;
-	    break;
-	case 4:
-	    swapper |= RADEON_NONSURF_AP0_SWP_32BPP
-		    |  RADEON_NONSURF_AP1_SWP_32BPP;
-	    break;
+#if X_BYTE_ORDER == X_BIG_ENDIAN
+	if (info->kms_enabled) {
+	    switch(bpp) {
+	    case 2:
+		swap = RADEON_HOST_DATA_SWAP_16BIT;
+		break;
+	    case 4:
+		swap = RADEON_HOST_DATA_SWAP_32BIT;
+		break;
+	    }
+	} else if (bpp != pScrn->bitsPerPixel) {
+	    if (bpp == 8)
+		swap = RADEON_HOST_DATA_SWAP_32BIT;
+	    else
+		swap = RADEON_HOST_DATA_SWAP_HDW;
 	}
-	OUTREG(RADEON_SURFACE_CNTL, swapper);
 #endif
+
 	w *= bpp;
 
-	while (h--) {
-	    memcpy(dst, src, w);
-	    src += srcPitch;
-	    dst += dstPitch;
+	if (dstPitch == w && dstPitch == srcPitch)
+	    RADEONCopySwap(dst, src, h * dstPitch, swap);
+	else {
+	    while (h--) {
+		RADEONCopySwap(dst, src, w, swap);
+		src += srcPitch;
+		dst += dstPitch;
+	    }
 	}
-
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	/* restore byte swapping */
-	OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
-#endif
     }
 }
 
@@ -2282,9 +2283,10 @@ RADEONCopyRGB24Data(
     {
 #if X_BYTE_ORDER == X_BIG_ENDIAN
 	unsigned char *RADEONMMIO = info->MMIO;
-	OUTREG(RADEON_SURFACE_CNTL, (info->ModeReg->surface_cntl
-				   | RADEON_NONSURF_AP0_SWP_32BPP)
-				  & ~RADEON_NONSURF_AP0_SWP_16BPP);
+
+	if (!info->kms_enabled)
+	    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl &
+		   ~(RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP0_SWP_32BPP));
 #endif
 
 	for (j = 0; j < h; j++) {
@@ -2292,13 +2294,15 @@ RADEONCopyRGB24Data(
 	    sptr = src + j * srcPitch;
 
 	    for (i = 0; i < w; i++, sptr += 3) {
-		dptr[i] = (sptr[2] << 16) | (sptr[1] << 8) | sptr[0];
+		dptr[i] = cpu_to_le32((sptr[2] << 16) | (sptr[1] << 8) | sptr[0]);
 	    }
 	}
 
 #if X_BYTE_ORDER == X_BIG_ENDIAN
-	/* restore byte swapping */
-	OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
+	if (!info->kms_enabled) {
+	    /* restore byte swapping */
+	    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
+	}
 #endif
     }
 }
@@ -2377,9 +2381,10 @@ RADEONCopyMungedData(
 
 #if X_BYTE_ORDER == X_BIG_ENDIAN
 	unsigned char *RADEONMMIO = info->MMIO;
-	OUTREG(RADEON_SURFACE_CNTL, (info->ModeReg->surface_cntl
-				   | RADEON_NONSURF_AP0_SWP_32BPP)
-				  & ~RADEON_NONSURF_AP0_SWP_16BPP);
+
+	if (!info->kms_enabled)
+	    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl &
+		   ~(RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP0_SWP_32BPP));
 #endif
 
 	w /= 2;
@@ -2391,16 +2396,16 @@ RADEONCopyMungedData(
 	    i = w;
 	    while( i > 4 )
 	    {
-		dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
-		dst[1] = s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24);
-		dst[2] = s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24);
-		dst[3] = s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24);
+		dst[0] = cpu_to_le32(s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24));
+		dst[1] = cpu_to_le32(s1[2] | (s1[3] << 16) | (s3[1] << 8) | (s2[1] << 24));
+		dst[2] = cpu_to_le32(s1[4] | (s1[5] << 16) | (s3[2] << 8) | (s2[2] << 24));
+		dst[3] = cpu_to_le32(s1[6] | (s1[7] << 16) | (s3[3] << 8) | (s2[3] << 24));
 		dst += 4; s2 += 4; s3 += 4; s1 += 8;
 		i -= 4;
 	    }
 	    while( i-- )
 	    {
-		dst[0] = s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24);
+		dst[0] = cpu_to_le32(s1[0] | (s1[1] << 16) | (s3[0] << 8) | (s2[0] << 24));
 		dst++; s2++; s3++;
 		s1 += 2;
 	    }
@@ -2414,8 +2419,10 @@ RADEONCopyMungedData(
 	    }	
 	}
 #if X_BYTE_ORDER == X_BIG_ENDIAN
-	/* restore byte swapping */
-	OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
+	if (!info->kms_enabled) {
+	    /* restore byte swapping */
+	    OUTREG(RADEON_SURFACE_CNTL, info->ModeReg->surface_cntl);
+	}
 #endif
     }
 }
commit aee6b6f2c120baf477b4845ddc1a94637c31db2f
Author: Michel Dänzer <daenzer at vmware.com>
Date:   Sat Oct 3 16:33:33 2009 +0200

    EXA: Bail earlier from PrepareCopy hook on trivially unsupportable conditions.

diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index c74b9d9..98aca93 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -275,6 +275,15 @@ FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc,   PixmapPtr pDst,
     uint32_t datatype, src_pitch_offset, dst_pitch_offset;
     TRACE;
 
+    if (pDst->drawable.bitsPerPixel == 24)
+	RADEON_FALLBACK(("24bpp unsupported"));
+    if (!RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype))
+	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
+    if (!RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset))
+	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch source failed\n"));
+    if (!RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset))
+	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch dest failed\n"));
+
     RADEON_SWITCH_TO_2D();
 
 #ifdef XF86DRM_MODE
@@ -302,15 +311,6 @@ FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc,   PixmapPtr pDst,
     info->accel_state->ydir = ydir;
     info->accel_state->dst_pix = pDst;
 
-    if (pDst->drawable.bitsPerPixel == 24)
-	RADEON_FALLBACK(("24bpp unsupported"));
-    if (!RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype))
-	RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n"));
-    if (!RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset))
-	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch source failed\n"));
-    if (!RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset))
-	RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch dest failed\n"));
-
     FUNC_NAME(RADEONDoPrepareCopy)(pScrn, src_pitch_offset, dst_pitch_offset,
 				   datatype, rop, planemask);
 
commit 30031b169def5caadb394225919a6079a0957142
Author: Michel Dänzer <daenzer at vmware.com>
Date:   Sat Oct 3 16:33:33 2009 +0200

    R3/5xx EXA: Add support for BGRA picture formats.
    
    Also add mysteriously missing case statements for ABGR destination formats.

diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index c266fb7..7bc8ef0 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -125,6 +125,10 @@ static struct formatinfo R300TexFormats[] = {
     {PICT_x8r8g8b8,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)},
     {PICT_a8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)},
     {PICT_x8b8g8r8,	R300_EASY_TX_FORMAT(Z, Y, X, ONE, W8Z8Y8X8)},
+#ifdef PICT_TYPE_BGRA
+    {PICT_b8g8r8a8,	R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)},
+    {PICT_b8g8r8x8,	R300_EASY_TX_FORMAT(W, Z, Y, ONE, W8Z8Y8X8)},
+#endif
     {PICT_r5g6b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)},
     {PICT_a1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)},
     {PICT_x1r5g5b5,	R300_EASY_TX_FORMAT(X, Y, Z, ONE, W1Z5Y5X5)},
@@ -163,6 +167,12 @@ static Bool R300GetDestFormat(PicturePtr pDstPicture, uint32_t *dst_format)
     switch (pDstPicture->format) {
     case PICT_a8r8g8b8:
     case PICT_x8r8g8b8:
+    case PICT_a8b8g8r8:
+    case PICT_x8b8g8r8:
+#ifdef PICT_TYPE_BGRA
+    case PICT_b8g8r8a8:
+    case PICT_b8g8r8x8:
+#endif
 	*dst_format = R300_COLORFORMAT_ARGB8888;
 	break;
     case PICT_r5g6b5:
@@ -1559,6 +1569,16 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 		      R300_OUT_FMT_C2_SEL_BLUE |
 		      R300_OUT_FMT_C3_SEL_ALPHA);
 	break;
+#ifdef PICT_TYPE_BGRA
+    case PICT_b8g8r8a8:
+    case PICT_b8g8r8x8:
+	output_fmt = (R300_OUT_FMT_C4_8 |
+		      R300_OUT_FMT_C0_SEL_ALPHA |
+		      R300_OUT_FMT_C1_SEL_RED |
+		      R300_OUT_FMT_C2_SEL_GREEN |
+		      R300_OUT_FMT_C3_SEL_BLUE);
+	break;
+#endif
     case PICT_a8:
 	output_fmt = (R300_OUT_FMT_C4_8 |
 		      R300_OUT_FMT_C0_SEL_ALPHA);
commit 4b4ce36081ca151c24e028c54b59986f41731a73
Author: Michel Dänzer <daenzer at vmware.com>
Date:   Sat Oct 3 16:33:32 2009 +0200

    R3/5xx EXA: Minimise number of draw primitives used for Composite operations.
    
    This should reduce the kernel CS checker overhead, if nothing else.
    
    I'll leave porting this to other chipset families to others who can test it.

diff --git a/src/radeon.h b/src/radeon.h
index db7b26b..0322bf0 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -687,6 +687,9 @@ struct radeon_accel_state {
     /* Size of tiles ... set to 65536x65536 if not tiling in that direction */
     Bool              src_tile_width;
     Bool              src_tile_height;
+    uint32_t          *draw_header;
+    unsigned          vtx_count;
+    unsigned          num_vtx;
 
     Bool              vsync;
 
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index 6053eef..c266fb7 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -2041,6 +2041,16 @@ static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst)
     ENTER_DRAW(0);
 
     if (IS_R300_3D || IS_R500_3D) {
+	if (info->accel_state->draw_header) {
+		info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+							       info->accel_state->num_vtx *
+							       info->accel_state->vtx_count);
+		info->accel_state->draw_header[1] = RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
+		    RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+		    (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT);
+		info->accel_state->draw_header = NULL;
+	}
+
 	BEGIN_ACCEL(3);
 	OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
 	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
@@ -2126,8 +2136,10 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
     /* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
        srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
 
-#if defined(ACCEL_CP) && defined(XF86DRM_MODE)
-    if (info->cs && CS_FULL(info->cs)) {
+#if defined(ACCEL_CP)
+    if ((info->cs && CS_FULL(info->cs)) ||
+	(!info->cs && (info->cp->indirectBuffer->used + 4 * 32) >
+	 info->cp->indirectBuffer->total)) {
 	FUNC_NAME(RadeonDoneComposite)(info->accel_state->dst_pix);
 	radeon_cs_flush_indirect(pScrn);
 	info->accel_state->exa->PrepareComposite(info->accel_state->composite_op,
@@ -2202,12 +2214,26 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
 		 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
 		 (3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
     } else if (IS_R300_3D || IS_R500_3D) {
-	BEGIN_RING(4 * vtx_count + 4);
-	OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
-			    4 * vtx_count));
-	OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
-		 RADEON_CP_VC_CNTL_PRIM_WALK_RING |
-		 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+	if (!info->accel_state->draw_header) {
+	    BEGIN_RING(2);
+
+	    if (info->cs)
+		info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
+	    else
+		info->accel_state->draw_header = __head;
+	    info->accel_state->num_vtx = 0;
+	    info->accel_state->vtx_count = vtx_count;
+
+	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+				4 * vtx_count));
+	    OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
+		     RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+		     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+	    ADVANCE_RING();
+	}
+
+	info->accel_state->num_vtx += 4;
+	BEGIN_RING(4 * vtx_count);
     } else {
 	BEGIN_RING(3 * vtx_count + 2);
 	OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
@@ -2267,10 +2293,6 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
 		xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0],     xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0]);
     }
 
-    if (IS_R300_3D || IS_R500_3D)
-	/* flushing is pipelined, free/finish is not */
-	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
-
 #ifdef ACCEL_CP
     ADVANCE_RING();
 #else
commit eade1e5be159c9f2965d611925596d33cab11d6d
Author: Michel Dänzer <daenzer at vmware.com>
Date:   Sat Oct 3 16:33:32 2009 +0200

    EXA: Don't always flush when switching between 2D/3D engines with KMS.
    
    This seems to work fine here now and help 2D performance quite a bit, let's see
    if it breaks anybody else's setup...

diff --git a/src/radeon.h b/src/radeon.h
index e4f5334..db7b26b 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -1532,9 +1532,13 @@ do {									\
 	uint32_t flush = 0;                                             \
 	switch (info->accel_state->engineMode) {			\
 	case EXA_ENGINEMODE_UNKNOWN:					\
-	case EXA_ENGINEMODE_3D:						\
 	    flush = 1;                                                  \
+	    break;							\
+	case EXA_ENGINEMODE_3D:						\
+	    flush = !info->cs || CS_FULL(info->cs);			\
+	    break;							\
 	case EXA_ENGINEMODE_2D:						\
+	    flush = info->cs && CS_FULL(info->cs);			\
 	    break;							\
 	}								\
 	if (flush) {							\
@@ -1555,7 +1559,9 @@ do {									\
 	    break;							\
 	case EXA_ENGINEMODE_2D:						\
 	    flush = !info->cs || CS_FULL(info->cs);			\
+	    break;							\
 	case EXA_ENGINEMODE_3D:						\
+	    flush = info->cs && CS_FULL(info->cs);			\
 	    break;							\
 	}								\
 	if (flush) {							\
commit 926e414fc835ace141c066830d11d8ce32dbb06c
Author: Michel Dänzer <daenzer at vmware.com>
Date:   Sat Oct 3 16:33:32 2009 +0200

    KMS: Double-buffer textured video source image upload.
    
    In order to avoid stalling on previous frame.
    
    OTOH without KMS we can't do this but have to wait for the previous frame to
    finish rendering.

diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 5e6c561..ab3c498 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -224,7 +224,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     if (info->cs) {
 	accel_state->dst_mc_addr = 0;
 	accel_state->src_mc_addr[0] = 0;
-	accel_state->src_bo[0] = pPriv->src_bo;
+	accel_state->src_bo[0] = pPriv->src_bo[pPriv->currentBuffer];
 	accel_state->src_bo[1] = NULL;
 	accel_state->dst_bo = radeon_get_pixmap_bo(pPixmap);
 
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index b708f52..04a2401 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -319,16 +319,19 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
 	if (info->ChipFamily >= CHIP_FAMILY_R600)
 	    pPriv->video_offset = radeon_legacy_allocate_memory(pScrn,
 								&pPriv->video_memory,
-								size * 2, 256);
+								size, 256);
 	else
 	    pPriv->video_offset = radeon_legacy_allocate_memory(pScrn,
 								&pPriv->video_memory,
-								size * 2, 64);
+								size, 64);
 	if (pPriv->video_offset == 0)
 	    return BadAlloc;
 
-	if (info->cs)
-	    pPriv->src_bo = pPriv->video_memory;
+	if (info->cs) {
+	    pPriv->src_bo[0] = pPriv->video_memory;
+	    radeon_legacy_allocate_memory(pScrn, (void*)&pPriv->src_bo[1], size,
+					  info->ChipFamily >= CHIP_FAMILY_R600 ? 256 : 64);
+	}
     }
 
     /* Bicubic filter loading */
@@ -366,14 +369,21 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
 
     pPriv->src_offset = pPriv->video_offset;
     if (info->cs) {
+	struct radeon_bo *src_bo;
 	int ret;
-	ret = radeon_bo_map(pPriv->src_bo, 1);
+
+	pPriv->currentBuffer ^= 1;
+
+	src_bo = pPriv->src_bo[pPriv->currentBuffer];
+
+	ret = radeon_bo_map(src_bo, 1);
 	if (ret)
 	    return BadAlloc;
 
-	pPriv->src_addr = pPriv->src_bo->ptr;
+	pPriv->src_addr = src_bo->ptr;
     } else {
 	pPriv->src_addr = (uint8_t *)(info->FB + pPriv->video_offset);
+	RADEONWaitForIdleMMIO(pScrn);
     }
     pPriv->src_pitch = dstPitch;
 
@@ -460,7 +470,7 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
 
 #if defined(XF86DRM_MODE)
     if (info->cs)
-	radeon_bo_unmap(pPriv->src_bo);
+	radeon_bo_unmap(pPriv->src_bo[pPriv->currentBuffer]);
 #endif
 #ifdef XF86DRI
     if (info->directRenderingEnabled) {
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index d26e7c0..8b55efc 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -93,6 +93,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
     RADEONInfoPtr info = RADEONPTR(pScrn);
     PixmapPtr pPixmap = pPriv->pPixmap;
     struct radeon_exa_pixmap_priv *driver_priv;
+    struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
     uint32_t txformat, txsize, txpitch, txoffset;
     uint32_t dst_pitch, dst_format;
     uint32_t colorpitch;
@@ -107,7 +108,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	int ret;
 
 	radeon_cs_space_reset_bos(info->cs);
-        radeon_cs_space_add_persistent_bo(info->cs, pPriv->src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+        radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
 
 	if (pPriv->bicubic_enabled)
 	    radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
@@ -237,7 +238,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		      RADEON_CLAMP_T_CLAMP_LAST |
 		      RADEON_YUV_TO_RGB);
 	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0);
-	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, pPriv->src_bo);
+	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, src_bo);
 	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
 		      RADEON_COLOR_ARG_A_ZERO |
 		      RADEON_COLOR_ARG_B_ZERO |
@@ -264,7 +265,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		      RADEON_CLAMP_S_CLAMP_LAST |
 		      RADEON_CLAMP_T_CLAMP_LAST);
 	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1);
-	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, pPriv->src_bo);
+	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, src_bo);
 	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_1,
 		      RADEON_COLOR_ARG_A_ZERO |
 		      RADEON_COLOR_ARG_B_ZERO |
@@ -288,7 +289,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		      RADEON_CLAMP_S_CLAMP_LAST |
 		      RADEON_CLAMP_T_CLAMP_LAST);
 	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_2, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ1);
-	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, pPriv->src_bo);
+	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, src_bo);
 	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_2,
 		      RADEON_COLOR_ARG_A_ZERO |
 		      RADEON_COLOR_ARG_B_ZERO |
@@ -321,7 +322,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		      RADEON_CLAMP_T_CLAMP_LAST |
 		      RADEON_YUV_TO_RGB);
 	OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat | RADEON_TXFORMAT_ST_ROUTE_STQ0);
-	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, pPriv->src_bo);
+	OUT_TEXTURE_REG(RADEON_PP_TXOFFSET_0, txoffset, src_bo);
 	OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0,
 		      RADEON_COLOR_ARG_A_ZERO |
 		      RADEON_COLOR_ARG_B_ZERO |
@@ -468,6 +469,7 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     RADEONInfoPtr info = RADEONPTR(pScrn);
     PixmapPtr pPixmap = pPriv->pPixmap;
     struct radeon_exa_pixmap_priv *driver_priv;
+    struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
     uint32_t txformat;
     uint32_t txfilter, txsize, txpitch, txoffset;
     uint32_t dst_pitch, dst_format;
@@ -493,7 +495,7 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	int ret;
 
 	radeon_cs_space_reset_bos(info->cs);
-        radeon_cs_space_add_persistent_bo(info->cs, pPriv->src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+        radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
 
 	if (pPriv->bicubic_enabled)
 	    radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
@@ -660,21 +662,21 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 		      (pPriv->w - 1) |
 		      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
 	OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
-	OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, pPriv->src_bo);
+	OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, src_bo);
 
 	OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter);
 	OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
 	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0);
 	OUT_ACCEL_REG(R200_PP_TXSIZE_1, txsize);
 	OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch);
-	OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, pPriv->src_bo);
+	OUT_TEXTURE_REG(R200_PP_TXOFFSET_1, txoffset + pPriv->planeu_offset, src_bo);
 
 	OUT_ACCEL_REG(R200_PP_TXFILTER_2, txfilter);
 	OUT_ACCEL_REG(R200_PP_TXFORMAT_2, txformat | R200_TXFORMAT_ST_ROUTE_STQ1);
 	OUT_ACCEL_REG(R200_PP_TXFORMAT_X_2, 0);
 	OUT_ACCEL_REG(R200_PP_TXSIZE_2, txsize);
 	OUT_ACCEL_REG(R200_PP_TXPITCH_2, txpitch);
-	OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, pPriv->src_bo);
+	OUT_TEXTURE_REG(R200_PP_TXOFFSET_2, txoffset + pPriv->planev_offset, src_bo);
 
 	/* similar to r300 code. Note the big problem is that hardware constants
 	 * are 8 bits only, representing 0.0-1.0. We can get that up (using bias
@@ -816,7 +818,7 @@ FUNC_NAME(R200DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 		      (pPriv->w - 1) |
 		      ((pPriv->h - 1) << RADEON_TEX_VSIZE_SHIFT));
 	OUT_ACCEL_REG(R200_PP_TXPITCH_0, pPriv->src_pitch - 32);
-	OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, pPriv->src_bo);
+	OUT_TEXTURE_REG(R200_PP_TXOFFSET_0, txoffset, src_bo);
 
 	/* MAD temp1 / 2, const0.a * 2, temp0.ggg, -const0.rgb */
 	OUT_ACCEL_REG(R200_PP_TXCBLEND_0,
@@ -1017,6 +1019,7 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     RADEONInfoPtr info = RADEONPTR(pScrn);
     PixmapPtr pPixmap = pPriv->pPixmap;
     struct radeon_exa_pixmap_priv *driver_priv;
+    struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
     uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
     uint32_t dst_pitch, dst_format;
     uint32_t txenable, colorpitch, bicubic_offset;
@@ -1032,7 +1035,7 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	int ret;
 
 	radeon_cs_space_reset_bos(info->cs);
-	radeon_cs_space_add_persistent_bo(info->cs, pPriv->src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+	radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
 
 	if (pPriv->bicubic_enabled)
 	  radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
@@ -1161,7 +1164,7 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     else
 	OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
     OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
-    OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, pPriv->src_bo);
+    OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo);
     FINISH_ACCEL();
 
     txenable = R300_TEX_0_ENABLE;
@@ -1183,13 +1186,13 @@ FUNC_NAME(R300DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	OUT_ACCEL_REG(R300_TX_FORMAT0_1, txformat0);
 	OUT_ACCEL_REG(R300_TX_FORMAT1_1, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_2);
 	OUT_ACCEL_REG(R300_TX_FORMAT2_1, txpitch);
-	OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, pPriv->src_bo);
+	OUT_TEXTURE_REG(R300_TX_OFFSET_1, txoffset + pPriv->planeu_offset, src_bo);
 	OUT_ACCEL_REG(R300_TX_FILTER0_2, txfilter | (2 << R300_TX_ID_SHIFT));
 	OUT_ACCEL_REG(R300_TX_FILTER1_2, 0);
 	OUT_ACCEL_REG(R300_TX_FORMAT0_2, txformat0);
 	OUT_ACCEL_REG(R300_TX_FORMAT1_2, R300_TX_FORMAT_X8 | R300_TX_FORMAT_CACHE_FOURTH_REGION_3);
 	OUT_ACCEL_REG(R300_TX_FORMAT2_2, txpitch);
-	OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, pPriv->src_bo);
+	OUT_TEXTURE_REG(R300_TX_OFFSET_2, txoffset + pPriv->planev_offset, src_bo);
 	FINISH_ACCEL();
 	txenable |= R300_TEX_1_ENABLE | R300_TEX_2_ENABLE;
     }
@@ -2465,6 +2468,7 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     RADEONInfoPtr info = RADEONPTR(pScrn);
     PixmapPtr pPixmap = pPriv->pPixmap;
     struct radeon_exa_pixmap_priv *driver_priv;
+    struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
     uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
     uint32_t dst_pitch, dst_format;
     uint32_t txenable, colorpitch, bicubic_offset;
@@ -2480,7 +2484,7 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	int ret;
 
 	radeon_cs_space_reset_bos(info->cs);
-	radeon_cs_space_add_persistent_bo(info->cs, pPriv->src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+	radeon_cs_space_add_persistent_bo(info->cs, src_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
 
 	if (pPriv->bicubic_enabled)
 	    radeon_cs_space_add_persistent_bo(info->cs, info->bicubic_bo, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
@@ -2612,7 +2616,7 @@ FUNC_NAME(R500DisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
     OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
     OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
-    OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, pPriv->src_bo);
+    OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo);
     FINISH_ACCEL();
 
     txenable = R300_TEX_0_ENABLE;
diff --git a/src/radeon_video.h b/src/radeon_video.h
index 989942c..1b8f684 100644
--- a/src/radeon_video.h
+++ b/src/radeon_video.h
@@ -119,7 +119,7 @@ typedef struct {
     int src_x, src_y;
     int vsync;
 
-    struct radeon_bo *src_bo;
+    struct radeon_bo *src_bo[2];
 } RADEONPortPrivRec, *RADEONPortPrivPtr;
 
 /* Reference color space transform data */
commit 6a3b75fae4147244212298ff55c7ab36c3d80d30
Author: Michel Dänzer <daenzer at vmware.com>
Date:   Sat Oct 3 16:33:31 2009 +0200

    KMS: Don't hide HW cursor when it's updated.
    
    Inspired by the intel driver; might avoid/reduce HW cursor flicker in some
    cases.

diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index 94e6272..bd6020b 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -479,6 +479,7 @@ static Bool RADEONCursorInit_KMS(ScreenPtr pScreen)
 			      (HARDWARE_CURSOR_TRUECOLOR_AT_8BPP |
 			       HARDWARE_CURSOR_AND_SOURCE_WITH_MASK |
 			       HARDWARE_CURSOR_SOURCE_MASK_INTERLEAVE_1 |
+			       HARDWARE_CURSOR_UPDATE_UNHIDDEN |
 			       HARDWARE_CURSOR_ARGB));
 }
 
commit c4ba15b4871c70cced7f1761e24be692cdb28d2b
Author: Michel Dänzer <daenzer at vmware.com>
Date:   Sat Oct 3 16:33:31 2009 +0200

    Fix some compiler warnings.

diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index fa49392..94e6272 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -181,7 +181,7 @@ static Bool RADEONIsAccelWorking(ScrnInfoPtr pScrn)
 
     memset(&ginfo, 0, sizeof(ginfo));
     ginfo.request = 0x3;
-    ginfo.value = (uint64_t)&tmp;
+    ginfo.value = (uintptr_t)&tmp;
     r = drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &ginfo, sizeof(ginfo));
     if (r)
         return FALSE;
@@ -434,7 +434,9 @@ Bool RADEONPreInit_KMS(ScrnInfoPtr pScrn, int flags)
 	    info->gart_size = mminfo.gart_size;
 	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
 		       "mem size init: gart size :%llx vram size: s:%llx visible:%llx\n",
-		       mminfo.gart_size, mminfo.vram_size, mminfo.vram_visible);
+		       (unsigned long long)mminfo.gart_size,
+		       (unsigned long long)mminfo.vram_size,
+		       (unsigned long long)mminfo.vram_visible);
 	}
     }
 #if 0


More information about the xorg-commit mailing list