[PATCH 5/6] radeon: complete UTS and DFS even when a scratch BO is not necessary

Karl Tomlinson karlt+ at karlt.net
Sun Aug 22 03:28:06 PDT 2010


Turns on the big-endian paths even for little-endian systems, and adds
similar paths to the r6xx/r7xx functions.

This makes UTS and DFS reliable, which will let PrepareAccess (with
mixed pixmaps) choose to fail based on whether the pixmap is in VRAM
(to avoid CPU reads).
---
 src/r600_exa.c         |  110 ++++++++++++++++++++++++++++++++----------------
 src/radeon_exa_funcs.c |   72 ++++---------------------------
 2 files changed, 82 insertions(+), 100 deletions(-)

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 9b7a0c9..8544034 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1772,13 +1772,18 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_accel_state *accel_state = info->accel_state;
     struct radeon_exa_pixmap_priv *driver_priv;
-    struct radeon_bo *scratch;
+    struct radeon_bo *scratch = NULL;
+    struct radeon_bo *copy_dst;
+    unsigned char *dst;
     unsigned size;
     uint32_t dst_domain;
     int bpp = pDst->drawable.bitsPerPixel;
     uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
+    uint32_t copy_pitch;
     uint32_t src_pitch_hw = scratch_pitch / (bpp / 8);
     uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
+    int ret;
+    Bool flush = TRUE;
     Bool r;
     int i;
     struct r600_accel_object src_obj, dst_obj;
@@ -1788,15 +1793,19 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
 
     driver_priv = exaGetPixmapDriverPrivate(pDst);
 
-    /* If we know the BO won't be busy, don't bother */
-    if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs) &&
-	!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
-	return FALSE;
+    /* If we know the BO won't be busy, don't bother with a scratch */
+    copy_dst = driver_priv->bo;
+    copy_pitch = pDst->devKind;
+    if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
+	flush = FALSE;
+	if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
+	    goto copy;
+    }
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
     if (scratch == NULL) {
-	return FALSE;
+	goto copy;
     }
 
     src_obj.pitch = src_pitch_hw;
@@ -1821,33 +1830,45 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
 			   &dst_obj,
 			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
 			   3, 0xffffffff)) {
-        r = FALSE;
-        goto out;
+        goto copy;
     }
+    copy_dst = scratch;
+    copy_pitch = scratch_pitch;
+    flush = FALSE;
+
+copy:
+    if (flush)
+	radeon_cs_flush_indirect(pScrn);
 
-    r = radeon_bo_map(scratch, 0);
-    if (r) {
+    ret = radeon_bo_map(copy_dst, 0);
+    if (ret) {
         r = FALSE;
         goto out;
     }
     r = TRUE;
     size = w * bpp / 8;
+    dst = copy_dst->ptr;
+    if (copy_dst == driver_priv->bo)
+	dst += y * copy_pitch + x * bpp / 8;
     for (i = 0; i < h; i++) {
-        memcpy(scratch->ptr + i * scratch_pitch, src, size);
+        memcpy(dst + i * copy_pitch, src, size);
         src += src_pitch;
     }
-    radeon_bo_unmap(scratch);
+    radeon_bo_unmap(copy_dst);
 
-    if (info->accel_state->vsync)
-	RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
+    if (copy_dst == scratch) {
+	if (info->accel_state->vsync)
+	    RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
 
-    /* blit from gart to vram */
-    R600DoPrepareCopy(pScrn);
-    R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
-    R600DoCopyVline(pDst);
+	/* blit from gart to vram */
+	R600DoPrepareCopy(pScrn);
+	R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
+	R600DoCopyVline(pDst);
+    }
 
 out:
-    radeon_bo_unref(scratch);
+    if (scratch)
+	radeon_bo_unref(scratch);
     return r;
 }
 
@@ -1859,13 +1880,17 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_accel_state *accel_state = info->accel_state;
     struct radeon_exa_pixmap_priv *driver_priv;
-    struct radeon_bo *scratch;
+    struct radeon_bo *scratch = NULL;
+    struct radeon_bo *copy_src;
     unsigned size;
     uint32_t src_domain = 0;
     int bpp = pSrc->drawable.bitsPerPixel;
     uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
+    uint32_t copy_pitch;
     uint32_t dst_pitch_hw = scratch_pitch / (bpp / 8);
     uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
+    int ret;
+    Bool flush = FALSE;
     Bool r;
     struct r600_accel_object src_obj, dst_obj;
 
@@ -1874,24 +1899,28 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
 
     driver_priv = exaGetPixmapDriverPrivate(pSrc);
 
-    /* If we know the BO won't end up in VRAM anyway, don't bother */
+    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
+    copy_src = driver_priv->bo;
+    copy_pitch = pSrc->devKind;
     if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
 	src_domain = radeon_bo_get_src_domain(driver_priv->bo);
 	if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
 	    (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
 	    src_domain = 0;
+	else /* A write may be scheduled */
+	    flush = TRUE;
     }
 
     if (!src_domain)
 	radeon_bo_is_busy(driver_priv->bo, &src_domain);
 
     if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
-	return FALSE;
+	goto copy;
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
     if (scratch == NULL) {
-	return FALSE;
+	goto copy;
     }
     radeon_cs_space_reset_bos(info->cs);
     radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
@@ -1900,10 +1929,9 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
     accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
     radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
-    r = radeon_cs_space_check(info->cs);
-    if (r) {
-        r = FALSE;
-        goto out;
+    ret = radeon_cs_space_check(info->cs);
+    if (ret) {
+        goto copy;
     }
 
     src_obj.pitch = src_pitch_hw;
@@ -1928,34 +1956,42 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
 			   &dst_obj,
 			   accel_state->copy_vs_offset, accel_state->copy_ps_offset,
 			   3, 0xffffffff)) {
-        r = FALSE;
-        goto out;
+        goto copy;
     }
 
     /* blit from vram to gart */
     R600DoPrepareCopy(pScrn);
     R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
     R600DoCopy(pScrn);
+    copy_src = scratch;
+    copy_pitch = scratch_pitch;
+    flush = TRUE;
 
-    if (info->cs)
+copy:
+    if (flush && info->cs)
 	radeon_cs_flush_indirect(pScrn);
 
-    r = radeon_bo_map(scratch, 0);
-    if (r) {
+    ret = radeon_bo_map(copy_src, 0);
+    if (ret) {
+	ErrorF("failed to map pixmap: %d\n", ret);
         r = FALSE;
         goto out;
     }
     r = TRUE;
     w *= bpp / 8;
-    size = 0;
+    if (copy_src == driver_priv->bo)
+	size = y * copy_pitch + x * bpp / 8;
+    else
+	size = 0;
     while (h--) {
-        memcpy(dst, scratch->ptr + size, w);
-        size += scratch_pitch;
+        memcpy(dst, copy_src->ptr + size, w);
+        size += copy_pitch;
         dst += dst_pitch;
     }
-    radeon_bo_unmap(scratch);
+    radeon_bo_unmap(copy_src);
 out:
-    radeon_bo_unref(scratch);
+    if (scratch)
+	radeon_bo_unref(scratch);
     return r;
 }
 #endif
diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index f629c8e..e80a996 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -471,9 +471,7 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     uint32_t copy_pitch;
     uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
     int ret;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
     Bool flush = TRUE;
-#endif
     Bool r;
     int i;
 
@@ -495,61 +493,34 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     }
 #endif
 
-    /* If we know the BO won't be busy, don't bother */
-#if X_BYTE_ORDER == X_BIG_ENDIAN
+    /* If we know the BO won't be busy, don't bother with a scratch */
     copy_dst = driver_priv->bo;
     copy_pitch = pDst->devKind;
-#endif
     if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
 	flush = FALSE;
-#endif
-	if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain)) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	    /* Can't return FALSE here if we need to swap bytes */
-	    if (swap != RADEON_HOST_DATA_SWAP_NONE &&
-		driver_priv->bo != info->front_bo) {
-		goto copy;
-	    }
-#endif
-	    return FALSE;
-	}
+	if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
+	    goto copy;
     }
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
     if (scratch == NULL) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	if (swap != RADEON_HOST_DATA_SWAP_NONE &&
-	    driver_priv->bo != info->front_bo) {
-	    goto copy;
-	}
-#endif
-	return FALSE;
+	goto copy;
     }
     radeon_cs_space_reset_bos(info->cs);
     radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
     radeon_cs_space_add_persistent_bo(info->cs, scratch, RADEON_GEM_DOMAIN_GTT, 0);
     ret = radeon_cs_space_check(info->cs);
     if (ret) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	if (swap != RADEON_HOST_DATA_SWAP_NONE &&
-	    driver_priv->bo != info->front_bo) {
-	    goto copy;
-	}
-#endif
-        r = FALSE;
-        goto out;
+	goto copy;
     }
     copy_dst = scratch;
     copy_pitch = scratch_pitch;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
     flush = FALSE;
 
 copy:
     if (flush)
 	radeon_cs_flush_indirect(pScrn);
-#endif
 
     ret = radeon_bo_map(copy_dst, 0);
     if (ret) {
@@ -600,9 +571,7 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     uint32_t copy_pitch;
     uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
     int ret;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
     Bool flush = FALSE;
-#endif
     Bool r;
 
     if (bpp < 8)
@@ -623,57 +592,36 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     }
 #endif
 
-    /* If we know the BO won't end up in VRAM anyway, don't bother */
-#if X_BYTE_ORDER == X_BIG_ENDIAN
+    /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
     copy_src = driver_priv->bo;
     copy_pitch = pSrc->devKind;
-#endif
     if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
 	src_domain = radeon_bo_get_src_domain(driver_priv->bo);
 	if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
 	    (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
 	    src_domain = 0;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
 	else /* A write may be scheduled */
 	    flush = TRUE;
-#endif
     }
 
     if (!src_domain)
 	radeon_bo_is_busy(driver_priv->bo, &src_domain);
 
     if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	/* Can't return FALSE here if we need to swap bytes */
-	if (swap != RADEON_HOST_DATA_SWAP_NONE) {
-	    goto copy;
-	}
-#endif
-	return FALSE;
+	goto copy;
     }
 
     size = scratch_pitch * h;
     scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
     if (scratch == NULL) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	if (swap != RADEON_HOST_DATA_SWAP_NONE) {
-	    goto copy;
-	}
-#endif
-	return FALSE;
+	goto copy;
     }
     radeon_cs_space_reset_bos(info->cs);
     radeon_add_pixmap(info->cs, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
     radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, RADEON_GEM_DOMAIN_GTT);
     ret = radeon_cs_space_check(info->cs);
     if (ret) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
-	if (swap != RADEON_HOST_DATA_SWAP_NONE) {
-	    goto copy;
-	}
-#endif
-        r = FALSE;
-        goto out;
+	goto copy;
     }
     RADEONGetDatatypeBpp(pSrc->drawable.bitsPerPixel, &datatype);
     RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset);
@@ -685,12 +633,10 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
                     RADEON_GEM_DOMAIN_GTT);
     copy_src = scratch;
     copy_pitch = scratch_pitch;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
     flush = TRUE;
 
 copy:
     if (flush)
-#endif
 	FLUSH_RING();
 
     ret = radeon_bo_map(copy_src, 0);
-- 
1.7.1


--=-=-=--


More information about the xorg-driver-ati mailing list