[PATCH 5/6] radeon: complete UTS and DFS even when a scratch BO is not necessary
Karl Tomlinson
karlt+ at karlt.net
Sun Aug 22 03:28:06 PDT 2010
Turns on the big-endian paths even for little-endian systems, and adds
similar paths to the r6xx/r7xx functions.
This makes UTS and DFS reliable, which will let PrepareAccess (with
mixed pixmaps) choose to fail based on whether the pixmap is in VRAM
(to avoid CPU reads).
---
src/r600_exa.c | 110 ++++++++++++++++++++++++++++++++----------------
src/radeon_exa_funcs.c | 72 ++++---------------------------
2 files changed, 82 insertions(+), 100 deletions(-)
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 9b7a0c9..8544034 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1772,13 +1772,18 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
RADEONInfoPtr info = RADEONPTR(pScrn);
struct radeon_accel_state *accel_state = info->accel_state;
struct radeon_exa_pixmap_priv *driver_priv;
- struct radeon_bo *scratch;
+ struct radeon_bo *scratch = NULL;
+ struct radeon_bo *copy_dst;
+ unsigned char *dst;
unsigned size;
uint32_t dst_domain;
int bpp = pDst->drawable.bitsPerPixel;
uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
+ uint32_t copy_pitch;
uint32_t src_pitch_hw = scratch_pitch / (bpp / 8);
uint32_t dst_pitch_hw = exaGetPixmapPitch(pDst) / (bpp / 8);
+ int ret;
+ Bool flush = TRUE;
Bool r;
int i;
struct r600_accel_object src_obj, dst_obj;
@@ -1788,15 +1793,19 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
driver_priv = exaGetPixmapDriverPrivate(pDst);
- /* If we know the BO won't be busy, don't bother */
- if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs) &&
- !radeon_bo_is_busy(driver_priv->bo, &dst_domain))
- return FALSE;
+ /* If we know the BO won't be busy, don't bother with a scratch */
+ copy_dst = driver_priv->bo;
+ copy_pitch = pDst->devKind;
+ if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
+ flush = FALSE;
+ if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
+ goto copy;
+ }
size = scratch_pitch * h;
scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
if (scratch == NULL) {
- return FALSE;
+ goto copy;
}
src_obj.pitch = src_pitch_hw;
@@ -1821,33 +1830,45 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
&dst_obj,
accel_state->copy_vs_offset, accel_state->copy_ps_offset,
3, 0xffffffff)) {
- r = FALSE;
- goto out;
+ goto copy;
}
+ copy_dst = scratch;
+ copy_pitch = scratch_pitch;
+ flush = FALSE;
+
+copy:
+ if (flush)
+ radeon_cs_flush_indirect(pScrn);
- r = radeon_bo_map(scratch, 0);
- if (r) {
+ ret = radeon_bo_map(copy_dst, 0);
+ if (ret) {
r = FALSE;
goto out;
}
r = TRUE;
size = w * bpp / 8;
+ dst = copy_dst->ptr;
+ if (copy_dst == driver_priv->bo)
+ dst += y * copy_pitch + x * bpp / 8;
for (i = 0; i < h; i++) {
- memcpy(scratch->ptr + i * scratch_pitch, src, size);
+ memcpy(dst + i * copy_pitch, src, size);
src += src_pitch;
}
- radeon_bo_unmap(scratch);
+ radeon_bo_unmap(copy_dst);
- if (info->accel_state->vsync)
- RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
+ if (copy_dst == scratch) {
+ if (info->accel_state->vsync)
+ RADEONVlineHelperSet(pScrn, x, y, x + w, y + h);
- /* blit from gart to vram */
- R600DoPrepareCopy(pScrn);
- R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
- R600DoCopyVline(pDst);
+ /* blit from gart to vram */
+ R600DoPrepareCopy(pScrn);
+ R600AppendCopyVertex(pScrn, 0, 0, x, y, w, h);
+ R600DoCopyVline(pDst);
+ }
out:
- radeon_bo_unref(scratch);
+ if (scratch)
+ radeon_bo_unref(scratch);
return r;
}
@@ -1859,13 +1880,17 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
RADEONInfoPtr info = RADEONPTR(pScrn);
struct radeon_accel_state *accel_state = info->accel_state;
struct radeon_exa_pixmap_priv *driver_priv;
- struct radeon_bo *scratch;
+ struct radeon_bo *scratch = NULL;
+ struct radeon_bo *copy_src;
unsigned size;
uint32_t src_domain = 0;
int bpp = pSrc->drawable.bitsPerPixel;
uint32_t scratch_pitch = RADEON_ALIGN(w * bpp / 8, 256);
+ uint32_t copy_pitch;
uint32_t dst_pitch_hw = scratch_pitch / (bpp / 8);
uint32_t src_pitch_hw = exaGetPixmapPitch(pSrc) / (bpp / 8);
+ int ret;
+ Bool flush = FALSE;
Bool r;
struct r600_accel_object src_obj, dst_obj;
@@ -1874,24 +1899,28 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
driver_priv = exaGetPixmapDriverPrivate(pSrc);
- /* If we know the BO won't end up in VRAM anyway, don't bother */
+ /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
+ copy_src = driver_priv->bo;
+ copy_pitch = pSrc->devKind;
if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
src_domain = radeon_bo_get_src_domain(driver_priv->bo);
if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
src_domain = 0;
+ else /* A write may be scheduled */
+ flush = TRUE;
}
if (!src_domain)
radeon_bo_is_busy(driver_priv->bo, &src_domain);
if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM)
- return FALSE;
+ goto copy;
size = scratch_pitch * h;
scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
if (scratch == NULL) {
- return FALSE;
+ goto copy;
}
radeon_cs_space_reset_bos(info->cs);
radeon_cs_space_add_persistent_bo(info->cs, info->accel_state->shaders_bo,
@@ -1900,10 +1929,9 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
radeon_add_pixmap(info->cs, pSrc, info->accel_state->src_obj[0].domain, 0);
accel_state->dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, accel_state->dst_obj.domain);
- r = radeon_cs_space_check(info->cs);
- if (r) {
- r = FALSE;
- goto out;
+ ret = radeon_cs_space_check(info->cs);
+ if (ret) {
+ goto copy;
}
src_obj.pitch = src_pitch_hw;
@@ -1928,34 +1956,42 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
&dst_obj,
accel_state->copy_vs_offset, accel_state->copy_ps_offset,
3, 0xffffffff)) {
- r = FALSE;
- goto out;
+ goto copy;
}
/* blit from vram to gart */
R600DoPrepareCopy(pScrn);
R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h);
R600DoCopy(pScrn);
+ copy_src = scratch;
+ copy_pitch = scratch_pitch;
+ flush = TRUE;
- if (info->cs)
+copy:
+ if (flush && info->cs)
radeon_cs_flush_indirect(pScrn);
- r = radeon_bo_map(scratch, 0);
- if (r) {
+ ret = radeon_bo_map(copy_src, 0);
+ if (ret) {
+ ErrorF("failed to map pixmap: %d\n", ret);
r = FALSE;
goto out;
}
r = TRUE;
w *= bpp / 8;
- size = 0;
+ if (copy_src == driver_priv->bo)
+ size = y * copy_pitch + x * bpp / 8;
+ else
+ size = 0;
while (h--) {
- memcpy(dst, scratch->ptr + size, w);
- size += scratch_pitch;
+ memcpy(dst, copy_src->ptr + size, w);
+ size += copy_pitch;
dst += dst_pitch;
}
- radeon_bo_unmap(scratch);
+ radeon_bo_unmap(copy_src);
out:
- radeon_bo_unref(scratch);
+ if (scratch)
+ radeon_bo_unref(scratch);
return r;
}
#endif
diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index f629c8e..e80a996 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -471,9 +471,7 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
uint32_t copy_pitch;
uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
int ret;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
Bool flush = TRUE;
-#endif
Bool r;
int i;
@@ -495,61 +493,34 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
}
#endif
- /* If we know the BO won't be busy, don't bother */
-#if X_BYTE_ORDER == X_BIG_ENDIAN
+ /* If we know the BO won't be busy, don't bother with a scratch */
copy_dst = driver_priv->bo;
copy_pitch = pDst->devKind;
-#endif
if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
flush = FALSE;
-#endif
- if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain)) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- /* Can't return FALSE here if we need to swap bytes */
- if (swap != RADEON_HOST_DATA_SWAP_NONE &&
- driver_priv->bo != info->front_bo) {
- goto copy;
- }
-#endif
- return FALSE;
- }
+ if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
+ goto copy;
}
size = scratch_pitch * h;
scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
if (scratch == NULL) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- if (swap != RADEON_HOST_DATA_SWAP_NONE &&
- driver_priv->bo != info->front_bo) {
- goto copy;
- }
-#endif
- return FALSE;
+ goto copy;
}
radeon_cs_space_reset_bos(info->cs);
radeon_add_pixmap(info->cs, pDst, 0, RADEON_GEM_DOMAIN_VRAM);
radeon_cs_space_add_persistent_bo(info->cs, scratch, RADEON_GEM_DOMAIN_GTT, 0);
ret = radeon_cs_space_check(info->cs);
if (ret) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- if (swap != RADEON_HOST_DATA_SWAP_NONE &&
- driver_priv->bo != info->front_bo) {
- goto copy;
- }
-#endif
- r = FALSE;
- goto out;
+ goto copy;
}
copy_dst = scratch;
copy_pitch = scratch_pitch;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
flush = FALSE;
copy:
if (flush)
radeon_cs_flush_indirect(pScrn);
-#endif
ret = radeon_bo_map(copy_dst, 0);
if (ret) {
@@ -600,9 +571,7 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
uint32_t copy_pitch;
uint32_t swap = RADEON_HOST_DATA_SWAP_NONE;
int ret;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
Bool flush = FALSE;
-#endif
Bool r;
if (bpp < 8)
@@ -623,57 +592,36 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
}
#endif
- /* If we know the BO won't end up in VRAM anyway, don't bother */
-#if X_BYTE_ORDER == X_BIG_ENDIAN
+ /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
copy_src = driver_priv->bo;
copy_pitch = pSrc->devKind;
-#endif
if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
src_domain = radeon_bo_get_src_domain(driver_priv->bo);
if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
(RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
src_domain = 0;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
else /* A write may be scheduled */
flush = TRUE;
-#endif
}
if (!src_domain)
radeon_bo_is_busy(driver_priv->bo, &src_domain);
if (src_domain & ~(uint32_t)RADEON_GEM_DOMAIN_VRAM) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- /* Can't return FALSE here if we need to swap bytes */
- if (swap != RADEON_HOST_DATA_SWAP_NONE) {
- goto copy;
- }
-#endif
- return FALSE;
+ goto copy;
}
size = scratch_pitch * h;
scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
if (scratch == NULL) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- if (swap != RADEON_HOST_DATA_SWAP_NONE) {
- goto copy;
- }
-#endif
- return FALSE;
+ goto copy;
}
radeon_cs_space_reset_bos(info->cs);
radeon_add_pixmap(info->cs, pSrc, RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
radeon_cs_space_add_persistent_bo(info->cs, scratch, 0, RADEON_GEM_DOMAIN_GTT);
ret = radeon_cs_space_check(info->cs);
if (ret) {
-#if X_BYTE_ORDER == X_BIG_ENDIAN
- if (swap != RADEON_HOST_DATA_SWAP_NONE) {
- goto copy;
- }
-#endif
- r = FALSE;
- goto out;
+ goto copy;
}
RADEONGetDatatypeBpp(pSrc->drawable.bitsPerPixel, &datatype);
RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset);
@@ -685,12 +633,10 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
RADEON_GEM_DOMAIN_GTT);
copy_src = scratch;
copy_pitch = scratch_pitch;
-#if X_BYTE_ORDER == X_BIG_ENDIAN
flush = TRUE;
copy:
if (flush)
-#endif
FLUSH_RING();
ret = radeon_bo_map(copy_src, 0);
--
1.7.1
--=-=-=--
More information about the xorg-driver-ati
mailing list