xf86-video-ati: Branch 'master' - 2 commits
Michel Dänzer
michel at daenzer.net
Sat Aug 22 08:01:41 PDT 2009
On Sat, 2009-08-22 at 04:18 -0700, Dave Airlie wrote:
> src/radeon_exa.c | 3 +--
> src/radeon_exa_funcs.c | 35 +++++++++++++++++++++++++++++++++++
> 2 files changed, 36 insertions(+), 2 deletions(-)
>
> New commits:
> commit bac224912c750dc1c85ff2d9b8526dad6c23b572
> Author: Dave Airlie <airlied at redhat.com>
> Date: Sat Aug 22 21:17:59 2009 +1000
>
> radeon: don't spec any initial placement for pixmaps.
>
> allow the first use to decide placement.
>
> diff --git a/src/radeon_exa.c b/src/radeon_exa.c
> index ec65722..3f3c9ba 100644
> --- a/src/radeon_exa.c
> +++ b/src/radeon_exa.c
> @@ -391,8 +391,7 @@ void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align)
> return new_priv;
>
> new_priv->bo = radeon_bo_open(info->bufmgr, 0, size,
> - align, RADEON_GEM_DOMAIN_VRAM |
> - RADEON_GEM_DOMAIN_GTT, 0);
> + align, 0, 0);
> if (!new_priv->bo) {
> xfree(new_priv);
> ErrorF("Failed to alloc memory\n");
IME this has a significant impact on 2D performance (e.g. x11perf
-aa10text drops from about 325K/s to about 225K/s, and this is
noticeable in real apps), until the kernel side avoids unnecessary BO
copies.
Also, current EXA (with EXA_MIXED_PIXMAPS) actually only calls the
driver CreatePixmap(2) hook just before the first accelerated operation
on the pixmap. So I'd suggest setting RADEON_GEM_DOMAIN_VRAM |
RADEON_GEM_DOMAIN_GTT in both hooks.
> commit 77f98717d825162da106c6898cdbcbdf5c984ae6
> Author: Dave Airlie <airlied at redhat.com>
> Date: Sat Aug 22 21:16:25 2009 +1000
>
> exa/cs: add DFS from GTT optimisation
>
> This uses the new libdrm busy interface, once I had this in place
> I added a error if this happened and it does on my desktop here,
> so may as well add the optimisation that used to be in my old KMS tree.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
>
> diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
> index e48317f..f937c4e 100644
> --- a/src/radeon_exa_funcs.c
> +++ b/src/radeon_exa_funcs.c
> @@ -507,6 +507,33 @@ out:
> }
>
> static Bool
> +RADEONDownloadFromScreenGTT(PixmapPtr pSrc, int x, int y, int w,
> + int h, char *dst, int dst_pitch)
> +{
> + struct radeon_exa_pixmap_priv *driver_priv;
> + int src_pitch = exaGetPixmapPitch(pSrc);
> + int bpp = pSrc->drawable.bitsPerPixel;
> + int src_offset;
> + int r;
> +
> + driver_priv = exaGetPixmapDriverPrivate(pSrc);
> + r = radeon_bo_map(driver_priv->bo, 0);
> + if (r)
> + return FALSE;
> +
> + src_offset = (x * bpp / 8) + (y * src_pitch);
> + w *= bpp / 8;
> +
> + while (h--) {
> + memcpy(dst, driver_priv->bo->ptr + src_offset, w);
> + src_offset += src_pitch;
> + dst += dst_pitch;
> + }
> + radeon_bo_unmap(driver_priv->bo);
> + return TRUE;
> +}
> +
> +static Bool
> RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
> int h, char *dst, int dst_pitch)
> {
> @@ -519,11 +546,19 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
> unsigned bpp = pSrc->drawable.bitsPerPixel;
> uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63;
> Bool r;
> + uint32_t src_domain;
> + int busy;
>
> if (bpp < 8)
> return FALSE;
>
> driver_priv = exaGetPixmapDriverPrivate(pSrc);
> +
> + busy = radeon_bo_is_busy(driver_priv->bo, &src_domain);
> +
> + if (src_domain == RADEON_GEM_DOMAIN_GTT)
> + return RADEONDownloadFromScreenGTT(pSrc, x, y, w, h,
> + dst, dst_pitch);
RADEONDownloadFromScreenGTT is superfluous, just return FALSE from
RADEONDownloadFromScreenCS. Also, this doesn't take into account any
unflushed operations involving the BO.
Below is what I've been testing for DFS / UTS.
diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index bada4fb..abb8be3 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -459,6 +459,7 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
struct radeon_bo *scratch;
unsigned size;
uint32_t datatype = 0;
+ uint32_t dst_domain;
uint32_t dst_pitch_offset;
unsigned bpp = pDst->drawable.bitsPerPixel;
uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63;
@@ -470,6 +471,18 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
driver_priv = exaGetPixmapDriverPrivate(pDst);
+ /* If we know the BO won't be busy, don't bother */
+ if (driver_priv->bo->cref == 1 &&
+ !radeon_bo_is_busy(driver_priv->bo, &dst_domain))
+{
+ static Bool printed;
+ if (!printed) {
+ ErrorF("%s: Bailing because BO is idle\n", __func__);
+ printed = TRUE;
+ }
+ return FALSE;
+}
+
size = scratch_pitch * h;
scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
if (scratch == NULL) {
@@ -519,6 +532,7 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
struct radeon_bo *scratch;
unsigned size;
uint32_t datatype = 0;
+ uint32_t src_domain = 0;
uint32_t src_pitch_offset;
unsigned bpp = pSrc->drawable.bitsPerPixel;
uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63;
@@ -529,6 +543,30 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
driver_priv = exaGetPixmapDriverPrivate(pSrc);
+ /* If we know the BO will end up in GTT anyway, don't bother */
+ if (driver_priv->bo->cref > 1) {
+ src_domain = driver_priv->bo->space_accounted & 0xffff;
+ if (!src_domain)
+ src_domain = driver_priv->bo->space_accounted >> 16;
+
+ if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
+ (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
+ src_domain = 0;
+ }
+
+ if (!src_domain)
+ radeon_bo_is_busy(driver_priv->bo, &src_domain);
+
+ if (src_domain == RADEON_GEM_DOMAIN_GTT)
+{
+ static Bool printed;
+ if (!printed) {
+ ErrorF("%s: Bailing because BO is in GTT\n", __func__);
+ printed = TRUE;
+ }
+ return FALSE;
+}
+
size = scratch_pitch * h;
scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
if (scratch == NULL) {
--
Earthling Michel Dänzer | http://www.vmware.com
Libre software enthusiast | Debian, X and DRI developer
More information about the xorg-driver-ati
mailing list