xf86-video-ati: Branch 'master' - 2 commits

Dave Airlie airlied at csn.ul.ie
Sat Aug 22 16:42:37 PDT 2009


> On Sat, 2009-08-22 at 04:18 -0700, Dave Airlie wrote:
> > src/radeon_exa.c       |    3 +--
> >  src/radeon_exa_funcs.c |   35 +++++++++++++++++++++++++++++++++++
> >  2 files changed, 36 insertions(+), 2 deletions(-)
> > 
> > New commits:
> > commit bac224912c750dc1c85ff2d9b8526dad6c23b572
> > Author: Dave Airlie <airlied at redhat.com>
> > Date:   Sat Aug 22 21:17:59 2009 +1000
> > 
> >     radeon: don't spec any initial placement for pixmaps.
> >     
> >     allow the first use to decide placement.
> > 
> > diff --git a/src/radeon_exa.c b/src/radeon_exa.c
> > index ec65722..3f3c9ba 100644
> > --- a/src/radeon_exa.c
> > +++ b/src/radeon_exa.c
> > @@ -391,8 +391,7 @@ void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align)
> >  	return new_priv;
> >  
> >      new_priv->bo = radeon_bo_open(info->bufmgr, 0, size,
> > -				  align, RADEON_GEM_DOMAIN_VRAM |
> > -				  RADEON_GEM_DOMAIN_GTT, 0);
> > +				  align, 0, 0);
> >      if (!new_priv->bo) {
> >  	xfree(new_priv);
> >  	ErrorF("Failed to alloc memory\n");
> 
> IME this has a significant impact on 2D performance (e.g. x11perf
> -aa10text drops from about 325K/s to about 225K/s, and this is
> noticeable in real apps), until the kernel side avoids unnecessary BO
> copies.


The problem is the API is specced as initial_domain, not domains, so
its incorrect. I'm writing a TTM patch at the moment to avoid BO copies
to VRAM where nothing has yet mapped or touched the BO.

Dave.

> 
> Also, current EXA (with EXA_MIXED_PIXMAPS) actually only calls the
> driver CreatePixmap(2) hook just before the first accelerated operation
> on the pixmap. So I'd suggest setting RADEON_GEM_DOMAIN_VRAM |
> RADEON_GEM_DOMAIN_GTT in both hooks.
> 
> 
> > commit 77f98717d825162da106c6898cdbcbdf5c984ae6
> > Author: Dave Airlie <airlied at redhat.com>
> > Date:   Sat Aug 22 21:16:25 2009 +1000
> > 
> >     exa/cs: add DFS from GTT optimisation
> >     
> >     This uses the new libdrm busy interface, once I had this in place
> >     I added a error if this happened and it does on my desktop here,
> >     so may as well add the optimisation that used to be in my old KMS tree.
> >     
> >     Signed-off-by: Dave Airlie <airlied at redhat.com>
> > 
> > diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
> > index e48317f..f937c4e 100644
> > --- a/src/radeon_exa_funcs.c
> > +++ b/src/radeon_exa_funcs.c
> > @@ -507,6 +507,33 @@ out:
> >  }
> >  
> >  static Bool
> > +RADEONDownloadFromScreenGTT(PixmapPtr pSrc, int x, int y, int w,
> > +			    int h, char *dst, int dst_pitch)
> > +{
> > +    struct radeon_exa_pixmap_priv *driver_priv;
> > +    int src_pitch = exaGetPixmapPitch(pSrc);
> > +    int bpp = pSrc->drawable.bitsPerPixel;
> > +    int src_offset;
> > +    int r;
> > +
> > +    driver_priv = exaGetPixmapDriverPrivate(pSrc);
> > +    r = radeon_bo_map(driver_priv->bo, 0);
> > +    if (r)
> > +	return FALSE;
> > +
> > +    src_offset = (x * bpp / 8) + (y * src_pitch);
> > +    w *= bpp / 8;
> > +
> > +    while (h--) {
> > +        memcpy(dst, driver_priv->bo->ptr + src_offset, w);
> > +        src_offset += src_pitch;
> > +        dst += dst_pitch;
> > +    }
> > +    radeon_bo_unmap(driver_priv->bo);
> > +    return TRUE;
> > +}
> > +
> > +static Bool
> >  RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
> >                             int h, char *dst, int dst_pitch)
> >  {
> > @@ -519,11 +546,19 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
> >      unsigned bpp = pSrc->drawable.bitsPerPixel;
> >      uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63;
> >      Bool r;
> > +    uint32_t src_domain;
> > +    int busy;
> >  
> >      if (bpp < 8)
> >  	return FALSE;
> >  
> >      driver_priv = exaGetPixmapDriverPrivate(pSrc);
> > + 
> > +    busy = radeon_bo_is_busy(driver_priv->bo, &src_domain);
> > +
> > +    if (src_domain == RADEON_GEM_DOMAIN_GTT)
> > +	return RADEONDownloadFromScreenGTT(pSrc, x, y, w, h,
> > +					   dst, dst_pitch);
> 
> RADEONDownloadFromScreenGTT is superfluous, just return FALSE from
> RADEONDownloadFromScreenCS. Also, this doesn't take into account any
> unflushed operations involving the BO.
> 
> Below is what I've been testing for DFS / UTS.
> 
> 
> diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
> index bada4fb..abb8be3 100644
> --- a/src/radeon_exa_funcs.c
> +++ b/src/radeon_exa_funcs.c
> @@ -459,6 +459,7 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
>      struct radeon_bo *scratch;
>      unsigned size;
>      uint32_t datatype = 0;
> +    uint32_t dst_domain;
>      uint32_t dst_pitch_offset;
>      unsigned bpp = pDst->drawable.bitsPerPixel;
>      uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63;
> @@ -470,6 +471,18 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
>  
>      driver_priv = exaGetPixmapDriverPrivate(pDst);
>  
> +    /* If we know the BO won't be busy, don't bother */
> +    if (driver_priv->bo->cref == 1 &&
> +	!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
> +{
> + static Bool printed;
> + if (!printed) {
> +  ErrorF("%s: Bailing because BO is idle\n", __func__);
> +  printed = TRUE;
> + }
> +	return FALSE;
> +}
> +
>      size = scratch_pitch * h;
>      scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
>      if (scratch == NULL) {
> @@ -519,6 +532,7 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
>      struct radeon_bo *scratch;
>      unsigned size;
>      uint32_t datatype = 0;
> +    uint32_t src_domain = 0;
>      uint32_t src_pitch_offset;
>      unsigned bpp = pSrc->drawable.bitsPerPixel;
>      uint32_t scratch_pitch = (w * bpp / 8 + 63) & ~63;
> @@ -529,6 +543,30 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
>  
>      driver_priv = exaGetPixmapDriverPrivate(pSrc);
>  
> +    /* If we know the BO will end up in GTT anyway, don't bother */
> +    if (driver_priv->bo->cref > 1) {
> +	src_domain = driver_priv->bo->space_accounted & 0xffff;
> +	if (!src_domain)
> +	    src_domain = driver_priv->bo->space_accounted >> 16;
> +
> +	if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
> +	    (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM))
> +	    src_domain = 0;
> +    }
> +
> +    if (!src_domain)
> +	radeon_bo_is_busy(driver_priv->bo, &src_domain);
> +
> +    if (src_domain == RADEON_GEM_DOMAIN_GTT)
> +{
> + static Bool printed;
> + if (!printed) {
> +  ErrorF("%s: Bailing because BO is in GTT\n", __func__);
> +  printed = TRUE;
> + }
> +	return FALSE;
> +}
> +
>      size = scratch_pitch * h;
>      scratch = radeon_bo_open(info->bufmgr, 0, size, 0, RADEON_GEM_DOMAIN_GTT, 0);
>      if (scratch == NULL) {
> 
> 
> 


More information about the xorg-driver-ati mailing list