xf86-video-intel: 5 commits - src/intel.h src/intel_memory.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/kgem.c src/sna/kgem.h

Fri Dec 2 02:32:55 PST 2011

src/intel.h           |    1 
 src/intel_memory.c    |   82 +++++++++++++++++---------------------------------
 src/sna/gen5_render.c |   21 +++++++++---
 src/sna/gen6_render.c |    5 +--
 src/sna/kgem.c        |   76 ++++++++++++++++++++++++++++++----------------
 src/sna/kgem.h        |    1 
 6 files changed, 97 insertions(+), 89 deletions(-)

New commits:
commit 85d3dc5910a2eea3a10b822e01443e11eaae9291
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Dec 2 10:22:51 2011 +0000

    uxa: Reset size limits based on AGP size
    
    The basis for the constraints are what we can map into the aperture for
    direct writing with the CPU, so use the size of the mappable region as
    opposed to the size of the total GTT.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/intel.h b/src/intel.h
index 1f004a8..28f049e 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -542,7 +542,6 @@ int intel_crtc_to_pipe(xf86CrtcPtr crtc);
 unsigned long intel_get_fence_size(intel_screen_private *intel, unsigned long size);
 unsigned long intel_get_fence_pitch(intel_screen_private *intel, unsigned long pitch,
 				   uint32_t tiling_mode);
-void intel_set_gem_max_sizes(ScrnInfoPtr scrn);
 
 drm_intel_bo *intel_allocate_framebuffer(ScrnInfoPtr scrn,
 					int w, int h, int cpp,
diff --git a/src/intel_memory.c b/src/intel_memory.c
index 763a6ad..7e0a6dd 100644
--- a/src/intel_memory.c
+++ b/src/intel_memory.c
@@ -169,6 +169,35 @@ static inline int intel_pad_drawable_width(int width)
 	return ALIGN(width, 64);
 }
 
+
+static size_t
+agp_aperture_size(struct pci_device *dev, int gen)
+{
+	return dev->regions[gen < 30 ? 0 : 2].size;
+}
+
+static void intel_set_gem_max_sizes(ScrnInfoPtr scrn)
+{
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	size_t agp_size = agp_aperture_size(intel->PciInfo,
+					    INTEL_INFO(intel)->gen);
+
+	/* The chances of being able to mmap an object larger than this
+	 * are slim, so don't try. */
+	intel->max_gtt_map_size = agp_size / 2;
+
+	/* Let objects be tiled up to the size where only 4 would fit in
+	 * the aperture, presuming best case alignment.  */
+	intel->max_tiling_size = agp_size / 4;
+
+	/* Large BOs will tend to hit SW fallbacks frequently, and also will
+	 * tend to fail to successfully map when doing SW fallbacks because we
+	 * overcommit address space for BO access, or worse cause aperture
+	 * thrashing.
+	 */
+	intel->max_bo_size = intel->max_gtt_map_size;
+}
+
 /**
  * Allocates a framebuffer for a screen.
  *
@@ -249,56 +278,3 @@ retry:
 
 	return front_buffer;
 }
-
-static void intel_set_max_bo_size(intel_screen_private *intel,
-				 const struct drm_i915_gem_get_aperture *aperture)
-{
-	if (aperture->aper_available_size)
-		/* Large BOs will tend to hit SW fallbacks frequently, and also will
-		 * tend to fail to successfully map when doing SW fallbacks because we
-		 * overcommit address space for BO access, or worse cause aperture
-		 * thrashing.
-		 */
-		intel->max_bo_size = aperture->aper_available_size / 2;
-	else
-		intel->max_bo_size = 64 * 1024 * 1024;
-}
-
-static void intel_set_max_gtt_map_size(intel_screen_private *intel,
-				      const struct drm_i915_gem_get_aperture *aperture)
-{
-	if (aperture->aper_available_size)
-		/* Let objects up get bound up to the size where only 2 would fit in
-		 * the aperture, but then leave slop to account for alignment like
-		 * libdrm does.
-		 */
-		intel->max_gtt_map_size =
-			aperture->aper_available_size * 3 / 4 / 2;
-	else
-		intel->max_gtt_map_size = 16 * 1024 * 1024;
-}
-
-static void intel_set_max_tiling_size(intel_screen_private *intel,
-				     const struct drm_i915_gem_get_aperture *aperture)
-{
-	if (aperture->aper_available_size)
-		/* Let objects be tiled up to the size where only 4 would fit in
-		 * the aperture, presuming worst case alignment.
-		 */
-		intel->max_tiling_size = aperture->aper_available_size / 4;
-	else
-		intel->max_tiling_size = 4 * 1024 * 1024;
-}
-
-void intel_set_gem_max_sizes(ScrnInfoPtr scrn)
-{
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-	struct drm_i915_gem_get_aperture aperture;
-
-	aperture.aper_available_size = 0;
-	drmIoctl(intel->drmSubFD, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
-
-	intel_set_max_bo_size(intel, &aperture);
-	intel_set_max_gtt_map_size(intel, &aperture);
-	intel_set_max_tiling_size(intel, &aperture);
-}
commit e55198746102afb7427f577bd5bfc76667438da9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Dec 1 13:49:03 2011 +0000

    sna: Reuse the full size of an old handle for io
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index ad08c6f..db3a9c0 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2116,14 +2116,23 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 	alloc = (flags & KGEM_BUFFER_LAST) ? 4096 : 32 * 1024;
 	alloc = ALIGN(size, alloc);
 
-	bo = malloc(sizeof(*bo) + alloc);
-	if (bo == NULL)
-		return NULL;
-
 	handle = 0;
-	if (kgem->has_vmap)
+	if (kgem->has_vmap) {
+		bo = malloc(sizeof(*bo) + alloc);
+		if (bo == NULL)
+			return NULL;
+
 		handle = gem_vmap(kgem->fd, bo+1, alloc, write);
-	if (handle == 0) {
+		if (handle) {
+			__kgem_bo_init(&bo->base, handle, alloc);
+			bo->base.vmap = true;
+			bo->need_io = 0;
+			goto init;
+		} else
+			free(bo);
+	}
+
+	{
 		struct kgem_bo *old;
 
 		old = NULL;
@@ -2132,6 +2141,11 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 		if (old == NULL)
 			old = search_linear_cache(kgem, alloc, false);
 		if (old) {
+			alloc = old->size;
+			bo = malloc(sizeof(*bo) + alloc);
+			if (bo == NULL)
+				return NULL;
+
 			memcpy(&bo->base, old, sizeof(*old));
 			if (old->rq)
 				list_replace(&old->request,
@@ -2141,6 +2155,10 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 			free(old);
 			bo->base.refcnt = 1;
 		} else {
+			bo = malloc(sizeof(*bo) + alloc);
+			if (bo == NULL)
+				return NULL;
+
 			if (!__kgem_bo_init(&bo->base,
 					    gem_create(kgem->fd, alloc),
 					    alloc)) {
@@ -2150,11 +2168,8 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 		}
 		bo->need_io = write;
 		bo->base.io = write;
-	} else {
-		__kgem_bo_init(&bo->base, handle, alloc);
-		bo->base.vmap = true;
-		bo->need_io = 0;
 	}
+init:
 	bo->base.reusable = false;
 
 	bo->alloc = alloc;
commit c5632369cbd6473304c06e4230347abbe46513ec
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Dec 1 13:23:56 2011 +0000

    sna: Move the preservation of the io handle into the common destroy path
    
    In order to capture and reuse all io buffers.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 959f97c..ad08c6f 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -611,7 +611,20 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 	if (NO_CACHE)
 		goto destroy;
 
-	if(!bo->reusable)
+	if (bo->io) {
+		/* transfer the handle to a minimum bo */
+		struct kgem_bo *base = malloc(sizeof(*base));
+		if (base) {
+			memcpy(base, bo, sizeof (*base));
+			base->reusable = true;
+			list_init(&base->list);
+			list_replace(&bo->request, &base->request);
+			free(bo);
+			bo = base;
+		}
+	}
+
+	if (!bo->reusable)
 		goto destroy;
 
 	if (!bo->rq && !bo->needs_flush) {
@@ -820,18 +833,6 @@ static void kgem_finish_partials(struct kgem *kgem)
 			bo->need_io = 0;
 		}
 
-		/* transfer the handle to a minimum bo */
-		if (bo->base.refcnt == 1 && !bo->base.vmap) {
-			struct kgem_bo *base = malloc(sizeof(*base));
-			if (base) {
-				memcpy(base, &bo->base, sizeof (*base));
-				base->reusable = true;
-				list_init(&base->list);
-				list_replace(&bo->base.request, &base->request);
-				free(bo);
-				bo = (struct kgem_partial_bo *)base;
-			}
-		}
 		kgem_bo_unref(kgem, &bo->base);
 	}
 }
@@ -2148,6 +2149,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 			}
 		}
 		bo->need_io = write;
+		bo->base.io = write;
 	} else {
 		__kgem_bo_init(&bo->base, handle, alloc);
 		bo->base.vmap = true;
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 112a91c..59e64cf 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -73,6 +73,7 @@ struct kgem_bo {
 	uint32_t cpu_read : 1;
 	uint32_t cpu_write : 1;
 	uint32_t vmap : 1;
+	uint32_t io : 1;
 	uint32_t flush : 1;
 	uint32_t sync : 1;
 	uint32_t purged : 1;
commit 95f4da647a4055545b09cae0834df0fa2127a458
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Nov 30 11:59:31 2011 +0000

    sna: Align pwrite to transfer whole cachelines
    
    Daniel claims that this is will be faster, or will be once he has
    completed rewriting pwrite!
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 58b9b67..959f97c 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -164,9 +164,16 @@ static int gem_write(int fd, uint32_t handle,
 
 	VG_CLEAR(pwrite);
 	pwrite.handle = handle;
-	pwrite.offset = offset;
-	pwrite.size = length;
-	pwrite.data_ptr = (uintptr_t)src;
+	/* align the transfer to cachelines; fortuitously this is safe! */
+	if ((offset | length) & 63) {
+		pwrite.offset = offset & ~63;
+		pwrite.size = ALIGN(offset+length, 64) - pwrite.offset;
+		pwrite.data_ptr = (uintptr_t)src + pwrite.offset - offset;
+	} else {
+		pwrite.offset = offset;
+		pwrite.size = length;
+		pwrite.data_ptr = (uintptr_t)src;
+	}
 	return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite);
 }
 
commit ecd6cca617ac29cf2b1b2a4d33fca19b84fea2a9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Nov 29 19:27:46 2011 +0000

    sna/gen5: Handle cpu-bo for render targets
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index d4870b5..8b683a8 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1856,20 +1856,29 @@ gen5_composite_set_target(PicturePtr dst, struct sna_composite_op *op)
 	}
 
 	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
+	priv = sna_pixmap(op->dst.pixmap);
+
 	op->dst.width  = op->dst.pixmap->drawable.width;
 	op->dst.height = op->dst.pixmap->drawable.height;
 	op->dst.format = dst->format;
-	priv = sna_pixmap_force_to_gpu(op->dst.pixmap);
-	if (priv == NULL)
-		return FALSE;
 
 	DBG(("%s: pixmap=%p, format=%08x\n", __FUNCTION__,
 	     op->dst.pixmap, (unsigned int)op->dst.format));
 
+	op->dst.bo = NULL;
+	if (priv && priv->gpu_bo == NULL) {
+		op->dst.bo = priv->cpu_bo;
+		op->damage = &priv->cpu_damage;
+	}
+	if (op->dst.bo == NULL) {
+		priv = sna_pixmap_force_to_gpu(op->dst.pixmap);
+		if (priv == NULL)
+			return FALSE;
 
-	op->dst.bo = priv->gpu_bo;
-	op->damage = &priv->gpu_damage;
-	if (sna_damage_is_all(&priv->gpu_damage, op->dst.width, op->dst.height))
+		op->dst.bo = priv->gpu_bo;
+		op->damage = &priv->gpu_damage;
+	}
+	if (sna_damage_is_all(op->damage, op->dst.width, op->dst.height))
 		op->damage = NULL;
 
 	DBG(("%s: bo=%p, damage=%p\n", __FUNCTION__, op->dst.bo, op->damage));
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index de6ede9..5a889e7 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2048,10 +2048,9 @@ gen6_composite_set_target(struct sna_composite_op *op, PicturePtr dst)
 
 		op->dst.bo = priv->gpu_bo;
 		op->damage = &priv->gpu_damage;
-		if (sna_damage_is_all(&priv->gpu_damage,
-				      op->dst.width, op->dst.height))
-			op->damage = NULL;
 	}
+	if (sna_damage_is_all(op->damage, op->dst.width, op->dst.height))
+		op->damage = NULL;
 
 	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
 			    &op->dst.x, &op->dst.y);