xf86-video-intel: 3 commits - configure.ac src/sna/kgem.c src/sna/kgem.h

Sun Dec 16 14:56:23 PST 2012

configure.ac   |    9 ++
 src/sna/kgem.c |  194 +++++++++++++++++++++++++++++++++++----------------------
 src/sna/kgem.h |    2 
 3 files changed, 131 insertions(+), 74 deletions(-)

New commits:
commit 5b0572503eab235bc7eff20d369241330c41e630
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 16 23:04:55 2012 +0000

    sna: Enable support for opting out of the kernel CS workaround
    
    Keeping a set of pinned batches in userspace is considerably faster as
    we can avoid the blit overhead. However, combining the two approaches
    yields even greater performance, as fast as without either w/a, and yet
    stable.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/configure.ac b/configure.ac
index 9354437..e153019 100644
--- a/configure.ac
+++ b/configure.ac
@@ -282,6 +282,15 @@ if test "x$accel" = xnone -a "x$UMS_ONLY" != "xyes"; then
 	AC_MSG_ERROR([No default acceleration option])
 fi
 
+AC_ARG_ENABLE(pinned-batches,
+	      AS_HELP_STRING([--enable-pinned-batches],
+			     [Enable use of "pinned batches" (experimental) [default=no]]),
+	      [PINNED="$enableval"],
+	      [PINNED=no])
+if test "x$PINNED" = xyes; then
+	AC_DEFINE(USE_PINNED_BATCHES,1,[Assume "pinned batches" support])
+fi
+
 AC_ARG_ENABLE(userptr,
 	      AS_HELP_STRING([--enable-userptr],
 			     [Enable use of userptr (experimental) [default=no]]),
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 36eab9b..ad967eb 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -70,12 +70,18 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 #define DBG_NO_MAP_UPLOAD 0
 #define DBG_NO_RELAXED_FENCING 0
 #define DBG_NO_SECURE_BATCHES 0
+#define DBG_NO_PINNED_BATCHES 0
 #define DBG_NO_FAST_RELOC 0
 #define DBG_NO_HANDLE_LUT 0
 #define DBG_DUMP 0
 
 #define SHOW_BATCH 0
 
+#ifndef USE_PINNED_BATCHES
+#undef DBG_NO_PINNED_BATCHES
+#define DBG_NO_PINNED_BATCHES 1
+#endif
+
 #ifndef USE_FASTRELOC
 #undef DBG_NO_FAST_RELOC
 #define DBG_NO_FAST_RELOC 1
@@ -110,11 +116,13 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
 
 #define LOCAL_I915_PARAM_HAS_SEMAPHORES		20
 #define LOCAL_I915_PARAM_HAS_SECURE_BATCHES	23
-#define LOCAL_I915_PARAM_HAS_NO_RELOC		24
-#define LOCAL_I915_PARAM_HAS_HANDLE_LUT		25
+#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES	24
+#define LOCAL_I915_PARAM_HAS_NO_RELOC		25
+#define LOCAL_I915_PARAM_HAS_HANDLE_LUT		26
 
-#define LOCAL_I915_EXEC_NO_RELOC		(1<<10)
-#define LOCAL_I915_EXEC_HANDLE_LUT		(1<<11)
+#define LOCAL_I915_EXEC_IS_PINNED		(1<<10)
+#define LOCAL_I915_EXEC_NO_RELOC		(1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT		(1<<12)
 
 #define LOCAL_I915_GEM_USERPTR       0x32
 #define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
@@ -828,6 +836,14 @@ static bool test_has_secure_batches(struct kgem *kgem)
 	return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0;
 }
 
+static bool test_has_pinned_batches(struct kgem *kgem)
+{
+	if (DBG_NO_PINNED_BATCHES)
+		return false;
+
+	return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0;
+}
+
 static int kgem_get_screen_index(struct kgem *kgem)
 {
 	struct sna *sna = container_of(kgem, struct sna, kgem);
@@ -943,7 +959,6 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
 	kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
 	kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
 
-
 	kgem->has_blt = gem_param(kgem, I915_PARAM_HAS_BLT) > 0;
 	DBG(("%s: has BLT ring? %d\n", __FUNCTION__,
 	     kgem->has_blt));
@@ -991,6 +1006,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
 	DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
 	     kgem->has_secure_batches));
 
+	kgem->has_pinned_batches = test_has_pinned_batches(kgem);
+	DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__,
+	     kgem->has_pinned_batches));
+
 	if (!is_hw_supported(kgem, dev)) {
 		xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
 			   "Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
@@ -1002,7 +1021,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
 	}
 
 	kgem->batch_size = ARRAY_SIZE(kgem->batch);
-	if (gen == 020)
+	if (gen == 020 && !kgem->has_pinned_batches)
 		/* Limited to what we can pin */
 		kgem->batch_size = 4*1024;
 	if (gen == 022)
@@ -1144,6 +1163,8 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
 		kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
 	if (kgem->has_handle_lut)
 		kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
+	if (kgem->has_pinned_batches)
+		kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
 }
 
 /* XXX hopefully a good approximation */
@@ -2395,7 +2416,7 @@ out_16384:
 		}
 	}
 
-	if (kgem->gen == 020) {
+	if (kgem->gen == 020 && !kgem->has_pinned_batches) {
 		assert(size <= 16384);
 
 		bo = list_first_entry(&kgem->pinned_batches[size > 4096],
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 59be858..c23b9e3 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -170,6 +170,7 @@ struct kgem {
 	uint32_t has_relaxed_delta :1;
 	uint32_t has_semaphores :1;
 	uint32_t has_secure_batches :1;
+	uint32_t has_pinned_batches :1;
 	uint32_t has_cacheing :1;
 	uint32_t has_llc :1;
 	uint32_t has_no_reloc :1;
commit 805f78addf3ffb36c736df680806cf722b18fea9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 16 22:04:54 2012 +0000

    sna: Try to reuse pinned batches by inspecting the kernel busy status
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 15d5642..36eab9b 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1830,94 +1830,103 @@ static bool kgem_retire__flushing(struct kgem *kgem)
 	return retired;
 }
 
-static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
+
+static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
 {
 	bool retired = false;
 
-	while (!list_is_empty(&kgem->requests[ring])) {
-		struct kgem_request *rq;
+	DBG(("%s: request %d complete\n",
+	     __FUNCTION__, rq->bo->handle));
 
-		rq = list_first_entry(&kgem->requests[ring],
-				      struct kgem_request,
-				      list);
-		if (kgem_busy(kgem, rq->bo->handle))
-			break;
+	while (!list_is_empty(&rq->buffers)) {
+		struct kgem_bo *bo;
 
-		DBG(("%s: request %d complete\n",
-		     __FUNCTION__, rq->bo->handle));
+		bo = list_first_entry(&rq->buffers,
+				      struct kgem_bo,
+				      request);
 
-		while (!list_is_empty(&rq->buffers)) {
-			struct kgem_bo *bo;
+		assert(RQ(bo->rq) == rq);
+		assert(bo->exec == NULL);
+		assert(bo->domain == DOMAIN_GPU);
 
-			bo = list_first_entry(&rq->buffers,
-					      struct kgem_bo,
-					      request);
+		list_del(&bo->request);
 
-			assert(RQ(bo->rq) == rq);
-			assert(bo->exec == NULL);
-			assert(bo->domain == DOMAIN_GPU);
+		if (bo->needs_flush)
+			bo->needs_flush = kgem_busy(kgem, bo->handle);
+		if (bo->needs_flush) {
+			DBG(("%s: moving %d to flushing\n",
+			     __FUNCTION__, bo->handle));
+			list_add(&bo->request, &kgem->flushing);
+			bo->rq = &_kgem_static_request;
+		} else {
+			bo->domain = DOMAIN_NONE;
+			bo->rq = NULL;
+		}
 
-			list_del(&bo->request);
+		if (bo->refcnt)
+			continue;
 
-			if (bo->needs_flush)
-				bo->needs_flush = kgem_busy(kgem, bo->handle);
+		if (bo->snoop) {
 			if (bo->needs_flush) {
-				DBG(("%s: moving %d to flushing\n",
-				     __FUNCTION__, bo->handle));
 				list_add(&bo->request, &kgem->flushing);
 				bo->rq = &_kgem_static_request;
 			} else {
-				bo->domain = DOMAIN_NONE;
-				bo->rq = NULL;
+				kgem_bo_move_to_snoop(kgem, bo);
 			}
+			continue;
+		}
 
-			if (bo->refcnt)
-				continue;
-
-			if (bo->snoop) {
-				if (bo->needs_flush) {
-					list_add(&bo->request, &kgem->flushing);
-					bo->rq = &_kgem_static_request;
-				} else {
-					kgem_bo_move_to_snoop(kgem, bo);
-				}
-				continue;
-			}
+		if (!bo->reusable) {
+			DBG(("%s: closing %d\n",
+			     __FUNCTION__, bo->handle));
+			kgem_bo_free(kgem, bo);
+			continue;
+		}
 
-			if (!bo->reusable) {
+		if (!bo->needs_flush) {
+			if (kgem_bo_set_purgeable(kgem, bo)) {
+				kgem_bo_move_to_inactive(kgem, bo);
+				retired = true;
+			} else {
 				DBG(("%s: closing %d\n",
 				     __FUNCTION__, bo->handle));
 				kgem_bo_free(kgem, bo);
-				continue;
-			}
-
-			if (!bo->needs_flush) {
-				if (kgem_bo_set_purgeable(kgem, bo)) {
-					kgem_bo_move_to_inactive(kgem, bo);
-					retired = true;
-				} else {
-					DBG(("%s: closing %d\n",
-					     __FUNCTION__, bo->handle));
-					kgem_bo_free(kgem, bo);
-				}
 			}
 		}
+	}
 
-		assert(rq->bo->rq == NULL);
-		assert(list_is_empty(&rq->bo->request));
+	assert(rq->bo->rq == NULL);
+	assert(list_is_empty(&rq->bo->request));
 
-		if (--rq->bo->refcnt == 0) {
-			if (kgem_bo_set_purgeable(kgem, rq->bo)) {
-				kgem_bo_move_to_inactive(kgem, rq->bo);
-				retired = true;
-			} else {
-				DBG(("%s: closing %d\n",
-				     __FUNCTION__, rq->bo->handle));
-				kgem_bo_free(kgem, rq->bo);
-			}
+	if (--rq->bo->refcnt == 0) {
+		if (kgem_bo_set_purgeable(kgem, rq->bo)) {
+			kgem_bo_move_to_inactive(kgem, rq->bo);
+			retired = true;
+		} else {
+			DBG(("%s: closing %d\n",
+			     __FUNCTION__, rq->bo->handle));
+			kgem_bo_free(kgem, rq->bo);
 		}
+	}
+
+	__kgem_request_free(rq);
+	return retired;
+}
+
+static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
+{
+	bool retired = false;
 
-		__kgem_request_free(rq);
+	while (!list_is_empty(&kgem->requests[ring])) {
+		struct kgem_request *rq;
+
+		rq = list_first_entry(&kgem->requests[ring],
+				      struct kgem_request,
+				      list);
+		if (kgem_busy(kgem, rq->bo->handle))
+			break;
+
+		retired |= __kgem_retire_rq(kgem, rq);
 	}
 
 #if HAS_DEBUG_FULL
@@ -2357,9 +2366,16 @@ kgem_create_batch(struct kgem *kgem, int size)
 				      struct kgem_bo,
 				      list);
 		if (!bo->rq) {
+out_4096:
 			list_move_tail(&bo->list, &kgem->pinned_batches[0]);
 			return kgem_bo_reference(bo);
 		}
+
+		if (!kgem_busy(kgem, bo->handle)) {
+			assert(RQ(bo->rq)->bo == bo);
+			__kgem_retire_rq(kgem, RQ(bo->rq));
+			goto out_4096;
+		}
 	}
 
 	if (size <= 16384) {
@@ -2367,9 +2383,16 @@ kgem_create_batch(struct kgem *kgem, int size)
 				      struct kgem_bo,
 				      list);
 		if (!bo->rq) {
+out_16384:
 			list_move_tail(&bo->list, &kgem->pinned_batches[1]);
 			return kgem_bo_reference(bo);
 		}
+
+		if (!kgem_busy(kgem, bo->handle)) {
+			assert(RQ(bo->rq)->bo == bo);
+			__kgem_retire_rq(kgem, RQ(bo->rq));
+			goto out_16384;
+		}
 	}
 
 	if (kgem->gen == 020) {
commit f1aec676810c4a4c180b342d9a83254e08dd55da
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 16 17:37:32 2012 +0000

    sna: Precompute the base set of batch-flags
    
    This is to make it easier to extend in future.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index fc97737..15d5642 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1138,6 +1138,12 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
 	if ((int)kgem->fence_max < 0)
 		kgem->fence_max = 5; /* minimum safe value for all hw */
 	DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
+
+	kgem->batch_flags_base = 0;
+	if (kgem->has_no_reloc)
+		kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
+	if (kgem->has_handle_lut)
+		kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
 }
 
 /* XXX hopefully a good approximation */
@@ -2303,11 +2309,7 @@ void kgem_reset(struct kgem *kgem)
 	kgem->surface = kgem->batch_size;
 	kgem->mode = KGEM_NONE;
 	kgem->flush = 0;
-	kgem->batch_flags = 0;
-	if (kgem->has_no_reloc)
-		kgem->batch_flags |= LOCAL_I915_EXEC_NO_RELOC;
-	if (kgem->has_handle_lut)
-		kgem->batch_flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+	kgem->batch_flags = kgem->batch_flags_base;
 
 	kgem->next_request = __kgem_request_alloc();
 
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 53e252d..59be858 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -144,6 +144,7 @@ struct kgem {
 	} vma[NUM_MAP_TYPES];
 
 	uint32_t batch_flags;
+	uint32_t batch_flags_base;
 #define I915_EXEC_SECURE (1<<9)
 #define LOCAL_EXEC_OBJECT_WRITE (1<<2)