xf86-video-intel: 3 commits - configure.ac src/sna/kgem.c src/sna/kgem.h
Chris Wilson
ickle at kemper.freedesktop.org
Sun Dec 16 14:56:23 PST 2012
configure.ac | 9 ++
src/sna/kgem.c | 194 +++++++++++++++++++++++++++++++++++----------------------
src/sna/kgem.h | 2
3 files changed, 131 insertions(+), 74 deletions(-)
New commits:
commit 5b0572503eab235bc7eff20d369241330c41e630
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Dec 16 23:04:55 2012 +0000
sna: Enable support for opting out of the kernel CS workaround
Keeping a set of pinned batches in userspace is considerably faster as
we can avoid the blit overhead. However, combining the two approaches
yields even greater performance, as fast as without either w/a, and yet
stable.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/configure.ac b/configure.ac
index 9354437..e153019 100644
--- a/configure.ac
+++ b/configure.ac
@@ -282,6 +282,15 @@ if test "x$accel" = xnone -a "x$UMS_ONLY" != "xyes"; then
AC_MSG_ERROR([No default acceleration option])
fi
+AC_ARG_ENABLE(pinned-batches,
+ AS_HELP_STRING([--enable-pinned-batches],
+ [Enable use of "pinned batches" (experimental) [default=no]]),
+ [PINNED="$enableval"],
+ [PINNED=no])
+if test "x$PINNED" = xyes; then
+ AC_DEFINE(USE_PINNED_BATCHES,1,[Assume "pinned batches" support])
+fi
+
AC_ARG_ENABLE(userptr,
AS_HELP_STRING([--enable-userptr],
[Enable use of userptr (experimental) [default=no]]),
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 36eab9b..ad967eb 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -70,12 +70,18 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define DBG_NO_MAP_UPLOAD 0
#define DBG_NO_RELAXED_FENCING 0
#define DBG_NO_SECURE_BATCHES 0
+#define DBG_NO_PINNED_BATCHES 0
#define DBG_NO_FAST_RELOC 0
#define DBG_NO_HANDLE_LUT 0
#define DBG_DUMP 0
#define SHOW_BATCH 0
+#ifndef USE_PINNED_BATCHES
+#undef DBG_NO_PINNED_BATCHES
+#define DBG_NO_PINNED_BATCHES 1
+#endif
+
#ifndef USE_FASTRELOC
#undef DBG_NO_FAST_RELOC
#define DBG_NO_FAST_RELOC 1
@@ -110,11 +116,13 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
-#define LOCAL_I915_PARAM_HAS_NO_RELOC 24
-#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 25
+#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24
+#define LOCAL_I915_PARAM_HAS_NO_RELOC 25
+#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26
-#define LOCAL_I915_EXEC_NO_RELOC (1<<10)
-#define LOCAL_I915_EXEC_HANDLE_LUT (1<<11)
+#define LOCAL_I915_EXEC_IS_PINNED (1<<10)
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
#define LOCAL_I915_GEM_USERPTR 0x32
#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
@@ -828,6 +836,14 @@ static bool test_has_secure_batches(struct kgem *kgem)
return gem_param(kgem, LOCAL_I915_PARAM_HAS_SECURE_BATCHES) > 0;
}
+static bool test_has_pinned_batches(struct kgem *kgem)
+{
+ if (DBG_NO_PINNED_BATCHES)
+ return false;
+
+ return gem_param(kgem, LOCAL_I915_PARAM_HAS_PINNED_BATCHES) > 0;
+}
+
static int kgem_get_screen_index(struct kgem *kgem)
{
struct sna *sna = container_of(kgem, struct sna, kgem);
@@ -943,7 +959,6 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
kgem->vma[MAP_GTT].count = -MAX_GTT_VMA_CACHE;
kgem->vma[MAP_CPU].count = -MAX_CPU_VMA_CACHE;
-
kgem->has_blt = gem_param(kgem, I915_PARAM_HAS_BLT) > 0;
DBG(("%s: has BLT ring? %d\n", __FUNCTION__,
kgem->has_blt));
@@ -991,6 +1006,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
DBG(("%s: can use privileged batchbuffers? %d\n", __FUNCTION__,
kgem->has_secure_batches));
+ kgem->has_pinned_batches = test_has_pinned_batches(kgem);
+ DBG(("%s: can use pinned batchbuffers (to avoid CS w/a)? %d\n", __FUNCTION__,
+ kgem->has_pinned_batches));
+
if (!is_hw_supported(kgem, dev)) {
xf86DrvMsg(kgem_get_screen_index(kgem), X_WARNING,
"Detected unsupported/dysfunctional hardware, disabling acceleration.\n");
@@ -1002,7 +1021,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
}
kgem->batch_size = ARRAY_SIZE(kgem->batch);
- if (gen == 020)
+ if (gen == 020 && !kgem->has_pinned_batches)
/* Limited to what we can pin */
kgem->batch_size = 4*1024;
if (gen == 022)
@@ -1144,6 +1163,8 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
if (kgem->has_handle_lut)
kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
+ if (kgem->has_pinned_batches)
+ kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
}
/* XXX hopefully a good approximation */
@@ -2395,7 +2416,7 @@ out_16384:
}
}
- if (kgem->gen == 020) {
+ if (kgem->gen == 020 && !kgem->has_pinned_batches) {
assert(size <= 16384);
bo = list_first_entry(&kgem->pinned_batches[size > 4096],
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 59be858..c23b9e3 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -170,6 +170,7 @@ struct kgem {
uint32_t has_relaxed_delta :1;
uint32_t has_semaphores :1;
uint32_t has_secure_batches :1;
+ uint32_t has_pinned_batches :1;
uint32_t has_cacheing :1;
uint32_t has_llc :1;
uint32_t has_no_reloc :1;
commit 805f78addf3ffb36c736df680806cf722b18fea9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Dec 16 22:04:54 2012 +0000
sna: Try to reuse pinned batches by inspecting the kernel busy status
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 15d5642..36eab9b 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1830,94 +1830,103 @@ static bool kgem_retire__flushing(struct kgem *kgem)
return retired;
}
-static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
+
+static bool __kgem_retire_rq(struct kgem *kgem, struct kgem_request *rq)
{
bool retired = false;
- while (!list_is_empty(&kgem->requests[ring])) {
- struct kgem_request *rq;
+ DBG(("%s: request %d complete\n",
+ __FUNCTION__, rq->bo->handle));
- rq = list_first_entry(&kgem->requests[ring],
- struct kgem_request,
- list);
- if (kgem_busy(kgem, rq->bo->handle))
- break;
+ while (!list_is_empty(&rq->buffers)) {
+ struct kgem_bo *bo;
- DBG(("%s: request %d complete\n",
- __FUNCTION__, rq->bo->handle));
+ bo = list_first_entry(&rq->buffers,
+ struct kgem_bo,
+ request);
- while (!list_is_empty(&rq->buffers)) {
- struct kgem_bo *bo;
+ assert(RQ(bo->rq) == rq);
+ assert(bo->exec == NULL);
+ assert(bo->domain == DOMAIN_GPU);
- bo = list_first_entry(&rq->buffers,
- struct kgem_bo,
- request);
+ list_del(&bo->request);
- assert(RQ(bo->rq) == rq);
- assert(bo->exec == NULL);
- assert(bo->domain == DOMAIN_GPU);
+ if (bo->needs_flush)
+ bo->needs_flush = kgem_busy(kgem, bo->handle);
+ if (bo->needs_flush) {
+ DBG(("%s: moving %d to flushing\n",
+ __FUNCTION__, bo->handle));
+ list_add(&bo->request, &kgem->flushing);
+ bo->rq = &_kgem_static_request;
+ } else {
+ bo->domain = DOMAIN_NONE;
+ bo->rq = NULL;
+ }
- list_del(&bo->request);
+ if (bo->refcnt)
+ continue;
- if (bo->needs_flush)
- bo->needs_flush = kgem_busy(kgem, bo->handle);
+ if (bo->snoop) {
if (bo->needs_flush) {
- DBG(("%s: moving %d to flushing\n",
- __FUNCTION__, bo->handle));
list_add(&bo->request, &kgem->flushing);
bo->rq = &_kgem_static_request;
} else {
- bo->domain = DOMAIN_NONE;
- bo->rq = NULL;
+ kgem_bo_move_to_snoop(kgem, bo);
}
+ continue;
+ }
- if (bo->refcnt)
- continue;
-
- if (bo->snoop) {
- if (bo->needs_flush) {
- list_add(&bo->request, &kgem->flushing);
- bo->rq = &_kgem_static_request;
- } else {
- kgem_bo_move_to_snoop(kgem, bo);
- }
- continue;
- }
+ if (!bo->reusable) {
+ DBG(("%s: closing %d\n",
+ __FUNCTION__, bo->handle));
+ kgem_bo_free(kgem, bo);
+ continue;
+ }
- if (!bo->reusable) {
+ if (!bo->needs_flush) {
+ if (kgem_bo_set_purgeable(kgem, bo)) {
+ kgem_bo_move_to_inactive(kgem, bo);
+ retired = true;
+ } else {
DBG(("%s: closing %d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
- continue;
- }
-
- if (!bo->needs_flush) {
- if (kgem_bo_set_purgeable(kgem, bo)) {
- kgem_bo_move_to_inactive(kgem, bo);
- retired = true;
- } else {
- DBG(("%s: closing %d\n",
- __FUNCTION__, bo->handle));
- kgem_bo_free(kgem, bo);
- }
}
}
+ }
- assert(rq->bo->rq == NULL);
- assert(list_is_empty(&rq->bo->request));
+ assert(rq->bo->rq == NULL);
+ assert(list_is_empty(&rq->bo->request));
- if (--rq->bo->refcnt == 0) {
- if (kgem_bo_set_purgeable(kgem, rq->bo)) {
- kgem_bo_move_to_inactive(kgem, rq->bo);
- retired = true;
- } else {
- DBG(("%s: closing %d\n",
- __FUNCTION__, rq->bo->handle));
- kgem_bo_free(kgem, rq->bo);
- }
+ if (--rq->bo->refcnt == 0) {
+ if (kgem_bo_set_purgeable(kgem, rq->bo)) {
+ kgem_bo_move_to_inactive(kgem, rq->bo);
+ retired = true;
+ } else {
+ DBG(("%s: closing %d\n",
+ __FUNCTION__, rq->bo->handle));
+ kgem_bo_free(kgem, rq->bo);
}
+ }
+
+ __kgem_request_free(rq);
+ return retired;
+}
+
+static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
+{
+ bool retired = false;
- __kgem_request_free(rq);
+ while (!list_is_empty(&kgem->requests[ring])) {
+ struct kgem_request *rq;
+
+ rq = list_first_entry(&kgem->requests[ring],
+ struct kgem_request,
+ list);
+ if (kgem_busy(kgem, rq->bo->handle))
+ break;
+
+ retired |= __kgem_retire_rq(kgem, rq);
}
#if HAS_DEBUG_FULL
@@ -2357,9 +2366,16 @@ kgem_create_batch(struct kgem *kgem, int size)
struct kgem_bo,
list);
if (!bo->rq) {
+out_4096:
list_move_tail(&bo->list, &kgem->pinned_batches[0]);
return kgem_bo_reference(bo);
}
+
+ if (!kgem_busy(kgem, bo->handle)) {
+ assert(RQ(bo->rq)->bo == bo);
+ __kgem_retire_rq(kgem, RQ(bo->rq));
+ goto out_4096;
+ }
}
if (size <= 16384) {
@@ -2367,9 +2383,16 @@ kgem_create_batch(struct kgem *kgem, int size)
struct kgem_bo,
list);
if (!bo->rq) {
+out_16384:
list_move_tail(&bo->list, &kgem->pinned_batches[1]);
return kgem_bo_reference(bo);
}
+
+ if (!kgem_busy(kgem, bo->handle)) {
+ assert(RQ(bo->rq)->bo == bo);
+ __kgem_retire_rq(kgem, RQ(bo->rq));
+ goto out_16384;
+ }
}
if (kgem->gen == 020) {
commit f1aec676810c4a4c180b342d9a83254e08dd55da
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Dec 16 17:37:32 2012 +0000
sna: Precompute the base set of batch-flags
This is to make it easier to extend in future.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index fc97737..15d5642 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1138,6 +1138,12 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
if ((int)kgem->fence_max < 0)
kgem->fence_max = 5; /* minimum safe value for all hw */
DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
+
+ kgem->batch_flags_base = 0;
+ if (kgem->has_no_reloc)
+ kgem->batch_flags_base |= LOCAL_I915_EXEC_NO_RELOC;
+ if (kgem->has_handle_lut)
+ kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
}
/* XXX hopefully a good approximation */
@@ -2303,11 +2309,7 @@ void kgem_reset(struct kgem *kgem)
kgem->surface = kgem->batch_size;
kgem->mode = KGEM_NONE;
kgem->flush = 0;
- kgem->batch_flags = 0;
- if (kgem->has_no_reloc)
- kgem->batch_flags |= LOCAL_I915_EXEC_NO_RELOC;
- if (kgem->has_handle_lut)
- kgem->batch_flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+ kgem->batch_flags = kgem->batch_flags_base;
kgem->next_request = __kgem_request_alloc();
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 53e252d..59be858 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -144,6 +144,7 @@ struct kgem {
} vma[NUM_MAP_TYPES];
uint32_t batch_flags;
+ uint32_t batch_flags_base;
#define I915_EXEC_SECURE (1<<9)
#define LOCAL_EXEC_OBJECT_WRITE (1<<2)
More information about the xorg-commit
mailing list