[PATCH 1/3] drm/radeon: improve sa allocator (add fence and make it self contain)
j.glisse at gmail.com
j.glisse at gmail.com
Wed May 2 08:59:14 PDT 2012
From: Jerome Glisse <jglisse at redhat.com>
This patch is ground work for having the sa allocator as a standalone
self contained helper. Each sa_bo can be associated with a fence and
when allocating new one you can ask to block until there is room for
satisfying your request.
It also change the sa allocation logic. The sa manager now keep a
last ptr that point to the last allocated sa bo. As sa bo are
allocated from begining to end and as the sa bo list is shorted in
offset order then the sa bo after the last one in the list is also
the oldest sa bo so the one that should finish first.
Thus the allocation alogirthm is simple, it check if there is enough
room after the last sa bo, if so it allocate new sa bo there. If
there isn't it can wait the next sa bo to finish. Code also handle
wrap around ie when last reach the end offset of the sa manager,
next sa bo is allocated from begining (offset 0).
Idea is that bo allocated through this are bo that have lifetime
linked to one of the ring of the GPU, thus when ring progress sa
bo are progresivly freed starting with last->next.
Signed-off-by: Christian König <deathsimple at vodafone.de>
Signed-off-by: Jerome Glisse <jglisse at redhat.com>
---
drivers/gpu/drm/radeon/radeon.h | 16 ++-
drivers/gpu/drm/radeon/radeon_cs.c | 4 +-
drivers/gpu/drm/radeon/radeon_gart.c | 11 +-
drivers/gpu/drm/radeon/radeon_object.h | 11 +-
drivers/gpu/drm/radeon/radeon_ring.c | 33 +++-
drivers/gpu/drm/radeon/radeon_sa.c | 266 ++++++++++++++++++++++++-----
drivers/gpu/drm/radeon/radeon_semaphore.c | 4 +-
7 files changed, 278 insertions(+), 67 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 729d332..acbb642 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -380,23 +380,29 @@ struct radeon_bo_list {
* Assumption is that there won't be hole (all object on same
* alignment).
*/
+
+struct radeon_sa_bo;
+
struct radeon_sa_manager {
+ spinlock_t lock;
struct radeon_bo *bo;
struct list_head sa_bo;
unsigned size;
+ struct radeon_sa_bo *last;
uint64_t gpu_addr;
void *cpu_ptr;
uint32_t domain;
};
-struct radeon_sa_bo;
-
/* sub-allocation buffer */
struct radeon_sa_bo {
struct list_head list;
struct radeon_sa_manager *manager;
- unsigned offset;
+ unsigned soffset;
+ unsigned eoffset;
unsigned size;
+ struct radeon_fence *fence;
+ bool free;
};
/*
@@ -628,7 +634,7 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
*/
struct radeon_ib {
- struct radeon_sa_bo sa_bo;
+ struct radeon_sa_bo *sa_bo;
unsigned idx;
uint32_t length_dw;
uint64_t gpu_addr;
@@ -684,7 +690,7 @@ struct radeon_vm {
unsigned last_pfn;
u64 pt_gpu_addr;
u64 *pt;
- struct radeon_sa_bo sa_bo;
+ struct radeon_sa_bo *sa_bo;
struct mutex mutex;
/* last fence for cs using this vm */
struct radeon_fence *fence;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index a0826bb..3989015 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -465,7 +465,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
/* ib pool is bind at 0 in virtual address space to gpu_addr is the
* offset inside the pool bo
*/
- parser->const_ib->gpu_addr = parser->const_ib->sa_bo.offset;
+ parser->const_ib->gpu_addr = parser->const_ib->sa_bo->soffset;
r = radeon_ib_schedule(rdev, parser->const_ib);
if (r)
goto out;
@@ -475,7 +475,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
/* ib pool is bind at 0 in virtual address space to gpu_addr is the
* offset inside the pool bo
*/
- parser->ib->gpu_addr = parser->ib->sa_bo.offset;
+ parser->ib->gpu_addr = parser->ib->sa_bo->soffset;
parser->ib->is_const_ib = false;
r = radeon_ib_schedule(rdev, parser->ib);
out:
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index c58a036..cc5036c 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -393,10 +393,13 @@ int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm)
}
retry:
- r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo,
+ r = radeon_sa_bo_new(rdev, &vm->sa_bo, &rdev->vm_manager.sa_manager,
RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8),
- RADEON_GPU_PAGE_SIZE);
+ RADEON_GPU_PAGE_SIZE, false, NULL);
if (r) {
+ if (r != -ENOMEM) {
+ return r;
+ }
if (list_empty(&rdev->vm_manager.lru_vm)) {
return r;
}
@@ -405,9 +408,9 @@ retry:
goto retry;
}
vm->pt = rdev->vm_manager.sa_manager.cpu_ptr;
- vm->pt += (vm->sa_bo.offset >> 3);
+ vm->pt += (vm->sa_bo->soffset >> 3);
vm->pt_gpu_addr = rdev->vm_manager.sa_manager.gpu_addr;
- vm->pt_gpu_addr += vm->sa_bo.offset;
+ vm->pt_gpu_addr += vm->sa_bo->soffset;
memset(vm->pt, 0, RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8));
retry_id:
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index f9104be..35e54da 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -156,10 +156,15 @@ extern int radeon_sa_bo_manager_start(struct radeon_device *rdev,
extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
struct radeon_sa_manager *sa_manager);
extern int radeon_sa_bo_new(struct radeon_device *rdev,
+ struct radeon_sa_bo **sa_bo,
struct radeon_sa_manager *sa_manager,
- struct radeon_sa_bo *sa_bo,
- unsigned size, unsigned align);
+ unsigned size, unsigned align,
+ bool block, struct radeon_fence *fence);
extern void radeon_sa_bo_free(struct radeon_device *rdev,
- struct radeon_sa_bo *sa_bo);
+ struct radeon_sa_bo **sa_bo);
+#if defined(CONFIG_DEBUG_FS)
+extern void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
+ struct seq_file *m);
+#endif
#endif
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 2eb4c6e..a7db890 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -122,15 +122,15 @@ retry:
for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
radeon_ib_try_free(rdev, &rdev->ib_pool.ibs[idx]);
if (rdev->ib_pool.ibs[idx].fence == NULL) {
- r = radeon_sa_bo_new(rdev, &rdev->ib_pool.sa_manager,
- &rdev->ib_pool.ibs[idx].sa_bo,
- size, 256);
+ r = radeon_sa_bo_new(rdev, &rdev->ib_pool.ibs[idx].sa_bo,
+ &rdev->ib_pool.sa_manager,
+ size, 256, false, NULL);
if (!r) {
*ib = &rdev->ib_pool.ibs[idx];
(*ib)->ptr = rdev->ib_pool.sa_manager.cpu_ptr;
- (*ib)->ptr += ((*ib)->sa_bo.offset >> 2);
+ (*ib)->ptr += ((*ib)->sa_bo->soffset >> 2);
(*ib)->gpu_addr = rdev->ib_pool.sa_manager.gpu_addr;
- (*ib)->gpu_addr += (*ib)->sa_bo.offset;
+ (*ib)->gpu_addr += (*ib)->sa_bo->soffset;
(*ib)->fence = fence;
(*ib)->vm_id = 0;
(*ib)->is_const_ib = false;
@@ -148,6 +148,7 @@ retry:
}
/* this should be rare event, ie all ib scheduled none signaled yet.
*/
+ r = -ENOMEM;
for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
if (rdev->ib_pool.ibs[idx].fence && rdev->ib_pool.ibs[idx].fence->emitted) {
r = radeon_fence_wait(rdev->ib_pool.ibs[idx].fence, false);
@@ -228,7 +229,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
rdev->ib_pool.ibs[i].fence = NULL;
rdev->ib_pool.ibs[i].idx = i;
rdev->ib_pool.ibs[i].length_dw = 0;
- INIT_LIST_HEAD(&rdev->ib_pool.ibs[i].sa_bo.list);
+ rdev->ib_pool.ibs[i].sa_bo = NULL;
}
rdev->ib_pool.head_id = 0;
rdev->ib_pool.ready = true;
@@ -600,12 +601,32 @@ static int radeon_debugfs_ib_info(struct seq_file *m, void *data)
static struct drm_info_list radeon_debugfs_ib_list[RADEON_IB_POOL_SIZE];
static char radeon_debugfs_ib_names[RADEON_IB_POOL_SIZE][32];
static unsigned radeon_debugfs_ib_idx[RADEON_IB_POOL_SIZE];
+
+static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = (struct drm_info_node *) m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct radeon_device *rdev = dev->dev_private;
+
+ radeon_sa_bo_dump_debug_info(&rdev->ib_pool.sa_manager, m);
+
+ return 0;
+}
+
+static struct drm_info_list radeon_debugfs_sa_list[] = {
+ {"radeon_sa_info", &radeon_debugfs_sa_info, 0, NULL},
+};
#endif
int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring)
{
#if defined(CONFIG_DEBUG_FS)
unsigned i;
+ int r;
+
+ r = radeon_debugfs_add_files(rdev, radeon_debugfs_sa_list, 1);
+ if (r)
+ return r;
for (i = 0; i < ARRAY_SIZE(radeon_debugfs_ring_info_list); ++i) {
struct drm_info_list *info = &radeon_debugfs_ring_info_list[i];
int ridx = *(int*)radeon_debugfs_ring_info_list[i].data;
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index 8fbfe69..e758aaa 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -27,6 +27,23 @@
* Authors:
* Jerome Glisse <glisse at freedesktop.org>
*/
+/* sa allocation logic :
+ *
+ * The sa manager now keep a last ptr that point to the last allocated
+ * sa bo. As sa bo are allocated from begining to end and as the sa bo
+ * list is shorted in offset order then the sa bo after the last one in
+ * the list is also the oldest sa bo so the one that should finish first.
+ *
+ * Thus the allocation alogirthm is simple, it check if there is enough
+ * room after the last sa bo, if so it allocate new sa bo there. If
+ * there isn't it can wait the next sa bo to finish. Code also handle
+ * wrap around ie when last reach the end offset of the sa manager,
+ * next sa bo is allocated from begining (offset 0).
+ *
+ * Idea is that bo allocated through this are bo that have lifetime
+ * linked to one of the ring of the GPU, thus when ring progress sa
+ * bo are progresivly freed starting with last->next.
+ */
#include "drmP.h"
#include "drm.h"
#include "radeon.h"
@@ -37,9 +54,11 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev,
{
int r;
+ spin_lock_init(&sa_manager->lock);
sa_manager->bo = NULL;
sa_manager->size = size;
sa_manager->domain = domain;
+ sa_manager->last = NULL;
INIT_LIST_HEAD(&sa_manager->sa_bo);
r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
@@ -63,7 +82,9 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) {
list_del_init(&sa_bo->list);
}
- radeon_bo_unref(&sa_manager->bo);
+ if (sa_manager->bo) {
+ radeon_bo_unref(&sa_manager->bo);
+ }
sa_manager->size = 0;
}
@@ -113,77 +134,232 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
return r;
}
+static void radeon_sa_bo_free_locked(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo)
+{
+ struct radeon_sa_manager *sa_manager = sa_bo->manager;
+ struct list_head *prev;
+
+ if (sa_bo->fence) {
+ if (!radeon_fence_signaled(sa_bo->fence)) {
+ return;
+ }
+ radeon_fence_unref(&sa_bo->fence);
+ }
+ prev = sa_bo->list.prev;
+ list_del_init(&sa_bo->list);
+ if (list_empty(&sa_manager->sa_bo)) {
+ /* this bo was alone in the list */
+ sa_manager->last = NULL;
+ } else if (sa_manager->last == sa_bo) {
+ if (prev == &sa_manager->sa_bo) {
+ /* sa_bo is begining of list, the new last became
+ * the last of the list
+ */
+ sa_manager->last = list_entry(sa_manager->sa_bo.prev, struct radeon_sa_bo, list);
+ } else {
+ /* prev became the new last */
+ sa_manager->last = list_entry(prev, struct radeon_sa_bo, list);
+ }
+ }
+ /* in case try free already free the sa_bo but radeon_sa_bo_free
+ * wasn't yet call, the free bool protect us from freeing to
+ * early the structure
+ */
+ if (sa_bo->free) {
+ kfree(sa_bo);
+ }
+}
+
+static bool radeon_sa_manager_try_free(struct radeon_device *rdev,
+ struct radeon_sa_bo *oldest)
+{
+ if (oldest->fence && oldest->fence->emitted) {
+ if (radeon_fence_signaled(oldest->fence)) {
+ radeon_sa_bo_free_locked(rdev, oldest);
+ return true;
+ }
+ }
+ return false;
+}
+
/*
* Principe is simple, we keep a list of sub allocation in offset
* order (first entry has offset == 0, last entry has the highest
* offset).
*
- * When allocating new object we first check if there is room at
- * the end total_size - (last_object_offset + last_object_size) >=
- * alloc_size. If so we allocate new object there.
- *
- * When there is not enough room at the end, we start waiting for
- * each sub object until we reach object_offset+object_size >=
- * alloc_size, this object then become the sub object we return.
+ * The last ptr serve as equivalent to read position in cp ring.
+ * last->prev is the previous last, while last->next is the oldest
+ * sa_bo allocated.
*
* Alignment can't be bigger than page size
+ *
+ * Return value:
+ * -ENOMEM failure to allocate
+ * -ERESTARTSYS restart ioctl
+ * -EDEADLK when fence wait report GPU lockup
*/
int radeon_sa_bo_new(struct radeon_device *rdev,
+ struct radeon_sa_bo **tmp,
struct radeon_sa_manager *sa_manager,
- struct radeon_sa_bo *sa_bo,
- unsigned size, unsigned align)
+ unsigned size, unsigned align,
+ bool block, struct radeon_fence *fence)
{
- struct radeon_sa_bo *tmp;
- struct list_head *head;
- unsigned offset = 0, wasted = 0;
+ struct radeon_sa_bo *sa_bo, *next, *oldest;
+ unsigned offset, wasted, hole_offset, hole_size;
+ bool try_begining = false, add_begining = false;
+ int r = -ENOMEM;
BUG_ON(align > RADEON_GPU_PAGE_SIZE);
BUG_ON(size > sa_manager->size);
- /* no one ? */
- head = sa_manager->sa_bo.prev;
- if (list_empty(&sa_manager->sa_bo)) {
+ *tmp = NULL;
+ sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL);
+ if (sa_bo == NULL) {
+ return -ENOMEM;
+ }
+ sa_bo->manager = sa_manager;
+ sa_bo->fence = NULL;
+ sa_bo->free = false;
+ sa_bo->soffset = 0;
+ sa_bo->eoffset = 0;
+ sa_bo->size = 0;
+ INIT_LIST_HEAD(&sa_bo->list);
+
+ spin_lock(&sa_manager->lock);
+retry:
+ if (sa_manager->last == NULL) {
+ offset = 0;
+ add_begining = true;
goto out;
}
- /* look for a hole big enough */
- offset = 0;
- list_for_each_entry(tmp, &sa_manager->sa_bo, list) {
- /* room before this object ? */
- if (offset < tmp->offset && (tmp->offset - offset) >= size) {
- head = tmp->list.prev;
+ hole_offset = sa_manager->last->eoffset;
+ wasted = (align - (hole_offset % align)) % align;
+ if (sa_manager->last->list.next == &sa_manager->sa_bo) {
+ /* no sa bo after that one */
+ hole_size = sa_manager->size - hole_offset;
+ try_begining = true;
+ oldest = list_entry(sa_manager->sa_bo.next, struct radeon_sa_bo, list);
+ } else {
+ next = list_entry(sa_manager->last->list.next, struct radeon_sa_bo, list);
+ hole_size = next->soffset - hole_offset;
+ oldest = next;
+ }
+ if ((size + wasted) <= hole_size) {
+ offset = hole_offset + wasted;
+ goto out;
+ } else if (try_begining) {
+ /* last was at end of list, so if we wrap over we might find
+ * room at the begining of the list
+ */
+ offset = 0;
+ hole_size = oldest->soffset;
+ if (size <= hole_size) {
+ add_begining = true;
goto out;
}
- offset = tmp->offset + tmp->size;
- wasted = offset % align;
- if (wasted) {
- wasted = align - wasted;
+ }
+ /* try to be optimist and free the oldest one */
+ if (radeon_sa_manager_try_free(rdev, oldest)) {
+ goto retry;
+ }
+
+ /* if block is used all the sa_bo must be associated with a
+ * fence, we perform sanity check but expect things to go
+ * berserk if you don't follow this
+ */
+ if (block) {
+ struct radeon_fence *fence = NULL;
+
+ if (oldest->fence) {
+ fence = radeon_fence_ref(oldest->fence);
}
- offset += wasted;
- }
- /* room at the end ? */
- head = sa_manager->sa_bo.prev;
- tmp = list_entry(head, struct radeon_sa_bo, list);
- offset = tmp->offset + tmp->size;
- wasted = offset % align;
- if (wasted) {
- wasted = align - wasted;
- }
- offset += wasted;
- if ((sa_manager->size - offset) < size) {
- /* failed to find somethings big enough */
- return -ENOMEM;
+ spin_unlock(&sa_manager->lock);
+
+ if (fence == NULL) {
+ /* this should never happen */
+ dev_warn(rdev->dev, "sa allocator nothing we can wait for\n");
+ goto out_err;
+ }
+
+ r = radeon_fence_wait(fence, false);
+ radeon_fence_unref(&fence);
+ if (r) {
+ goto out_err;
+ }
+
+ spin_lock(&sa_manager->lock);
+ goto retry;
}
+ spin_unlock(&sa_manager->lock);
+
+out_err:
+ kfree(sa_bo);
+ return r;
out:
- sa_bo->manager = sa_manager;
- sa_bo->offset = offset;
+ *tmp = sa_bo;
+ if (add_begining) {
+ list_add(&sa_bo->list, &sa_manager->sa_bo);
+ } else {
+ list_add(&sa_bo->list, &sa_manager->last->list);
+ }
+ sa_manager->last = sa_bo;
+ if (fence) {
+ sa_bo->fence = radeon_fence_ref(fence);
+ }
+ sa_bo->soffset = offset;
+ sa_bo->eoffset = offset + size;
sa_bo->size = size;
- list_add(&sa_bo->list, head);
+ spin_unlock(&sa_manager->lock);
return 0;
}
-void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo)
+void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **tmp)
{
- list_del_init(&sa_bo->list);
+ struct radeon_sa_bo *sa_bo;
+ struct radeon_sa_manager *sa_manager;
+
+ if (tmp == NULL || *tmp == NULL) {
+ return;
+ }
+
+ sa_bo = *tmp;
+ sa_manager = sa_bo->manager;
+ *tmp = NULL;
+ spin_lock(&sa_manager->lock);
+ sa_bo->free = true;
+ if (list_empty(&sa_bo->list)) {
+ /* it has already been free */
+ kfree(sa_bo);
+ goto out;
+ }
+ if (sa_bo->fence && !sa_bo->fence->emitted) {
+ radeon_fence_unref(&sa_bo->fence);
+ }
+ radeon_sa_bo_free_locked(rdev, sa_bo);
+
+out:
+ spin_unlock(&sa_manager->lock);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
+ struct seq_file *m)
+{
+ struct radeon_sa_bo *i;
+
+ spin_lock(&sa_manager->lock);
+ seq_printf(m, "last [%p]\n", sa_manager->last);
+ if (sa_manager->last) {
+ i = sa_manager->last;
+ seq_printf(m, "[0x%08x 0x%08x]/0x%08x size %d [%p] LAST\n", i->soffset,
+ i->eoffset, sa_manager->size, i->size, i);
+ }
+ list_for_each_entry(i, &sa_manager->sa_bo, list) {
+ seq_printf(m, "[0x%08x 0x%08x]/0x%08x size %d [%p]\n", i->soffset,
+ i->eoffset, sa_manager->size, i->size, i);
+ }
+ spin_unlock(&sa_manager->lock);
}
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index 930a08a..822723e 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -55,9 +55,9 @@ static int radeon_semaphore_add_bo(struct radeon_device *rdev)
return r;
}
gpu_addr = rdev->ib_pool.sa_manager.gpu_addr;
- gpu_addr += bo->ib->sa_bo.offset;
+ gpu_addr += bo->ib->sa_bo->soffset;
cpu_ptr = rdev->ib_pool.sa_manager.cpu_ptr;
- cpu_ptr += (bo->ib->sa_bo.offset >> 2);
+ cpu_ptr += (bo->ib->sa_bo->soffset >> 2);
for (i = 0; i < (RADEON_SEMAPHORE_BO_SIZE/8); i++) {
bo->semaphores[i].gpu_addr = gpu_addr;
bo->semaphores[i].cpu_ptr = cpu_ptr;
--
1.7.7.6
More information about the dri-devel
mailing list