[PATCH 12/18] drm/radeon: multiple ring allocator

Fri May 4 15:43:53 PDT 2012

From: Christian König <deathsimple at vodafone.de>

A startover with a new idea for a multiple ring allocator.
Should perform as well as a normal ring allocator as long
as only one ring does somthing, but falls back to a more
complex algorithm if more complex things start to happen.

We store the last allocated bo in last, we always try to allocate
after the last allocated bo. Principle is that in a linear GPU ring
progression was is after last is the oldest bo we allocated and thus
the first one that should no longer be in use by the GPU.

If it's not the case we skip over the bo after last to the closest
done bo if such one exist. If none exist and we are not asked to
block we report failure to allocate.

If we are asked to block we wait on all the oldest fence of all
rings. We just wait for any of those fence to complete.

Signed-off-by: Christian König <deathsimple at vodafone.de>
Signed-off-by: Jerome Glisse <jglisse at redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h      |    7 +-
 drivers/gpu/drm/radeon/radeon_ring.c |   19 +--
 drivers/gpu/drm/radeon/radeon_sa.c   |  314 ++++++++++++++++++++++++----------
 3 files changed, 233 insertions(+), 107 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 6f8d0f5..1c3eb06 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -385,7 +385,9 @@ struct radeon_bo_list {
 struct radeon_sa_manager {
 	spinlock_t		lock;
 	struct radeon_bo	*bo;
-	struct list_head	sa_bo;
+	struct radeon_sa_bo	*last;
+	struct list_head	flist[RADEON_NUM_RINGS];
+	struct list_head	olist;
 	unsigned		size;
 	uint64_t		gpu_addr;
 	void			*cpu_ptr;
@@ -396,7 +398,8 @@ struct radeon_sa_bo;
 
 /* sub-allocation buffer */
 struct radeon_sa_bo {
-	struct list_head		list;
+	struct list_head		olist;
+	struct list_head		flist;
 	struct radeon_sa_manager	*manager;
 	unsigned			soffset;
 	unsigned			eoffset;
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index c84af9c..2eed251 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -204,25 +204,22 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 
 int radeon_ib_pool_init(struct radeon_device *rdev)
 {
-	struct radeon_sa_manager tmp;
 	int i, r;
 
-	r = radeon_sa_bo_manager_init(rdev, &tmp,
-				      RADEON_IB_POOL_SIZE*64*1024,
-				      RADEON_GEM_DOMAIN_GTT);
-	if (r) {
-		return r;
-	}
-
 	radeon_mutex_lock(&rdev->ib_pool.mutex);
 	if (rdev->ib_pool.ready) {
 		radeon_mutex_unlock(&rdev->ib_pool.mutex);
-		radeon_sa_bo_manager_fini(rdev, &tmp);
 		return 0;
 	}
 
-	rdev->ib_pool.sa_manager = tmp;
-	INIT_LIST_HEAD(&rdev->ib_pool.sa_manager.sa_bo);
+	r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
+				      RADEON_IB_POOL_SIZE*64*1024,
+				      RADEON_GEM_DOMAIN_GTT);
+	if (r) {
+		radeon_mutex_unlock(&rdev->ib_pool.mutex);
+		return r;
+	}
+
 	for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
 		rdev->ib_pool.ibs[i].fence = NULL;
 		rdev->ib_pool.ibs[i].idx = i;
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index bc4ef87..dc67cb4 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -27,21 +27,42 @@
  * Authors:
  *    Jerome Glisse <glisse at freedesktop.org>
  */
+/* Algorithm:
+ *
+ * We store the last allocated bo in last, we always try to allocate
+ * after the last allocated bo. Principle is that in a linear GPU ring
+ * progression was is after last is the oldest bo we allocated and thus
+ * the first one that should no longer be in use by the GPU.
+ *
+ * If it's not the case we skip over the bo after last to the closest
+ * done bo if such one exist. If none exist and we are not asked to
+ * block we report failure to allocate.
+ *
+ * If we are asked to block we wait on all the oldest fence of all
+ * rings. We just wait for any of those fence to complete.
+ */
 #include "drmP.h"
 #include "drm.h"
 #include "radeon.h"
 
+static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo);
+static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager);
+
 int radeon_sa_bo_manager_init(struct radeon_device *rdev,
 			      struct radeon_sa_manager *sa_manager,
 			      unsigned size, u32 domain)
 {
-	int r;
+	int i, r;
 
 	spin_lock_init(&sa_manager->lock);
 	sa_manager->bo = NULL;
 	sa_manager->size = size;
 	sa_manager->domain = domain;
-	INIT_LIST_HEAD(&sa_manager->sa_bo);
+	INIT_LIST_HEAD(&sa_manager->olist);
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		INIT_LIST_HEAD(&sa_manager->flist[i]);
+	}
+	sa_manager->last = NULL;
 
 	r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true,
 			     RADEON_GEM_DOMAIN_CPU, &sa_manager->bo);
@@ -58,11 +79,17 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
 {
 	struct radeon_sa_bo *sa_bo, *tmp;
 
-	if (!list_empty(&sa_manager->sa_bo)) {
-		dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
+	if (!list_empty(&sa_manager->olist)) {
+		sa_manager->last = list_entry(sa_manager->olist.next,
+					      struct radeon_sa_bo, olist);
+		radeon_sa_bo_try_free(sa_manager);
+		radeon_sa_bo_remove_locked(sa_manager->last);
+		if (!list_empty(&sa_manager->olist)) {
+			dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
+		}
 	}
-	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) {
-		list_del_init(&sa_bo->list);
+	list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
+		radeon_sa_bo_remove_locked(sa_bo);
 	}
 	radeon_bo_unref(&sa_manager->bo);
 	sa_manager->size = 0;
@@ -114,111 +141,204 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
 	return r;
 }
 
-/*
- * Principe is simple, we keep a list of sub allocation in offset
- * order (first entry has offset == 0, last entry has the highest
- * offset).
- *
- * When allocating new object we first check if there is room at
- * the end total_size - (last_object_offset + last_object_size) >=
- * alloc_size. If so we allocate new object there.
- *
- * When there is not enough room at the end, we start waiting for
- * each sub object until we reach object_offset+object_size >=
- * alloc_size, this object then become the sub object we return.
- *
- * Alignment can't be bigger than page size
- */
-
 static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo)
 {
-	list_del(&sa_bo->list);
+	struct radeon_sa_manager *sa_manager = sa_bo->manager;
+	struct list_head *prev = sa_bo->olist.prev;
+
+	list_del_init(&sa_bo->olist);
+	if (list_empty(&sa_manager->olist)) {
+		sa_manager->last = NULL;
+	} if (sa_manager->last == sa_bo) {
+		if (prev != &sa_manager->olist) {
+			sa_manager->last = list_entry(prev,
+						      struct radeon_sa_bo,
+						      olist);
+		} else {
+			sa_manager->last = list_entry(sa_manager->olist.prev,
+						      struct radeon_sa_bo,
+						      olist);
+		}
+	}
+	list_del_init(&sa_bo->flist);
 	radeon_fence_unref(&sa_bo->fence);
 	kfree(sa_bo);
 }
 
+static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager)
+{
+	struct radeon_sa_bo *sa_bo, *tmp;
+
+	if (sa_manager->last == NULL) {
+		return;
+	}
+	sa_bo = sa_manager->last;
+	list_for_each_entry_safe_continue(sa_bo, tmp, &sa_manager->olist, olist) {
+		if (sa_bo->fence == NULL || !radeon_fence_signaled(sa_bo->fence)) {
+			return;
+		}
+		radeon_sa_bo_remove_locked(sa_bo);
+	}
+}
+
+static inline void radeon_sa_bo_hole_after(struct radeon_sa_bo *sa_bo,
+					   unsigned *soffset, unsigned *eoffset)
+{
+
+	if (sa_bo == NULL) {
+		return;
+	}
+	*soffset = sa_bo->eoffset;
+	if (sa_bo->olist.next == &sa_bo->manager->olist) {
+		*eoffset = sa_bo->manager->size;
+	} else {
+		struct radeon_sa_bo *next;
+
+		next = list_entry(sa_bo->olist.next, struct radeon_sa_bo, olist);
+		*eoffset = next->soffset;
+	}
+}
+
+static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager,
+				   struct radeon_sa_bo *sa_bo,
+				   unsigned size, unsigned align)
+{
+	unsigned soffset, eoffset, wasted;
+
+	soffset = 0;
+	eoffset = sa_manager->size;
+	radeon_sa_bo_hole_after(sa_manager->last, &soffset, &eoffset);
+	wasted = (align - (soffset % align)) % align;
+
+	if ((eoffset - soffset) >= (size + wasted)) {
+		soffset += wasted;
+
+		sa_bo->manager = sa_manager;
+		sa_bo->soffset = soffset;
+		sa_bo->eoffset = soffset + size;
+		if (sa_manager->last) {
+			list_add(&sa_bo->olist, &sa_manager->last->olist);
+		} else {
+			list_add(&sa_bo->olist, &sa_manager->olist);
+		}
+		INIT_LIST_HEAD(&sa_bo->flist);
+		sa_manager->last = sa_bo;
+		return true;
+	}
+	return false;
+}
+
+static void radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager)
+{
+	unsigned i, soffset, best, tmp;
+
+	/* last can't be NULL if we reach this function */
+	soffset = sa_manager->last->eoffset;
+	/* to handle wrap around we add sa_manager->size */
+	best = sa_manager->size * 2;
+	/* go over all fence list and try to find the closest sa_bo
+	 * of the current last
+	 */
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		struct radeon_sa_bo *sa_bo;
+
+		if (list_empty(&sa_manager->flist[i]))
+			continue;
+		sa_bo = list_first_entry(&sa_manager->flist[i],
+					 struct radeon_sa_bo, flist);
+
+		if (!radeon_fence_signaled(sa_bo->fence))
+			continue;
+
+		tmp = sa_bo->soffset;
+		if (tmp < soffset) {
+			/* wrap around, pretend it's after */
+			tmp += sa_manager->size;
+		}
+		tmp -= soffset;
+		if (tmp < best) {
+			/* this sa bo is the closest one */
+			best = tmp;
+			sa_manager->last = list_entry(sa_bo->olist.prev,
+						      struct radeon_sa_bo,
+						      olist);
+		}
+	}
+}
+
+static int radeon_sa_bo_block(struct radeon_device *rdev,
+			      struct radeon_sa_manager *sa_manager)
+{
+	struct radeon_fence *fences[RADEON_NUM_RINGS];
+	int i, r;
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		struct radeon_sa_bo *sa_bo;
+
+		fences[i] = NULL;
+		if (list_empty(&sa_manager->flist[i])) {
+			continue;
+		}
+		sa_bo = list_first_entry(&sa_manager->flist[i],
+					 struct radeon_sa_bo, flist);
+		fences[i] = sa_bo->fence;
+	}
+
+	spin_unlock(&sa_manager->lock);
+	r = radeon_fence_wait_any(rdev, fences, false);
+	spin_lock(&sa_manager->lock);
+	return r;
+}
+
 int radeon_sa_bo_new(struct radeon_device *rdev,
 		     struct radeon_sa_manager *sa_manager,
 		     struct radeon_sa_bo **sa_bo,
 		     unsigned size, unsigned align, bool block)
 {
-	struct radeon_fence *fence = NULL;
-	struct radeon_sa_bo *tmp, *next;
-	struct list_head *head;
-	unsigned offset = 0, wasted = 0;
-	int r;
+	unsigned i;
+	int r = -ENOMEM;
 
 	BUG_ON(align > RADEON_GPU_PAGE_SIZE);
 	BUG_ON(size > sa_manager->size);
 
 	*sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL);
-
-retry:
+	if ((*sa_bo) == NULL) {
+		return -ENOMEM;
+	}
+	(*sa_bo)->manager = sa_manager;
+	(*sa_bo)->fence = NULL;
+	INIT_LIST_HEAD(&(*sa_bo)->olist);
+	INIT_LIST_HEAD(&(*sa_bo)->flist);
 
 	spin_lock(&sa_manager->lock);
+	do {
+		/* try to allocate couple time before going to wait */
+		for (i = 0; i < 2; ++i) {
+			radeon_sa_bo_try_free(sa_manager);
 
-	/* no one ? */
-	head = sa_manager->sa_bo.prev;
-	if (list_empty(&sa_manager->sa_bo)) {
-		goto out;
-	}
-
-	/* look for a hole big enough */
-	offset = 0;
-	list_for_each_entry_safe(tmp, next, &sa_manager->sa_bo, list) {
-		/* try to free this object */
-		if (tmp->fence) {
-			if (radeon_fence_signaled(tmp->fence)) {
-				radeon_sa_bo_remove_locked(tmp);
-				continue;
-			} else {
-				fence = tmp->fence;
+			if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo,
+						   size, align)) {
+				spin_unlock(&sa_manager->lock);
+				return 0;
 			}
-		}
 
-		/* room before this object ? */
-		if (offset < tmp->soffset && (tmp->soffset - offset) >= size) {
-			head = tmp->list.prev;
-			goto out;
-		}
-		offset = tmp->eoffset;
-		wasted = offset % align;
-		if (wasted) {
-			wasted = align - wasted;
+			/* see if we can skip over some allocations */
+			radeon_sa_bo_next_hole(sa_manager);
 		}
-		offset += wasted;
-	}
-	/* room at the end ? */
-	head = sa_manager->sa_bo.prev;
-	tmp = list_entry(head, struct radeon_sa_bo, list);
-	offset = tmp->eoffset;
-	wasted = offset % align;
-	if (wasted) {
-		wasted = align - wasted;
-	}
-	offset += wasted;
-	if ((sa_manager->size - offset) < size) {
-		/* failed to find somethings big enough */
-		spin_unlock(&sa_manager->lock);
-		if (block && fence) {
-			r = radeon_fence_wait(fence, false);
-			if (r)
-				return r;
-
-			goto retry;
+
+		if (block) {
+			r = radeon_sa_bo_block(rdev, sa_manager);
+			if (r) {
+				goto out_err;
+			}
 		}
-		kfree(*sa_bo);
-		*sa_bo = NULL;
-		return -ENOMEM;
-	}
+	} while (block);
 
-out:
-	(*sa_bo)->manager = sa_manager;
-	(*sa_bo)->soffset = offset;
-	(*sa_bo)->eoffset = offset + size;
-	list_add(&(*sa_bo)->list, head);
+out_err:
 	spin_unlock(&sa_manager->lock);
-	return 0;
+	kfree(*sa_bo);
+	*sa_bo = NULL;
+	return r;
 }
 
 void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
@@ -226,13 +346,16 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
 {
 	struct radeon_sa_manager *sa_manager;
 
-	if (!sa_bo || !*sa_bo)
+	if (sa_bo == NULL || *sa_bo == NULL) {
 		return;
+	}
 
 	sa_manager = (*sa_bo)->manager;
 	spin_lock(&sa_manager->lock);
 	if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
 		(*sa_bo)->fence = radeon_fence_ref(fence);
+		list_add_tail(&(*sa_bo)->flist,
+			      &sa_manager->flist[fence->ring]);
 	} else {
 		radeon_sa_bo_remove_locked(*sa_bo);
 	}
@@ -247,15 +370,18 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
 	struct radeon_sa_bo *i;
 
 	spin_lock(&sa_manager->lock);
-	list_for_each_entry(i, &sa_manager->sa_bo, list) {
-		seq_printf(m, "[%08x %08x] size %4d (%p)",
-			   i->soffset, i->eoffset, i->eoffset - i->soffset, i);
-		if (i->fence) {
-			seq_printf(m, " protected by %Ld (%p)\n",
-				   i->fence->seq, i->fence);
+	list_for_each_entry(i, &sa_manager->olist, olist) {
+		if (i == sa_manager->last) {
+			seq_printf(m, ">");
 		} else {
-			seq_printf(m, "\n");
+			seq_printf(m, " ");
+		}
+		seq_printf(m, "[0x%08x 0x%08x] size %8d",
+			   i->soffset, i->eoffset, i->eoffset - i->soffset);
+		if (i->fence) {
+			seq_printf(m, " protected by 0x%016llx", i->fence->seq);
 		}
+		seq_printf(m, "\n");
 	}
 	spin_unlock(&sa_manager->lock);
 }
-- 
1.7.7.6