[Intel-gfx] [PATCH 39/53] drm/i915/bdw: Two-stage execlist submit process
oscar.mateo at intel.com
oscar.mateo at intel.com
Fri Jun 13 17:37:57 CEST 2014
From: Michel Thierry <michel.thierry at intel.com>
Context switch (and execlist submission) should happen only when
other contexts are not active, otherwise pre-emption occurs.
To assure this, we place context switch requests in a queue and those
request are later consumed when the right context switch interrupt is
received (still TODO).
v2: Use a spinlock, do not remove the requests on unqueue (wait for
context switch completion).
Signed-off-by: Thomas Daniel <thomas.daniel at intel.com>
v3: Several rebases and code changes. Use unique ID.
v4:
- Move the queue/lock init to the late ring initialization.
- Damien's kmalloc review comments: check return, use sizeof(*req),
do not cast.
v5:
- Do not reuse drm_i915_gem_request. Instead, create our own.
- New namespace.
Signed-off-by: Michel Thierry <michel.thierry at intel.com> (v1)
Signed-off-by: Oscar Mateo <oscar.mateo at intel.com> (v2-v5)
---
drivers/gpu/drm/i915/intel_lrc.c | 63 +++++++++++++++++++++++++++++++--
drivers/gpu/drm/i915/intel_lrc.h | 8 +++++
drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++
3 files changed, 71 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index cd4512f..49d3c00 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -214,6 +214,63 @@ static int execlists_submit_context(struct intel_engine_cs *ring,
return 0;
}
+static void execlists_context_unqueue(struct intel_engine_cs *ring)
+{
+ struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
+ struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
+
+ if (list_empty(&ring->execlist_queue))
+ return;
+
+ /* Try to read in pairs */
+ list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue, execlist_link) {
+ if (!req0)
+ req0 = cursor;
+ else if (req0->ctx == cursor->ctx) {
+ /* Same ctx: ignore first request, as second request
+ * will update tail past first request's workload */
+ list_del(&req0->execlist_link);
+ i915_gem_context_unreference(req0->ctx);
+ kfree(req0);
+ req0 = cursor;
+ } else {
+ req1 = cursor;
+ break;
+ }
+ }
+
+ BUG_ON(execlists_submit_context(ring, req0->ctx, req0->tail,
+ req1? req1->ctx : NULL, req1? req1->tail : 0));
+}
+
+static int execlists_context_queue(struct intel_engine_cs *ring,
+ struct intel_context *to,
+ u32 tail)
+{
+ struct intel_ctx_submit_request *req = NULL;
+ unsigned long flags;
+ bool was_empty;
+
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (req == NULL)
+ return -ENOMEM;
+ req->ctx = to;
+ i915_gem_context_reference(req->ctx);
+ req->ring = ring;
+ req->tail = tail;
+
+ spin_lock_irqsave(&ring->execlist_lock, flags);
+
+ was_empty = list_empty(&ring->execlist_queue);
+ list_add_tail(&req->execlist_link, &ring->execlist_queue);
+ if (was_empty)
+ execlists_context_unqueue(ring);
+
+ spin_unlock_irqrestore(&ring->execlist_lock, flags);
+
+ return 0;
+}
+
static inline struct intel_ringbuffer *
logical_ringbuf_get(struct intel_engine_cs *ring, struct intel_context *ctx)
{
@@ -891,8 +948,7 @@ static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
static void gen8_submit_ctx(struct intel_engine_cs *ring,
struct intel_context *ctx, u32 value)
{
- /* FIXME: too cheeky, we don't even check if the ELSP is ready */
- execlists_submit_context(ring, ctx, value, NULL, 0);
+ execlists_context_queue(ring, ctx, value);
}
static int gen8_emit_request(struct intel_engine_cs *ring,
@@ -988,6 +1044,9 @@ static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *rin
INIT_LIST_HEAD(&ring->request_list);
init_waitqueue_head(&ring->irq_queue);
+ INIT_LIST_HEAD(&ring->execlist_queue);
+ spin_lock_init(&ring->execlist_lock);
+
ret = intel_lr_context_deferred_create(dctx, ring);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index eeb90ec..e1938a3 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -44,4 +44,12 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
bool intel_enable_execlists(struct drm_device *dev);
+struct intel_ctx_submit_request {
+ struct intel_context *ctx;
+ struct intel_engine_cs *ring;
+ u32 tail;
+
+ struct list_head execlist_link;
+};
+
#endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ca02b5d..c3342e1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -156,6 +156,8 @@ struct intel_engine_cs {
} semaphore;
/* Execlists */
+ spinlock_t execlist_lock;
+ struct list_head execlist_queue;
u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */
void (*submit_ctx)(struct intel_engine_cs *ring,
struct intel_context *ctx, u32 value);
--
1.9.0
More information about the Intel-gfx
mailing list