Commit 80b204bc authored by Chris Wilson's avatar Chris Wilson

drm/i915: Enable multiple timelines

With the infrastructure converted over to tracking multiple timelines in
the GEM API whilst preserving the efficiency of using a single execution
timeline internally, we can now assign a separate timeline to every
context with full-ppgtt.

v2: Add a comment to indicate the xfer between timelines upon submission.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-35-chris@chris-wilson.co.uk
parent f2d13290
...@@ -3549,6 +3549,16 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) ...@@ -3549,6 +3549,16 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx)
kref_put(&ctx->ref, i915_gem_context_free); kref_put(&ctx->ref, i915_gem_context_free);
} }
static inline struct intel_timeline *
i915_gem_context_lookup_timeline(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
struct i915_address_space *vm;
vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base;
return &vm->timeline.engine[engine->id];
}
static inline bool i915_gem_context_is_default(const struct i915_gem_context *c) static inline bool i915_gem_context_is_default(const struct i915_gem_context *c)
{ {
return c->user_handle == DEFAULT_CONTEXT_HANDLE; return c->user_handle == DEFAULT_CONTEXT_HANDLE;
......
...@@ -2564,12 +2564,9 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) ...@@ -2564,12 +2564,9 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
* not need an engine->irq_seqno_barrier() before the seqno reads. * not need an engine->irq_seqno_barrier() before the seqno reads.
*/ */
list_for_each_entry(request, &engine->timeline->requests, link) { list_for_each_entry(request, &engine->timeline->requests, link) {
if (i915_gem_request_completed(request)) if (__i915_gem_request_completed(request))
continue; continue;
if (!i915_sw_fence_done(&request->submit))
break;
return request; return request;
} }
...@@ -2597,6 +2594,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) ...@@ -2597,6 +2594,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
{ {
struct drm_i915_gem_request *request; struct drm_i915_gem_request *request;
struct i915_gem_context *incomplete_ctx; struct i915_gem_context *incomplete_ctx;
struct intel_timeline *timeline;
bool ring_hung; bool ring_hung;
if (engine->irq_seqno_barrier) if (engine->irq_seqno_barrier)
...@@ -2635,6 +2633,10 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) ...@@ -2635,6 +2633,10 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
list_for_each_entry_continue(request, &engine->timeline->requests, link) list_for_each_entry_continue(request, &engine->timeline->requests, link)
if (request->ctx == incomplete_ctx) if (request->ctx == incomplete_ctx)
reset_request(request); reset_request(request);
timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine);
list_for_each_entry(request, &timeline->requests, link)
reset_request(request);
} }
void i915_gem_reset(struct drm_i915_private *dev_priv) void i915_gem_reset(struct drm_i915_private *dev_priv)
......
...@@ -365,9 +365,9 @@ i915_gem_create_context(struct drm_device *dev, ...@@ -365,9 +365,9 @@ i915_gem_create_context(struct drm_device *dev,
return ctx; return ctx;
if (USES_FULL_PPGTT(dev)) { if (USES_FULL_PPGTT(dev)) {
struct i915_hw_ppgtt *ppgtt = struct i915_hw_ppgtt *ppgtt;
i915_ppgtt_create(to_i915(dev), file_priv);
ppgtt = i915_ppgtt_create(to_i915(dev), file_priv, ctx->name);
if (IS_ERR(ppgtt)) { if (IS_ERR(ppgtt)) {
DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
PTR_ERR(ppgtt)); PTR_ERR(ppgtt));
......
...@@ -33,14 +33,17 @@ ...@@ -33,14 +33,17 @@
#include "intel_drv.h" #include "intel_drv.h"
#include "i915_trace.h" #include "i915_trace.h"
static bool static bool ggtt_is_idle(struct drm_i915_private *dev_priv)
gpu_is_idle(struct drm_i915_private *dev_priv)
{ {
struct i915_ggtt *ggtt = &dev_priv->ggtt;
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
enum intel_engine_id id; enum intel_engine_id id;
for_each_engine(engine, dev_priv, id) { for_each_engine(engine, dev_priv, id) {
if (intel_engine_is_active(engine)) struct intel_timeline *tl;
tl = &ggtt->base.timeline.engine[engine->id];
if (i915_gem_active_isset(&tl->last_request))
return false; return false;
} }
...@@ -154,7 +157,7 @@ i915_gem_evict_something(struct i915_address_space *vm, ...@@ -154,7 +157,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK) if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK)
return -ENOSPC; return -ENOSPC;
if (gpu_is_idle(dev_priv)) { if (ggtt_is_idle(dev_priv)) {
/* If we still have pending pageflip completions, drop /* If we still have pending pageflip completions, drop
* back to userspace to give our workqueues time to * back to userspace to give our workqueues time to
* acquire our locks and unpin the old scanouts. * acquire our locks and unpin the old scanouts.
......
...@@ -2185,8 +2185,10 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, ...@@ -2185,8 +2185,10 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
} }
static void i915_address_space_init(struct i915_address_space *vm, static void i915_address_space_init(struct i915_address_space *vm,
struct drm_i915_private *dev_priv) struct drm_i915_private *dev_priv,
const char *name)
{ {
i915_gem_timeline_init(dev_priv, &vm->timeline, name);
drm_mm_init(&vm->mm, vm->start, vm->total); drm_mm_init(&vm->mm, vm->start, vm->total);
INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->active_list);
INIT_LIST_HEAD(&vm->inactive_list); INIT_LIST_HEAD(&vm->inactive_list);
...@@ -2215,14 +2217,15 @@ static void gtt_write_workarounds(struct drm_device *dev) ...@@ -2215,14 +2217,15 @@ static void gtt_write_workarounds(struct drm_device *dev)
static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_private *dev_priv, struct drm_i915_private *dev_priv,
struct drm_i915_file_private *file_priv) struct drm_i915_file_private *file_priv,
const char *name)
{ {
int ret; int ret;
ret = __hw_ppgtt_init(ppgtt, dev_priv); ret = __hw_ppgtt_init(ppgtt, dev_priv);
if (ret == 0) { if (ret == 0) {
kref_init(&ppgtt->ref); kref_init(&ppgtt->ref);
i915_address_space_init(&ppgtt->base, dev_priv); i915_address_space_init(&ppgtt->base, dev_priv, name);
ppgtt->base.file = file_priv; ppgtt->base.file = file_priv;
} }
...@@ -2258,7 +2261,8 @@ int i915_ppgtt_init_hw(struct drm_device *dev) ...@@ -2258,7 +2261,8 @@ int i915_ppgtt_init_hw(struct drm_device *dev)
struct i915_hw_ppgtt * struct i915_hw_ppgtt *
i915_ppgtt_create(struct drm_i915_private *dev_priv, i915_ppgtt_create(struct drm_i915_private *dev_priv,
struct drm_i915_file_private *fpriv) struct drm_i915_file_private *fpriv,
const char *name)
{ {
struct i915_hw_ppgtt *ppgtt; struct i915_hw_ppgtt *ppgtt;
int ret; int ret;
...@@ -2267,7 +2271,7 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, ...@@ -2267,7 +2271,7 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv,
if (!ppgtt) if (!ppgtt)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv); ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name);
if (ret) { if (ret) {
kfree(ppgtt); kfree(ppgtt);
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -2290,6 +2294,7 @@ void i915_ppgtt_release(struct kref *kref) ...@@ -2290,6 +2294,7 @@ void i915_ppgtt_release(struct kref *kref)
WARN_ON(!list_empty(&ppgtt->base.inactive_list)); WARN_ON(!list_empty(&ppgtt->base.inactive_list));
WARN_ON(!list_empty(&ppgtt->base.unbound_list)); WARN_ON(!list_empty(&ppgtt->base.unbound_list));
i915_gem_timeline_fini(&ppgtt->base.timeline);
list_del(&ppgtt->base.global_link); list_del(&ppgtt->base.global_link);
drm_mm_takedown(&ppgtt->base.mm); drm_mm_takedown(&ppgtt->base.mm);
...@@ -3232,11 +3237,13 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) ...@@ -3232,11 +3237,13 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
/* Subtract the guard page before address space initialization to /* Subtract the guard page before address space initialization to
* shrink the range used by drm_mm. * shrink the range used by drm_mm.
*/ */
mutex_lock(&dev_priv->drm.struct_mutex);
ggtt->base.total -= PAGE_SIZE; ggtt->base.total -= PAGE_SIZE;
i915_address_space_init(&ggtt->base, dev_priv); i915_address_space_init(&ggtt->base, dev_priv, "[global]");
ggtt->base.total += PAGE_SIZE; ggtt->base.total += PAGE_SIZE;
if (!HAS_LLC(dev_priv)) if (!HAS_LLC(dev_priv))
ggtt->base.mm.color_adjust = i915_gtt_color_adjust; ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
mutex_unlock(&dev_priv->drm.struct_mutex);
if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
dev_priv->ggtt.mappable_base, dev_priv->ggtt.mappable_base,
......
...@@ -342,6 +342,7 @@ struct i915_pml4 { ...@@ -342,6 +342,7 @@ struct i915_pml4 {
struct i915_address_space { struct i915_address_space {
struct drm_mm mm; struct drm_mm mm;
struct i915_gem_timeline timeline;
struct drm_device *dev; struct drm_device *dev;
/* Every address space belongs to a struct file - except for the global /* Every address space belongs to a struct file - except for the global
* GTT that is owned by the driver (and so @file is set to NULL). In * GTT that is owned by the driver (and so @file is set to NULL). In
...@@ -613,7 +614,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv); ...@@ -613,7 +614,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv);
int i915_ppgtt_init_hw(struct drm_device *dev); int i915_ppgtt_init_hw(struct drm_device *dev);
void i915_ppgtt_release(struct kref *kref); void i915_ppgtt_release(struct kref *kref);
struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv, struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv,
struct drm_i915_file_private *fpriv); struct drm_i915_file_private *fpriv,
const char *name);
static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
{ {
if (ppgtt) if (ppgtt)
......
...@@ -34,12 +34,6 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence) ...@@ -34,12 +34,6 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence)
static const char *i915_fence_get_timeline_name(struct dma_fence *fence) static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
{ {
/* Timelines are bound by eviction to a VM. However, since
* we only have a global seqno at the moment, we only have
* a single timeline. Note that each timeline will have
* multiple execution contexts (fence contexts) as we allow
* engines within a single timeline to execute in parallel.
*/
return to_request(fence)->timeline->common->name; return to_request(fence)->timeline->common->name;
} }
...@@ -64,18 +58,6 @@ static signed long i915_fence_wait(struct dma_fence *fence, ...@@ -64,18 +58,6 @@ static signed long i915_fence_wait(struct dma_fence *fence,
return i915_wait_request(to_request(fence), interruptible, timeout); return i915_wait_request(to_request(fence), interruptible, timeout);
} }
static void i915_fence_value_str(struct dma_fence *fence, char *str, int size)
{
snprintf(str, size, "%u", fence->seqno);
}
static void i915_fence_timeline_value_str(struct dma_fence *fence, char *str,
int size)
{
snprintf(str, size, "%u",
intel_engine_get_seqno(to_request(fence)->engine));
}
static void i915_fence_release(struct dma_fence *fence) static void i915_fence_release(struct dma_fence *fence)
{ {
struct drm_i915_gem_request *req = to_request(fence); struct drm_i915_gem_request *req = to_request(fence);
...@@ -90,8 +72,6 @@ const struct dma_fence_ops i915_fence_ops = { ...@@ -90,8 +72,6 @@ const struct dma_fence_ops i915_fence_ops = {
.signaled = i915_fence_signaled, .signaled = i915_fence_signaled,
.wait = i915_fence_wait, .wait = i915_fence_wait,
.release = i915_fence_release, .release = i915_fence_release,
.fence_value_str = i915_fence_value_str,
.timeline_value_str = i915_fence_timeline_value_str,
}; };
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
...@@ -147,7 +127,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) ...@@ -147,7 +127,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
GEM_BUG_ON(!i915_gem_request_completed(request)); GEM_BUG_ON(!i915_gem_request_completed(request));
trace_i915_gem_request_retire(request); trace_i915_gem_request_retire(request);
spin_lock_irq(&request->engine->timeline->lock);
list_del_init(&request->link); list_del_init(&request->link);
spin_unlock_irq(&request->engine->timeline->lock);
/* We know the GPU must have read the request to have /* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position * sent us the seqno + interrupt, so use the position
...@@ -313,6 +296,12 @@ static int reserve_global_seqno(struct drm_i915_private *i915) ...@@ -313,6 +296,12 @@ static int reserve_global_seqno(struct drm_i915_private *i915)
return 0; return 0;
} }
static u32 __timeline_get_seqno(struct i915_gem_timeline *tl)
{
/* next_seqno only incremented under a mutex */
return ++tl->next_seqno.counter;
}
static u32 timeline_get_seqno(struct i915_gem_timeline *tl) static u32 timeline_get_seqno(struct i915_gem_timeline *tl)
{ {
return atomic_inc_return(&tl->next_seqno); return atomic_inc_return(&tl->next_seqno);
...@@ -325,16 +314,20 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) ...@@ -325,16 +314,20 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
container_of(fence, typeof(*request), submit); container_of(fence, typeof(*request), submit);
struct intel_engine_cs *engine = request->engine; struct intel_engine_cs *engine = request->engine;
struct intel_timeline *timeline; struct intel_timeline *timeline;
unsigned long flags;
u32 seqno; u32 seqno;
if (state != FENCE_COMPLETE) if (state != FENCE_COMPLETE)
return NOTIFY_DONE; return NOTIFY_DONE;
/* Will be called from irq-context when using foreign DMA fences */ /* Transfer from per-context onto the global per-engine timeline */
timeline = engine->timeline;
GEM_BUG_ON(timeline == request->timeline);
timeline = request->timeline; /* Will be called from irq-context when using foreign DMA fences */
spin_lock_irqsave(&timeline->lock, flags);
seqno = request->fence.seqno; seqno = timeline_get_seqno(timeline->common);
GEM_BUG_ON(!seqno); GEM_BUG_ON(!seqno);
GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));
...@@ -354,6 +347,12 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) ...@@ -354,6 +347,12 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
request->ring->vaddr + request->postfix); request->ring->vaddr + request->postfix);
engine->submit_request(request); engine->submit_request(request);
spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING);
list_move_tail(&request->link, &timeline->requests);
spin_unlock(&request->timeline->lock);
spin_unlock_irqrestore(&timeline->lock, flags);
return NOTIFY_DONE; return NOTIFY_DONE;
} }
...@@ -394,7 +393,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, ...@@ -394,7 +393,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
/* Move the oldest request to the slab-cache (if not in use!) */ /* Move the oldest request to the slab-cache (if not in use!) */
req = list_first_entry_or_null(&engine->timeline->requests, req = list_first_entry_or_null(&engine->timeline->requests,
typeof(*req), link); typeof(*req), link);
if (req && i915_gem_request_completed(req)) if (req && __i915_gem_request_completed(req))
i915_gem_request_retire(req); i915_gem_request_retire(req);
/* Beware: Dragons be flying overhead. /* Beware: Dragons be flying overhead.
...@@ -431,14 +430,15 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, ...@@ -431,14 +430,15 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
goto err_unreserve; goto err_unreserve;
} }
req->timeline = engine->timeline; req->timeline = i915_gem_context_lookup_timeline(ctx, engine);
GEM_BUG_ON(req->timeline == engine->timeline);
spin_lock_init(&req->lock); spin_lock_init(&req->lock);
dma_fence_init(&req->fence, dma_fence_init(&req->fence,
&i915_fence_ops, &i915_fence_ops,
&req->lock, &req->lock,
req->timeline->fence_context, req->timeline->fence_context,
timeline_get_seqno(req->timeline->common)); __timeline_get_seqno(req->timeline->common));
i915_sw_fence_init(&req->submit, submit_notify); i915_sw_fence_init(&req->submit, submit_notify);
...@@ -722,9 +722,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) ...@@ -722,9 +722,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
&request->submitq); &request->submitq);
spin_lock_irq(&timeline->lock);
list_add_tail(&request->link, &timeline->requests); list_add_tail(&request->link, &timeline->requests);
spin_unlock_irq(&timeline->lock);
GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno,
request->fence.seqno));
timeline->last_pending_seqno = request->fence.seqno; timeline->last_submitted_seqno = request->fence.seqno;
i915_gem_active_set(&timeline->last_request, request); i915_gem_active_set(&timeline->last_request, request);
list_add_tail(&request->ring_link, &ring->request_list); list_add_tail(&request->ring_link, &ring->request_list);
...@@ -991,7 +996,7 @@ static void engine_retire_requests(struct intel_engine_cs *engine) ...@@ -991,7 +996,7 @@ static void engine_retire_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe(request, next, list_for_each_entry_safe(request, next,
&engine->timeline->requests, link) { &engine->timeline->requests, link) {
if (!i915_gem_request_completed(request)) if (!__i915_gem_request_completed(request))
return; return;
i915_gem_request_retire(request); i915_gem_request_retire(request);
......
...@@ -48,6 +48,7 @@ int i915_gem_timeline_init(struct drm_i915_private *i915, ...@@ -48,6 +48,7 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
tl->fence_context = fences++; tl->fence_context = fences++;
tl->common = timeline; tl->common = timeline;
spin_lock_init(&tl->lock);
init_request_active(&tl->last_request, NULL); init_request_active(&tl->last_request, NULL);
INIT_LIST_HEAD(&tl->requests); INIT_LIST_HEAD(&tl->requests);
} }
......
...@@ -34,7 +34,8 @@ struct i915_gem_timeline; ...@@ -34,7 +34,8 @@ struct i915_gem_timeline;
struct intel_timeline { struct intel_timeline {
u64 fence_context; u64 fence_context;
u32 last_submitted_seqno; u32 last_submitted_seqno;
u32 last_pending_seqno;
spinlock_t lock;
/** /**
* List of breadcrumbs associated with GPU requests currently * List of breadcrumbs associated with GPU requests currently
......
...@@ -569,9 +569,4 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); ...@@ -569,9 +569,4 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
unsigned int intel_kick_waiters(struct drm_i915_private *i915); unsigned int intel_kick_waiters(struct drm_i915_private *i915);
unsigned int intel_kick_signalers(struct drm_i915_private *i915); unsigned int intel_kick_signalers(struct drm_i915_private *i915);
static inline bool intel_engine_is_active(struct intel_engine_cs *engine)
{
return i915_gem_active_isset(&engine->timeline->last_request);
}
#endif /* _INTEL_RINGBUFFER_H_ */ #endif /* _INTEL_RINGBUFFER_H_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment