Commit 21950ee7 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Pull i915_gem_active into the i915_active family

Looking forward, we need to break the struct_mutex dependency on
i915_gem_active. In the meantime, external use of i915_gem_active is
quite beguiling, little do new users suspect that it implies a barrier
as each request it tracks must be ordered wrt the previous one. As one
of many, it can be used to track activity across multiple timelines, a
shared fence, which fits our unordered request submission much better. We
need to steer external users away from the singular, exclusive fence
imposed by i915_gem_active to i915_active instead. As part of that
process, we move i915_gem_active out of i915_request.c into
i915_active.c to start separating the two concepts, and rename it to
i915_active_request (both to tie it to the concept of tracking just one
request, and to give it a longer, less appealing name).
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190205130005.2807-5-chris@chris-wilson.co.uk
parent 5f5c139d
...@@ -21,7 +21,7 @@ static struct i915_global_active { ...@@ -21,7 +21,7 @@ static struct i915_global_active {
} global; } global;
struct active_node { struct active_node {
struct i915_gem_active base; struct i915_active_request base;
struct i915_active *ref; struct i915_active *ref;
struct rb_node node; struct rb_node node;
u64 timeline; u64 timeline;
...@@ -33,7 +33,7 @@ __active_park(struct i915_active *ref) ...@@ -33,7 +33,7 @@ __active_park(struct i915_active *ref)
struct active_node *it, *n; struct active_node *it, *n;
rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
GEM_BUG_ON(i915_gem_active_isset(&it->base)); GEM_BUG_ON(i915_active_request_isset(&it->base));
kmem_cache_free(global.slab_cache, it); kmem_cache_free(global.slab_cache, it);
} }
ref->tree = RB_ROOT; ref->tree = RB_ROOT;
...@@ -53,18 +53,18 @@ __active_retire(struct i915_active *ref) ...@@ -53,18 +53,18 @@ __active_retire(struct i915_active *ref)
} }
static void static void
node_retire(struct i915_gem_active *base, struct i915_request *rq) node_retire(struct i915_active_request *base, struct i915_request *rq)
{ {
__active_retire(container_of(base, struct active_node, base)->ref); __active_retire(container_of(base, struct active_node, base)->ref);
} }
static void static void
last_retire(struct i915_gem_active *base, struct i915_request *rq) last_retire(struct i915_active_request *base, struct i915_request *rq)
{ {
__active_retire(container_of(base, struct i915_active, last)); __active_retire(container_of(base, struct i915_active, last));
} }
static struct i915_gem_active * static struct i915_active_request *
active_instance(struct i915_active *ref, u64 idx) active_instance(struct i915_active *ref, u64 idx)
{ {
struct active_node *node; struct active_node *node;
...@@ -85,7 +85,7 @@ active_instance(struct i915_active *ref, u64 idx) ...@@ -85,7 +85,7 @@ active_instance(struct i915_active *ref, u64 idx)
* twice for the same timeline (as the older rbtree element will be * twice for the same timeline (as the older rbtree element will be
* retired before the new request added to last). * retired before the new request added to last).
*/ */
old = i915_gem_active_raw(&ref->last, BKL(ref)); old = i915_active_request_raw(&ref->last, BKL(ref));
if (!old || old->fence.context == idx) if (!old || old->fence.context == idx)
goto out; goto out;
...@@ -110,7 +110,7 @@ active_instance(struct i915_active *ref, u64 idx) ...@@ -110,7 +110,7 @@ active_instance(struct i915_active *ref, u64 idx)
node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL); node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
/* kmalloc may retire the ref->last (thanks shrinker)! */ /* kmalloc may retire the ref->last (thanks shrinker)! */
if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) { if (unlikely(!i915_active_request_raw(&ref->last, BKL(ref)))) {
kmem_cache_free(global.slab_cache, node); kmem_cache_free(global.slab_cache, node);
goto out; goto out;
} }
...@@ -118,7 +118,7 @@ active_instance(struct i915_active *ref, u64 idx) ...@@ -118,7 +118,7 @@ active_instance(struct i915_active *ref, u64 idx)
if (unlikely(!node)) if (unlikely(!node))
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
init_request_active(&node->base, node_retire); i915_active_request_init(&node->base, NULL, node_retire);
node->ref = ref; node->ref = ref;
node->timeline = idx; node->timeline = idx;
...@@ -133,7 +133,7 @@ active_instance(struct i915_active *ref, u64 idx) ...@@ -133,7 +133,7 @@ active_instance(struct i915_active *ref, u64 idx)
* callback not two, and so much undo the active counting for the * callback not two, and so much undo the active counting for the
* overwritten slot. * overwritten slot.
*/ */
if (i915_gem_active_isset(&node->base)) { if (i915_active_request_isset(&node->base)) {
/* Retire ourselves from the old rq->active_list */ /* Retire ourselves from the old rq->active_list */
__list_del_entry(&node->base.link); __list_del_entry(&node->base.link);
ref->count--; ref->count--;
...@@ -154,7 +154,7 @@ void i915_active_init(struct drm_i915_private *i915, ...@@ -154,7 +154,7 @@ void i915_active_init(struct drm_i915_private *i915,
ref->i915 = i915; ref->i915 = i915;
ref->retire = retire; ref->retire = retire;
ref->tree = RB_ROOT; ref->tree = RB_ROOT;
init_request_active(&ref->last, last_retire); i915_active_request_init(&ref->last, NULL, last_retire);
ref->count = 0; ref->count = 0;
} }
...@@ -162,15 +162,15 @@ int i915_active_ref(struct i915_active *ref, ...@@ -162,15 +162,15 @@ int i915_active_ref(struct i915_active *ref,
u64 timeline, u64 timeline,
struct i915_request *rq) struct i915_request *rq)
{ {
struct i915_gem_active *active; struct i915_active_request *active;
active = active_instance(ref, timeline); active = active_instance(ref, timeline);
if (IS_ERR(active)) if (IS_ERR(active))
return PTR_ERR(active); return PTR_ERR(active);
if (!i915_gem_active_isset(active)) if (!i915_active_request_isset(active))
ref->count++; ref->count++;
i915_gem_active_set(active, rq); __i915_active_request_set(active, rq);
GEM_BUG_ON(!ref->count); GEM_BUG_ON(!ref->count);
return 0; return 0;
...@@ -196,12 +196,12 @@ int i915_active_wait(struct i915_active *ref) ...@@ -196,12 +196,12 @@ int i915_active_wait(struct i915_active *ref)
if (i915_active_acquire(ref)) if (i915_active_acquire(ref))
goto out_release; goto out_release;
ret = i915_gem_active_retire(&ref->last, BKL(ref)); ret = i915_active_request_retire(&ref->last, BKL(ref));
if (ret) if (ret)
goto out_release; goto out_release;
rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
ret = i915_gem_active_retire(&it->base, BKL(ref)); ret = i915_active_request_retire(&it->base, BKL(ref));
if (ret) if (ret)
break; break;
} }
...@@ -211,11 +211,11 @@ int i915_active_wait(struct i915_active *ref) ...@@ -211,11 +211,11 @@ int i915_active_wait(struct i915_active *ref)
return ret; return ret;
} }
static int __i915_request_await_active(struct i915_request *rq, int i915_request_await_active_request(struct i915_request *rq,
struct i915_gem_active *active) struct i915_active_request *active)
{ {
struct i915_request *barrier = struct i915_request *barrier =
i915_gem_active_raw(active, &rq->i915->drm.struct_mutex); i915_active_request_raw(active, &rq->i915->drm.struct_mutex);
return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0; return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
} }
...@@ -225,12 +225,12 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) ...@@ -225,12 +225,12 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
struct active_node *it, *n; struct active_node *it, *n;
int ret; int ret;
ret = __i915_request_await_active(rq, &ref->last); ret = i915_request_await_active_request(rq, &ref->last);
if (ret) if (ret)
return ret; return ret;
rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) { rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
ret = __i915_request_await_active(rq, &it->base); ret = i915_request_await_active_request(rq, &it->base);
if (ret) if (ret)
return ret; return ret;
} }
...@@ -241,12 +241,32 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref) ...@@ -241,12 +241,32 @@ int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
void i915_active_fini(struct i915_active *ref) void i915_active_fini(struct i915_active *ref)
{ {
GEM_BUG_ON(i915_gem_active_isset(&ref->last)); GEM_BUG_ON(i915_active_request_isset(&ref->last));
GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree)); GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
GEM_BUG_ON(ref->count); GEM_BUG_ON(ref->count);
} }
#endif #endif
int i915_active_request_set(struct i915_active_request *active,
struct i915_request *rq)
{
int err;
/* Must maintain ordering wrt previous active requests */
err = i915_request_await_active_request(rq, active);
if (err)
return err;
__i915_active_request_set(active, rq);
return 0;
}
void i915_active_retire_noop(struct i915_active_request *active,
struct i915_request *request)
{
/* Space left intentionally blank */
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/i915_active.c" #include "selftests/i915_active.c"
#endif #endif
......
...@@ -7,7 +7,354 @@ ...@@ -7,7 +7,354 @@
#ifndef _I915_ACTIVE_H_ #ifndef _I915_ACTIVE_H_
#define _I915_ACTIVE_H_ #define _I915_ACTIVE_H_
#include <linux/lockdep.h>
#include "i915_active_types.h" #include "i915_active_types.h"
#include "i915_request.h"
/*
* We treat requests as fences. This is not be to confused with our
* "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
* We use the fences to synchronize access from the CPU with activity on the
* GPU, for example, we should not rewrite an object's PTE whilst the GPU
* is reading them. We also track fences at a higher level to provide
* implicit synchronisation around GEM objects, e.g. set-domain will wait
* for outstanding GPU rendering before marking the object ready for CPU
* access, or a pageflip will wait until the GPU is complete before showing
* the frame on the scanout.
*
* In order to use a fence, the object must track the fence it needs to
* serialise with. For example, GEM objects want to track both read and
* write access so that we can perform concurrent read operations between
* the CPU and GPU engines, as well as waiting for all rendering to
* complete, or waiting for the last GPU user of a "fence register". The
* object then embeds a #i915_active_request to track the most recent (in
* retirement order) request relevant for the desired mode of access.
* The #i915_active_request is updated with i915_active_request_set() to
* track the most recent fence request, typically this is done as part of
* i915_vma_move_to_active().
*
* When the #i915_active_request completes (is retired), it will
* signal its completion to the owner through a callback as well as mark
* itself as idle (i915_active_request.request == NULL). The owner
* can then perform any action, such as delayed freeing of an active
* resource including itself.
*/
void i915_active_retire_noop(struct i915_active_request *active,
struct i915_request *request);
/**
* i915_active_request_init - prepares the activity tracker for use
* @active - the active tracker
* @rq - initial request to track, can be NULL
* @func - a callback when then the tracker is retired (becomes idle),
* can be NULL
*
* i915_active_request_init() prepares the embedded @active struct for use as
* an activity tracker, that is for tracking the last known active request
* associated with it. When the last request becomes idle, when it is retired
* after completion, the optional callback @func is invoked.
*/
static inline void
i915_active_request_init(struct i915_active_request *active,
struct i915_request *rq,
i915_active_retire_fn retire)
{
RCU_INIT_POINTER(active->request, rq);
INIT_LIST_HEAD(&active->link);
active->retire = retire ?: i915_active_retire_noop;
}
#define INIT_ACTIVE_REQUEST(name) i915_active_request_init((name), NULL, NULL)
/**
* i915_active_request_set - updates the tracker to watch the current request
* @active - the active tracker
* @request - the request to watch
*
* __i915_active_request_set() watches the given @request for completion. Whilst
* that @request is busy, the @active reports busy. When that @request is
* retired, the @active tracker is updated to report idle.
*/
static inline void
__i915_active_request_set(struct i915_active_request *active,
struct i915_request *request)
{
list_move(&active->link, &request->active_list);
rcu_assign_pointer(active->request, request);
}
int __must_check
i915_active_request_set(struct i915_active_request *active,
struct i915_request *rq);
/**
* i915_active_request_set_retire_fn - updates the retirement callback
* @active - the active tracker
* @fn - the routine called when the request is retired
* @mutex - struct_mutex used to guard retirements
*
* i915_active_request_set_retire_fn() updates the function pointer that
* is called when the final request associated with the @active tracker
* is retired.
*/
static inline void
i915_active_request_set_retire_fn(struct i915_active_request *active,
i915_active_retire_fn fn,
struct mutex *mutex)
{
lockdep_assert_held(mutex);
active->retire = fn ?: i915_active_retire_noop;
}
static inline struct i915_request *
__i915_active_request_peek(const struct i915_active_request *active)
{
/*
* Inside the error capture (running with the driver in an unknown
* state), we want to bend the rules slightly (a lot).
*
* Work is in progress to make it safer, in the meantime this keeps
* the known issue from spamming the logs.
*/
return rcu_dereference_protected(active->request, 1);
}
/**
* i915_active_request_raw - return the active request
* @active - the active tracker
*
* i915_active_request_raw() returns the current request being tracked, or NULL.
* It does not obtain a reference on the request for the caller, so the caller
* must hold struct_mutex.
*/
static inline struct i915_request *
i915_active_request_raw(const struct i915_active_request *active,
struct mutex *mutex)
{
return rcu_dereference_protected(active->request,
lockdep_is_held(mutex));
}
/**
* i915_active_request_peek - report the active request being monitored
* @active - the active tracker
*
* i915_active_request_peek() returns the current request being tracked if
* still active, or NULL. It does not obtain a reference on the request
* for the caller, so the caller must hold struct_mutex.
*/
static inline struct i915_request *
i915_active_request_peek(const struct i915_active_request *active,
struct mutex *mutex)
{
struct i915_request *request;
request = i915_active_request_raw(active, mutex);
if (!request || i915_request_completed(request))
return NULL;
return request;
}
/**
* i915_active_request_get - return a reference to the active request
* @active - the active tracker
*
* i915_active_request_get() returns a reference to the active request, or NULL
* if the active tracker is idle. The caller must hold struct_mutex.
*/
static inline struct i915_request *
i915_active_request_get(const struct i915_active_request *active,
struct mutex *mutex)
{
return i915_request_get(i915_active_request_peek(active, mutex));
}
/**
* __i915_active_request_get_rcu - return a reference to the active request
* @active - the active tracker
*
* __i915_active_request_get() returns a reference to the active request,
* or NULL if the active tracker is idle. The caller must hold the RCU read
* lock, but the returned pointer is safe to use outside of RCU.
*/
static inline struct i915_request *
__i915_active_request_get_rcu(const struct i915_active_request *active)
{
/*
* Performing a lockless retrieval of the active request is super
* tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
* slab of request objects will not be freed whilst we hold the
* RCU read lock. It does not guarantee that the request itself
* will not be freed and then *reused*. Viz,
*
* Thread A Thread B
*
* rq = active.request
* retire(rq) -> free(rq);
* (rq is now first on the slab freelist)
* active.request = NULL
*
* rq = new submission on a new object
* ref(rq)
*
* To prevent the request from being reused whilst the caller
* uses it, we take a reference like normal. Whilst acquiring
* the reference we check that it is not in a destroyed state
* (refcnt == 0). That prevents the request being reallocated
* whilst the caller holds on to it. To check that the request
* was not reallocated as we acquired the reference we have to
* check that our request remains the active request across
* the lookup, in the same manner as a seqlock. The visibility
* of the pointer versus the reference counting is controlled
* by using RCU barriers (rcu_dereference and rcu_assign_pointer).
*
* In the middle of all that, we inspect whether the request is
* complete. Retiring is lazy so the request may be completed long
* before the active tracker is updated. Querying whether the
* request is complete is far cheaper (as it involves no locked
* instructions setting cachelines to exclusive) than acquiring
* the reference, so we do it first. The RCU read lock ensures the
* pointer dereference is valid, but does not ensure that the
* seqno nor HWS is the right one! However, if the request was
* reallocated, that means the active tracker's request was complete.
* If the new request is also complete, then both are and we can
* just report the active tracker is idle. If the new request is
* incomplete, then we acquire a reference on it and check that
* it remained the active request.
*
* It is then imperative that we do not zero the request on
* reallocation, so that we can chase the dangling pointers!
* See i915_request_alloc().
*/
do {
struct i915_request *request;
request = rcu_dereference(active->request);
if (!request || i915_request_completed(request))
return NULL;
/*
* An especially silly compiler could decide to recompute the
* result of i915_request_completed, more specifically
* re-emit the load for request->fence.seqno. A race would catch
* a later seqno value, which could flip the result from true to
* false. Which means part of the instructions below might not
* be executed, while later on instructions are executed. Due to
* barriers within the refcounting the inconsistency can't reach
* past the call to i915_request_get_rcu, but not executing
* that while still executing i915_request_put() creates
* havoc enough. Prevent this with a compiler barrier.
*/
barrier();
request = i915_request_get_rcu(request);
/*
* What stops the following rcu_access_pointer() from occurring
* before the above i915_request_get_rcu()? If we were
* to read the value before pausing to get the reference to
* the request, we may not notice a change in the active
* tracker.
*
* The rcu_access_pointer() is a mere compiler barrier, which
* means both the CPU and compiler are free to perform the
* memory read without constraint. The compiler only has to
* ensure that any operations after the rcu_access_pointer()
* occur afterwards in program order. This means the read may
* be performed earlier by an out-of-order CPU, or adventurous
* compiler.
*
* The atomic operation at the heart of
* i915_request_get_rcu(), see dma_fence_get_rcu(), is
* atomic_inc_not_zero() which is only a full memory barrier
* when successful. That is, if i915_request_get_rcu()
* returns the request (and so with the reference counted
* incremented) then the following read for rcu_access_pointer()
* must occur after the atomic operation and so confirm
* that this request is the one currently being tracked.
*
* The corresponding write barrier is part of
* rcu_assign_pointer().
*/
if (!request || request == rcu_access_pointer(active->request))
return rcu_pointer_handoff(request);
i915_request_put(request);
} while (1);
}
/**
* i915_active_request_get_unlocked - return a reference to the active request
* @active - the active tracker
*
* i915_active_request_get_unlocked() returns a reference to the active request,
* or NULL if the active tracker is idle. The reference is obtained under RCU,
* so no locking is required by the caller.
*
* The reference should be freed with i915_request_put().
*/
static inline struct i915_request *
i915_active_request_get_unlocked(const struct i915_active_request *active)
{
struct i915_request *request;
rcu_read_lock();
request = __i915_active_request_get_rcu(active);
rcu_read_unlock();
return request;
}
/**
* i915_active_request_isset - report whether the active tracker is assigned
* @active - the active tracker
*
* i915_active_request_isset() returns true if the active tracker is currently
* assigned to a request. Due to the lazy retiring, that request may be idle
* and this may report stale information.
*/
static inline bool
i915_active_request_isset(const struct i915_active_request *active)
{
return rcu_access_pointer(active->request);
}
/**
* i915_active_request_retire - waits until the request is retired
* @active - the active request on which to wait
*
* i915_active_request_retire() waits until the request is completed,
* and then ensures that at least the retirement handler for this
* @active tracker is called before returning. If the @active
* tracker is idle, the function returns immediately.
*/
static inline int __must_check
i915_active_request_retire(struct i915_active_request *active,
struct mutex *mutex)
{
struct i915_request *request;
long ret;
request = i915_active_request_raw(active, mutex);
if (!request)
return 0;
ret = i915_request_wait(request,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT);
if (ret < 0)
return ret;
list_del_init(&active->link);
RCU_INIT_POINTER(active->request, NULL);
active->retire(active, request);
return 0;
}
/* /*
* GPU activity tracking * GPU activity tracking
...@@ -47,6 +394,8 @@ int i915_active_wait(struct i915_active *ref); ...@@ -47,6 +394,8 @@ int i915_active_wait(struct i915_active *ref);
int i915_request_await_active(struct i915_request *rq, int i915_request_await_active(struct i915_request *rq,
struct i915_active *ref); struct i915_active *ref);
int i915_request_await_active_request(struct i915_request *rq,
struct i915_active_request *active);
bool i915_active_acquire(struct i915_active *ref); bool i915_active_acquire(struct i915_active *ref);
......
...@@ -8,16 +8,26 @@ ...@@ -8,16 +8,26 @@
#define _I915_ACTIVE_TYPES_H_ #define _I915_ACTIVE_TYPES_H_
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <linux/rcupdate.h>
#include "i915_request.h"
struct drm_i915_private; struct drm_i915_private;
struct i915_active_request;
struct i915_request;
typedef void (*i915_active_retire_fn)(struct i915_active_request *,
struct i915_request *);
struct i915_active_request {
struct i915_request __rcu *request;
struct list_head link;
i915_active_retire_fn retire;
};
struct i915_active { struct i915_active {
struct drm_i915_private *i915; struct drm_i915_private *i915;
struct rb_root tree; struct rb_root tree;
struct i915_gem_active last; struct i915_active_request last;
unsigned int count; unsigned int count;
void (*retire)(struct i915_active *ref); void (*retire)(struct i915_active *ref);
......
...@@ -206,7 +206,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) ...@@ -206,7 +206,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
if (vma->fence) if (vma->fence)
seq_printf(m, " , fence: %d%s", seq_printf(m, " , fence: %d%s",
vma->fence->id, vma->fence->id,
i915_gem_active_isset(&vma->last_fence) ? "*" : ""); i915_active_request_isset(&vma->last_fence) ? "*" : "");
seq_puts(m, ")"); seq_puts(m, ")");
} }
if (obj->stolen) if (obj->stolen)
......
...@@ -3018,7 +3018,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) ...@@ -3018,7 +3018,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915)
GEM_BUG_ON(i915->gt.active_requests); GEM_BUG_ON(i915->gt.active_requests);
for_each_engine(engine, i915, id) { for_each_engine(engine, i915, id) {
GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); GEM_BUG_ON(__i915_active_request_peek(&engine->timeline.last_request));
GEM_BUG_ON(engine->last_retired_context != GEM_BUG_ON(engine->last_retired_context !=
to_intel_context(i915->kernel_context, engine)); to_intel_context(i915->kernel_context, engine));
} }
...@@ -3264,7 +3264,7 @@ wait_for_timelines(struct drm_i915_private *i915, ...@@ -3264,7 +3264,7 @@ wait_for_timelines(struct drm_i915_private *i915,
list_for_each_entry(tl, &gt->active_list, link) { list_for_each_entry(tl, &gt->active_list, link) {
struct i915_request *rq; struct i915_request *rq;
rq = i915_gem_active_get_unlocked(&tl->last_request); rq = i915_active_request_get_unlocked(&tl->last_request);
if (!rq) if (!rq)
continue; continue;
...@@ -4165,7 +4165,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, ...@@ -4165,7 +4165,8 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
} }
static void static void
frontbuffer_retire(struct i915_gem_active *active, struct i915_request *request) frontbuffer_retire(struct i915_active_request *active,
struct i915_request *request)
{ {
struct drm_i915_gem_object *obj = struct drm_i915_gem_object *obj =
container_of(active, typeof(*obj), frontbuffer_write); container_of(active, typeof(*obj), frontbuffer_write);
...@@ -4192,7 +4193,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, ...@@ -4192,7 +4193,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
obj->resv = &obj->__builtin_resv; obj->resv = &obj->__builtin_resv;
obj->frontbuffer_ggtt_origin = ORIGIN_GTT; obj->frontbuffer_ggtt_origin = ORIGIN_GTT;
init_request_active(&obj->frontbuffer_write, frontbuffer_retire); i915_active_request_init(&obj->frontbuffer_write,
NULL, frontbuffer_retire);
obj->mm.madv = I915_MADV_WILLNEED; obj->mm.madv = I915_MADV_WILLNEED;
INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
......
...@@ -322,7 +322,7 @@ static u32 default_desc_template(const struct drm_i915_private *i915, ...@@ -322,7 +322,7 @@ static u32 default_desc_template(const struct drm_i915_private *i915,
return desc; return desc;
} }
static void intel_context_retire(struct i915_gem_active *active, static void intel_context_retire(struct i915_active_request *active,
struct i915_request *rq) struct i915_request *rq)
{ {
struct intel_context *ce = struct intel_context *ce =
...@@ -344,7 +344,8 @@ intel_context_init(struct intel_context *ce, ...@@ -344,7 +344,8 @@ intel_context_init(struct intel_context *ce,
/* Use the whole device by default */ /* Use the whole device by default */
ce->sseu = intel_device_default_sseu(ctx->i915); ce->sseu = intel_device_default_sseu(ctx->i915);
init_request_active(&ce->active_tracker, intel_context_retire); i915_active_request_init(&ce->active_tracker,
NULL, intel_context_retire);
} }
static struct i915_gem_context * static struct i915_gem_context *
...@@ -668,7 +669,7 @@ last_request_on_engine(struct i915_timeline *timeline, ...@@ -668,7 +669,7 @@ last_request_on_engine(struct i915_timeline *timeline,
GEM_BUG_ON(timeline == &engine->timeline); GEM_BUG_ON(timeline == &engine->timeline);
rq = i915_gem_active_raw(&timeline->last_request, rq = i915_active_request_raw(&timeline->last_request,
&engine->i915->drm.struct_mutex); &engine->i915->drm.struct_mutex);
if (rq && rq->engine == engine) { if (rq && rq->engine == engine) {
GEM_TRACE("last request for %s on engine %s: %llx:%llu\n", GEM_TRACE("last request for %s on engine %s: %llx:%llu\n",
...@@ -1015,7 +1016,7 @@ gen8_modify_rpcs_gpu(struct intel_context *ce, ...@@ -1015,7 +1016,7 @@ gen8_modify_rpcs_gpu(struct intel_context *ce,
} }
/* Queue this switch after all other activity by this context. */ /* Queue this switch after all other activity by this context. */
prev = i915_gem_active_raw(&ce->ring->timeline->last_request, prev = i915_active_request_raw(&ce->ring->timeline->last_request,
&i915->drm.struct_mutex); &i915->drm.struct_mutex);
if (prev && !i915_request_completed(prev)) { if (prev && !i915_request_completed(prev)) {
ret = i915_request_await_dma_fence(rq, &prev->fence); ret = i915_request_await_dma_fence(rq, &prev->fence);
...@@ -1039,9 +1040,9 @@ gen8_modify_rpcs_gpu(struct intel_context *ce, ...@@ -1039,9 +1040,9 @@ gen8_modify_rpcs_gpu(struct intel_context *ce,
* But we only need to take one pin on the account of it. Or in other * But we only need to take one pin on the account of it. Or in other
* words transfer the pinned ce object to tracked active request. * words transfer the pinned ce object to tracked active request.
*/ */
if (!i915_gem_active_isset(&ce->active_tracker)) if (!i915_active_request_isset(&ce->active_tracker))
__intel_context_pin(ce); __intel_context_pin(ce);
i915_gem_active_set(&ce->active_tracker, rq); __i915_active_request_set(&ce->active_tracker, rq);
out_add: out_add:
i915_request_add(rq); i915_request_add(rq);
......
...@@ -187,7 +187,7 @@ struct i915_gem_context { ...@@ -187,7 +187,7 @@ struct i915_gem_context {
* active_tracker: Active tracker for the external rq activity * active_tracker: Active tracker for the external rq activity
* on this intel_context object. * on this intel_context object.
*/ */
struct i915_gem_active active_tracker; struct i915_active_request active_tracker;
const struct intel_context_ops *ops; const struct intel_context_ops *ops;
......
...@@ -223,7 +223,7 @@ static int fence_update(struct drm_i915_fence_reg *fence, ...@@ -223,7 +223,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
i915_gem_object_get_tiling(vma->obj))) i915_gem_object_get_tiling(vma->obj)))
return -EINVAL; return -EINVAL;
ret = i915_gem_active_retire(&vma->last_fence, ret = i915_active_request_retire(&vma->last_fence,
&vma->obj->base.dev->struct_mutex); &vma->obj->base.dev->struct_mutex);
if (ret) if (ret)
return ret; return ret;
...@@ -232,7 +232,7 @@ static int fence_update(struct drm_i915_fence_reg *fence, ...@@ -232,7 +232,7 @@ static int fence_update(struct drm_i915_fence_reg *fence,
if (fence->vma) { if (fence->vma) {
struct i915_vma *old = fence->vma; struct i915_vma *old = fence->vma;
ret = i915_gem_active_retire(&old->last_fence, ret = i915_active_request_retire(&old->last_fence,
&old->obj->base.dev->struct_mutex); &old->obj->base.dev->struct_mutex);
if (ret) if (ret)
return ret; return ret;
......
...@@ -1918,7 +1918,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size) ...@@ -1918,7 +1918,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
i915_active_init(i915, &vma->active, NULL); i915_active_init(i915, &vma->active, NULL);
init_request_active(&vma->last_fence, NULL); INIT_ACTIVE_REQUEST(&vma->last_fence);
vma->vm = &ggtt->vm; vma->vm = &ggtt->vm;
vma->ops = &pd_vma_ops; vma->ops = &pd_vma_ops;
......
...@@ -175,7 +175,7 @@ struct drm_i915_gem_object { ...@@ -175,7 +175,7 @@ struct drm_i915_gem_object {
atomic_t frontbuffer_bits; atomic_t frontbuffer_bits;
unsigned int frontbuffer_ggtt_origin; /* write once */ unsigned int frontbuffer_ggtt_origin; /* write once */
struct i915_gem_active frontbuffer_write; struct i915_active_request frontbuffer_write;
/** Current tiling stride for the object, if it's tiled. */ /** Current tiling stride for the object, if it's tiled. */
unsigned int tiling_and_stride; unsigned int tiling_and_stride;
......
...@@ -1062,23 +1062,23 @@ i915_error_object_create(struct drm_i915_private *i915, ...@@ -1062,23 +1062,23 @@ i915_error_object_create(struct drm_i915_private *i915,
} }
/* The error capture is special as tries to run underneath the normal /* The error capture is special as tries to run underneath the normal
* locking rules - so we use the raw version of the i915_gem_active lookup. * locking rules - so we use the raw version of the i915_active_request lookup.
*/ */
static inline u32 static inline u32
__active_get_seqno(struct i915_gem_active *active) __active_get_seqno(struct i915_active_request *active)
{ {
struct i915_request *request; struct i915_request *request;
request = __i915_gem_active_peek(active); request = __i915_active_request_peek(active);
return request ? request->global_seqno : 0; return request ? request->global_seqno : 0;
} }
static inline int static inline int
__active_get_engine_id(struct i915_gem_active *active) __active_get_engine_id(struct i915_active_request *active)
{ {
struct i915_request *request; struct i915_request *request;
request = __i915_gem_active_peek(active); request = __i915_active_request_peek(active);
return request ? request->engine->id : -1; return request ? request->engine->id : -1;
} }
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_active.h"
#include "i915_reset.h" #include "i915_reset.h"
static const char *i915_fence_get_driver_name(struct dma_fence *fence) static const char *i915_fence_get_driver_name(struct dma_fence *fence)
...@@ -125,12 +126,6 @@ static void unreserve_gt(struct drm_i915_private *i915) ...@@ -125,12 +126,6 @@ static void unreserve_gt(struct drm_i915_private *i915)
i915_gem_park(i915); i915_gem_park(i915);
} }
void i915_gem_retire_noop(struct i915_gem_active *active,
struct i915_request *request)
{
/* Space left intentionally blank */
}
static void advance_ring(struct i915_request *request) static void advance_ring(struct i915_request *request)
{ {
struct intel_ring *ring = request->ring; struct intel_ring *ring = request->ring;
...@@ -244,7 +239,7 @@ static void __retire_engine_upto(struct intel_engine_cs *engine, ...@@ -244,7 +239,7 @@ static void __retire_engine_upto(struct intel_engine_cs *engine,
static void i915_request_retire(struct i915_request *request) static void i915_request_retire(struct i915_request *request)
{ {
struct i915_gem_active *active, *next; struct i915_active_request *active, *next;
GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n", GEM_TRACE("%s fence %llx:%lld, global=%d, current %d:%d\n",
request->engine->name, request->engine->name,
...@@ -278,10 +273,10 @@ static void i915_request_retire(struct i915_request *request) ...@@ -278,10 +273,10 @@ static void i915_request_retire(struct i915_request *request)
* we may spend an inordinate amount of time simply handling * we may spend an inordinate amount of time simply handling
* the retirement of requests and processing their callbacks. * the retirement of requests and processing their callbacks.
* Of which, this loop itself is particularly hot due to the * Of which, this loop itself is particularly hot due to the
* cache misses when jumping around the list of i915_gem_active. * cache misses when jumping around the list of
* So we try to keep this loop as streamlined as possible and * i915_active_request. So we try to keep this loop as
* also prefetch the next i915_gem_active to try and hide * streamlined as possible and also prefetch the next
* the likely cache miss. * i915_active_request to try and hide the likely cache miss.
*/ */
prefetchw(next); prefetchw(next);
...@@ -526,17 +521,9 @@ i915_request_alloc_slow(struct intel_context *ce) ...@@ -526,17 +521,9 @@ i915_request_alloc_slow(struct intel_context *ce)
return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL); return kmem_cache_alloc(ce->gem_context->i915->requests, GFP_KERNEL);
} }
static int add_barrier(struct i915_request *rq, struct i915_gem_active *active)
{
struct i915_request *barrier =
i915_gem_active_raw(active, &rq->i915->drm.struct_mutex);
return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
}
static int add_timeline_barrier(struct i915_request *rq) static int add_timeline_barrier(struct i915_request *rq)
{ {
return add_barrier(rq, &rq->timeline->barrier); return i915_request_await_active_request(rq, &rq->timeline->barrier);
} }
/** /**
...@@ -595,7 +582,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) ...@@ -595,7 +582,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
* We use RCU to look up requests in flight. The lookups may * We use RCU to look up requests in flight. The lookups may
* race with the request being allocated from the slab freelist. * race with the request being allocated from the slab freelist.
* That is the request we are writing to here, may be in the process * That is the request we are writing to here, may be in the process
* of being read by __i915_gem_active_get_rcu(). As such, * of being read by __i915_active_request_get_rcu(). As such,
* we have to be very careful when overwriting the contents. During * we have to be very careful when overwriting the contents. During
* the RCU lookup, we change chase the request->engine pointer, * the RCU lookup, we change chase the request->engine pointer,
* read the request->global_seqno and increment the reference count. * read the request->global_seqno and increment the reference count.
...@@ -937,7 +924,7 @@ void i915_request_add(struct i915_request *request) ...@@ -937,7 +924,7 @@ void i915_request_add(struct i915_request *request)
* see a more recent value in the hws than we are tracking. * see a more recent value in the hws than we are tracking.
*/ */
prev = i915_gem_active_raw(&timeline->last_request, prev = i915_active_request_raw(&timeline->last_request,
&request->i915->drm.struct_mutex); &request->i915->drm.struct_mutex);
if (prev && !i915_request_completed(prev)) { if (prev && !i915_request_completed(prev)) {
i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
...@@ -954,7 +941,7 @@ void i915_request_add(struct i915_request *request) ...@@ -954,7 +941,7 @@ void i915_request_add(struct i915_request *request)
spin_unlock_irq(&timeline->lock); spin_unlock_irq(&timeline->lock);
GEM_BUG_ON(timeline->seqno != request->fence.seqno); GEM_BUG_ON(timeline->seqno != request->fence.seqno);
i915_gem_active_set(&timeline->last_request, request); __i915_active_request_set(&timeline->last_request, request);
list_add_tail(&request->ring_link, &ring->request_list); list_add_tail(&request->ring_link, &ring->request_list);
if (list_is_first(&request->ring_link, &ring->request_list)) { if (list_is_first(&request->ring_link, &ring->request_list)) {
......
...@@ -403,387 +403,4 @@ static inline void i915_request_mark_complete(struct i915_request *rq) ...@@ -403,387 +403,4 @@ static inline void i915_request_mark_complete(struct i915_request *rq)
void i915_retire_requests(struct drm_i915_private *i915); void i915_retire_requests(struct drm_i915_private *i915);
/*
* We treat requests as fences. This is not be to confused with our
* "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
* We use the fences to synchronize access from the CPU with activity on the
* GPU, for example, we should not rewrite an object's PTE whilst the GPU
* is reading them. We also track fences at a higher level to provide
* implicit synchronisation around GEM objects, e.g. set-domain will wait
* for outstanding GPU rendering before marking the object ready for CPU
* access, or a pageflip will wait until the GPU is complete before showing
* the frame on the scanout.
*
* In order to use a fence, the object must track the fence it needs to
* serialise with. For example, GEM objects want to track both read and
* write access so that we can perform concurrent read operations between
* the CPU and GPU engines, as well as waiting for all rendering to
* complete, or waiting for the last GPU user of a "fence register". The
* object then embeds a #i915_gem_active to track the most recent (in
* retirement order) request relevant for the desired mode of access.
* The #i915_gem_active is updated with i915_gem_active_set() to track the
* most recent fence request, typically this is done as part of
* i915_vma_move_to_active().
*
* When the #i915_gem_active completes (is retired), it will
* signal its completion to the owner through a callback as well as mark
* itself as idle (i915_gem_active.request == NULL). The owner
* can then perform any action, such as delayed freeing of an active
* resource including itself.
*/
struct i915_gem_active;
typedef void (*i915_gem_retire_fn)(struct i915_gem_active *,
struct i915_request *);
struct i915_gem_active {
struct i915_request __rcu *request;
struct list_head link;
i915_gem_retire_fn retire;
};
void i915_gem_retire_noop(struct i915_gem_active *,
struct i915_request *request);
/**
* init_request_active - prepares the activity tracker for use
* @active - the active tracker
* @func - a callback when then the tracker is retired (becomes idle),
* can be NULL
*
* init_request_active() prepares the embedded @active struct for use as
* an activity tracker, that is for tracking the last known active request
* associated with it. When the last request becomes idle, when it is retired
* after completion, the optional callback @func is invoked.
*/
static inline void
init_request_active(struct i915_gem_active *active,
i915_gem_retire_fn retire)
{
RCU_INIT_POINTER(active->request, NULL);
INIT_LIST_HEAD(&active->link);
active->retire = retire ?: i915_gem_retire_noop;
}
/**
* i915_gem_active_set - updates the tracker to watch the current request
* @active - the active tracker
* @request - the request to watch
*
* i915_gem_active_set() watches the given @request for completion. Whilst
* that @request is busy, the @active reports busy. When that @request is
* retired, the @active tracker is updated to report idle.
*/
static inline void
i915_gem_active_set(struct i915_gem_active *active,
struct i915_request *request)
{
list_move(&active->link, &request->active_list);
rcu_assign_pointer(active->request, request);
}
/**
* i915_gem_active_set_retire_fn - updates the retirement callback
* @active - the active tracker
* @fn - the routine called when the request is retired
* @mutex - struct_mutex used to guard retirements
*
* i915_gem_active_set_retire_fn() updates the function pointer that
* is called when the final request associated with the @active tracker
* is retired.
*/
static inline void
i915_gem_active_set_retire_fn(struct i915_gem_active *active,
i915_gem_retire_fn fn,
struct mutex *mutex)
{
lockdep_assert_held(mutex);
active->retire = fn ?: i915_gem_retire_noop;
}
static inline struct i915_request *
__i915_gem_active_peek(const struct i915_gem_active *active)
{
/*
* Inside the error capture (running with the driver in an unknown
* state), we want to bend the rules slightly (a lot).
*
* Work is in progress to make it safer, in the meantime this keeps
* the known issue from spamming the logs.
*/
return rcu_dereference_protected(active->request, 1);
}
/**
* i915_gem_active_raw - return the active request
* @active - the active tracker
*
* i915_gem_active_raw() returns the current request being tracked, or NULL.
* It does not obtain a reference on the request for the caller, so the caller
* must hold struct_mutex.
*/
static inline struct i915_request *
i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex)
{
return rcu_dereference_protected(active->request,
lockdep_is_held(mutex));
}
/**
* i915_gem_active_peek - report the active request being monitored
* @active - the active tracker
*
* i915_gem_active_peek() returns the current request being tracked if
* still active, or NULL. It does not obtain a reference on the request
* for the caller, so the caller must hold struct_mutex.
*/
static inline struct i915_request *
i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
{
struct i915_request *request;
request = i915_gem_active_raw(active, mutex);
if (!request || i915_request_completed(request))
return NULL;
return request;
}
/**
* i915_gem_active_get - return a reference to the active request
* @active - the active tracker
*
* i915_gem_active_get() returns a reference to the active request, or NULL
* if the active tracker is idle. The caller must hold struct_mutex.
*/
static inline struct i915_request *
i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
{
return i915_request_get(i915_gem_active_peek(active, mutex));
}
/**
* __i915_gem_active_get_rcu - return a reference to the active request
* @active - the active tracker
*
* __i915_gem_active_get() returns a reference to the active request, or NULL
* if the active tracker is idle. The caller must hold the RCU read lock, but
* the returned pointer is safe to use outside of RCU.
*/
static inline struct i915_request *
__i915_gem_active_get_rcu(const struct i915_gem_active *active)
{
/*
* Performing a lockless retrieval of the active request is super
* tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
* slab of request objects will not be freed whilst we hold the
* RCU read lock. It does not guarantee that the request itself
* will not be freed and then *reused*. Viz,
*
* Thread A Thread B
*
* rq = active.request
* retire(rq) -> free(rq);
* (rq is now first on the slab freelist)
* active.request = NULL
*
* rq = new submission on a new object
* ref(rq)
*
* To prevent the request from being reused whilst the caller
* uses it, we take a reference like normal. Whilst acquiring
* the reference we check that it is not in a destroyed state
* (refcnt == 0). That prevents the request being reallocated
* whilst the caller holds on to it. To check that the request
* was not reallocated as we acquired the reference we have to
* check that our request remains the active request across
* the lookup, in the same manner as a seqlock. The visibility
* of the pointer versus the reference counting is controlled
* by using RCU barriers (rcu_dereference and rcu_assign_pointer).
*
* In the middle of all that, we inspect whether the request is
* complete. Retiring is lazy so the request may be completed long
* before the active tracker is updated. Querying whether the
* request is complete is far cheaper (as it involves no locked
* instructions setting cachelines to exclusive) than acquiring
* the reference, so we do it first. The RCU read lock ensures the
* pointer dereference is valid, but does not ensure that the
* seqno nor HWS is the right one! However, if the request was
* reallocated, that means the active tracker's request was complete.
* If the new request is also complete, then both are and we can
* just report the active tracker is idle. If the new request is
* incomplete, then we acquire a reference on it and check that
* it remained the active request.
*
* It is then imperative that we do not zero the request on
* reallocation, so that we can chase the dangling pointers!
* See i915_request_alloc().
*/
do {
struct i915_request *request;
request = rcu_dereference(active->request);
if (!request || i915_request_completed(request))
return NULL;
/*
* An especially silly compiler could decide to recompute the
* result of i915_request_completed, more specifically
* re-emit the load for request->fence.seqno. A race would catch
* a later seqno value, which could flip the result from true to
* false. Which means part of the instructions below might not
* be executed, while later on instructions are executed. Due to
* barriers within the refcounting the inconsistency can't reach
* past the call to i915_request_get_rcu, but not executing
* that while still executing i915_request_put() creates
* havoc enough. Prevent this with a compiler barrier.
*/
barrier();
request = i915_request_get_rcu(request);
/*
* What stops the following rcu_access_pointer() from occurring
* before the above i915_request_get_rcu()? If we were
* to read the value before pausing to get the reference to
* the request, we may not notice a change in the active
* tracker.
*
* The rcu_access_pointer() is a mere compiler barrier, which
* means both the CPU and compiler are free to perform the
* memory read without constraint. The compiler only has to
* ensure that any operations after the rcu_access_pointer()
* occur afterwards in program order. This means the read may
* be performed earlier by an out-of-order CPU, or adventurous
* compiler.
*
* The atomic operation at the heart of
* i915_request_get_rcu(), see dma_fence_get_rcu(), is
* atomic_inc_not_zero() which is only a full memory barrier
* when successful. That is, if i915_request_get_rcu()
* returns the request (and so with the reference counted
* incremented) then the following read for rcu_access_pointer()
* must occur after the atomic operation and so confirm
* that this request is the one currently being tracked.
*
* The corresponding write barrier is part of
* rcu_assign_pointer().
*/
if (!request || request == rcu_access_pointer(active->request))
return rcu_pointer_handoff(request);
i915_request_put(request);
} while (1);
}
/**
* i915_gem_active_get_unlocked - return a reference to the active request
* @active - the active tracker
*
* i915_gem_active_get_unlocked() returns a reference to the active request,
* or NULL if the active tracker is idle. The reference is obtained under RCU,
* so no locking is required by the caller.
*
* The reference should be freed with i915_request_put().
*/
static inline struct i915_request *
i915_gem_active_get_unlocked(const struct i915_gem_active *active)
{
struct i915_request *request;
rcu_read_lock();
request = __i915_gem_active_get_rcu(active);
rcu_read_unlock();
return request;
}
/**
* i915_gem_active_isset - report whether the active tracker is assigned
* @active - the active tracker
*
* i915_gem_active_isset() returns true if the active tracker is currently
* assigned to a request. Due to the lazy retiring, that request may be idle
* and this may report stale information.
*/
static inline bool
i915_gem_active_isset(const struct i915_gem_active *active)
{
return rcu_access_pointer(active->request);
}
/**
* i915_gem_active_wait - waits until the request is completed
* @active - the active request on which to wait
* @flags - how to wait
* @timeout - how long to wait at most
* @rps - userspace client to charge for a waitboost
*
* i915_gem_active_wait() waits until the request is completed before
* returning, without requiring any locks to be held. Note that it does not
* retire any requests before returning.
*
* This function relies on RCU in order to acquire the reference to the active
* request without holding any locks. See __i915_gem_active_get_rcu() for the
* glory details on how that is managed. Once the reference is acquired, we
* can then wait upon the request, and afterwards release our reference,
* free of any locking.
*
* This function wraps i915_request_wait(), see it for the full details on
* the arguments.
*
* Returns 0 if successful, or a negative error code.
*/
static inline int
i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags)
{
struct i915_request *request;
long ret = 0;
request = i915_gem_active_get_unlocked(active);
if (request) {
ret = i915_request_wait(request, flags, MAX_SCHEDULE_TIMEOUT);
i915_request_put(request);
}
return ret < 0 ? ret : 0;
}
/**
* i915_gem_active_retire - waits until the request is retired
* @active - the active request on which to wait
*
* i915_gem_active_retire() waits until the request is completed,
* and then ensures that at least the retirement handler for this
* @active tracker is called before returning. If the @active
* tracker is idle, the function returns immediately.
*/
static inline int __must_check
i915_gem_active_retire(struct i915_gem_active *active,
struct mutex *mutex)
{
struct i915_request *request;
long ret;
request = i915_gem_active_raw(active, mutex);
if (!request)
return 0;
ret = i915_request_wait(request,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT);
if (ret < 0)
return ret;
list_del_init(&active->link);
RCU_INIT_POINTER(active->request, NULL);
active->retire(active, request);
return 0;
}
#define for_each_active(mask, idx) \
for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx))
#endif /* I915_REQUEST_H */ #endif /* I915_REQUEST_H */
...@@ -862,7 +862,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) ...@@ -862,7 +862,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
struct i915_request *rq; struct i915_request *rq;
long timeout; long timeout;
rq = i915_gem_active_get_unlocked(&tl->last_request); rq = i915_active_request_get_unlocked(&tl->last_request);
if (!rq) if (!rq)
continue; continue;
......
...@@ -163,8 +163,8 @@ int i915_timeline_init(struct drm_i915_private *i915, ...@@ -163,8 +163,8 @@ int i915_timeline_init(struct drm_i915_private *i915,
spin_lock_init(&timeline->lock); spin_lock_init(&timeline->lock);
init_request_active(&timeline->barrier, NULL); INIT_ACTIVE_REQUEST(&timeline->barrier);
init_request_active(&timeline->last_request, NULL); INIT_ACTIVE_REQUEST(&timeline->last_request);
INIT_LIST_HEAD(&timeline->requests); INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync); i915_syncmap_init(&timeline->sync);
...@@ -236,7 +236,7 @@ void i915_timeline_fini(struct i915_timeline *timeline) ...@@ -236,7 +236,7 @@ void i915_timeline_fini(struct i915_timeline *timeline)
{ {
GEM_BUG_ON(timeline->pin_count); GEM_BUG_ON(timeline->pin_count);
GEM_BUG_ON(!list_empty(&timeline->requests)); GEM_BUG_ON(!list_empty(&timeline->requests));
GEM_BUG_ON(i915_gem_active_isset(&timeline->barrier)); GEM_BUG_ON(i915_active_request_isset(&timeline->barrier));
i915_syncmap_free(&timeline->sync); i915_syncmap_free(&timeline->sync);
hwsp_free(timeline); hwsp_free(timeline);
...@@ -311,25 +311,6 @@ void i915_timeline_unpin(struct i915_timeline *tl) ...@@ -311,25 +311,6 @@ void i915_timeline_unpin(struct i915_timeline *tl)
__i915_vma_unpin(tl->hwsp_ggtt); __i915_vma_unpin(tl->hwsp_ggtt);
} }
int i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq)
{
struct i915_request *old;
int err;
lockdep_assert_held(&rq->i915->drm.struct_mutex);
/* Must maintain ordering wrt existing barriers */
old = i915_gem_active_raw(&tl->barrier, &rq->i915->drm.struct_mutex);
if (old) {
err = i915_request_await_dma_fence(rq, &old->fence);
if (err)
return err;
}
i915_gem_active_set(&tl->barrier, rq);
return 0;
}
void __i915_timeline_free(struct kref *kref) void __i915_timeline_free(struct kref *kref)
{ {
struct i915_timeline *timeline = struct i915_timeline *timeline =
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/kref.h> #include <linux/kref.h>
#include "i915_active.h"
#include "i915_request.h" #include "i915_request.h"
#include "i915_syncmap.h" #include "i915_syncmap.h"
#include "i915_utils.h" #include "i915_utils.h"
...@@ -58,10 +59,10 @@ struct i915_timeline { ...@@ -58,10 +59,10 @@ struct i915_timeline {
/* Contains an RCU guarded pointer to the last request. No reference is /* Contains an RCU guarded pointer to the last request. No reference is
* held to the request, users must carefully acquire a reference to * held to the request, users must carefully acquire a reference to
* the request using i915_gem_active_get_request_rcu(), or hold the * the request using i915_active_request_get_request_rcu(), or hold the
* struct_mutex. * struct_mutex.
*/ */
struct i915_gem_active last_request; struct i915_active_request last_request;
/** /**
* We track the most recent seqno that we wait on in every context so * We track the most recent seqno that we wait on in every context so
...@@ -82,7 +83,7 @@ struct i915_timeline { ...@@ -82,7 +83,7 @@ struct i915_timeline {
* subsequent submissions to this timeline be executed only after the * subsequent submissions to this timeline be executed only after the
* barrier has been completed. * barrier has been completed.
*/ */
struct i915_gem_active barrier; struct i915_active_request barrier;
struct list_head link; struct list_head link;
const char *name; const char *name;
...@@ -174,7 +175,10 @@ void i915_timelines_fini(struct drm_i915_private *i915); ...@@ -174,7 +175,10 @@ void i915_timelines_fini(struct drm_i915_private *i915);
* submissions on @timeline. Subsequent requests will not be submitted to GPU * submissions on @timeline. Subsequent requests will not be submitted to GPU
* until the barrier has been completed. * until the barrier has been completed.
*/ */
int i915_timeline_set_barrier(struct i915_timeline *timeline, static inline int
struct i915_request *rq); i915_timeline_set_barrier(struct i915_timeline *tl, struct i915_request *rq)
{
return i915_active_request_set(&tl->barrier, rq);
}
#endif #endif
...@@ -120,7 +120,7 @@ vma_create(struct drm_i915_gem_object *obj, ...@@ -120,7 +120,7 @@ vma_create(struct drm_i915_gem_object *obj,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
i915_active_init(vm->i915, &vma->active, __i915_vma_retire); i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
init_request_active(&vma->last_fence, NULL); INIT_ACTIVE_REQUEST(&vma->last_fence);
vma->vm = vm; vma->vm = vm;
vma->ops = &vm->vma_ops; vma->ops = &vm->vma_ops;
...@@ -808,7 +808,7 @@ static void __i915_vma_destroy(struct i915_vma *vma) ...@@ -808,7 +808,7 @@ static void __i915_vma_destroy(struct i915_vma *vma)
GEM_BUG_ON(vma->node.allocated); GEM_BUG_ON(vma->node.allocated);
GEM_BUG_ON(vma->fence); GEM_BUG_ON(vma->fence);
GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); GEM_BUG_ON(i915_active_request_isset(&vma->last_fence));
mutex_lock(&vma->vm->mutex); mutex_lock(&vma->vm->mutex);
list_del(&vma->vm_link); list_del(&vma->vm_link);
...@@ -942,14 +942,14 @@ int i915_vma_move_to_active(struct i915_vma *vma, ...@@ -942,14 +942,14 @@ int i915_vma_move_to_active(struct i915_vma *vma,
obj->write_domain = I915_GEM_DOMAIN_RENDER; obj->write_domain = I915_GEM_DOMAIN_RENDER;
if (intel_fb_obj_invalidate(obj, ORIGIN_CS)) if (intel_fb_obj_invalidate(obj, ORIGIN_CS))
i915_gem_active_set(&obj->frontbuffer_write, rq); __i915_active_request_set(&obj->frontbuffer_write, rq);
obj->read_domains = 0; obj->read_domains = 0;
} }
obj->read_domains |= I915_GEM_GPU_DOMAINS; obj->read_domains |= I915_GEM_GPU_DOMAINS;
if (flags & EXEC_OBJECT_NEEDS_FENCE) if (flags & EXEC_OBJECT_NEEDS_FENCE)
i915_gem_active_set(&vma->last_fence, rq); __i915_active_request_set(&vma->last_fence, rq);
export_fence(vma, rq, flags); export_fence(vma, rq, flags);
return 0; return 0;
...@@ -986,7 +986,7 @@ int i915_vma_unbind(struct i915_vma *vma) ...@@ -986,7 +986,7 @@ int i915_vma_unbind(struct i915_vma *vma)
if (ret) if (ret)
goto unpin; goto unpin;
ret = i915_gem_active_retire(&vma->last_fence, ret = i915_active_request_retire(&vma->last_fence,
&vma->vm->i915->drm.struct_mutex); &vma->vm->i915->drm.struct_mutex);
unpin: unpin:
__i915_vma_unpin(vma); __i915_vma_unpin(vma);
......
...@@ -110,7 +110,7 @@ struct i915_vma { ...@@ -110,7 +110,7 @@ struct i915_vma {
#define I915_VMA_GGTT_WRITE BIT(15) #define I915_VMA_GGTT_WRITE BIT(15)
struct i915_active active; struct i915_active active;
struct i915_gem_active last_fence; struct i915_active_request last_fence;
/** /**
* Support different GGTT views into the same object. * Support different GGTT views into the same object.
......
...@@ -1086,7 +1086,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) ...@@ -1086,7 +1086,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine)
* the last request that remains in the timeline. When idle, it is * the last request that remains in the timeline. When idle, it is
* the last executed context as tracked by retirement. * the last executed context as tracked by retirement.
*/ */
rq = __i915_gem_active_peek(&engine->timeline.last_request); rq = __i915_active_request_peek(&engine->timeline.last_request);
if (rq) if (rq)
return rq->hw_context == kernel_context; return rq->hw_context == kernel_context;
else else
......
...@@ -184,7 +184,7 @@ struct intel_overlay { ...@@ -184,7 +184,7 @@ struct intel_overlay {
struct overlay_registers __iomem *regs; struct overlay_registers __iomem *regs;
u32 flip_addr; u32 flip_addr;
/* flip handling */ /* flip handling */
struct i915_gem_active last_flip; struct i915_active_request last_flip;
}; };
static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv, static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv,
...@@ -212,22 +212,22 @@ static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv, ...@@ -212,22 +212,22 @@ static void i830_overlay_clock_gating(struct drm_i915_private *dev_priv,
static void intel_overlay_submit_request(struct intel_overlay *overlay, static void intel_overlay_submit_request(struct intel_overlay *overlay,
struct i915_request *rq, struct i915_request *rq,
i915_gem_retire_fn retire) i915_active_retire_fn retire)
{ {
GEM_BUG_ON(i915_gem_active_peek(&overlay->last_flip, GEM_BUG_ON(i915_active_request_peek(&overlay->last_flip,
&overlay->i915->drm.struct_mutex)); &overlay->i915->drm.struct_mutex));
i915_gem_active_set_retire_fn(&overlay->last_flip, retire, i915_active_request_set_retire_fn(&overlay->last_flip, retire,
&overlay->i915->drm.struct_mutex); &overlay->i915->drm.struct_mutex);
i915_gem_active_set(&overlay->last_flip, rq); __i915_active_request_set(&overlay->last_flip, rq);
i915_request_add(rq); i915_request_add(rq);
} }
static int intel_overlay_do_wait_request(struct intel_overlay *overlay, static int intel_overlay_do_wait_request(struct intel_overlay *overlay,
struct i915_request *rq, struct i915_request *rq,
i915_gem_retire_fn retire) i915_active_retire_fn retire)
{ {
intel_overlay_submit_request(overlay, rq, retire); intel_overlay_submit_request(overlay, rq, retire);
return i915_gem_active_retire(&overlay->last_flip, return i915_active_request_retire(&overlay->last_flip,
&overlay->i915->drm.struct_mutex); &overlay->i915->drm.struct_mutex);
} }
...@@ -349,7 +349,8 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay) ...@@ -349,7 +349,8 @@ static void intel_overlay_release_old_vma(struct intel_overlay *overlay)
i915_vma_put(vma); i915_vma_put(vma);
} }
static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, static void
intel_overlay_release_old_vid_tail(struct i915_active_request *active,
struct i915_request *rq) struct i915_request *rq)
{ {
struct intel_overlay *overlay = struct intel_overlay *overlay =
...@@ -358,7 +359,7 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, ...@@ -358,7 +359,7 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active,
intel_overlay_release_old_vma(overlay); intel_overlay_release_old_vma(overlay);
} }
static void intel_overlay_off_tail(struct i915_gem_active *active, static void intel_overlay_off_tail(struct i915_active_request *active,
struct i915_request *rq) struct i915_request *rq)
{ {
struct intel_overlay *overlay = struct intel_overlay *overlay =
...@@ -421,7 +422,7 @@ static int intel_overlay_off(struct intel_overlay *overlay) ...@@ -421,7 +422,7 @@ static int intel_overlay_off(struct intel_overlay *overlay)
* We have to be careful not to repeat work forever an make forward progess. */ * We have to be careful not to repeat work forever an make forward progess. */
static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay)
{ {
return i915_gem_active_retire(&overlay->last_flip, return i915_active_request_retire(&overlay->last_flip,
&overlay->i915->drm.struct_mutex); &overlay->i915->drm.struct_mutex);
} }
...@@ -1355,7 +1356,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv) ...@@ -1355,7 +1356,7 @@ void intel_overlay_setup(struct drm_i915_private *dev_priv)
overlay->contrast = 75; overlay->contrast = 75;
overlay->saturation = 146; overlay->saturation = 146;
init_request_active(&overlay->last_flip, NULL); INIT_ACTIVE_REQUEST(&overlay->last_flip);
mutex_lock(&dev_priv->drm.struct_mutex); mutex_lock(&dev_priv->drm.struct_mutex);
......
...@@ -15,8 +15,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context) ...@@ -15,8 +15,8 @@ void mock_timeline_init(struct i915_timeline *timeline, u64 context)
spin_lock_init(&timeline->lock); spin_lock_init(&timeline->lock);
init_request_active(&timeline->barrier, NULL); INIT_ACTIVE_REQUEST(&timeline->barrier);
init_request_active(&timeline->last_request, NULL); INIT_ACTIVE_REQUEST(&timeline->last_request);
INIT_LIST_HEAD(&timeline->requests); INIT_LIST_HEAD(&timeline->requests);
i915_syncmap_init(&timeline->sync); i915_syncmap_init(&timeline->sync);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment