Commit 85bedbf1 authored by Chris Wilson's avatar Chris Wilson

drm/i915/gt: Eliminate the trylock for reading a timeline's hwsp

As we stash a pointer to the HWSP cacheline on the request, when reading
it we only need confirm that the cacheline is still valid by checking
that the request and timeline are still intact.

v2: Protect hwsp_cachline with RCU
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191217011659.3092130-1-chris@chris-wilson.co.uk
parent e14177f1
...@@ -15,6 +15,9 @@ ...@@ -15,6 +15,9 @@
#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) #define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit)))
#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) #define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit))
#define CACHELINE_BITS 6
#define CACHELINE_FREE CACHELINE_BITS
struct intel_timeline_hwsp { struct intel_timeline_hwsp {
struct intel_gt *gt; struct intel_gt *gt;
struct intel_gt_timelines *gt_timelines; struct intel_gt_timelines *gt_timelines;
...@@ -23,14 +26,6 @@ struct intel_timeline_hwsp { ...@@ -23,14 +26,6 @@ struct intel_timeline_hwsp {
u64 free_bitmap; u64 free_bitmap;
}; };
struct intel_timeline_cacheline {
struct i915_active active;
struct intel_timeline_hwsp *hwsp;
void *vaddr;
#define CACHELINE_BITS 6
#define CACHELINE_FREE CACHELINE_BITS
};
static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) static struct i915_vma *__hwsp_alloc(struct intel_gt *gt)
{ {
struct drm_i915_private *i915 = gt->i915; struct drm_i915_private *i915 = gt->i915;
...@@ -133,7 +128,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) ...@@ -133,7 +128,7 @@ static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
i915_active_fini(&cl->active); i915_active_fini(&cl->active);
kfree(cl); kfree_rcu(cl, rcu);
} }
__i915_active_call __i915_active_call
...@@ -514,46 +509,35 @@ int intel_timeline_read_hwsp(struct i915_request *from, ...@@ -514,46 +509,35 @@ int intel_timeline_read_hwsp(struct i915_request *from,
struct i915_request *to, struct i915_request *to,
u32 *hwsp) u32 *hwsp)
{ {
struct intel_timeline *tl; struct intel_timeline_cacheline *cl;
int err; int err;
GEM_BUG_ON(!rcu_access_pointer(from->hwsp_cacheline));
rcu_read_lock(); rcu_read_lock();
tl = rcu_dereference(from->timeline); cl = rcu_dereference(from->hwsp_cacheline);
if (i915_request_completed(from) || !kref_get_unless_zero(&tl->kref)) if (unlikely(!i915_active_acquire_if_busy(&cl->active)))
tl = NULL; goto unlock; /* seqno wrapped and completed! */
if (unlikely(i915_request_completed(from)))
goto release;
rcu_read_unlock(); rcu_read_unlock();
if (!tl) /* already completed */
return 1;
GEM_BUG_ON(rcu_access_pointer(to->timeline) == tl);
err = -EAGAIN;
if (mutex_trylock(&tl->mutex)) {
struct intel_timeline_cacheline *cl = from->hwsp_cacheline;
if (i915_request_completed(from)) {
err = 1;
goto unlock;
}
err = cacheline_ref(cl, to); err = cacheline_ref(cl, to);
if (err) if (err)
goto unlock; goto out;
if (likely(cl == tl->hwsp_cacheline)) {
*hwsp = tl->hwsp_offset;
} else { /* across a seqno wrap, recover the original offset */
*hwsp = i915_ggtt_offset(cl->hwsp->vma) + *hwsp = i915_ggtt_offset(cl->hwsp->vma) +
ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * CACHELINE_BYTES;
CACHELINE_BYTES;
}
unlock:
mutex_unlock(&tl->mutex);
}
intel_timeline_put(tl);
out:
i915_active_release(&cl->active);
return err; return err;
release:
i915_active_release(&cl->active);
unlock:
rcu_read_unlock();
return 1;
} }
void intel_timeline_unpin(struct intel_timeline *tl) void intel_timeline_unpin(struct intel_timeline *tl)
......
...@@ -10,14 +10,15 @@ ...@@ -10,14 +10,15 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/kref.h> #include <linux/kref.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/rcupdate.h>
#include <linux/types.h> #include <linux/types.h>
#include "i915_active_types.h" #include "i915_active_types.h"
struct drm_i915_private; struct drm_i915_private;
struct i915_vma; struct i915_vma;
struct intel_timeline_cacheline;
struct i915_syncmap; struct i915_syncmap;
struct intel_timeline_hwsp;
struct intel_timeline { struct intel_timeline {
u64 fence_context; u64 fence_context;
...@@ -87,4 +88,13 @@ struct intel_timeline { ...@@ -87,4 +88,13 @@ struct intel_timeline {
struct rcu_head rcu; struct rcu_head rcu;
}; };
struct intel_timeline_cacheline {
struct i915_active active;
struct intel_timeline_hwsp *hwsp;
void *vaddr;
struct rcu_head rcu;
};
#endif /* __I915_TIMELINE_TYPES_H__ */ #endif /* __I915_TIMELINE_TYPES_H__ */
...@@ -655,9 +655,9 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) ...@@ -655,9 +655,9 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
rq->execution_mask = ce->engine->mask; rq->execution_mask = ce->engine->mask;
rq->flags = 0; rq->flags = 0;
rcu_assign_pointer(rq->timeline, tl); RCU_INIT_POINTER(rq->timeline, tl);
RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline);
rq->hwsp_seqno = tl->hwsp_seqno; rq->hwsp_seqno = tl->hwsp_seqno;
rq->hwsp_cacheline = tl->hwsp_cacheline;
rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "gt/intel_context_types.h" #include "gt/intel_context_types.h"
#include "gt/intel_engine_types.h" #include "gt/intel_engine_types.h"
#include "gt/intel_timeline_types.h"
#include "i915_gem.h" #include "i915_gem.h"
#include "i915_scheduler.h" #include "i915_scheduler.h"
...@@ -41,8 +42,6 @@ ...@@ -41,8 +42,6 @@
struct drm_file; struct drm_file;
struct drm_i915_gem_object; struct drm_i915_gem_object;
struct i915_request; struct i915_request;
struct intel_timeline;
struct intel_timeline_cacheline;
struct i915_capture_list { struct i915_capture_list {
struct i915_capture_list *next; struct i915_capture_list *next;
...@@ -183,7 +182,7 @@ struct i915_request { ...@@ -183,7 +182,7 @@ struct i915_request {
* inside the timeline's HWSP vma, but it is only valid while this * inside the timeline's HWSP vma, but it is only valid while this
* request has not completed and guarded by the timeline mutex. * request has not completed and guarded by the timeline mutex.
*/ */
struct intel_timeline_cacheline *hwsp_cacheline; struct intel_timeline_cacheline __rcu *hwsp_cacheline;
/** Position in the ring of the start of the request */ /** Position in the ring of the start of the request */
u32 head; u32 head;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment