Commit 1216e3c3 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Drop unused engine->irq_seqno_barrier w/a

Now that we have eliminated the CPU-side irq_seqno_barrier by moving the
delays on the GPU before emitting the MI_USER_INTERRUPT, we can remove
the engine->irq_seqno_barrier infrastructure. Though intentionally
slowing down the GPU is nasty, so is the code we can now remove!
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181228171641.16531-6-chris@chris-wilson.co.uk
parent 835051d3
...@@ -3586,90 +3586,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) ...@@ -3586,90 +3586,6 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
} }
} }
static inline bool
__i915_request_irq_complete(const struct i915_request *rq)
{
struct intel_engine_cs *engine = rq->engine;
u32 seqno;
/* Note that the engine may have wrapped around the seqno, and
* so our request->global_seqno will be ahead of the hardware,
* even though it completed the request before wrapping. We catch
* this by kicking all the waiters before resetting the seqno
* in hardware, and also signal the fence.
*/
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
return true;
/* The request was dequeued before we were awoken. We check after
* inspecting the hw to confirm that this was the same request
* that generated the HWS update. The memory barriers within
* the request execution are sufficient to ensure that a check
* after reading the value from hw matches this request.
*/
seqno = i915_request_global_seqno(rq);
if (!seqno)
return false;
/* Before we do the heavier coherent read of the seqno,
* check the value (hopefully) in the CPU cacheline.
*/
if (__i915_request_completed(rq, seqno))
return true;
/* Ensure our read of the seqno is coherent so that we
* do not "miss an interrupt" (i.e. if this is the last
* request and the seqno write from the GPU is not visible
* by the time the interrupt fires, we will see that the
* request is incomplete and go back to sleep awaiting
* another interrupt that will never come.)
*
* Strictly, we only need to do this once after an interrupt,
* but it is easier and safer to do it every time the waiter
* is woken.
*/
if (engine->irq_seqno_barrier &&
test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) {
struct intel_breadcrumbs *b = &engine->breadcrumbs;
/* The ordering of irq_posted versus applying the barrier
* is crucial. The clearing of the current irq_posted must
* be visible before we perform the barrier operation,
* such that if a subsequent interrupt arrives, irq_posted
* is reasserted and our task rewoken (which causes us to
* do another __i915_request_irq_complete() immediately
* and reapply the barrier). Conversely, if the clear
* occurs after the barrier, then an interrupt that arrived
* whilst we waited on the barrier would not trigger a
* barrier on the next pass, and the read may not see the
* seqno update.
*/
engine->irq_seqno_barrier(engine);
/* If we consume the irq, but we are no longer the bottom-half,
* the real bottom-half may not have serialised their own
* seqno check with the irq-barrier (i.e. may have inspected
* the seqno before we believe it coherent since they see
* irq_posted == false but we are still running).
*/
spin_lock_irq(&b->irq_lock);
if (b->irq_wait && b->irq_wait->tsk != current)
/* Note that if the bottom-half is changed as we
* are sending the wake-up, the new bottom-half will
* be woken by whomever made the change. We only have
* to worry about when we steal the irq-posted for
* ourself.
*/
wake_up_process(b->irq_wait->tsk);
spin_unlock_irq(&b->irq_lock);
if (__i915_request_completed(rq, seqno))
return true;
}
return false;
}
void i915_memcpy_init_early(struct drm_i915_private *dev_priv); void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len); bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
......
...@@ -3230,13 +3230,6 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine, ...@@ -3230,13 +3230,6 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine,
struct i915_request *request, struct i915_request *request,
bool stalled) bool stalled)
{ {
/*
* Make sure this write is visible before we re-enable the interrupt
* handlers on another CPU, as tasklet_enable() resolves to just
* a compiler barrier which is insufficient for our purpose here.
*/
smp_store_mb(engine->irq_posted, 0);
if (request) if (request)
request = i915_gem_reset_request(engine, request, stalled); request = i915_gem_reset_request(engine, request, stalled);
......
...@@ -1189,13 +1189,6 @@ static void notify_ring(struct intel_engine_cs *engine) ...@@ -1189,13 +1189,6 @@ static void notify_ring(struct intel_engine_cs *engine)
rq = i915_request_get(waiter); rq = i915_request_get(waiter);
tsk = wait->tsk; tsk = wait->tsk;
} else {
if (engine->irq_seqno_barrier &&
i915_seqno_passed(seqno, wait->seqno - 1)) {
set_bit(ENGINE_IRQ_BREADCRUMB,
&engine->irq_posted);
tsk = wait->tsk;
}
} }
engine->breadcrumbs.irq_count++; engine->breadcrumbs.irq_count++;
......
...@@ -1179,13 +1179,7 @@ long i915_request_wait(struct i915_request *rq, ...@@ -1179,13 +1179,7 @@ long i915_request_wait(struct i915_request *rq,
set_current_state(state); set_current_state(state);
wakeup: wakeup:
/* if (i915_request_completed(rq))
* Carefully check if the request is complete, giving time
* for the seqno to be visible following the interrupt.
* We also have to check in case we are kicked by the GPU
* reset in order to drop the struct_mutex.
*/
if (__i915_request_irq_complete(rq))
break; break;
/* /*
......
...@@ -166,12 +166,6 @@ static void irq_enable(struct intel_engine_cs *engine) ...@@ -166,12 +166,6 @@ static void irq_enable(struct intel_engine_cs *engine)
*/ */
GEM_BUG_ON(!intel_irqs_enabled(engine->i915)); GEM_BUG_ON(!intel_irqs_enabled(engine->i915));
/* Enabling the IRQ may miss the generation of the interrupt, but
* we still need to force the barrier before reading the seqno,
* just in case.
*/
set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
/* Caller disables interrupts */ /* Caller disables interrupts */
if (engine->irq_enable) { if (engine->irq_enable) {
spin_lock(&engine->i915->irq_lock); spin_lock(&engine->i915->irq_lock);
...@@ -683,16 +677,6 @@ static int intel_breadcrumbs_signaler(void *arg) ...@@ -683,16 +677,6 @@ static int intel_breadcrumbs_signaler(void *arg)
} }
if (unlikely(do_schedule)) { if (unlikely(do_schedule)) {
/* Before we sleep, check for a missed seqno */
if (current->state & TASK_NORMAL &&
!list_empty(&b->signals) &&
engine->irq_seqno_barrier &&
test_and_clear_bit(ENGINE_IRQ_BREADCRUMB,
&engine->irq_posted)) {
engine->irq_seqno_barrier(engine);
intel_engine_wakeup(engine);
}
sleep: sleep:
if (kthread_should_park()) if (kthread_should_park())
kthread_parkme(); kthread_parkme();
...@@ -859,16 +843,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) ...@@ -859,16 +843,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
else else
irq_disable(engine); irq_disable(engine);
/*
* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the
* GPU is active and may have already executed the MI_USER_INTERRUPT
* before the CPU is ready to receive. However, the engine is currently
* idle (we haven't started it yet), there is no possibility for a
* missed interrupt as we enabled the irq and so we can clear the
* immediate wakeup (until a real interrupt arrives for the waiter).
*/
clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
spin_unlock_irqrestore(&b->irq_lock, flags); spin_unlock_irqrestore(&b->irq_lock, flags);
} }
......
...@@ -457,7 +457,6 @@ int intel_engines_init(struct drm_i915_private *dev_priv) ...@@ -457,7 +457,6 @@ int intel_engines_init(struct drm_i915_private *dev_priv)
void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno) void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
{ {
intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted);
/* After manually advancing the seqno, fake the interrupt in case /* After manually advancing the seqno, fake the interrupt in case
* there are any waiters for that seqno. * there are any waiters for that seqno.
...@@ -1536,11 +1535,6 @@ void intel_engine_dump(struct intel_engine_cs *engine, ...@@ -1536,11 +1535,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_unlock(&b->rb_lock); spin_unlock(&b->rb_lock);
local_irq_restore(flags); local_irq_restore(flags);
drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s)\n",
engine->irq_posted,
yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
&engine->irq_posted)));
drm_printf(m, "HWSP:\n"); drm_printf(m, "HWSP:\n");
hexdump(m, engine->status_page.page_addr, PAGE_SIZE); hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
......
...@@ -120,16 +120,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) ...@@ -120,16 +120,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd)
static void hangcheck_load_sample(struct intel_engine_cs *engine, static void hangcheck_load_sample(struct intel_engine_cs *engine,
struct intel_engine_hangcheck *hc) struct intel_engine_hangcheck *hc)
{ {
/* We don't strictly need an irq-barrier here, as we are not
* serving an interrupt request, be paranoid in case the
* barrier has side-effects (such as preventing a broken
* cacheline snoop) and so be sure that we can see the seqno
* advance. If the seqno should stick, due to a stale
* cacheline, we would erroneously declare the GPU hung.
*/
if (engine->irq_seqno_barrier)
engine->irq_seqno_barrier(engine);
hc->acthd = intel_engine_get_active_head(engine); hc->acthd = intel_engine_get_active_head(engine);
hc->seqno = intel_engine_get_seqno(engine); hc->seqno = intel_engine_get_seqno(engine);
} }
......
...@@ -711,10 +711,6 @@ static int init_ring_common(struct intel_engine_cs *engine) ...@@ -711,10 +711,6 @@ static int init_ring_common(struct intel_engine_cs *engine)
static struct i915_request *reset_prepare(struct intel_engine_cs *engine) static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
{ {
intel_engine_stop_cs(engine); intel_engine_stop_cs(engine);
if (engine->irq_seqno_barrier)
engine->irq_seqno_barrier(engine);
return i915_gem_find_active_request(engine); return i915_gem_find_active_request(engine);
} }
......
...@@ -365,9 +365,6 @@ struct intel_engine_cs { ...@@ -365,9 +365,6 @@ struct intel_engine_cs {
struct drm_i915_gem_object *default_state; struct drm_i915_gem_object *default_state;
void *pinned_default_state; void *pinned_default_state;
unsigned long irq_posted;
#define ENGINE_IRQ_BREADCRUMB 0
/* Rather than have every client wait upon all user interrupts, /* Rather than have every client wait upon all user interrupts,
* with the herd waking after every interrupt and each doing the * with the herd waking after every interrupt and each doing the
* heavyweight seqno dance, we delegate the task (of being the * heavyweight seqno dance, we delegate the task (of being the
...@@ -501,13 +498,6 @@ struct intel_engine_cs { ...@@ -501,13 +498,6 @@ struct intel_engine_cs {
*/ */
void (*cancel_requests)(struct intel_engine_cs *engine); void (*cancel_requests)(struct intel_engine_cs *engine);
/* Some chipsets are not quite as coherent as advertised and need
* an expensive kick to force a true read of the up-to-date seqno.
* However, the up-to-date seqno is not always required and the last
* seen value is good enough. Note that the seqno will always be
* monotonic, even if not coherent.
*/
void (*irq_seqno_barrier)(struct intel_engine_cs *engine);
void (*cleanup)(struct intel_engine_cs *engine); void (*cleanup)(struct intel_engine_cs *engine);
struct intel_engine_execlists execlists; struct intel_engine_execlists execlists;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment