Commit 963ddd63 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Suspend submission tasklets around wedging

After staring hard at sequences like

[   28.199013]  systemd-1       2..s. 26062228us : execlists_submission_tasklet: rcs0 cs-irq head=0 [0?], tail=1 [1?]
[   28.199095]  systemd-1       2..s. 26062229us : execlists_submission_tasklet: rcs0 csb[1]: status=0x00000018:0x00000000, active=0x1
[   28.199177]  systemd-1       2..s. 26062230us : execlists_submission_tasklet: rcs0 out[0]: ctx=0.1, seqno=3, prio=-1024
[   28.199258]  systemd-1       2..s. 26062231us : execlists_submission_tasklet: rcs0 completed ctx=0
[   28.199340]  gem_eio-829     1..s1 26066853us : execlists_submission_tasklet: rcs0 in[0]:  ctx=1.1, seqno=1, prio=0
[   28.199421]   <idle>-0       2..s. 26066863us : execlists_submission_tasklet: rcs0 cs-irq head=1 [1?], tail=2 [2?]
[   28.199503]   <idle>-0       2..s. 26066865us : execlists_submission_tasklet: rcs0 csb[2]: status=0x00000001:0x00000000, active=0x1
[   28.199585]  gem_eio-829     1..s1 26067077us : execlists_submission_tasklet: rcs0 in[1]:  ctx=3.1, seqno=2, prio=0
[   28.199667]  gem_eio-829     1..s1 26067078us : execlists_submission_tasklet: rcs0 in[0]:  ctx=1.2, seqno=1, prio=0
[   28.199749]   <idle>-0       2..s. 26067084us : execlists_submission_tasklet: rcs0 cs-irq head=2 [2?], tail=3 [3?]
[   28.199830]   <idle>-0       2..s. 26067085us : execlists_submission_tasklet: rcs0 csb[3]: status=0x00008002:0x00000001, active=0x1
[   28.199912]   <idle>-0       2..s. 26067086us : execlists_submission_tasklet: rcs0 out[0]: ctx=1.2, seqno=1, prio=0
[   28.199994]  gem_eio-829     2..s. 28246084us : execlists_submission_tasklet: rcs0 cs-irq head=3 [3?], tail=4 [4?]
[   28.200096]  gem_eio-829     2..s. 28246088us : execlists_submission_tasklet: rcs0 csb[4]: status=0x00000014:0x00000001, active=0x5
[   28.200178]  gem_eio-829     2..s. 28246089us : execlists_submission_tasklet: rcs0 out[0]: ctx=0.0, seqno=0, prio=0
[   28.200260]  gem_eio-829     2..s. 28246127us : execlists_submission_tasklet: execlists_submission_tasklet:886 GEM_BUG_ON(buf[2 * head + 1] != port->context_id)

the conclusion is that the only place where the ports are reset to zero,
is from engine->cancel_requests called during i915_gem_set_wedged().

The race is horrible as it results from calling set-wedged on active HW
(the GPU reset failed) and as such we need to be careful as the HW state
changes beneath us. Fortunately, it's the same scary conditions as
affect normal reset, so we can reuse the same machinery to disable state
tracking as we clobber it.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104945Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Michel Thierry <michel.thierry@intel.com>
Fixes: af7a8ffa ("drm/i915: Use rcu instead of stop_machine in set_wedged")
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180302113324.23189-2-chris@chris-wilson.co.uk
parent 32078b72
...@@ -3212,8 +3212,10 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) ...@@ -3212,8 +3212,10 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
* rolling the global seqno forward (since this would complete requests * rolling the global seqno forward (since this would complete requests
* for which we haven't set the fence error to EIO yet). * for which we haven't set the fence error to EIO yet).
*/ */
for_each_engine(engine, i915, id) for_each_engine(engine, i915, id) {
i915_gem_reset_prepare_engine(engine);
engine->submit_request = nop_submit_request; engine->submit_request = nop_submit_request;
}
/* /*
* Make sure no one is running the old callback before we proceed with * Make sure no one is running the old callback before we proceed with
...@@ -3255,6 +3257,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) ...@@ -3255,6 +3257,8 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
intel_engine_init_global_seqno(engine, intel_engine_init_global_seqno(engine,
intel_engine_last_submit(engine)); intel_engine_last_submit(engine));
spin_unlock_irqrestore(&engine->timeline->lock, flags); spin_unlock_irqrestore(&engine->timeline->lock, flags);
i915_gem_reset_finish_engine(engine);
} }
wake_up_all(&i915->gpu_error.reset_queue); wake_up_all(&i915->gpu_error.reset_queue);
......
...@@ -687,6 +687,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) ...@@ -687,6 +687,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
struct rb_node *rb; struct rb_node *rb;
unsigned long flags; unsigned long flags;
GEM_TRACE("%s\n", engine->name);
spin_lock_irqsave(&engine->timeline->lock, flags); spin_lock_irqsave(&engine->timeline->lock, flags);
/* Cancel the requests on the HW and clear the ELSP tracker. */ /* Cancel the requests on the HW and clear the ELSP tracker. */
...@@ -733,6 +735,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) ...@@ -733,6 +735,9 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine)
*/ */
clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
/* Mark all CS interrupts as complete */
execlists->active = 0;
spin_unlock_irqrestore(&engine->timeline->lock, flags); spin_unlock_irqrestore(&engine->timeline->lock, flags);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment