Commit 33df8a76 authored by Chris Wilson's avatar Chris Wilson

drm/i915: Prevent lock-cycles between GPU waits and GPU resets

We cannot allow ourselves to wait on the GPU while holding any lock as we
may need to reset the GPU. While there is not an explicit lock between
the two operations, lockdep cannot detect the dependency. So let's tell
lockdep about the wait/reset dependency with an explicit lockmap.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190612085246.16374-1-chris@chris-wilson.co.uk
parent ea60f4bd
...@@ -978,10 +978,11 @@ void i915_reset(struct drm_i915_private *i915, ...@@ -978,10 +978,11 @@ void i915_reset(struct drm_i915_private *i915,
might_sleep(); might_sleep();
GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
lock_map_acquire(&i915->gt.reset_lockmap);
/* Clear any previous failed attempts at recovery. Time to try again. */ /* Clear any previous failed attempts at recovery. Time to try again. */
if (!__i915_gem_unset_wedged(i915)) if (!__i915_gem_unset_wedged(i915))
return; goto unlock;
if (reason) if (reason)
dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
...@@ -1029,6 +1030,8 @@ void i915_reset(struct drm_i915_private *i915, ...@@ -1029,6 +1030,8 @@ void i915_reset(struct drm_i915_private *i915,
finish: finish:
reset_finish(i915); reset_finish(i915);
unlock:
lock_map_release(&i915->gt.reset_lockmap);
return; return;
taint: taint:
......
...@@ -1919,6 +1919,14 @@ struct drm_i915_private { ...@@ -1919,6 +1919,14 @@ struct drm_i915_private {
ktime_t last_init_time; ktime_t last_init_time;
struct i915_vma *scratch; struct i915_vma *scratch;
/*
* We must never wait on the GPU while holding a lock as we
* may need to perform a GPU reset. So while we don't need to
* serialise wait/reset with an explicit lock, we do want
* lockdep to detect potential dependency cycles.
*/
struct lockdep_map reset_lockmap;
} gt; } gt;
struct { struct {
......
...@@ -1782,6 +1782,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915) ...@@ -1782,6 +1782,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915)
int i915_gem_init_early(struct drm_i915_private *dev_priv) int i915_gem_init_early(struct drm_i915_private *dev_priv)
{ {
static struct lock_class_key reset_key;
int err; int err;
intel_gt_pm_init(dev_priv); intel_gt_pm_init(dev_priv);
...@@ -1789,6 +1790,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) ...@@ -1789,6 +1790,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
INIT_LIST_HEAD(&dev_priv->gt.active_rings); INIT_LIST_HEAD(&dev_priv->gt.active_rings);
INIT_LIST_HEAD(&dev_priv->gt.closed_vma); INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
spin_lock_init(&dev_priv->gt.closed_lock); spin_lock_init(&dev_priv->gt.closed_lock);
lockdep_init_map(&dev_priv->gt.reset_lockmap,
"i915.reset", &reset_key, 0);
i915_gem_init__mm(dev_priv); i915_gem_init__mm(dev_priv);
i915_gem_init__pm(dev_priv); i915_gem_init__pm(dev_priv);
......
...@@ -1444,6 +1444,7 @@ long i915_request_wait(struct i915_request *rq, ...@@ -1444,6 +1444,7 @@ long i915_request_wait(struct i915_request *rq,
return -ETIME; return -ETIME;
trace_i915_request_wait_begin(rq, flags); trace_i915_request_wait_begin(rq, flags);
lock_map_acquire(&rq->i915->gt.reset_lockmap);
/* /*
* Optimistic spin before touching IRQs. * Optimistic spin before touching IRQs.
...@@ -1517,6 +1518,7 @@ long i915_request_wait(struct i915_request *rq, ...@@ -1517,6 +1518,7 @@ long i915_request_wait(struct i915_request *rq,
dma_fence_remove_callback(&rq->fence, &wait.cb); dma_fence_remove_callback(&rq->fence, &wait.cb);
out: out:
lock_map_release(&rq->i915->gt.reset_lockmap);
trace_i915_request_wait_end(rq); trace_i915_request_wait_end(rq);
return timeout; return timeout;
} }
......
...@@ -130,6 +130,7 @@ static struct dev_pm_domain pm_domain = { ...@@ -130,6 +130,7 @@ static struct dev_pm_domain pm_domain = {
struct drm_i915_private *mock_gem_device(void) struct drm_i915_private *mock_gem_device(void)
{ {
static struct lock_class_key reset_key;
struct drm_i915_private *i915; struct drm_i915_private *i915;
struct pci_dev *pdev; struct pci_dev *pdev;
int err; int err;
...@@ -204,6 +205,7 @@ struct drm_i915_private *mock_gem_device(void) ...@@ -204,6 +205,7 @@ struct drm_i915_private *mock_gem_device(void)
INIT_LIST_HEAD(&i915->gt.active_rings); INIT_LIST_HEAD(&i915->gt.active_rings);
INIT_LIST_HEAD(&i915->gt.closed_vma); INIT_LIST_HEAD(&i915->gt.closed_vma);
spin_lock_init(&i915->gt.closed_lock); spin_lock_init(&i915->gt.closed_lock);
lockdep_init_map(&i915->gt.reset_lockmap, "i915.reset", &reset_key, 0);
mutex_lock(&i915->drm.struct_mutex); mutex_lock(&i915->drm.struct_mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment