Commit 0e178aef authored by Chris Wilson's avatar Chris Wilson

drm/i915: Detect a failed GPU reset+recovery

If we can't recover the GPU after the reset, mark it as wedged to cancel
the outstanding tasks and to prevent new users from trying to use the
broken GPU.

v2: Check the same ring is hung again before declaring the reset broken.
v3: use engine_stalled (Mika)
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1484668747-9120-6-git-send-email-mika.kuoppala@intel.com
parent 61da5362
...@@ -1761,7 +1761,12 @@ void i915_reset(struct drm_i915_private *dev_priv) ...@@ -1761,7 +1761,12 @@ void i915_reset(struct drm_i915_private *dev_priv)
pr_notice("drm/i915: Resetting chip after gpu hang\n"); pr_notice("drm/i915: Resetting chip after gpu hang\n");
disable_irq(dev_priv->drm.irq); disable_irq(dev_priv->drm.irq);
i915_gem_reset_prepare(dev_priv); ret = i915_gem_reset_prepare(dev_priv);
if (ret) {
DRM_ERROR("GPU recovery failed\n");
intel_gpu_reset(dev_priv, ALL_ENGINES);
goto error;
}
ret = intel_gpu_reset(dev_priv, ALL_ENGINES); ret = intel_gpu_reset(dev_priv, ALL_ENGINES);
if (ret) { if (ret) {
......
...@@ -3327,7 +3327,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) ...@@ -3327,7 +3327,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error)
return READ_ONCE(error->reset_count); return READ_ONCE(error->reset_count);
} }
void i915_gem_reset_prepare(struct drm_i915_private *dev_priv); int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv);
void i915_gem_set_wedged(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv);
void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
......
...@@ -2625,16 +2625,28 @@ static bool engine_stalled(struct intel_engine_cs *engine) ...@@ -2625,16 +2625,28 @@ static bool engine_stalled(struct intel_engine_cs *engine)
return true; return true;
} }
void i915_gem_reset_prepare(struct drm_i915_private *dev_priv) int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
{ {
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
enum intel_engine_id id; enum intel_engine_id id;
int err = 0;
/* Ensure irq handler finishes, and not run again. */ /* Ensure irq handler finishes, and not run again. */
for_each_engine(engine, dev_priv, id) for_each_engine(engine, dev_priv, id) {
struct drm_i915_gem_request *request;
tasklet_kill(&engine->irq_tasklet); tasklet_kill(&engine->irq_tasklet);
if (engine_stalled(engine)) {
request = i915_gem_find_active_request(engine);
if (request && request->fence.error == -EIO)
err = -EIO; /* Previous reset failed! */
}
}
i915_gem_revoke_fences(dev_priv); i915_gem_revoke_fences(dev_priv);
return err;
} }
static void skip_request(struct drm_i915_gem_request *request) static void skip_request(struct drm_i915_gem_request *request)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment