Commit 4fdd5b4e authored by Chris Wilson's avatar Chris Wilson

drm/i915: Fix fallout of fake reset along resume

commit b2209e62 ("drm/i915/execlists: Reset the CSB head tracking on
reset/sanitization") and commit 1288786b ("drm/i915: Move GEM sanitize
from resume_early to resume") show the conflicting requirements on the
code. We must reset the GPU before trashing live state on a fast resume
(hibernation debug, or error paths), but we must only reset our state
tracking iff the GPU is reset (or power cycled). This is tricky if we
are disabling GPU reset to simulate broken hardware; we reset our state
tracking but the GPU is left intact and recovers from its stale state.

v2: Again without the assertion for forcewake, no longer required since
commit b3ee09a4 ("drm/i915/ringbuffer: Fix context restore upon reset")
as the contexts are reset from the CS ensuring everything is powered up.

Fixes: b2209e62 ("drm/i915/execlists: Reset the CSB head tracking on reset/sanitization")
Fixes: 1288786b ("drm/i915: Move GEM sanitize from resume_early to resume")
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: default avatarJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180616202534.18767-1-chris@chris-wilson.co.uk
parent b77422f8
...@@ -1841,6 +1841,8 @@ static int i915_drm_resume_early(struct drm_device *dev) ...@@ -1841,6 +1841,8 @@ static int i915_drm_resume_early(struct drm_device *dev)
else else
intel_display_set_init_power(dev_priv, true); intel_display_set_init_power(dev_priv, true);
intel_engines_sanitize(dev_priv);
enable_rpm_wakeref_asserts(dev_priv); enable_rpm_wakeref_asserts(dev_priv);
out: out:
......
...@@ -4990,8 +4990,7 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) ...@@ -4990,8 +4990,7 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
void i915_gem_sanitize(struct drm_i915_private *i915) void i915_gem_sanitize(struct drm_i915_private *i915)
{ {
struct intel_engine_cs *engine; int err;
enum intel_engine_id id;
GEM_TRACE("\n"); GEM_TRACE("\n");
...@@ -5017,14 +5016,11 @@ void i915_gem_sanitize(struct drm_i915_private *i915) ...@@ -5017,14 +5016,11 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
* it may impact the display and we are uncertain about the stability * it may impact the display and we are uncertain about the stability
* of the reset, so this could be applied to even earlier gen. * of the reset, so this could be applied to even earlier gen.
*/ */
err = -ENODEV;
if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915)) if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
WARN_ON(intel_gpu_reset(i915, ALL_ENGINES)); err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
if (!err)
/* Reset the submission backend after resume as well as the GPU reset */ intel_engines_sanitize(i915);
for_each_engine(engine, i915, id) {
if (engine->reset.reset)
engine->reset.reset(engine, NULL);
}
intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
intel_runtime_pm_put(i915); intel_runtime_pm_put(i915);
......
...@@ -1077,6 +1077,28 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915) ...@@ -1077,6 +1077,28 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
engine->set_default_submission(engine); engine->set_default_submission(engine);
} }
/**
* intel_engines_sanitize: called after the GPU has lost power
* @i915: the i915 device
*
* Anytime we reset the GPU, either with an explicit GPU reset or through a
* PCI power cycle, the GPU loses state and we must reset our state tracking
* to match. Note that calling intel_engines_sanitize() if the GPU has not
* been reset results in much confusion!
*/
void intel_engines_sanitize(struct drm_i915_private *i915)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
GEM_TRACE("\n");
for_each_engine(engine, i915, id) {
if (engine->reset.reset)
engine->reset.reset(engine, NULL);
}
}
/** /**
* intel_engines_park: called when the GT is transitioning from busy->idle * intel_engines_park: called when the GT is transitioning from busy->idle
* @i915: the i915 device * @i915: the i915 device
......
...@@ -563,14 +563,6 @@ static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq) ...@@ -563,14 +563,6 @@ static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq)
{ {
GEM_TRACE("%s seqno=%x\n", engine->name, rq ? rq->global_seqno : 0); GEM_TRACE("%s seqno=%x\n", engine->name, rq ? rq->global_seqno : 0);
/*
* RC6 must be prevented until the reset is complete and the engine
* reinitialised. If it occurs in the middle of this sequence, the
* state written to/loaded from the power context is ill-defined (e.g.
* the PP_BASE_DIR may be lost).
*/
assert_forcewakes_active(engine->i915, FORCEWAKE_ALL);
/* /*
* Try to restore the logical GPU state to match the continuation * Try to restore the logical GPU state to match the continuation
* of the request queue. If we skip the context/PD restore, then * of the request queue. If we skip the context/PD restore, then
......
...@@ -1052,6 +1052,8 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset) ...@@ -1052,6 +1052,8 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
return cs; return cs;
} }
void intel_engines_sanitize(struct drm_i915_private *i915);
bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engine_is_idle(struct intel_engine_cs *engine);
bool intel_engines_are_idle(struct drm_i915_private *dev_priv); bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment