Commit bd3ec9e7 authored by Chris Wilson's avatar Chris Wilson

drm/i915/gt: Poison residual state [HWSP] across resume.

Since we may lose the content of any buffer when we relinquish control
of the system (e.g. suspend/resume), we have to be careful not to rely
on regaining control. A good method to detect when we might be using
garbage is by always injecting that garbage prior to first use on
load/resume/etc.

v2: Drop sanitize callback on cleanup
v3: Move seqno reset to timeline enter, so we reset all timelines.
However, this is done on every activation during runtime and not reset.
The similar level of paranoia we apply to correcting context state after
a period of inactivity.
Suggested-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Venkata Ramana Nayana <venkata.ramana.nayana@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Reviewed-by: default avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200421092504.7416-1-chris@chris-wilson.co.uk
parent cf9ba278
...@@ -3649,7 +3649,26 @@ static void reset_csb_pointers(struct intel_engine_cs *engine) ...@@ -3649,7 +3649,26 @@ static void reset_csb_pointers(struct intel_engine_cs *engine)
static void execlists_sanitize(struct intel_engine_cs *engine) static void execlists_sanitize(struct intel_engine_cs *engine)
{ {
/*
* Poison residual state on resume, in case the suspend didn't!
*
* We have to assume that across suspend/resume (or other loss
* of control) that the contents of our pinned buffers has been
* lost, replaced by garbage. Since this doesn't always happen,
* let's poison such state so that we more quickly spot when
* we falsely assume it has been preserved.
*/
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
reset_csb_pointers(engine); reset_csb_pointers(engine);
/*
* The kernel_context HWSP is stored in the status_page. As above,
* that may be lost on resume/initialisation, and so we need to
* reset the value in the HWSP.
*/
intel_timeline_reset_seqno(engine->kernel_context->timeline);
} }
static void enable_error_interrupt(struct intel_engine_cs *engine) static void enable_error_interrupt(struct intel_engine_cs *engine)
...@@ -4539,6 +4558,8 @@ static void execlists_shutdown(struct intel_engine_cs *engine) ...@@ -4539,6 +4558,8 @@ static void execlists_shutdown(struct intel_engine_cs *engine)
static void execlists_release(struct intel_engine_cs *engine) static void execlists_release(struct intel_engine_cs *engine)
{ {
engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
execlists_shutdown(engine); execlists_shutdown(engine);
intel_engine_cleanup_common(engine); intel_engine_cleanup_common(engine);
...@@ -4550,7 +4571,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) ...@@ -4550,7 +4571,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
{ {
/* Default vfuncs which can be overriden by each engine. */ /* Default vfuncs which can be overriden by each engine. */
engine->sanitize = execlists_sanitize;
engine->resume = execlists_resume; engine->resume = execlists_resume;
engine->cops = &execlists_context_ops; engine->cops = &execlists_context_ops;
...@@ -4666,6 +4686,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) ...@@ -4666,6 +4686,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine)
execlists->csb_size = GEN11_CSB_ENTRIES; execlists->csb_size = GEN11_CSB_ENTRIES;
/* Finally, take ownership and responsibility for cleanup! */ /* Finally, take ownership and responsibility for cleanup! */
engine->sanitize = execlists_sanitize;
engine->release = execlists_release; engine->release = execlists_release;
return 0; return 0;
......
...@@ -337,6 +337,13 @@ int intel_timeline_pin(struct intel_timeline *tl) ...@@ -337,6 +337,13 @@ int intel_timeline_pin(struct intel_timeline *tl)
return 0; return 0;
} }
void intel_timeline_reset_seqno(const struct intel_timeline *tl)
{
/* Must be pinned to be writable, and no requests in flight. */
GEM_BUG_ON(!atomic_read(&tl->pin_count));
WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
}
void intel_timeline_enter(struct intel_timeline *tl) void intel_timeline_enter(struct intel_timeline *tl)
{ {
struct intel_gt_timelines *timelines = &tl->gt->timelines; struct intel_gt_timelines *timelines = &tl->gt->timelines;
...@@ -365,8 +372,16 @@ void intel_timeline_enter(struct intel_timeline *tl) ...@@ -365,8 +372,16 @@ void intel_timeline_enter(struct intel_timeline *tl)
return; return;
spin_lock(&timelines->lock); spin_lock(&timelines->lock);
if (!atomic_fetch_inc(&tl->active_count)) if (!atomic_fetch_inc(&tl->active_count)) {
/*
* The HWSP is volatile, and may have been lost while inactive,
* e.g. across suspend/resume. Be paranoid, and ensure that
* the HWSP value matches our seqno so we don't proclaim
* the next request as already complete.
*/
intel_timeline_reset_seqno(tl);
list_add_tail(&tl->link, &timelines->active_list); list_add_tail(&tl->link, &timelines->active_list);
}
spin_unlock(&timelines->lock); spin_unlock(&timelines->lock);
} }
......
...@@ -84,6 +84,8 @@ int intel_timeline_get_seqno(struct intel_timeline *tl, ...@@ -84,6 +84,8 @@ int intel_timeline_get_seqno(struct intel_timeline *tl,
void intel_timeline_exit(struct intel_timeline *tl); void intel_timeline_exit(struct intel_timeline *tl);
void intel_timeline_unpin(struct intel_timeline *tl); void intel_timeline_unpin(struct intel_timeline *tl);
void intel_timeline_reset_seqno(const struct intel_timeline *tl);
int intel_timeline_read_hwsp(struct i915_request *from, int intel_timeline_read_hwsp(struct i915_request *from,
struct i915_request *until, struct i915_request *until,
u32 *hwsp_offset); u32 *hwsp_offset);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment