Commit 18bb2bcc authored by Chris Wilson's avatar Chris Wilson

drm/i915: Serialise concurrent calls to i915_gem_set_wedged()

Make i915_gem_set_wedged() and i915_gem_unset_wedged() behaviour more
consistent if called concurrently, and only do the wedging and reporting
once, curtailing any possible race where we start unwedging in the middle
of a wedge.
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Reviewed-by: default avatarMika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190114210408.4561-2-chris@chris-wilson.co.uk
parent 204474a6
...@@ -3187,10 +3187,15 @@ static void nop_submit_request(struct i915_request *request) ...@@ -3187,10 +3187,15 @@ static void nop_submit_request(struct i915_request *request)
void i915_gem_set_wedged(struct drm_i915_private *i915) void i915_gem_set_wedged(struct drm_i915_private *i915)
{ {
struct i915_gpu_error *error = &i915->gpu_error;
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
enum intel_engine_id id; enum intel_engine_id id;
GEM_TRACE("start\n"); mutex_lock(&error->wedge_mutex);
if (test_bit(I915_WEDGED, &error->flags)) {
mutex_unlock(&error->wedge_mutex);
return;
}
if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) { if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(i915)) {
struct drm_printer p = drm_debug_printer(__func__); struct drm_printer p = drm_debug_printer(__func__);
...@@ -3199,8 +3204,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) ...@@ -3199,8 +3204,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
intel_engine_dump(engine, &p, "%s\n", engine->name); intel_engine_dump(engine, &p, "%s\n", engine->name);
} }
if (test_and_set_bit(I915_WEDGED, &i915->gpu_error.flags)) GEM_TRACE("start\n");
goto out;
/* /*
* First, stop submission to hw, but do not yet complete requests by * First, stop submission to hw, but do not yet complete requests by
...@@ -3236,23 +3240,31 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) ...@@ -3236,23 +3240,31 @@ void i915_gem_set_wedged(struct drm_i915_private *i915)
intel_engine_wakeup(engine); intel_engine_wakeup(engine);
} }
out: smp_mb__before_atomic();
set_bit(I915_WEDGED, &error->flags);
GEM_TRACE("end\n"); GEM_TRACE("end\n");
mutex_unlock(&error->wedge_mutex);
wake_up_all(&i915->gpu_error.reset_queue); wake_up_all(&error->reset_queue);
} }
bool i915_gem_unset_wedged(struct drm_i915_private *i915) bool i915_gem_unset_wedged(struct drm_i915_private *i915)
{ {
struct i915_gpu_error *error = &i915->gpu_error;
struct i915_timeline *tl; struct i915_timeline *tl;
bool ret = false;
lockdep_assert_held(&i915->drm.struct_mutex); lockdep_assert_held(&i915->drm.struct_mutex);
if (!test_bit(I915_WEDGED, &i915->gpu_error.flags))
if (!test_bit(I915_WEDGED, &error->flags))
return true; return true;
if (!i915->gt.scratch) /* Never full initialised, recovery impossible */ if (!i915->gt.scratch) /* Never full initialised, recovery impossible */
return false; return false;
mutex_lock(&error->wedge_mutex);
GEM_TRACE("start\n"); GEM_TRACE("start\n");
/* /*
...@@ -3286,7 +3298,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) ...@@ -3286,7 +3298,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
*/ */
if (dma_fence_default_wait(&rq->fence, true, if (dma_fence_default_wait(&rq->fence, true,
MAX_SCHEDULE_TIMEOUT) < 0) MAX_SCHEDULE_TIMEOUT) < 0)
return false; goto unlock;
} }
i915_retire_requests(i915); i915_retire_requests(i915);
GEM_BUG_ON(i915->gt.active_requests); GEM_BUG_ON(i915->gt.active_requests);
...@@ -3309,8 +3321,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) ...@@ -3309,8 +3321,11 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ smp_mb__before_atomic(); /* complete takeover before enabling execbuf */
clear_bit(I915_WEDGED, &i915->gpu_error.flags); clear_bit(I915_WEDGED, &i915->gpu_error.flags);
ret = true;
unlock:
mutex_unlock(&i915->gpu_error.wedge_mutex);
return true; return ret;
} }
static void static void
...@@ -5706,6 +5721,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) ...@@ -5706,6 +5721,7 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
i915_gem_idle_work_handler); i915_gem_idle_work_handler);
init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
init_waitqueue_head(&dev_priv->gpu_error.reset_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
mutex_init(&dev_priv->gpu_error.wedge_mutex);
atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);
......
...@@ -271,8 +271,8 @@ struct i915_gpu_error { ...@@ -271,8 +271,8 @@ struct i915_gpu_error {
#define I915_RESET_BACKOFF 0 #define I915_RESET_BACKOFF 0
#define I915_RESET_HANDOFF 1 #define I915_RESET_HANDOFF 1
#define I915_RESET_MODESET 2 #define I915_RESET_MODESET 2
#define I915_RESET_ENGINE 3
#define I915_WEDGED (BITS_PER_LONG - 1) #define I915_WEDGED (BITS_PER_LONG - 1)
#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES)
/** Number of times an engine has been reset */ /** Number of times an engine has been reset */
u32 reset_engine_count[I915_NUM_ENGINES]; u32 reset_engine_count[I915_NUM_ENGINES];
...@@ -283,6 +283,8 @@ struct i915_gpu_error { ...@@ -283,6 +283,8 @@ struct i915_gpu_error {
/** Reason for the current *global* reset */ /** Reason for the current *global* reset */
const char *reason; const char *reason;
struct mutex wedge_mutex; /* serialises wedging/unwedging */
/** /**
* Waitqueue to signal when a hang is detected. Used to for waiters * Waitqueue to signal when a hang is detected. Used to for waiters
* to release the struct_mutex for the reset to procede. * to release the struct_mutex for the reset to procede.
......
...@@ -188,6 +188,7 @@ struct drm_i915_private *mock_gem_device(void) ...@@ -188,6 +188,7 @@ struct drm_i915_private *mock_gem_device(void)
init_waitqueue_head(&i915->gpu_error.wait_queue); init_waitqueue_head(&i915->gpu_error.wait_queue);
init_waitqueue_head(&i915->gpu_error.reset_queue); init_waitqueue_head(&i915->gpu_error.reset_queue);
mutex_init(&i915->gpu_error.wedge_mutex);
i915->wq = alloc_ordered_workqueue("mock", 0); i915->wq = alloc_ordered_workqueue("mock", 0);
if (!i915->wq) if (!i915->wq)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment