Commit 90d27a1b authored by Pei Zhang's avatar Pei Zhang Committed by Zhenyu Wang

drm/i915/gvt: fix deadlock in workload_thread

It's a classical abba type deadlock when using 2 mutex objects, which
are gvt.lock(a) and drm.struct_mutex(b). Deadlock happens in threads:
1. intel_gvt_create/destroy_vgpu: P(a)->P(b)
2. workload_thread: P(b)->P(a)

Fix solution is align the lock acquire sequence in both threads. This
patch choose to adjust the sequence in workload_thread function.

This fixed lockup symptom for guest-reboot stress test.

v2: adjust sequence in workload_thread based on zhenyu's suggestion.
    adjust sequence in create/destroy_vgpu function.
v3: fix to still require struct_mutex for dispatch_workload()
Signed-off-by: default avatarPei Zhang <pei.zhang@intel.com>
[zhenyuw: fix unused variables warnings.]
Signed-off-by: default avatarZhenyu Wang <zhenyuw@linux.intel.com>
parent 8be8f4a9
...@@ -160,8 +160,6 @@ static int shadow_context_status_change(struct notifier_block *nb, ...@@ -160,8 +160,6 @@ static int shadow_context_status_change(struct notifier_block *nb,
static int dispatch_workload(struct intel_vgpu_workload *workload) static int dispatch_workload(struct intel_vgpu_workload *workload)
{ {
struct intel_vgpu *vgpu = workload->vgpu;
struct intel_gvt *gvt = vgpu->gvt;
int ring_id = workload->ring_id; int ring_id = workload->ring_id;
struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx; struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv; struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
...@@ -174,6 +172,8 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ...@@ -174,6 +172,8 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
shadow_ctx->desc_template = workload->ctx_desc.addressing_mode << shadow_ctx->desc_template = workload->ctx_desc.addressing_mode <<
GEN8_CTX_ADDRESSING_MODE_SHIFT; GEN8_CTX_ADDRESSING_MODE_SHIFT;
mutex_lock(&dev_priv->drm.struct_mutex);
rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx); rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
if (IS_ERR(rq)) { if (IS_ERR(rq)) {
gvt_err("fail to allocate gem request\n"); gvt_err("fail to allocate gem request\n");
...@@ -185,40 +185,35 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ...@@ -185,40 +185,35 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
workload->req = i915_gem_request_get(rq); workload->req = i915_gem_request_get(rq);
mutex_lock(&gvt->lock);
ret = intel_gvt_scan_and_shadow_workload(workload); ret = intel_gvt_scan_and_shadow_workload(workload);
if (ret) if (ret)
goto err; goto out;
ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx); ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
if (ret) if (ret)
goto err; goto out;
ret = populate_shadow_context(workload); ret = populate_shadow_context(workload);
if (ret) if (ret)
goto err; goto out;
if (workload->prepare) { if (workload->prepare) {
ret = workload->prepare(workload); ret = workload->prepare(workload);
if (ret) if (ret)
goto err; goto out;
} }
mutex_unlock(&gvt->lock);
gvt_dbg_sched("ring id %d submit workload to i915 %p\n", gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
ring_id, workload->req); ring_id, workload->req);
i915_add_request_no_flush(rq); ret = 0;
workload->dispatched = true; workload->dispatched = true;
return 0; out:
err: if (ret)
workload->status = ret; workload->status = ret;
mutex_unlock(&gvt->lock);
i915_add_request_no_flush(rq); i915_add_request_no_flush(rq);
mutex_unlock(&dev_priv->drm.struct_mutex);
return ret; return ret;
} }
...@@ -438,9 +433,9 @@ static int workload_thread(void *priv) ...@@ -438,9 +433,9 @@ static int workload_thread(void *priv)
intel_uncore_forcewake_get(gvt->dev_priv, intel_uncore_forcewake_get(gvt->dev_priv,
FORCEWAKE_ALL); FORCEWAKE_ALL);
mutex_lock(&gvt->dev_priv->drm.struct_mutex); mutex_lock(&gvt->lock);
ret = dispatch_workload(workload); ret = dispatch_workload(workload);
mutex_unlock(&gvt->dev_priv->drm.struct_mutex); mutex_unlock(&gvt->lock);
if (ret) { if (ret) {
gvt_err("fail to dispatch workload, skip\n"); gvt_err("fail to dispatch workload, skip\n");
...@@ -463,9 +458,7 @@ static int workload_thread(void *priv) ...@@ -463,9 +458,7 @@ static int workload_thread(void *priv)
gvt_dbg_sched("will complete workload %p\n, status: %d\n", gvt_dbg_sched("will complete workload %p\n, status: %d\n",
workload, workload->status); workload, workload->status);
mutex_lock(&gvt->dev_priv->drm.struct_mutex);
complete_current_workload(gvt, ring_id); complete_current_workload(gvt, ring_id);
mutex_unlock(&gvt->dev_priv->drm.struct_mutex);
i915_gem_request_put(fetch_and_zero(&workload->req)); i915_gem_request_put(fetch_and_zero(&workload->req));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment