Commit 4816b626 authored by Rob Clark's avatar Rob Clark

drm/msm: print offender task name on hangcheck recovery

Track the pid per submit, so we can print the name of the task which
submitted the batch that caused the gpu to hang.
Signed-off-by: default avatarRob Clark <robdclark@gmail.com>
parent 40e6815b
...@@ -86,6 +86,7 @@ struct msm_gem_submit { ...@@ -86,6 +86,7 @@ struct msm_gem_submit {
struct list_head bo_list; struct list_head bo_list;
struct ww_acquire_ctx ticket; struct ww_acquire_ctx ticket;
struct fence *fence; struct fence *fence;
struct pid *pid; /* submitting process */
bool valid; /* true if no cmdstream patching needed */ bool valid; /* true if no cmdstream patching needed */
unsigned int nr_cmds; unsigned int nr_cmds;
unsigned int nr_bos; unsigned int nr_bos;
......
...@@ -45,6 +45,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, ...@@ -45,6 +45,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
submit->dev = dev; submit->dev = dev;
submit->gpu = gpu; submit->gpu = gpu;
submit->pid = get_pid(task_pid(current));
/* initially, until copy_from_user() and bo lookup succeeds: */ /* initially, until copy_from_user() and bo lookup succeeds: */
submit->nr_bos = 0; submit->nr_bos = 0;
...@@ -60,6 +61,7 @@ void msm_gem_submit_free(struct msm_gem_submit *submit) ...@@ -60,6 +61,7 @@ void msm_gem_submit_free(struct msm_gem_submit *submit)
{ {
fence_put(submit->fence); fence_put(submit->fence);
list_del(&submit->node); list_del(&submit->node);
put_pid(submit->pid);
kfree(submit); kfree(submit);
} }
......
...@@ -272,16 +272,30 @@ static void recover_worker(struct work_struct *work) ...@@ -272,16 +272,30 @@ static void recover_worker(struct work_struct *work)
{ {
struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
struct drm_device *dev = gpu->dev; struct drm_device *dev = gpu->dev;
struct msm_gem_submit *submit;
uint32_t fence = gpu->funcs->last_fence(gpu); uint32_t fence = gpu->funcs->last_fence(gpu);
dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
msm_update_fence(gpu->fctx, fence + 1); msm_update_fence(gpu->fctx, fence + 1);
mutex_lock(&dev->struct_mutex); mutex_lock(&dev->struct_mutex);
if (msm_gpu_active(gpu)) {
struct msm_gem_submit *submit;
dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
list_for_each_entry(submit, &gpu->submit_list, node) {
if (submit->fence->seqno == (fence + 1)) {
struct task_struct *task;
rcu_read_lock();
task = pid_task(submit->pid, PIDTYPE_PID);
if (task) {
dev_err(dev->dev, "%s: offending task: %s\n",
gpu->name, task->comm);
}
rcu_read_unlock();
break;
}
}
if (msm_gpu_active(gpu)) {
/* retire completed submits, plus the one that hung: */ /* retire completed submits, plus the one that hung: */
retire_submits(gpu); retire_submits(gpu);
...@@ -293,6 +307,7 @@ static void recover_worker(struct work_struct *work) ...@@ -293,6 +307,7 @@ static void recover_worker(struct work_struct *work)
gpu->funcs->submit(gpu, submit, NULL); gpu->funcs->submit(gpu, submit, NULL);
} }
} }
mutex_unlock(&dev->struct_mutex); mutex_unlock(&dev->struct_mutex);
msm_gpu_retire(gpu); msm_gpu_retire(gpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment