Commit efaa9646 authored by Andrey Grodzovsky's avatar Andrey Grodzovsky Committed by Alex Deucher

drm/amdgpu: Present amdgpu_task_info in VM_FAULTS.

Extract and present the reposnsible process and thread when
VM_FAULT happens.

v2: Use getter and setter functions.
Signed-off-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Acked-by: default avatarJim Qu <Jim.Qu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2aa37bf5
...@@ -187,6 +187,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) ...@@ -187,6 +187,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
if (p->uf_entry.robj) if (p->uf_entry.robj)
p->job->uf_addr = uf_offset; p->job->uf_addr = uf_offset;
kfree(chunk_array); kfree(chunk_array);
/* Use this opportunity to fill in task info for the vm */
amdgpu_vm_set_task_info(vm);
return 0; return 0;
free_all_kdata: free_all_kdata:
......
...@@ -44,7 +44,6 @@ ...@@ -44,7 +44,6 @@
#include "amdgpu_atombios.h" #include "amdgpu_atombios.h"
static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev); static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev); static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
static int gmc_v8_0_wait_for_idle(void *handle); static int gmc_v8_0_wait_for_idle(void *handle);
...@@ -1447,8 +1446,13 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, ...@@ -1447,8 +1446,13 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
gmc_v8_0_set_fault_enable_default(adev, false); gmc_v8_0_set_fault_enable_default(adev, false);
if (printk_ratelimit()) { if (printk_ratelimit()) {
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", struct amdgpu_task_info task_info = { 0 };
entry->src_id, entry->src_data[0]);
amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n",
entry->src_id, entry->src_data[0], task_info.process_name,
task_info.tgid, task_info.task_name, task_info.pid);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
addr); addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
......
...@@ -257,11 +257,16 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, ...@@ -257,11 +257,16 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
} }
if (printk_ratelimit()) { if (printk_ratelimit()) {
struct amdgpu_task_info task_info = { 0 };
amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
dev_err(adev->dev, dev_err(adev->dev,
"[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d\n)\n",
entry->vmid_src ? "mmhub" : "gfxhub", entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid, entry->src_id, entry->ring_id, entry->vmid,
entry->pasid); entry->pasid, task_info.process_name, task_info.tgid,
task_info.task_name, task_info.pid);
dev_err(adev->dev, " at page 0x%016llx from %d\n", dev_err(adev->dev, " at page 0x%016llx from %d\n",
addr, entry->client_id); addr, entry->client_id);
if (!amdgpu_sriov_vf(adev)) if (!amdgpu_sriov_vf(adev))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment