Commit 3ec7d11b authored by Alex Deucher's avatar Alex Deucher

drm/radeon: add fault decode function for CIK

Helpful for debugging GPUVM errors as we can see what
hw block and page generated the fault in the log.
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent fbf6dc7a
...@@ -4441,6 +4441,29 @@ void cik_vm_fini(struct radeon_device *rdev) ...@@ -4441,6 +4441,29 @@ void cik_vm_fini(struct radeon_device *rdev)
{ {
} }
/**
* cik_vm_decode_fault - print human readable fault info
*
* @rdev: radeon_device pointer
* @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
* @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
*
* Print human readable fault information (CIK).
*/
static void cik_vm_decode_fault(struct radeon_device *rdev,
u32 status, u32 addr, u32 mc_client)
{
u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
char *block = (char *)&mc_client;
printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
protections, vmid, addr,
(status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
block, mc_id);
}
/** /**
* cik_vm_flush - cik vm flush using the CP * cik_vm_flush - cik vm flush using the CP
* *
...@@ -5496,6 +5519,7 @@ int cik_irq_process(struct radeon_device *rdev) ...@@ -5496,6 +5519,7 @@ int cik_irq_process(struct radeon_device *rdev)
u32 ring_index; u32 ring_index;
bool queue_hotplug = false; bool queue_hotplug = false;
bool queue_reset = false; bool queue_reset = false;
u32 addr, status, mc_client;
if (!rdev->ih.enabled || rdev->shutdown) if (!rdev->ih.enabled || rdev->shutdown)
return IRQ_NONE; return IRQ_NONE;
...@@ -5731,11 +5755,15 @@ int cik_irq_process(struct radeon_device *rdev) ...@@ -5731,11 +5755,15 @@ int cik_irq_process(struct radeon_device *rdev)
break; break;
case 146: case 146:
case 147: case 147:
addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data); dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR)); addr);
dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS)); status);
cik_vm_decode_fault(rdev, status, addr, mc_client);
/* reset addr and status */ /* reset addr and status */
WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1); WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
break; break;
......
...@@ -136,6 +136,22 @@ ...@@ -136,6 +136,22 @@
#define VM_INVALIDATE_RESPONSE 0x147c #define VM_INVALIDATE_RESPONSE 0x147c
#define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x14DC #define VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x14DC
#define PROTECTIONS_MASK (0xf << 0)
#define PROTECTIONS_SHIFT 0
/* bit 0: range
* bit 1: pde0
* bit 2: valid
* bit 3: read
* bit 4: write
*/
#define MEMORY_CLIENT_ID_MASK (0xff << 12)
#define MEMORY_CLIENT_ID_SHIFT 12
#define MEMORY_CLIENT_RW_MASK (1 << 24)
#define MEMORY_CLIENT_RW_SHIFT 24
#define FAULT_VMID_MASK (0xf << 25)
#define FAULT_VMID_SHIFT 25
#define VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT 0x14E4
#define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x14FC #define VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x14FC
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment