Commit c2d2588c authored by Jonathan Kim's avatar Jonathan Kim Committed by Alex Deucher

drm/amdkfd: add send exception operation

Add a debug operation that allows the debugger to send an exception
directly to runtime through a payload address.

For memory violations, normal vmfault signals will be applied to
notify runtime instead after passing in the saved exception data
when a memory violation was raised to the debugger.

For runtime exceptions, this will unblock the runtime enable
function which will be explained and implemented in a follow up
patch.
Signed-off-by: default avatarJonathan Kim <jonathan.kim@amd.com>
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 44b87bb0
...@@ -118,9 +118,9 @@ static void cik_event_interrupt_wq(struct kfd_node *dev, ...@@ -118,9 +118,9 @@ static void cik_event_interrupt_wq(struct kfd_node *dev,
return; return;
if (info.vmid == vmid) if (info.vmid == vmid)
kfd_signal_vm_fault_event(dev, pasid, &info); kfd_signal_vm_fault_event(dev, pasid, &info, NULL);
else else
kfd_signal_vm_fault_event(dev, pasid, NULL); kfd_signal_vm_fault_event(dev, pasid, NULL, NULL);
} }
} }
......
...@@ -2833,6 +2833,11 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v ...@@ -2833,6 +2833,11 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v
r = kfd_dbg_trap_disable(target); r = kfd_dbg_trap_disable(target);
break; break;
case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT: case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT:
r = kfd_dbg_send_exception_to_runtime(target,
args->send_runtime_event.gpu_id,
args->send_runtime_event.queue_id,
args->send_runtime_event.exception_mask);
break;
case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED: case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED:
case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE:
case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE:
......
...@@ -125,6 +125,49 @@ bool kfd_dbg_ev_raise(uint64_t event_mask, ...@@ -125,6 +125,49 @@ bool kfd_dbg_ev_raise(uint64_t event_mask,
return is_subscribed; return is_subscribed;
} }
int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
unsigned int dev_id,
unsigned int queue_id,
uint64_t error_reason)
{
if (error_reason & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
struct kfd_process_device *pdd = NULL;
struct kfd_hsa_memory_exception_data *data;
int i;
for (i = 0; i < p->n_pdds; i++) {
if (p->pdds[i]->dev->id == dev_id) {
pdd = p->pdds[i];
break;
}
}
if (!pdd)
return -ENODEV;
data = (struct kfd_hsa_memory_exception_data *)
pdd->vm_fault_exc_data;
kfd_dqm_evict_pasid(pdd->dev->dqm, p->pasid);
kfd_signal_vm_fault_event(pdd->dev, p->pasid, NULL, data);
error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
}
if (error_reason & (KFD_EC_MASK(EC_PROCESS_RUNTIME))) {
/*
* block should only happen after the debugger receives runtime
* enable notice.
*/
up(&p->runtime_enable_sema);
error_reason &= ~KFD_EC_MASK(EC_PROCESS_RUNTIME);
}
if (error_reason)
return kfd_send_exception_to_runtime(p, queue_id, error_reason);
return 0;
}
static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable) static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
{ {
struct mqd_update_info minfo = {0}; struct mqd_update_info minfo = {0};
......
...@@ -34,6 +34,12 @@ int kfd_dbg_trap_disable(struct kfd_process *target); ...@@ -34,6 +34,12 @@ int kfd_dbg_trap_disable(struct kfd_process *target);
int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd, int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
void __user *runtime_info, void __user *runtime_info,
uint32_t *runtime_info_size); uint32_t *runtime_info_size);
int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
unsigned int dev_id,
unsigned int queue_id,
uint64_t error_reason);
static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev) static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev)
{ {
return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) || return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
......
...@@ -1222,7 +1222,8 @@ void kfd_signal_hw_exception_event(u32 pasid) ...@@ -1222,7 +1222,8 @@ void kfd_signal_hw_exception_event(u32 pasid)
} }
void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid, void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
struct kfd_vm_fault_info *info) struct kfd_vm_fault_info *info,
struct kfd_hsa_memory_exception_data *data)
{ {
struct kfd_event *ev; struct kfd_event *ev;
uint32_t id; uint32_t id;
......
...@@ -362,7 +362,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev, ...@@ -362,7 +362,7 @@ static void event_interrupt_wq_v9(struct kfd_node *dev,
kfd_smi_event_update_vmfault(dev, pasid); kfd_smi_event_update_vmfault(dev, pasid);
kfd_dqm_evict_pasid(dev->dqm, pasid); kfd_dqm_evict_pasid(dev->dqm, pasid);
kfd_signal_vm_fault_event(dev, pasid, &info); kfd_signal_vm_fault_event(dev, pasid, &info, NULL);
} }
} }
......
...@@ -979,6 +979,7 @@ struct kfd_process { ...@@ -979,6 +979,7 @@ struct kfd_process {
bool queues_paused; bool queues_paused;
/* Tracks runtime enable status */ /* Tracks runtime enable status */
struct semaphore runtime_enable_sema;
struct kfd_runtime_info runtime_info; struct kfd_runtime_info runtime_info;
}; };
...@@ -1447,7 +1448,8 @@ int kfd_get_num_events(struct kfd_process *p); ...@@ -1447,7 +1448,8 @@ int kfd_get_num_events(struct kfd_process *p);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid, void kfd_signal_vm_fault_event(struct kfd_node *dev, u32 pasid,
struct kfd_vm_fault_info *info); struct kfd_vm_fault_info *info,
struct kfd_hsa_memory_exception_data *data);
void kfd_signal_reset_event(struct kfd_node *dev); void kfd_signal_reset_event(struct kfd_node *dev);
...@@ -1463,6 +1465,9 @@ static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev) ...@@ -1463,6 +1465,9 @@ static inline bool kfd_flush_tlb_after_unmap(struct kfd_dev *dev)
KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0); KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 0);
} }
int kfd_send_exception_to_runtime(struct kfd_process *p,
unsigned int queue_id,
uint64_t error_reason);
bool kfd_is_locked(void); bool kfd_is_locked(void);
/* Compute profile */ /* Compute profile */
......
...@@ -1462,6 +1462,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) ...@@ -1462,6 +1462,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
process->debugger_process = NULL; process->debugger_process = NULL;
process->exception_enable_mask = 0; process->exception_enable_mask = 0;
atomic_set(&process->debugged_process_count, 0); atomic_set(&process->debugged_process_count, 0);
sema_init(&process->runtime_enable_sema, 0);
process->pasid = kfd_pasid_alloc(); process->pasid = kfd_pasid_alloc();
if (process->pasid == 0) { if (process->pasid == 0) {
...@@ -2120,6 +2121,75 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) ...@@ -2120,6 +2121,75 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)
} }
} }
struct send_exception_work_handler_workarea {
struct work_struct work;
struct kfd_process *p;
unsigned int queue_id;
uint64_t error_reason;
};
static void send_exception_work_handler(struct work_struct *work)
{
struct send_exception_work_handler_workarea *workarea;
struct kfd_process *p;
struct queue *q;
struct mm_struct *mm;
struct kfd_context_save_area_header __user *csa_header;
uint64_t __user *err_payload_ptr;
uint64_t cur_err;
uint32_t ev_id;
workarea = container_of(work,
struct send_exception_work_handler_workarea,
work);
p = workarea->p;
mm = get_task_mm(p->lead_thread);
if (!mm)
return;
kthread_use_mm(mm);
q = pqm_get_user_queue(&p->pqm, workarea->queue_id);
if (!q)
goto out;
csa_header = (void __user *)q->properties.ctx_save_restore_area_address;
get_user(err_payload_ptr, (uint64_t __user **)&csa_header->err_payload_addr);
get_user(cur_err, err_payload_ptr);
cur_err |= workarea->error_reason;
put_user(cur_err, err_payload_ptr);
get_user(ev_id, &csa_header->err_event_id);
kfd_set_event(p, ev_id);
out:
kthread_unuse_mm(mm);
mmput(mm);
}
int kfd_send_exception_to_runtime(struct kfd_process *p,
unsigned int queue_id,
uint64_t error_reason)
{
struct send_exception_work_handler_workarea worker;
INIT_WORK_ONSTACK(&worker.work, send_exception_work_handler);
worker.p = p;
worker.queue_id = queue_id;
worker.error_reason = error_reason;
schedule_work(&worker.work);
flush_work(&worker.work);
destroy_work_on_stack(&worker.work);
return 0;
}
struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id) struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id)
{ {
int i; int i;
...@@ -2179,4 +2249,3 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) ...@@ -2179,4 +2249,3 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data)
} }
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment