Commit 40e8a766 authored by David Yat Sin's avatar David Yat Sin Committed by Alex Deucher

drm/amdkfd: CRIU checkpoint and restore events

Add support to existing CRIU ioctl's to save and restore events during
criu checkpoint and restore.
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarDavid Yat Sin <david.yatsin@amd.com>
Signed-off-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3a9822d7
...@@ -1008,57 +1008,11 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, ...@@ -1008,57 +1008,11 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
* through the event_page_offset field. * through the event_page_offset field.
*/ */
if (args->event_page_offset) { if (args->event_page_offset) {
struct kfd_dev *kfd;
struct kfd_process_device *pdd;
void *mem, *kern_addr;
uint64_t size;
kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
if (!kfd) {
pr_err("Getting device by id failed in %s\n", __func__);
return -EINVAL;
}
mutex_lock(&p->mutex); mutex_lock(&p->mutex);
err = kfd_kmap_event_page(p, args->event_page_offset);
if (p->signal_page) {
pr_err("Event page is already set\n");
err = -EINVAL;
goto out_unlock;
}
pdd = kfd_bind_process_to_device(kfd, p);
if (IS_ERR(pdd)) {
err = PTR_ERR(pdd);
goto out_unlock;
}
mem = kfd_process_device_translate_handle(pdd,
GET_IDR_HANDLE(args->event_page_offset));
if (!mem) {
pr_err("Can't find BO, offset is 0x%llx\n",
args->event_page_offset);
err = -EINVAL;
goto out_unlock;
}
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,
mem, &kern_addr, &size);
if (err) {
pr_err("Failed to map event page to kernel\n");
goto out_unlock;
}
err = kfd_event_page_set(p, kern_addr, size);
if (err) {
pr_err("Failed to set event page\n");
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem);
goto out_unlock;
}
p->signal_handle = args->event_page_offset;
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
if (err)
return err;
} }
err = kfd_event_create(filp, p, args->event_type, err = kfd_event_create(filp, p, args->event_type,
...@@ -1067,10 +1021,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, ...@@ -1067,10 +1021,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
&args->event_page_offset, &args->event_page_offset,
&args->event_slot_index); &args->event_slot_index);
return err; pr_debug("Created event (id:0x%08x) (%s)\n", args->event_id, __func__);
out_unlock:
mutex_unlock(&p->mutex);
return err; return err;
} }
...@@ -2031,7 +1982,7 @@ static int criu_get_process_object_info(struct kfd_process *p, ...@@ -2031,7 +1982,7 @@ static int criu_get_process_object_info(struct kfd_process *p,
if (ret) if (ret)
return ret; return ret;
num_events = 0; /* TODO: Implement Events */ num_events = kfd_get_num_events(p);
num_svm_ranges = 0; /* TODO: Implement SVM-Ranges */ num_svm_ranges = 0; /* TODO: Implement SVM-Ranges */
*num_objects = num_queues + num_events + num_svm_ranges; *num_objects = num_queues + num_events + num_svm_ranges;
...@@ -2040,7 +1991,7 @@ static int criu_get_process_object_info(struct kfd_process *p, ...@@ -2040,7 +1991,7 @@ static int criu_get_process_object_info(struct kfd_process *p,
priv_size = sizeof(struct kfd_criu_process_priv_data); priv_size = sizeof(struct kfd_criu_process_priv_data);
priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data); priv_size += *num_bos * sizeof(struct kfd_criu_bo_priv_data);
priv_size += queues_priv_data_size; priv_size += queues_priv_data_size;
/* TODO: Add Events priv size */ priv_size += num_events * sizeof(struct kfd_criu_event_priv_data);
/* TODO: Add SVM ranges priv size */ /* TODO: Add SVM ranges priv size */
*objs_priv_size = priv_size; *objs_priv_size = priv_size;
} }
...@@ -2102,7 +2053,10 @@ static int criu_checkpoint(struct file *filep, ...@@ -2102,7 +2053,10 @@ static int criu_checkpoint(struct file *filep,
if (ret) if (ret)
goto exit_unlock; goto exit_unlock;
/* TODO: Dump Events */ ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
&priv_offset);
if (ret)
goto exit_unlock;
/* TODO: Dump SVM-Ranges */ /* TODO: Dump SVM-Ranges */
} }
...@@ -2410,8 +2364,8 @@ static int criu_restore_objects(struct file *filep, ...@@ -2410,8 +2364,8 @@ static int criu_restore_objects(struct file *filep,
goto exit; goto exit;
break; break;
case KFD_CRIU_OBJECT_TYPE_EVENT: case KFD_CRIU_OBJECT_TYPE_EVENT:
/* TODO: Implement Events */ ret = kfd_criu_restore_event(filep, p, (uint8_t __user *)args->priv_data,
*priv_offset += sizeof(struct kfd_criu_event_priv_data); priv_offset, max_priv_data_size);
if (ret) if (ret)
goto exit; goto exit;
break; break;
......
...@@ -55,7 +55,6 @@ struct kfd_signal_page { ...@@ -55,7 +55,6 @@ struct kfd_signal_page {
bool need_to_free_pages; bool need_to_free_pages;
}; };
static uint64_t *page_slots(struct kfd_signal_page *page) static uint64_t *page_slots(struct kfd_signal_page *page)
{ {
return page->kernel_address; return page->kernel_address;
...@@ -92,7 +91,8 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p) ...@@ -92,7 +91,8 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
} }
static int allocate_event_notification_slot(struct kfd_process *p, static int allocate_event_notification_slot(struct kfd_process *p,
struct kfd_event *ev) struct kfd_event *ev,
const int *restore_id)
{ {
int id; int id;
...@@ -104,14 +104,19 @@ static int allocate_event_notification_slot(struct kfd_process *p, ...@@ -104,14 +104,19 @@ static int allocate_event_notification_slot(struct kfd_process *p,
p->signal_mapped_size = 256*8; p->signal_mapped_size = 256*8;
} }
/* if (restore_id) {
* Compatibility with old user mode: Only use signal slots id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
* user mode has mapped, may be less than GFP_KERNEL);
* KFD_SIGNAL_EVENT_LIMIT. This also allows future increase } else {
* of the event limit without breaking user mode. /*
*/ * Compatibility with old user mode: Only use signal slots
id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8, * user mode has mapped, may be less than
GFP_KERNEL); * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
* of the event limit without breaking user mode.
*/
id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
GFP_KERNEL);
}
if (id < 0) if (id < 0)
return id; return id;
...@@ -178,9 +183,8 @@ static struct kfd_event *lookup_signaled_event_by_partial_id( ...@@ -178,9 +183,8 @@ static struct kfd_event *lookup_signaled_event_by_partial_id(
return ev; return ev;
} }
static int create_signal_event(struct file *devkfd, static int create_signal_event(struct file *devkfd, struct kfd_process *p,
struct kfd_process *p, struct kfd_event *ev, const int *restore_id)
struct kfd_event *ev)
{ {
int ret; int ret;
...@@ -193,7 +197,7 @@ static int create_signal_event(struct file *devkfd, ...@@ -193,7 +197,7 @@ static int create_signal_event(struct file *devkfd,
return -ENOSPC; return -ENOSPC;
} }
ret = allocate_event_notification_slot(p, ev); ret = allocate_event_notification_slot(p, ev, restore_id);
if (ret) { if (ret) {
pr_warn("Signal event wasn't created because out of kernel memory\n"); pr_warn("Signal event wasn't created because out of kernel memory\n");
return ret; return ret;
...@@ -209,16 +213,22 @@ static int create_signal_event(struct file *devkfd, ...@@ -209,16 +213,22 @@ static int create_signal_event(struct file *devkfd,
return 0; return 0;
} }
static int create_other_event(struct kfd_process *p, struct kfd_event *ev) static int create_other_event(struct kfd_process *p, struct kfd_event *ev, const int *restore_id)
{ {
/* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an int id;
* intentional integer overflow to -1 without a compiler
* warning. idr_alloc treats a negative value as "maximum if (restore_id)
* signed integer". id = idr_alloc(&p->event_idr, ev, *restore_id, *restore_id + 1,
*/ GFP_KERNEL);
int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID, else
(uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1, /* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
GFP_KERNEL); * intentional integer overflow to -1 without a compiler
* warning. idr_alloc treats a negative value as "maximum
* signed integer".
*/
id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
(uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
GFP_KERNEL);
if (id < 0) if (id < 0)
return id; return id;
...@@ -295,8 +305,8 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev) ...@@ -295,8 +305,8 @@ static bool event_can_be_cpu_signaled(const struct kfd_event *ev)
return ev->type == KFD_EVENT_TYPE_SIGNAL; return ev->type == KFD_EVENT_TYPE_SIGNAL;
} }
int kfd_event_page_set(struct kfd_process *p, void *kernel_address, static int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
uint64_t size) uint64_t size, uint64_t user_handle)
{ {
struct kfd_signal_page *page; struct kfd_signal_page *page;
...@@ -315,10 +325,56 @@ int kfd_event_page_set(struct kfd_process *p, void *kernel_address, ...@@ -315,10 +325,56 @@ int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
p->signal_page = page; p->signal_page = page;
p->signal_mapped_size = size; p->signal_mapped_size = size;
p->signal_handle = user_handle;
return 0; return 0;
} }
int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset)
{
struct kfd_dev *kfd;
struct kfd_process_device *pdd;
void *mem, *kern_addr;
uint64_t size;
int err = 0;
if (p->signal_page) {
pr_err("Event page is already set\n");
return -EINVAL;
}
kfd = kfd_device_by_id(GET_GPU_ID(event_page_offset));
if (!kfd) {
pr_err("Getting device by id failed in %s\n", __func__);
return -EINVAL;
}
pdd = kfd_bind_process_to_device(kfd, p);
if (IS_ERR(pdd))
return PTR_ERR(pdd);
mem = kfd_process_device_translate_handle(pdd,
GET_IDR_HANDLE(event_page_offset));
if (!mem) {
pr_err("Can't find BO, offset is 0x%llx\n", event_page_offset);
return -EINVAL;
}
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,
mem, &kern_addr, &size);
if (err) {
pr_err("Failed to map event page to kernel\n");
return err;
}
err = kfd_event_page_set(p, kern_addr, size, event_page_offset);
if (err) {
pr_err("Failed to set event page\n");
amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem);
return err;
}
return err;
}
int kfd_event_create(struct file *devkfd, struct kfd_process *p, int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id, uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data, uint32_t *event_id, uint32_t *event_trigger_data,
...@@ -343,14 +399,14 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ...@@ -343,14 +399,14 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
switch (event_type) { switch (event_type) {
case KFD_EVENT_TYPE_SIGNAL: case KFD_EVENT_TYPE_SIGNAL:
case KFD_EVENT_TYPE_DEBUG: case KFD_EVENT_TYPE_DEBUG:
ret = create_signal_event(devkfd, p, ev); ret = create_signal_event(devkfd, p, ev, NULL);
if (!ret) { if (!ret) {
*event_page_offset = KFD_MMAP_TYPE_EVENTS; *event_page_offset = KFD_MMAP_TYPE_EVENTS;
*event_slot_index = ev->event_id; *event_slot_index = ev->event_id;
} }
break; break;
default: default:
ret = create_other_event(p, ev); ret = create_other_event(p, ev, NULL);
break; break;
} }
...@@ -366,6 +422,166 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ...@@ -366,6 +422,166 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
return ret; return ret;
} }
int kfd_criu_restore_event(struct file *devkfd,
struct kfd_process *p,
uint8_t __user *user_priv_ptr,
uint64_t *priv_data_offset,
uint64_t max_priv_data_size)
{
struct kfd_criu_event_priv_data *ev_priv;
struct kfd_event *ev = NULL;
int ret = 0;
ev_priv = kmalloc(sizeof(*ev_priv), GFP_KERNEL);
if (!ev_priv)
return -ENOMEM;
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
if (!ev) {
ret = -ENOMEM;
goto exit;
}
if (*priv_data_offset + sizeof(*ev_priv) > max_priv_data_size) {
ret = -EINVAL;
goto exit;
}
ret = copy_from_user(ev_priv, user_priv_ptr + *priv_data_offset, sizeof(*ev_priv));
if (ret) {
ret = -EFAULT;
goto exit;
}
*priv_data_offset += sizeof(*ev_priv);
if (ev_priv->user_handle) {
ret = kfd_kmap_event_page(p, ev_priv->user_handle);
if (ret)
goto exit;
}
ev->type = ev_priv->type;
ev->auto_reset = ev_priv->auto_reset;
ev->signaled = ev_priv->signaled;
init_waitqueue_head(&ev->wq);
mutex_lock(&p->event_mutex);
switch (ev->type) {
case KFD_EVENT_TYPE_SIGNAL:
case KFD_EVENT_TYPE_DEBUG:
ret = create_signal_event(devkfd, p, ev, &ev_priv->event_id);
break;
case KFD_EVENT_TYPE_MEMORY:
memcpy(&ev->memory_exception_data,
&ev_priv->memory_exception_data,
sizeof(struct kfd_hsa_memory_exception_data));
ret = create_other_event(p, ev, &ev_priv->event_id);
break;
case KFD_EVENT_TYPE_HW_EXCEPTION:
memcpy(&ev->hw_exception_data,
&ev_priv->hw_exception_data,
sizeof(struct kfd_hsa_hw_exception_data));
ret = create_other_event(p, ev, &ev_priv->event_id);
break;
}
exit:
if (ret)
kfree(ev);
kfree(ev_priv);
mutex_unlock(&p->event_mutex);
return ret;
}
int kfd_criu_checkpoint_events(struct kfd_process *p,
uint8_t __user *user_priv_data,
uint64_t *priv_data_offset)
{
struct kfd_criu_event_priv_data *ev_privs;
int i = 0;
int ret = 0;
struct kfd_event *ev;
uint32_t ev_id;
uint32_t num_events = kfd_get_num_events(p);
if (!num_events)
return 0;
ev_privs = kvzalloc(num_events * sizeof(*ev_privs), GFP_KERNEL);
if (!ev_privs)
return -ENOMEM;
idr_for_each_entry(&p->event_idr, ev, ev_id) {
struct kfd_criu_event_priv_data *ev_priv;
/*
* Currently, all events have same size of private_data, but the current ioctl's
* and CRIU plugin supports private_data of variable sizes
*/
ev_priv = &ev_privs[i];
ev_priv->object_type = KFD_CRIU_OBJECT_TYPE_EVENT;
/* We store the user_handle with the first event */
if (i == 0 && p->signal_page)
ev_priv->user_handle = p->signal_handle;
ev_priv->event_id = ev->event_id;
ev_priv->auto_reset = ev->auto_reset;
ev_priv->type = ev->type;
ev_priv->signaled = ev->signaled;
if (ev_priv->type == KFD_EVENT_TYPE_MEMORY)
memcpy(&ev_priv->memory_exception_data,
&ev->memory_exception_data,
sizeof(struct kfd_hsa_memory_exception_data));
else if (ev_priv->type == KFD_EVENT_TYPE_HW_EXCEPTION)
memcpy(&ev_priv->hw_exception_data,
&ev->hw_exception_data,
sizeof(struct kfd_hsa_hw_exception_data));
pr_debug("Checkpointed event[%d] id = 0x%08x auto_reset = %x type = %x signaled = %x\n",
i,
ev_priv->event_id,
ev_priv->auto_reset,
ev_priv->type,
ev_priv->signaled);
i++;
}
ret = copy_to_user(user_priv_data + *priv_data_offset,
ev_privs, num_events * sizeof(*ev_privs));
if (ret) {
pr_err("Failed to copy events priv to user\n");
ret = -EFAULT;
}
*priv_data_offset += num_events * sizeof(*ev_privs);
kvfree(ev_privs);
return ret;
}
int kfd_get_num_events(struct kfd_process *p)
{
struct kfd_event *ev;
uint32_t id;
u32 num_events = 0;
idr_for_each_entry(&p->event_idr, ev, id)
num_events++;
return num_events;
}
/* Assumes that p is current. */ /* Assumes that p is current. */
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id) int kfd_event_destroy(struct kfd_process *p, uint32_t event_id)
{ {
......
...@@ -1099,7 +1099,16 @@ struct kfd_criu_queue_priv_data { ...@@ -1099,7 +1099,16 @@ struct kfd_criu_queue_priv_data {
struct kfd_criu_event_priv_data { struct kfd_criu_event_priv_data {
uint32_t object_type; uint32_t object_type;
uint32_t reserved; uint64_t user_handle;
uint32_t event_id;
uint32_t auto_reset;
uint32_t type;
uint32_t signaled;
union {
struct kfd_hsa_memory_exception_data memory_exception_data;
struct kfd_hsa_hw_exception_data hw_exception_data;
};
}; };
int kfd_process_get_queue_info(struct kfd_process *p, int kfd_process_get_queue_info(struct kfd_process *p,
...@@ -1114,6 +1123,16 @@ int kfd_criu_restore_queue(struct kfd_process *p, ...@@ -1114,6 +1123,16 @@ int kfd_criu_restore_queue(struct kfd_process *p,
uint8_t __user *user_priv_data, uint8_t __user *user_priv_data,
uint64_t *priv_data_offset, uint64_t *priv_data_offset,
uint64_t max_priv_data_size); uint64_t max_priv_data_size);
int kfd_criu_checkpoint_events(struct kfd_process *p,
uint8_t __user *user_priv_data,
uint64_t *priv_data_offset);
int kfd_criu_restore_event(struct file *devkfd,
struct kfd_process *p,
uint8_t __user *user_priv_data,
uint64_t *priv_data_offset,
uint64_t max_priv_data_size);
/* CRIU - End */ /* CRIU - End */
/* Queue Context Management */ /* Queue Context Management */
...@@ -1277,12 +1296,14 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, ...@@ -1277,12 +1296,14 @@ void kfd_signal_iommu_event(struct kfd_dev *dev,
void kfd_signal_hw_exception_event(u32 pasid); void kfd_signal_hw_exception_event(u32 pasid);
int kfd_set_event(struct kfd_process *p, uint32_t event_id); int kfd_set_event(struct kfd_process *p, uint32_t event_id);
int kfd_reset_event(struct kfd_process *p, uint32_t event_id); int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
int kfd_event_page_set(struct kfd_process *p, void *kernel_address, int kfd_kmap_event_page(struct kfd_process *p, uint64_t event_page_offset);
uint64_t size);
int kfd_event_create(struct file *devkfd, struct kfd_process *p, int kfd_event_create(struct file *devkfd, struct kfd_process *p,
uint32_t event_type, bool auto_reset, uint32_t node_id, uint32_t event_type, bool auto_reset, uint32_t node_id,
uint32_t *event_id, uint32_t *event_trigger_data, uint32_t *event_id, uint32_t *event_trigger_data,
uint64_t *event_page_offset, uint32_t *event_slot_index); uint64_t *event_page_offset, uint32_t *event_slot_index);
int kfd_get_num_events(struct kfd_process *p);
int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid, void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment