Commit 71b9d192 authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdgpu: Handle duplicate BOs during process restore

In certain situations, some apps can import a BO multiple times
(through IPC for example). To restore such processes successfully,
we need to tell drm to ignore duplicate BOs.
While at it, also add additional logging to prevent silent failures
when process restore fails.
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFelix Kuehling <felix.kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2f14c0c8
...@@ -2869,15 +2869,17 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * ...@@ -2869,15 +2869,17 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
mutex_lock(&process_info->lock); mutex_lock(&process_info->lock);
drm_exec_init(&exec, 0, 0); drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
drm_exec_until_all_locked(&exec) { drm_exec_until_all_locked(&exec) {
list_for_each_entry(peer_vm, &process_info->vm_list_head, list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) { vm_list_node) {
ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2); ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2);
drm_exec_retry_on_contention(&exec); drm_exec_retry_on_contention(&exec);
if (unlikely(ret)) if (unlikely(ret)) {
pr_err("Locking VM PD failed, ret: %d\n", ret);
goto ttm_reserve_fail; goto ttm_reserve_fail;
} }
}
/* Reserve all BOs and page tables/directory. Add all BOs from /* Reserve all BOs and page tables/directory. Add all BOs from
* kfd_bo_list to ctx.list * kfd_bo_list to ctx.list
...@@ -2889,10 +2891,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * ...@@ -2889,10 +2891,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
gobj = &mem->bo->tbo.base; gobj = &mem->bo->tbo.base;
ret = drm_exec_prepare_obj(&exec, gobj, 1); ret = drm_exec_prepare_obj(&exec, gobj, 1);
drm_exec_retry_on_contention(&exec); drm_exec_retry_on_contention(&exec);
if (unlikely(ret)) if (unlikely(ret)) {
pr_err("drm_exec_prepare_obj failed, ret: %d\n", ret);
goto ttm_reserve_fail; goto ttm_reserve_fail;
} }
} }
}
amdgpu_sync_create(&sync_obj); amdgpu_sync_create(&sync_obj);
...@@ -2950,8 +2954,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * ...@@ -2950,8 +2954,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu *
* validations above would invalidate DMABuf imports again. * validations above would invalidate DMABuf imports again.
*/ */
ret = process_validate_vms(process_info, &exec.ticket); ret = process_validate_vms(process_info, &exec.ticket);
if (ret) if (ret) {
pr_debug("Validating VMs failed, ret: %d\n", ret);
goto validate_map_fail; goto validate_map_fail;
}
/* Update mappings not managed by KFD */ /* Update mappings not managed by KFD */
list_for_each_entry(peer_vm, &process_info->vm_list_head, list_for_each_entry(peer_vm, &process_info->vm_list_head,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment