Commit 15024daf authored by Philip Yang's avatar Philip Yang Committed by Alex Deucher

drm/amdkfd: keep BOs in system memory if restore failed

If vram is used up, display allocate vram evict the KFD BOs to system
memory. KFD schedule restore work to restore BOs back to vram. If
display BOs are pinned in vram, KFD restore work will keep retry, and
may never success.

If restore BO back to vram failed, keep the BO in system memory to
prevent endless retry restore, and GPU mapping will update to system
memory.
Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent c529b685
...@@ -2043,6 +2043,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) ...@@ -2043,6 +2043,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
int ret = 0, i; int ret = 0, i;
struct list_head duplicate_save; struct list_head duplicate_save;
struct amdgpu_sync sync_obj; struct amdgpu_sync sync_obj;
unsigned long failed_size = 0;
unsigned long total_size = 0;
INIT_LIST_HEAD(&duplicate_save); INIT_LIST_HEAD(&duplicate_save);
INIT_LIST_HEAD(&ctx.list); INIT_LIST_HEAD(&ctx.list);
...@@ -2099,11 +2101,19 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) ...@@ -2099,11 +2101,19 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
uint32_t domain = mem->domain; uint32_t domain = mem->domain;
struct kfd_bo_va_list *bo_va_entry; struct kfd_bo_va_list *bo_va_entry;
total_size += amdgpu_bo_size(bo);
ret = amdgpu_amdkfd_bo_validate(bo, domain, false); ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
if (ret) { if (ret) {
pr_debug("Memory eviction: Validate BOs failed. Try again\n"); pr_debug("Memory eviction: Validate BOs failed\n");
failed_size += amdgpu_bo_size(bo);
ret = amdgpu_amdkfd_bo_validate(bo,
AMDGPU_GEM_DOMAIN_GTT, false);
if (ret) {
pr_debug("Memory eviction: Try again\n");
goto validate_map_fail; goto validate_map_fail;
} }
}
ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
if (ret) { if (ret) {
pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
...@@ -2122,6 +2132,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) ...@@ -2122,6 +2132,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
} }
} }
if (failed_size)
pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size);
/* Update page directories */ /* Update page directories */
ret = process_update_pds(process_info, &sync_obj); ret = process_update_pds(process_info, &sync_obj);
if (ret) { if (ret) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment