Commit a269e449 authored by Alex Sierra's avatar Alex Sierra Committed by Alex Deucher

drm/amdgpu: Avoid reclaim fs while eviction lock

[Why]
Avoid reclaim filesystem while eviction lock is held called from
MMU notifier.

[How]
Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
Using memalloc_nofs_save / memalloc_nofs_restore API.
Signed-off-by: default avatarAlex Sierra <alex.sierra@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent a9ffe2a9
...@@ -82,6 +82,32 @@ struct amdgpu_prt_cb { ...@@ -82,6 +82,32 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb; struct dma_fence_cb cb;
}; };
/**
* vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
* happens while holding this lock anywhere to prevent deadlocks when
* an MMU notifier runs in reclaim-FS context.
*/
static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
{
mutex_lock(&vm->eviction_lock);
vm->saved_flags = memalloc_nofs_save();
}
static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
{
if (mutex_trylock(&vm->eviction_lock)) {
vm->saved_flags = memalloc_nofs_save();
return 1;
}
return 0;
}
static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
{
memalloc_nofs_restore(vm->saved_flags);
mutex_unlock(&vm->eviction_lock);
}
/** /**
* amdgpu_vm_level_shift - return the addr shift for each level * amdgpu_vm_level_shift - return the addr shift for each level
* *
...@@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
} }
} }
mutex_lock(&vm->eviction_lock); amdgpu_vm_eviction_lock(vm);
vm->evicting = false; vm->evicting = false;
mutex_unlock(&vm->eviction_lock); amdgpu_vm_eviction_unlock(vm);
return 0; return 0;
} }
...@@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (!(flags & AMDGPU_PTE_VALID)) if (!(flags & AMDGPU_PTE_VALID))
owner = AMDGPU_FENCE_OWNER_KFD; owner = AMDGPU_FENCE_OWNER_KFD;
mutex_lock(&vm->eviction_lock); amdgpu_vm_eviction_lock(vm);
if (vm->evicting) { if (vm->evicting) {
r = -EBUSY; r = -EBUSY;
goto error_unlock; goto error_unlock;
...@@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ...@@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
r = vm->update_funcs->commit(&params, fence); r = vm->update_funcs->commit(&params, fence);
error_unlock: error_unlock:
mutex_unlock(&vm->eviction_lock); amdgpu_vm_eviction_unlock(vm);
return r; return r;
} }
...@@ -2533,18 +2559,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) ...@@ -2533,18 +2559,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
return false; return false;
/* Try to block ongoing updates */ /* Try to block ongoing updates */
if (!mutex_trylock(&bo_base->vm->eviction_lock)) if (!amdgpu_vm_eviction_trylock(bo_base->vm))
return false; return false;
/* Don't evict VM page tables while they are updated */ /* Don't evict VM page tables while they are updated */
if (!dma_fence_is_signaled(bo_base->vm->last_direct) || if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
!dma_fence_is_signaled(bo_base->vm->last_delayed)) { !dma_fence_is_signaled(bo_base->vm->last_delayed)) {
mutex_unlock(&bo_base->vm->eviction_lock); amdgpu_vm_eviction_unlock(bo_base->vm);
return false; return false;
} }
bo_base->vm->evicting = true; bo_base->vm->evicting = true;
mutex_unlock(&bo_base->vm->eviction_lock); amdgpu_vm_eviction_unlock(bo_base->vm);
return true; return true;
} }
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <drm/gpu_scheduler.h> #include <drm/gpu_scheduler.h>
#include <drm/drm_file.h> #include <drm/drm_file.h>
#include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_bo_driver.h>
#include <linux/sched/mm.h>
#include "amdgpu_sync.h" #include "amdgpu_sync.h"
#include "amdgpu_ring.h" #include "amdgpu_ring.h"
...@@ -239,9 +240,12 @@ struct amdgpu_vm { ...@@ -239,9 +240,12 @@ struct amdgpu_vm {
/* tree of virtual addresses mapped */ /* tree of virtual addresses mapped */
struct rb_root_cached va; struct rb_root_cached va;
/* Lock to prevent eviction while we are updating page tables */ /* Lock to prevent eviction while we are updating page tables
* use vm_eviction_lock/unlock(vm)
*/
struct mutex eviction_lock; struct mutex eviction_lock;
bool evicting; bool evicting;
unsigned int saved_flags;
/* BOs who needs a validation */ /* BOs who needs a validation */
struct list_head evicted; struct list_head evicted;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment