Commit 12fcf0a7 authored by Felix Kuehling's avatar Felix Kuehling Committed by Alex Deucher

drm/amdkfd: Avoid thrashing of stack and heap

Stack and heap pages tend to be shared by many small allocations.
Concurrent access by CPU and GPU is therefore likely, which can lead to
thrashing. Avoid this by setting the preferred location to system memory.
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 297753a0
...@@ -2321,7 +2321,8 @@ svm_range_best_restore_location(struct svm_range *prange, ...@@ -2321,7 +2321,8 @@ svm_range_best_restore_location(struct svm_range *prange,
static int static int
svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
unsigned long *start, unsigned long *last) unsigned long *start, unsigned long *last,
bool *is_heap_stack)
{ {
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct interval_tree_node *node; struct interval_tree_node *node;
...@@ -2332,6 +2333,12 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, ...@@ -2332,6 +2333,12 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
pr_debug("VMA does not exist in address [0x%llx]\n", addr); pr_debug("VMA does not exist in address [0x%llx]\n", addr);
return -EFAULT; return -EFAULT;
} }
*is_heap_stack = (vma->vm_start <= vma->vm_mm->brk &&
vma->vm_end >= vma->vm_mm->start_brk) ||
(vma->vm_start <= vma->vm_mm->start_stack &&
vma->vm_end >= vma->vm_mm->start_stack);
start_limit = max(vma->vm_start >> PAGE_SHIFT, start_limit = max(vma->vm_start >> PAGE_SHIFT,
(unsigned long)ALIGN_DOWN(addr, 2UL << 8)); (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
end_limit = min(vma->vm_end >> PAGE_SHIFT, end_limit = min(vma->vm_end >> PAGE_SHIFT,
...@@ -2361,9 +2368,9 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr, ...@@ -2361,9 +2368,9 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
*start = start_limit; *start = start_limit;
*last = end_limit - 1; *last = end_limit - 1;
pr_debug("vma start: 0x%lx start: 0x%lx vma end: 0x%lx last: 0x%lx\n", pr_debug("vma [0x%lx 0x%lx] range [0x%lx 0x%lx] is_heap_stack %d\n",
vma->vm_start >> PAGE_SHIFT, *start, vma->vm_start >> PAGE_SHIFT, vma->vm_end >> PAGE_SHIFT,
vma->vm_end >> PAGE_SHIFT, *last); *start, *last, *is_heap_stack);
return 0; return 0;
} }
...@@ -2428,11 +2435,13 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, ...@@ -2428,11 +2435,13 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
struct svm_range *prange = NULL; struct svm_range *prange = NULL;
unsigned long start, last; unsigned long start, last;
uint32_t gpuid, gpuidx; uint32_t gpuid, gpuidx;
bool is_heap_stack;
uint64_t bo_s = 0; uint64_t bo_s = 0;
uint64_t bo_l = 0; uint64_t bo_l = 0;
int r; int r;
if (svm_range_get_range_boundaries(p, addr, &start, &last)) if (svm_range_get_range_boundaries(p, addr, &start, &last,
&is_heap_stack))
return NULL; return NULL;
r = svm_range_check_vm(p, start, last, &bo_s, &bo_l); r = svm_range_check_vm(p, start, last, &bo_s, &bo_l);
...@@ -2459,6 +2468,9 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, ...@@ -2459,6 +2468,9 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
return NULL; return NULL;
} }
if (is_heap_stack)
prange->preferred_loc = KFD_IOCTL_SVM_LOCATION_SYSMEM;
svm_range_add_to_svms(prange); svm_range_add_to_svms(prange);
svm_range_add_notifier_locked(mm, prange); svm_range_add_notifier_locked(mm, prange);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment