Commit 0ce15d6f authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: allocate VM PDs/PTs on demand

Let's start to allocate VM PDs/PTs on demand instead of pre-allocating
them during mapping.
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarHuang Rui <ray.huang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 780637cb
...@@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem, ...@@ -410,15 +410,7 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
if (p_bo_va_entry) if (p_bo_va_entry)
*p_bo_va_entry = bo_va_entry; *p_bo_va_entry = bo_va_entry;
/* Allocate new page tables if needed and validate /* Allocate validate page tables if needed */
* them.
*/
ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
if (ret) {
pr_err("Failed to allocate pts, err=%d\n", ret);
goto err_alloc_pts;
}
ret = vm_validate_pt_pd_bos(vm); ret = vm_validate_pt_pd_bos(vm);
if (ret) { if (ret) {
pr_err("validate_pt_pd_bos() failed\n"); pr_err("validate_pt_pd_bos() failed\n");
......
...@@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -92,15 +92,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return -ENOMEM; return -ENOMEM;
} }
r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
size);
if (r) {
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
amdgpu_vm_bo_rmv(adev, *bo_va);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size,
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
AMDGPU_PTE_EXECUTABLE); AMDGPU_PTE_EXECUTABLE);
......
...@@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -625,11 +625,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
switch (args->operation) { switch (args->operation) {
case AMDGPU_VA_OP_MAP: case AMDGPU_VA_OP_MAP:
r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
args->map_size);
if (r)
goto error_backoff;
va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
r = amdgpu_vm_bo_map(adev, bo_va, args->va_address, r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size, args->offset_in_bo, args->map_size,
...@@ -645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, ...@@ -645,11 +640,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->map_size); args->map_size);
break; break;
case AMDGPU_VA_OP_REPLACE: case AMDGPU_VA_OP_REPLACE:
r = amdgpu_vm_alloc_pts(adev, bo_va->base.vm, args->va_address,
args->map_size);
if (r)
goto error_backoff;
va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags); va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address, r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size, args->offset_in_bo, args->map_size,
......
...@@ -520,47 +520,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev, ...@@ -520,47 +520,6 @@ static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
} }
} }
/**
* amdgpu_vm_pt_first_leaf - get first leaf PD/PT
*
* @adev: amdgpu_device pointer
* @vm: amdgpu_vm structure
* @start: start addr of the walk
* @cursor: state to initialize
*
* Start a walk and go directly to the leaf node.
*/
static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
struct amdgpu_vm *vm, uint64_t start,
struct amdgpu_vm_pt_cursor *cursor)
{
amdgpu_vm_pt_start(adev, vm, start, cursor);
while (amdgpu_vm_pt_descendant(adev, cursor));
}
/**
* amdgpu_vm_pt_next_leaf - get next leaf PD/PT
*
* @adev: amdgpu_device pointer
* @cursor: current state
*
* Walk the PD/PT tree to the next leaf node.
*/
static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
struct amdgpu_vm_pt_cursor *cursor)
{
amdgpu_vm_pt_next(adev, cursor);
if (cursor->pfn != ~0ll)
while (amdgpu_vm_pt_descendant(adev, cursor));
}
/**
* for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy
*/
#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor) \
for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor)); \
(cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))
/** /**
* amdgpu_vm_pt_first_dfs - start a deep first search * amdgpu_vm_pt_first_dfs - start a deep first search
* *
...@@ -932,51 +891,30 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, ...@@ -932,51 +891,30 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
* Returns: * Returns:
* 0 on success, errno otherwise. * 0 on success, errno otherwise.
*/ */
int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm, struct amdgpu_vm *vm,
uint64_t saddr, uint64_t size) struct amdgpu_vm_pt_cursor *cursor)
{ {
struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_pt *entry = cursor->entry;
struct amdgpu_bo_param bp;
struct amdgpu_bo *pt; struct amdgpu_bo *pt;
uint64_t eaddr;
int r; int r;
/* validate the parameters */ if (cursor->level < AMDGPU_VM_PTB && !entry->entries) {
if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
return -EINVAL;
eaddr = saddr + size - 1;
saddr /= AMDGPU_GPU_PAGE_SIZE;
eaddr /= AMDGPU_GPU_PAGE_SIZE;
if (eaddr >= adev->vm_manager.max_pfn) {
dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
eaddr, adev->vm_manager.max_pfn);
return -EINVAL;
}
for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
struct amdgpu_vm_pt *entry = cursor.entry;
struct amdgpu_bo_param bp;
if (cursor.level < AMDGPU_VM_PTB) {
unsigned num_entries; unsigned num_entries;
num_entries = amdgpu_vm_num_entries(adev, cursor.level); num_entries = amdgpu_vm_num_entries(adev, cursor->level);
entry->entries = kvmalloc_array(num_entries, entry->entries = kvmalloc_array(num_entries,
sizeof(*entry->entries), sizeof(*entry->entries),
GFP_KERNEL | GFP_KERNEL | __GFP_ZERO);
__GFP_ZERO);
if (!entry->entries) if (!entry->entries)
return -ENOMEM; return -ENOMEM;
} }
if (entry->base.bo) if (entry->base.bo)
continue; return 0;
amdgpu_vm_bo_param(adev, vm, cursor.level, &bp); amdgpu_vm_bo_param(adev, vm, cursor->level, &bp);
r = amdgpu_bo_create(adev, &bp, &pt); r = amdgpu_bo_create(adev, &bp, &pt);
if (r) if (r)
...@@ -991,14 +929,12 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, ...@@ -991,14 +929,12 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
/* Keep a reference to the root directory to avoid /* Keep a reference to the root directory to avoid
* freeing them up in the wrong order. * freeing them up in the wrong order.
*/ */
pt->parent = amdgpu_bo_ref(cursor.parent->base.bo); pt->parent = amdgpu_bo_ref(cursor->parent->base.bo);
amdgpu_vm_bo_base_init(&entry->base, vm, pt); amdgpu_vm_bo_base_init(&entry->base, vm, pt);
r = amdgpu_vm_clear_bo(adev, vm, pt); r = amdgpu_vm_clear_bo(adev, vm, pt);
if (r) if (r)
goto error_free_pt; goto error_free_pt;
}
return 0; return 0;
...@@ -1644,6 +1580,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, ...@@ -1644,6 +1580,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
struct amdgpu_vm_pt_cursor cursor; struct amdgpu_vm_pt_cursor cursor;
uint64_t frag_start = start, frag_end; uint64_t frag_start = start, frag_end;
unsigned int frag; unsigned int frag;
int r;
/* figure out the initial fragment */ /* figure out the initial fragment */
amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
...@@ -1651,12 +1588,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, ...@@ -1651,12 +1588,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
/* walk over the address space and update the PTs */ /* walk over the address space and update the PTs */
amdgpu_vm_pt_start(adev, params->vm, start, &cursor); amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
while (cursor.pfn < end) { while (cursor.pfn < end) {
struct amdgpu_bo *pt = cursor.entry->base.bo;
unsigned shift, parent_shift, mask; unsigned shift, parent_shift, mask;
uint64_t incr, entry_end, pe_start; uint64_t incr, entry_end, pe_start;
struct amdgpu_bo *pt;
if (!pt) r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor);
return -ENOENT; if (r)
return r;
pt = cursor.entry->base.bo;
/* The root level can't be a huge page */ /* The root level can't be a huge page */
if (cursor.level == adev->vm_manager.root_level) { if (cursor.level == adev->vm_manager.root_level) {
......
...@@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm); ...@@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm);
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int (*callback)(void *p, struct amdgpu_bo *bo), int (*callback)(void *p, struct amdgpu_bo *bo),
void *param); void *param);
int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
uint64_t saddr, uint64_t size);
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
int amdgpu_vm_update_directories(struct amdgpu_device *adev, int amdgpu_vm_update_directories(struct amdgpu_device *adev,
struct amdgpu_vm *vm); struct amdgpu_vm *vm);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment