Commit f566ceb1 authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: add alloc/free for multi level PDs V2

Allocate and free page directories on demand.

V2:
a. clear entries allocation
b. fix entries index calculation
c. need alloc sub level even parent bo was allocated

Signed-off-by: Christian König <christian.koenig@amd.com> (v1)
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
Signed-off-by: Chunming Zhou <David1.Zhou@amd.com> (v2)
Acked-by: Alex Deucher <alexander.deucher@amd.com> (v2)
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4e2cb640
...@@ -244,56 +244,54 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, ...@@ -244,56 +244,54 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
spin_unlock(&glob->lru_lock); spin_unlock(&glob->lru_lock);
} }
/** /**
* amdgpu_vm_alloc_pts - Allocate page tables. * amdgpu_vm_alloc_levels - allocate the PD/PT levels
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* @vm: VM to allocate page tables for * @vm: requested vm
* @saddr: Start address which needs to be allocated * @saddr: start of the address range
* @size: Size from start address we need. * @eaddr: end of the address range
* *
* Make sure the page tables are allocated. * Make sure the page directories and page tables are allocated
*/ */
int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
struct amdgpu_vm *vm, struct amdgpu_vm *vm,
uint64_t saddr, uint64_t size) struct amdgpu_vm_pt *parent,
uint64_t saddr, uint64_t eaddr,
unsigned level)
{ {
unsigned last_pfn, pt_idx; unsigned shift = (adev->vm_manager.num_level - level) *
uint64_t eaddr; amdgpu_vm_block_size;
unsigned pt_idx, from, to;
int r; int r;
/* validate the parameters */ if (!parent->entries) {
if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) unsigned num_entries = amdgpu_vm_num_entries(adev, level);
return -EINVAL;
eaddr = saddr + size - 1; parent->entries = drm_calloc_large(num_entries,
last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE; sizeof(struct amdgpu_vm_pt));
if (last_pfn >= adev->vm_manager.max_pfn) { if (!parent->entries)
dev_err(adev->dev, "va above limit (0x%08X >= 0x%08X)\n", return -ENOMEM;
last_pfn, adev->vm_manager.max_pfn); memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
return -EINVAL;
} }
saddr /= AMDGPU_GPU_PAGE_SIZE; from = (saddr >> shift) % amdgpu_vm_num_entries(adev, level);
eaddr /= AMDGPU_GPU_PAGE_SIZE; to = (eaddr >> shift) % amdgpu_vm_num_entries(adev, level);
saddr >>= amdgpu_vm_block_size;
eaddr >>= amdgpu_vm_block_size;
BUG_ON(eaddr >= amdgpu_vm_num_entries(adev, 0)); if (to > parent->last_entry_used)
parent->last_entry_used = to;
if (eaddr > vm->root.last_entry_used) ++level;
vm->root.last_entry_used = eaddr;
/* walk over the address space and allocate the page tables */ /* walk over the address space and allocate the page tables */
for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { for (pt_idx = from; pt_idx <= to; ++pt_idx) {
struct reservation_object *resv = vm->root.bo->tbo.resv; struct reservation_object *resv = vm->root.bo->tbo.resv;
struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
struct amdgpu_bo *pt; struct amdgpu_bo *pt;
if (vm->root.entries[pt_idx].bo) if (!entry->bo) {
continue; r = amdgpu_bo_create(adev,
amdgpu_vm_bo_size(adev, level),
r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GPU_PAGE_SIZE, true,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
...@@ -304,18 +302,61 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, ...@@ -304,18 +302,61 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
if (r) if (r)
return r; return r;
/* Keep a reference to the page table to avoid freeing /* Keep a reference to the root directory to avoid
* them up in the wrong order. * freeing them up in the wrong order.
*/ */
pt->parent = amdgpu_bo_ref(vm->root.bo); pt->parent = amdgpu_bo_ref(vm->root.bo);
vm->root.entries[pt_idx].bo = pt; entry->bo = pt;
vm->root.entries[pt_idx].addr = 0; entry->addr = 0;
}
if (level < adev->vm_manager.num_level) {
r = amdgpu_vm_alloc_levels(adev, vm, entry, saddr,
eaddr, level);
if (r)
return r;
}
} }
return 0; return 0;
} }
/**
* amdgpu_vm_alloc_pts - Allocate page tables.
*
* @adev: amdgpu_device pointer
* @vm: VM to allocate page tables for
* @saddr: Start address which needs to be allocated
* @size: Size from start address we need.
*
* Make sure the page tables are allocated.
*/
int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
uint64_t saddr, uint64_t size)
{
unsigned last_pfn;
uint64_t eaddr;
/* validate the parameters */
if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
return -EINVAL;
eaddr = saddr + size - 1;
last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
if (last_pfn >= adev->vm_manager.max_pfn) {
dev_err(adev->dev, "va above limit (0x%08X >= 0x%08X)\n",
last_pfn, adev->vm_manager.max_pfn);
return -EINVAL;
}
saddr /= AMDGPU_GPU_PAGE_SIZE;
eaddr /= AMDGPU_GPU_PAGE_SIZE;
return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0);
}
static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev, static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vm_id *id) struct amdgpu_vm_id *id)
{ {
...@@ -1993,7 +2034,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -1993,7 +2034,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{ {
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT * 8); AMDGPU_VM_PTE_COUNT * 8);
unsigned pd_size, pd_entries;
unsigned ring_instance; unsigned ring_instance;
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
struct amd_sched_rq *rq; struct amd_sched_rq *rq;
...@@ -2008,16 +2048,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -2008,16 +2048,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->cleared);
INIT_LIST_HEAD(&vm->freed); INIT_LIST_HEAD(&vm->freed);
pd_size = amdgpu_vm_bo_size(adev, 0);
pd_entries = amdgpu_vm_num_entries(adev, 0);
/* allocate page table array */
vm->root.entries = drm_calloc_large(pd_entries, sizeof(struct amdgpu_vm_pt));
if (vm->root.entries == NULL) {
DRM_ERROR("Cannot allocate memory for page table array\n");
return -ENOMEM;
}
/* create scheduler entity for page table updates */ /* create scheduler entity for page table updates */
ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
...@@ -2027,11 +2057,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -2027,11 +2057,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
r = amd_sched_entity_init(&ring->sched, &vm->entity, r = amd_sched_entity_init(&ring->sched, &vm->entity,
rq, amdgpu_sched_jobs); rq, amdgpu_sched_jobs);
if (r) if (r)
goto err; return r;
vm->last_dir_update = NULL; vm->last_dir_update = NULL;
r = amdgpu_bo_create(adev, pd_size, align, true, r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
AMDGPU_GEM_CREATE_SHADOW | AMDGPU_GEM_CREATE_SHADOW |
...@@ -2058,12 +2088,32 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -2058,12 +2088,32 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
error_free_sched_entity: error_free_sched_entity:
amd_sched_entity_fini(&ring->sched, &vm->entity); amd_sched_entity_fini(&ring->sched, &vm->entity);
err:
drm_free_large(vm->root.entries);
return r; return r;
} }
/**
* amdgpu_vm_free_levels - free PD/PT levels
*
* @level: PD/PT starting level to free
*
* Free the page directory or page table level and all sub levels.
*/
static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
{
unsigned i;
if (level->bo) {
amdgpu_bo_unref(&level->bo->shadow);
amdgpu_bo_unref(&level->bo);
}
if (level->entries)
for (i = 0; i <= level->last_entry_used; i++)
amdgpu_vm_free_levels(&level->entries[i]);
drm_free_large(level->entries);
}
/** /**
* amdgpu_vm_fini - tear down a vm instance * amdgpu_vm_fini - tear down a vm instance
* *
...@@ -2077,7 +2127,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -2077,7 +2127,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{ {
struct amdgpu_bo_va_mapping *mapping, *tmp; struct amdgpu_bo_va_mapping *mapping, *tmp;
bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
int i;
amd_sched_entity_fini(vm->entity.sched, &vm->entity); amd_sched_entity_fini(vm->entity.sched, &vm->entity);
...@@ -2099,19 +2148,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -2099,19 +2148,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_vm_free_mapping(adev, vm, mapping, NULL); amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
} }
for (i = 0; i < amdgpu_vm_num_entries(adev, 0); i++) { amdgpu_vm_free_levels(&vm->root);
struct amdgpu_bo *pt = vm->root.entries[i].bo;
if (!pt)
continue;
amdgpu_bo_unref(&pt->shadow);
amdgpu_bo_unref(&pt);
}
drm_free_large(vm->root.entries);
amdgpu_bo_unref(&vm->root.bo->shadow);
amdgpu_bo_unref(&vm->root.bo);
dma_fence_put(vm->last_dir_update); dma_fence_put(vm->last_dir_update);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment