Commit 2f568dbd authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: move get_user_pages out of amdgpu_ttm_tt_pin_userptr v6

That avoids lock inversion between the BO reservation lock
and the anon_vma lock.

v2:
* Changed amdgpu_bo_list_entry.user_pages to an array of pointers
* Lock mmap_sem only for get_user_pages
* Added invalidation of unbound userpointer BOs
* Fixed memory leak and page reference leak

v3 (chk):
* Revert locking mmap_sem only for_get user_pages
* Revert adding invalidation of unbound userpointer BOs
* Sanitize and fix error handling

v4 (chk):
* Init userpages pointer everywhere.
* Fix error handling when get_user_pages() fails.
* Add invalidation of unbound userpointer BOs again.

v5 (chk):
* Add maximum number of tries.

v6 (chk):
* Fix error handling when we run out of tries.
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> (v4)
Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent d564a06e
...@@ -471,6 +471,8 @@ struct amdgpu_bo_list_entry { ...@@ -471,6 +471,8 @@ struct amdgpu_bo_list_entry {
struct ttm_validate_buffer tv; struct ttm_validate_buffer tv;
struct amdgpu_bo_va *bo_va; struct amdgpu_bo_va *bo_va;
uint32_t priority; uint32_t priority;
struct page **user_pages;
int user_invalidated;
}; };
struct amdgpu_bo_va_mapping { struct amdgpu_bo_va_mapping {
...@@ -2365,11 +2367,14 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, ...@@ -2365,11 +2367,14 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
struct amdgpu_ring **out_ring); struct amdgpu_ring **out_ring);
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain); void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain);
bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
uint32_t flags); uint32_t flags);
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm); struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end); unsigned long end);
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem); struct ttm_mem_reg *mem);
......
...@@ -200,6 +200,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, ...@@ -200,6 +200,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
list_add_tail(&list->array[i].tv.head, list_add_tail(&list->array[i].tv.head,
&bucket[priority]); &bucket[priority]);
list->array[i].user_pages = NULL;
} }
/* Connect the sorted buckets in the output list. */ /* Connect the sorted buckets in the output list. */
......
...@@ -111,6 +111,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, ...@@ -111,6 +111,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
p->uf_entry.priority = 0; p->uf_entry.priority = 0;
p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
p->uf_entry.tv.shared = true; p->uf_entry.tv.shared = true;
p->uf_entry.user_pages = NULL;
drm_gem_object_unreference_unlocked(gobj); drm_gem_object_unreference_unlocked(gobj);
return 0; return 0;
...@@ -297,6 +298,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, ...@@ -297,6 +298,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
list_for_each_entry(lobj, validated, tv.head) { list_for_each_entry(lobj, validated, tv.head) {
struct amdgpu_bo *bo = lobj->robj; struct amdgpu_bo *bo = lobj->robj;
bool binding_userptr = false;
struct mm_struct *usermm; struct mm_struct *usermm;
uint32_t domain; uint32_t domain;
...@@ -304,6 +306,15 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, ...@@ -304,6 +306,15 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
if (usermm && usermm != current->mm) if (usermm && usermm != current->mm)
return -EPERM; return -EPERM;
/* Check if we have user pages and nobody bound the BO already */
if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) {
size_t size = sizeof(struct page *);
size *= bo->tbo.ttm->num_pages;
memcpy(bo->tbo.ttm->pages, lobj->user_pages, size);
binding_userptr = true;
}
if (bo->pin_count) if (bo->pin_count)
continue; continue;
...@@ -334,6 +345,11 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, ...@@ -334,6 +345,11 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
} }
return r; return r;
} }
if (binding_userptr) {
drm_free_large(lobj->user_pages);
lobj->user_pages = NULL;
}
} }
return 0; return 0;
} }
...@@ -342,8 +358,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, ...@@ -342,8 +358,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs) union drm_amdgpu_cs *cs)
{ {
struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_bo_list_entry *e;
struct list_head duplicates; struct list_head duplicates;
bool need_mmap_lock = false; bool need_mmap_lock = false;
unsigned i, tries = 10;
int r; int r;
INIT_LIST_HEAD(&p->validated); INIT_LIST_HEAD(&p->validated);
...@@ -364,9 +382,81 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, ...@@ -364,9 +382,81 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (need_mmap_lock) if (need_mmap_lock)
down_read(&current->mm->mmap_sem); down_read(&current->mm->mmap_sem);
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); while (1) {
struct list_head need_pages;
unsigned i;
r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
&duplicates);
if (unlikely(r != 0)) if (unlikely(r != 0))
goto error_reserve; goto error_free_pages;
/* Without a BO list we don't have userptr BOs */
if (!p->bo_list)
break;
INIT_LIST_HEAD(&need_pages);
for (i = p->bo_list->first_userptr;
i < p->bo_list->num_entries; ++i) {
e = &p->bo_list->array[i];
if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm,
&e->user_invalidated) && e->user_pages) {
/* We acquired a page array, but somebody
* invalidated it. Free it an try again
*/
release_pages(e->user_pages,
e->robj->tbo.ttm->num_pages,
false);
drm_free_large(e->user_pages);
e->user_pages = NULL;
}
if (e->robj->tbo.ttm->state != tt_bound &&
!e->user_pages) {
list_del(&e->tv.head);
list_add(&e->tv.head, &need_pages);
amdgpu_bo_unreserve(e->robj);
}
}
if (list_empty(&need_pages))
break;
/* Unreserve everything again. */
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
/* We tried to often, just abort */
if (!--tries) {
r = -EDEADLK;
goto error_free_pages;
}
/* Fill the page arrays for all useptrs. */
list_for_each_entry(e, &need_pages, tv.head) {
struct ttm_tt *ttm = e->robj->tbo.ttm;
e->user_pages = drm_calloc_large(ttm->num_pages,
sizeof(struct page*));
if (!e->user_pages) {
r = -ENOMEM;
goto error_free_pages;
}
r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
if (r) {
drm_free_large(e->user_pages);
e->user_pages = NULL;
goto error_free_pages;
}
}
/* And try again. */
list_splice(&need_pages, &p->validated);
}
amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates); amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
...@@ -398,10 +488,26 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, ...@@ -398,10 +488,26 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
ttm_eu_backoff_reservation(&p->ticket, &p->validated); ttm_eu_backoff_reservation(&p->ticket, &p->validated);
} }
error_reserve: error_free_pages:
if (need_mmap_lock) if (need_mmap_lock)
up_read(&current->mm->mmap_sem); up_read(&current->mm->mmap_sem);
if (p->bo_list) {
for (i = p->bo_list->first_userptr;
i < p->bo_list->num_entries; ++i) {
e = &p->bo_list->array[i];
if (!e->user_pages)
continue;
release_pages(e->user_pages,
e->robj->tbo.ttm->num_pages,
false);
drm_free_large(e->user_pages);
}
}
return r; return r;
} }
......
...@@ -274,18 +274,23 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, ...@@ -274,18 +274,23 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) { if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
down_read(&current->mm->mmap_sem); down_read(&current->mm->mmap_sem);
r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
bo->tbo.ttm->pages);
if (r)
goto unlock_mmap_sem;
r = amdgpu_bo_reserve(bo, true); r = amdgpu_bo_reserve(bo, true);
if (r) { if (r)
up_read(&current->mm->mmap_sem); goto free_pages;
goto release_object;
}
amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
amdgpu_bo_unreserve(bo); amdgpu_bo_unreserve(bo);
up_read(&current->mm->mmap_sem);
if (r) if (r)
goto release_object; goto free_pages;
up_read(&current->mm->mmap_sem);
} }
r = drm_gem_handle_create(filp, gobj, &handle); r = drm_gem_handle_create(filp, gobj, &handle);
...@@ -297,6 +302,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, ...@@ -297,6 +302,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
args->handle = handle; args->handle = handle;
return 0; return 0;
free_pages:
release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false);
unlock_mmap_sem:
up_read(&current->mm->mmap_sem);
release_object: release_object:
drm_gem_object_unreference_unlocked(gobj); drm_gem_object_unreference_unlocked(gobj);
......
...@@ -508,22 +508,18 @@ struct amdgpu_ttm_tt { ...@@ -508,22 +508,18 @@ struct amdgpu_ttm_tt {
uint32_t userflags; uint32_t userflags;
spinlock_t guptasklock; spinlock_t guptasklock;
struct list_head guptasks; struct list_head guptasks;
atomic_t mmu_invalidations;
}; };
/* prepare the sg table with the user pages */ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
{ {
struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned pinned = 0, nents;
int r;
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ? unsigned pinned = 0;
DMA_BIDIRECTIONAL : DMA_TO_DEVICE; int r;
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
/* check that we only pin down anonymous memory /* check that we only use anonymous memory
to prevent problems with writeback */ to prevent problems with writeback */
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
struct vm_area_struct *vma; struct vm_area_struct *vma;
...@@ -536,7 +532,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) ...@@ -536,7 +532,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
do { do {
unsigned num_pages = ttm->num_pages - pinned; unsigned num_pages = ttm->num_pages - pinned;
uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
struct page **pages = ttm->pages + pinned; struct page **p = pages + pinned;
struct amdgpu_ttm_gup_task_list guptask; struct amdgpu_ttm_gup_task_list guptask;
guptask.task = current; guptask.task = current;
...@@ -545,7 +541,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) ...@@ -545,7 +541,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
spin_unlock(&gtt->guptasklock); spin_unlock(&gtt->guptasklock);
r = get_user_pages(current, current->mm, userptr, num_pages, r = get_user_pages(current, current->mm, userptr, num_pages,
write, 0, pages, NULL); write, 0, p, NULL);
spin_lock(&gtt->guptasklock); spin_lock(&gtt->guptasklock);
list_del(&guptask.list); list_del(&guptask.list);
...@@ -558,6 +554,25 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) ...@@ -558,6 +554,25 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
} while (pinned < ttm->num_pages); } while (pinned < ttm->num_pages);
return 0;
release_pages:
release_pages(pages, pinned, 0);
return r;
}
/* prepare the sg table with the user pages */
static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned nents;
int r;
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
enum dma_data_direction direction = write ?
DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0, r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
ttm->num_pages << PAGE_SHIFT, ttm->num_pages << PAGE_SHIFT,
GFP_KERNEL); GFP_KERNEL);
...@@ -576,9 +591,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) ...@@ -576,9 +591,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
release_sg: release_sg:
kfree(ttm->sg); kfree(ttm->sg);
release_pages:
release_pages(ttm->pages, pinned, 0);
return r; return r;
} }
...@@ -803,6 +815,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, ...@@ -803,6 +815,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
gtt->userflags = flags; gtt->userflags = flags;
spin_lock_init(&gtt->guptasklock); spin_lock_init(&gtt->guptasklock);
INIT_LIST_HEAD(&gtt->guptasks); INIT_LIST_HEAD(&gtt->guptasks);
atomic_set(&gtt->mmu_invalidations, 0);
return 0; return 0;
} }
...@@ -840,9 +853,21 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, ...@@ -840,9 +853,21 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
} }
spin_unlock(&gtt->guptasklock); spin_unlock(&gtt->guptasklock);
atomic_inc(&gtt->mmu_invalidations);
return true; return true;
} }
bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
int *last_invalidated)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
int prev_invalidated = *last_invalidated;
*last_invalidated = atomic_read(&gtt->mmu_invalidations);
return prev_invalidated != *last_invalidated;
}
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{ {
struct amdgpu_ttm_tt *gtt = (void *)ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm;
......
...@@ -95,6 +95,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, ...@@ -95,6 +95,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
entry->priority = 0; entry->priority = 0;
entry->tv.bo = &vm->page_directory->tbo; entry->tv.bo = &vm->page_directory->tbo;
entry->tv.shared = true; entry->tv.shared = true;
entry->user_pages = NULL;
list_add(&entry->tv.head, validated); list_add(&entry->tv.head, validated);
} }
...@@ -1186,6 +1187,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -1186,6 +1187,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
entry->priority = 0; entry->priority = 0;
entry->tv.bo = &entry->robj->tbo; entry->tv.bo = &entry->robj->tbo;
entry->tv.shared = true; entry->tv.shared = true;
entry->user_pages = NULL;
vm->page_tables[pt_idx].addr = 0; vm->page_tables[pt_idx].addr = 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment