Commit dc427a47 authored by Xiaogang Chen's avatar Xiaogang Chen Committed by Alex Deucher

drm/amdkfd: Use partial migrations in GPU page faults

This patch implements partial migration in gpu page fault according to migration
granularity(default 2MB) and not split svm range in cpu page fault handling.
A svm range may include pages from both system ram and vram of one gpu now.
These chagnes are expected to improve migration performance and reduce mmu
callback and TLB flush workloads.
Signed-off-by: default avatarXiaogang Chen <xiaogang.chen@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent de5e73dc
This diff is collapsed.
......@@ -41,9 +41,13 @@ enum MIGRATION_COPY_DIR {
};
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
unsigned long start, unsigned long last,
struct mm_struct *mm, uint32_t trigger);
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
uint32_t trigger, struct page *fault_page);
unsigned long start, unsigned long last,
uint32_t trigger, struct page *fault_page);
unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
......
......@@ -158,12 +158,13 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr)
static int
svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
unsigned long offset, unsigned long npages,
unsigned long *hmm_pfns, uint32_t gpuidx)
unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages)
{
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
dma_addr_t *addr = prange->dma_addr[gpuidx];
struct device *dev = adev->dev;
struct page *page;
uint64_t vram_pages_dev;
int i, r;
if (!addr) {
......@@ -173,6 +174,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
prange->dma_addr[gpuidx] = addr;
}
vram_pages_dev = 0;
addr += offset;
for (i = 0; i < npages; i++) {
if (svm_is_valid_dma_mapping_addr(dev, addr[i]))
......@@ -182,6 +184,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
if (is_zone_device_page(page)) {
struct amdgpu_device *bo_adev = prange->svm_bo->node->adev;
vram_pages_dev++;
addr[i] = (hmm_pfns[i] << PAGE_SHIFT) +
bo_adev->vm_manager.vram_base_offset -
bo_adev->kfd.pgmap.range.start;
......@@ -198,13 +201,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
addr[i] >> PAGE_SHIFT, page_to_pfn(page));
}
*vram_pages = vram_pages_dev;
return 0;
}
static int
svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
unsigned long offset, unsigned long npages,
unsigned long *hmm_pfns)
unsigned long *hmm_pfns, uint64_t *vram_pages)
{
struct kfd_process *p;
uint32_t gpuidx;
......@@ -223,7 +227,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
}
r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages,
hmm_pfns, gpuidx);
hmm_pfns, gpuidx, vram_pages);
if (r)
break;
}
......@@ -349,6 +353,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
INIT_LIST_HEAD(&prange->child_list);
atomic_set(&prange->invalid, 0);
prange->validate_timestamp = 0;
prange->vram_pages = 0;
mutex_init(&prange->migrate_mutex);
mutex_init(&prange->lock);
......@@ -395,6 +400,8 @@ static void svm_range_bo_release(struct kref *kref)
prange->start, prange->last);
mutex_lock(&prange->lock);
prange->svm_bo = NULL;
/* prange should not hold vram page now */
WARN_ON(prange->actual_loc);
mutex_unlock(&prange->lock);
spin_lock(&svm_bo->list_lock);
......@@ -975,6 +982,11 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old,
new->svm_bo = svm_range_bo_ref(old->svm_bo);
new->ttm_res = old->ttm_res;
/* set new's vram_pages as old range's now, the acurate vram_pages
* will be updated during mapping
*/
new->vram_pages = min(old->vram_pages, new->npages);
spin_lock(&new->svm_bo->list_lock);
list_add(&new->svm_bo_list, &new->svm_bo->range_list);
spin_unlock(&new->svm_bo->list_lock);
......@@ -1612,6 +1624,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
struct svm_validate_context *ctx;
unsigned long start, end, addr;
struct kfd_process *p;
uint64_t vram_pages;
void *owner;
int32_t idx;
int r = 0;
......@@ -1680,11 +1693,13 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
}
}
vram_pages = 0;
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
for (addr = start; !r && addr < end; ) {
struct hmm_range *hmm_range;
struct vm_area_struct *vma;
uint64_t vram_pages_vma;
unsigned long next = 0;
unsigned long offset;
unsigned long npages;
......@@ -1713,9 +1728,11 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
if (!r) {
offset = (addr - start) >> PAGE_SHIFT;
r = svm_range_dma_map(prange, ctx->bitmap, offset, npages,
hmm_range->hmm_pfns);
hmm_range->hmm_pfns, &vram_pages_vma);
if (r)
pr_debug("failed %d to dma map range\n", r);
else
vram_pages += vram_pages_vma;
}
svm_range_lock(prange);
......@@ -1741,6 +1758,19 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
addr = next;
}
if (addr == end) {
prange->vram_pages = vram_pages;
/* if prange does not include any vram page and it
* has not released svm_bo drop its svm_bo reference
* and set its actaul_loc to sys ram
*/
if (!vram_pages && prange->ttm_res) {
prange->actual_loc = 0;
svm_range_vram_node_free(prange);
}
}
svm_range_unreserve_bos(ctx);
if (!r)
prange->validate_timestamp = ktime_get_boottime();
......@@ -1993,6 +2023,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
new->actual_loc = old->actual_loc;
new->granularity = old->granularity;
new->mapped_to_gpu = old->mapped_to_gpu;
new->vram_pages = old->vram_pages;
bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE);
bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE);
......@@ -2901,6 +2932,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
uint32_t vmid, uint32_t node_id,
uint64_t addr, bool write_fault)
{
unsigned long start, last, size;
struct mm_struct *mm = NULL;
struct svm_range_list *svms;
struct svm_range *prange;
......@@ -3036,32 +3068,35 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr,
write_fault, timestamp);
if (prange->actual_loc != best_loc) {
if (prange->actual_loc != 0 || best_loc != 0) {
migration = true;
/* Align migration range start and size to granularity size */
size = 1UL << prange->granularity;
start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start);
last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last);
if (best_loc) {
r = svm_migrate_to_vram(prange, best_loc, mm,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
r = svm_migrate_to_vram(prange, best_loc, start, last,
mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
if (r) {
pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
r, addr);
/* Fallback to system memory if migration to
* VRAM failed
*/
if (prange->actual_loc)
r = svm_migrate_vram_to_ram(prange, mm,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
NULL);
if (prange->actual_loc && prange->actual_loc != best_loc)
r = svm_migrate_vram_to_ram(prange, mm, start, last,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
else
r = 0;
}
} else {
r = svm_migrate_vram_to_ram(prange, mm,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
NULL);
r = svm_migrate_vram_to_ram(prange, mm, start, last,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL);
}
if (r) {
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
r, svms, prange->start, prange->last);
r, svms, start, last);
goto out_unlock_range;
}
}
......@@ -3415,18 +3450,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
*migrated = false;
best_loc = svm_range_best_prefetch_location(prange);
if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
best_loc == prange->actual_loc)
/* when best_loc is a gpu node and same as prange->actual_loc
* we still need do migration as prange->actual_loc !=0 does
* not mean all pages in prange are vram. hmm migrate will pick
* up right pages during migration.
*/
if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) ||
(best_loc == 0 && prange->actual_loc == 0))
return 0;
if (!best_loc) {
r = svm_migrate_vram_to_ram(prange, mm,
r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last,
KFD_MIGRATE_TRIGGER_PREFETCH, NULL);
*migrated = !r;
return r;
}
r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last,
mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r;
return r;
......@@ -3481,7 +3522,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex);
do {
/* migrate all vram pages in this prange to sys ram
* after that prange->actual_loc should be zero
*/
r = svm_migrate_vram_to_ram(prange, mm,
prange->start, prange->last,
KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL);
} while (!r && prange->actual_loc && --retries);
......
......@@ -78,6 +78,7 @@ struct svm_work_list_item {
* @update_list:link list node used to add to update_list
* @mapping: bo_va mapping structure to create and update GPU page table
* @npages: number of pages
* @vram_pages: vram pages number in this svm_range
* @dma_addr: dma mapping address on each GPU for system memory physical page
* @ttm_res: vram ttm resource map
* @offset: range start offset within mm_nodes
......@@ -88,7 +89,9 @@ struct svm_work_list_item {
* @flags: flags defined as KFD_IOCTL_SVM_FLAG_*
* @perferred_loc: perferred location, 0 for CPU, or GPU id
* @perfetch_loc: last prefetch location, 0 for CPU, or GPU id
* @actual_loc: the actual location, 0 for CPU, or GPU id
* @actual_loc: this svm_range location. 0: all pages are from sys ram;
* GPU id: this svm_range may include vram pages from GPU with
* id actual_loc.
* @granularity:migration granularity, log2 num pages
* @invalid: not 0 means cpu page table is invalidated
* @validate_timestamp: system timestamp when range is validated
......@@ -112,6 +115,7 @@ struct svm_range {
struct list_head list;
struct list_head update_list;
uint64_t npages;
uint64_t vram_pages;
dma_addr_t *dma_addr[MAX_GPU_INSTANCE];
struct ttm_resource *ttm_res;
uint64_t offset;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment