Commit acac270d authored by Philip Yang's avatar Philip Yang Committed by Alex Deucher

drm/amdkfd: Add migration SMI event

For migration start and end event, output timestamp when migration
starts, ends, svm range address and size, GPU id of migration source and
destination and svm range attributes,

Migration trigger could be prefetch, CPU or GPU page fault and TTM
eviction.
Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent e0f1e65b
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include "kfd_priv.h" #include "kfd_priv.h"
#include "kfd_svm.h" #include "kfd_svm.h"
#include "kfd_migrate.h" #include "kfd_migrate.h"
#include "kfd_smi_events.h"
#ifdef dev_fmt #ifdef dev_fmt
#undef dev_fmt #undef dev_fmt
...@@ -402,8 +403,9 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -402,8 +403,9 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
static long static long
svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start, struct vm_area_struct *vma, uint64_t start,
uint64_t end) uint64_t end, uint32_t trigger)
{ {
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
uint64_t npages = (end - start) >> PAGE_SHIFT; uint64_t npages = (end - start) >> PAGE_SHIFT;
struct kfd_process_device *pdd; struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL; struct dma_fence *mfence = NULL;
...@@ -430,6 +432,11 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -430,6 +432,11 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.dst = migrate.src + npages; migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages); scratch = (dma_addr_t *)(migrate.dst + npages);
kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, adev->kfd.dev->id, prange->prefetch_loc,
prange->preferred_loc, trigger);
r = migrate_vma_setup(&migrate); r = migrate_vma_setup(&migrate);
if (r) { if (r) {
dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n", dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
...@@ -458,6 +465,10 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -458,6 +465,10 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
svm_migrate_copy_done(adev, mfence); svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate); migrate_vma_finalize(&migrate);
kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
0, adev->kfd.dev->id, trigger);
svm_range_dma_unmap(adev->dev, scratch, 0, npages); svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange); svm_range_free_dma_mappings(prange);
...@@ -479,6 +490,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -479,6 +490,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
* @prange: range structure * @prange: range structure
* @best_loc: the device to migrate to * @best_loc: the device to migrate to
* @mm: the process mm structure * @mm: the process mm structure
* @trigger: reason of migration
* *
* Context: Process context, caller hold mmap read lock, svms lock, prange lock * Context: Process context, caller hold mmap read lock, svms lock, prange lock
* *
...@@ -487,7 +499,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -487,7 +499,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
*/ */
static int static int
svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm) struct mm_struct *mm, uint32_t trigger)
{ {
unsigned long addr, start, end; unsigned long addr, start, end;
struct vm_area_struct *vma; struct vm_area_struct *vma;
...@@ -524,7 +536,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, ...@@ -524,7 +536,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
break; break;
next = min(vma->vm_end, end); next = min(vma->vm_end, end);
r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next); r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger);
if (r < 0) { if (r < 0) {
pr_debug("failed %ld to migrate\n", r); pr_debug("failed %ld to migrate\n", r);
break; break;
...@@ -655,8 +667,10 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -655,8 +667,10 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
*/ */
static long static long
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start, uint64_t end) struct vm_area_struct *vma, uint64_t start, uint64_t end,
uint32_t trigger)
{ {
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
uint64_t npages = (end - start) >> PAGE_SHIFT; uint64_t npages = (end - start) >> PAGE_SHIFT;
unsigned long upages = npages; unsigned long upages = npages;
unsigned long cpages = 0; unsigned long cpages = 0;
...@@ -685,6 +699,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -685,6 +699,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
migrate.dst = migrate.src + npages; migrate.dst = migrate.src + npages;
scratch = (dma_addr_t *)(migrate.dst + npages); scratch = (dma_addr_t *)(migrate.dst + npages);
kfd_smi_event_migration_start(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
adev->kfd.dev->id, 0, prange->prefetch_loc,
prange->preferred_loc, trigger);
r = migrate_vma_setup(&migrate); r = migrate_vma_setup(&migrate);
if (r) { if (r) {
dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n", dev_err(adev->dev, "%s: vma setup fail %d range [0x%lx 0x%lx]\n",
...@@ -715,6 +734,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -715,6 +734,11 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
svm_migrate_copy_done(adev, mfence); svm_migrate_copy_done(adev, mfence);
migrate_vma_finalize(&migrate); migrate_vma_finalize(&migrate);
kfd_smi_event_migration_end(adev->kfd.dev, p->lead_thread->pid,
start >> PAGE_SHIFT, end >> PAGE_SHIFT,
adev->kfd.dev->id, 0, trigger);
svm_range_dma_unmap(adev->dev, scratch, 0, npages); svm_range_dma_unmap(adev->dev, scratch, 0, npages);
out_free: out_free:
...@@ -732,13 +756,15 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -732,13 +756,15 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
* svm_migrate_vram_to_ram - migrate svm range from device to system * svm_migrate_vram_to_ram - migrate svm range from device to system
* @prange: range structure * @prange: range structure
* @mm: process mm, use current->mm if NULL * @mm: process mm, use current->mm if NULL
* @trigger: reason of migration
* *
* Context: Process context, caller hold mmap read lock, prange->migrate_mutex * Context: Process context, caller hold mmap read lock, prange->migrate_mutex
* *
* Return: * Return:
* 0 - OK, otherwise error code * 0 - OK, otherwise error code
*/ */
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
uint32_t trigger)
{ {
struct amdgpu_device *adev; struct amdgpu_device *adev;
struct vm_area_struct *vma; struct vm_area_struct *vma;
...@@ -779,7 +805,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) ...@@ -779,7 +805,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
} }
next = min(vma->vm_end, end); next = min(vma->vm_end, end);
r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next); r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger);
if (r < 0) { if (r < 0) {
pr_debug("failed %ld to migrate prange %p\n", r, prange); pr_debug("failed %ld to migrate prange %p\n", r, prange);
break; break;
...@@ -802,6 +828,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) ...@@ -802,6 +828,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
* @prange: range structure * @prange: range structure
* @best_loc: the device to migrate to * @best_loc: the device to migrate to
* @mm: process mm, use current->mm if NULL * @mm: process mm, use current->mm if NULL
* @trigger: reason of migration
* *
* Context: Process context, caller hold mmap read lock, svms lock, prange lock * Context: Process context, caller hold mmap read lock, svms lock, prange lock
* *
...@@ -810,7 +837,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) ...@@ -810,7 +837,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
*/ */
static int static int
svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm) struct mm_struct *mm, uint32_t trigger)
{ {
int r, retries = 3; int r, retries = 3;
...@@ -822,7 +849,7 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, ...@@ -822,7 +849,7 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc); pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
do { do {
r = svm_migrate_vram_to_ram(prange, mm); r = svm_migrate_vram_to_ram(prange, mm, trigger);
if (r) if (r)
return r; return r;
} while (prange->actual_loc && --retries); } while (prange->actual_loc && --retries);
...@@ -830,17 +857,17 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, ...@@ -830,17 +857,17 @@ svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
if (prange->actual_loc) if (prange->actual_loc)
return -EDEADLK; return -EDEADLK;
return svm_migrate_ram_to_vram(prange, best_loc, mm); return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
} }
int int
svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm) struct mm_struct *mm, uint32_t trigger)
{ {
if (!prange->actual_loc) if (!prange->actual_loc)
return svm_migrate_ram_to_vram(prange, best_loc, mm); return svm_migrate_ram_to_vram(prange, best_loc, mm, trigger);
else else
return svm_migrate_vram_to_vram(prange, best_loc, mm); return svm_migrate_vram_to_vram(prange, best_loc, mm, trigger);
} }
...@@ -909,7 +936,7 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) ...@@ -909,7 +936,7 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
goto out_unlock_prange; goto out_unlock_prange;
} }
r = svm_migrate_vram_to_ram(prange, mm); r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU);
if (r) if (r)
pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
prange, prange->start, prange->last); prange, prange->start, prange->last);
......
...@@ -41,8 +41,9 @@ enum MIGRATION_COPY_DIR { ...@@ -41,8 +41,9 @@ enum MIGRATION_COPY_DIR {
}; };
int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
struct mm_struct *mm); struct mm_struct *mm, uint32_t trigger);
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
uint32_t trigger);
unsigned long unsigned long
svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
......
...@@ -261,6 +261,28 @@ void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid, ...@@ -261,6 +261,28 @@ void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
pid, address, dev->id, migration ? 'M' : 'U'); pid, address, dev->id, migration ? 'M' : 'U');
} }
void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
unsigned long start, unsigned long end,
uint32_t from, uint32_t to,
uint32_t prefetch_loc, uint32_t preferred_loc,
uint32_t trigger)
{
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_START,
"%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
ktime_get_boottime_ns(), pid, start, end - start,
from, to, prefetch_loc, preferred_loc, trigger);
}
void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
unsigned long start, unsigned long end,
uint32_t from, uint32_t to, uint32_t trigger)
{
kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_MIGRATE_END,
"%lld -%d @%lx(%lx) %x->%x %d\n",
ktime_get_boottime_ns(), pid, start, end - start,
from, to, trigger);
}
int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
{ {
struct kfd_smi_client *client; struct kfd_smi_client *client;
......
...@@ -34,4 +34,12 @@ void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid, ...@@ -34,4 +34,12 @@ void kfd_smi_event_page_fault_start(struct kfd_dev *dev, pid_t pid,
ktime_t ts); ktime_t ts);
void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid, void kfd_smi_event_page_fault_end(struct kfd_dev *dev, pid_t pid,
unsigned long address, bool migration); unsigned long address, bool migration);
void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid,
unsigned long start, unsigned long end,
uint32_t from, uint32_t to,
uint32_t prefetch_loc, uint32_t preferred_loc,
uint32_t trigger);
void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid,
unsigned long start, unsigned long end,
uint32_t from, uint32_t to, uint32_t trigger);
#endif #endif
...@@ -2821,7 +2821,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, ...@@ -2821,7 +2821,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
if (prange->actual_loc != best_loc) { if (prange->actual_loc != best_loc) {
migration = true; migration = true;
if (best_loc) { if (best_loc) {
r = svm_migrate_to_vram(prange, best_loc, mm); r = svm_migrate_to_vram(prange, best_loc, mm,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
if (r) { if (r) {
pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n",
r, addr); r, addr);
...@@ -2829,12 +2830,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, ...@@ -2829,12 +2830,14 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
* VRAM failed * VRAM failed
*/ */
if (prange->actual_loc) if (prange->actual_loc)
r = svm_migrate_vram_to_ram(prange, mm); r = svm_migrate_vram_to_ram(prange, mm,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
else else
r = 0; r = 0;
} }
} else { } else {
r = svm_migrate_vram_to_ram(prange, mm); r = svm_migrate_vram_to_ram(prange, mm,
KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU);
} }
if (r) { if (r) {
pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n",
...@@ -3157,12 +3160,12 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, ...@@ -3157,12 +3160,12 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
return 0; return 0;
if (!best_loc) { if (!best_loc) {
r = svm_migrate_vram_to_ram(prange, mm); r = svm_migrate_vram_to_ram(prange, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r; *migrated = !r;
return r; return r;
} }
r = svm_migrate_to_vram(prange, best_loc, mm); r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH);
*migrated = !r; *migrated = !r;
return r; return r;
...@@ -3220,7 +3223,8 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) ...@@ -3220,7 +3223,8 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex); mutex_lock(&prange->migrate_mutex);
do { do {
r = svm_migrate_vram_to_ram(prange, r = svm_migrate_vram_to_ram(prange,
svm_bo->eviction_fence->mm); svm_bo->eviction_fence->mm,
KFD_MIGRATE_TRIGGER_TTM_EVICTION);
} while (!r && prange->actual_loc && --retries); } while (!r && prange->actual_loc && --retries);
if (!r && prange->actual_loc) if (!r && prange->actual_loc)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment