Commit d4566dee authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdkfd: Track GPU memory utilization per process

Track GPU VRAM usage on a per process basis and report it through
sysfs.
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent b8020b03
...@@ -65,6 +65,7 @@ struct kgd_mem { ...@@ -65,6 +65,7 @@ struct kgd_mem {
struct amdgpu_sync sync; struct amdgpu_sync sync;
bool aql_queue; bool aql_queue;
bool is_imported;
}; };
/* KFD Memory Eviction */ /* KFD Memory Eviction */
...@@ -219,7 +220,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( ...@@ -219,7 +220,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
void *vm, struct kgd_mem **mem, void *vm, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags); uint64_t *offset, uint32_t flags);
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem); struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size);
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
......
...@@ -1277,7 +1277,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( ...@@ -1277,7 +1277,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
} }
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct kgd_dev *kgd, struct kgd_mem *mem) struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
{ {
struct amdkfd_process_info *process_info = mem->process_info; struct amdkfd_process_info *process_info = mem->process_info;
unsigned long bo_size = mem->bo->tbo.mem.size; unsigned long bo_size = mem->bo->tbo.mem.size;
...@@ -1286,9 +1286,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( ...@@ -1286,9 +1286,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct ttm_validate_buffer *bo_list_entry; struct ttm_validate_buffer *bo_list_entry;
unsigned int mapped_to_gpu_memory; unsigned int mapped_to_gpu_memory;
int ret; int ret;
bool is_imported = 0;
mutex_lock(&mem->lock); mutex_lock(&mem->lock);
mapped_to_gpu_memory = mem->mapped_to_gpu_memory; mapped_to_gpu_memory = mem->mapped_to_gpu_memory;
is_imported = mem->is_imported;
mutex_unlock(&mem->lock); mutex_unlock(&mem->lock);
/* lock is not needed after this, since mem is unused and will /* lock is not needed after this, since mem is unused and will
* be freed anyway * be freed anyway
...@@ -1340,6 +1342,17 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( ...@@ -1340,6 +1342,17 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
kfree(mem->bo->tbo.sg); kfree(mem->bo->tbo.sg);
} }
/* Update the size of the BO being freed if it was allocated from
* VRAM and is not imported.
*/
if (size) {
if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) &&
(!is_imported))
*size = bo_size;
else
*size = 0;
}
/* Free the BO*/ /* Free the BO*/
amdgpu_bo_unref(&mem->bo); amdgpu_bo_unref(&mem->bo);
mutex_destroy(&mem->lock); mutex_destroy(&mem->lock);
...@@ -1694,6 +1707,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, ...@@ -1694,6 +1707,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
(*mem)->process_info = avm->process_info; (*mem)->process_info = avm->process_info;
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
amdgpu_sync_create(&(*mem)->sync); amdgpu_sync_create(&(*mem)->sync);
(*mem)->is_imported = true;
return 0; return 0;
} }
......
...@@ -1323,6 +1323,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, ...@@ -1323,6 +1323,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
goto err_free; goto err_free;
} }
/* Update the VRAM usage count */
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
...@@ -1338,7 +1342,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, ...@@ -1338,7 +1342,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
return 0; return 0;
err_free: err_free:
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
err_unlock: err_unlock:
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
return err; return err;
...@@ -1352,6 +1356,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, ...@@ -1352,6 +1356,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
void *mem; void *mem;
struct kfd_dev *dev; struct kfd_dev *dev;
int ret; int ret;
uint64_t size = 0;
dev = kfd_device_by_id(GET_GPU_ID(args->handle)); dev = kfd_device_by_id(GET_GPU_ID(args->handle));
if (!dev) if (!dev)
...@@ -1374,7 +1379,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, ...@@ -1374,7 +1379,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
} }
ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
(struct kgd_mem *)mem); (struct kgd_mem *)mem, &size);
/* If freeing the buffer failed, leave the handle in place for /* If freeing the buffer failed, leave the handle in place for
* clean-up during process tear-down. * clean-up during process tear-down.
...@@ -1383,6 +1388,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep, ...@@ -1383,6 +1388,8 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
kfd_process_device_remove_obj_handle( kfd_process_device_remove_obj_handle(
pdd, GET_IDR_HANDLE(args->handle)); pdd, GET_IDR_HANDLE(args->handle));
WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
err_unlock: err_unlock:
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
return ret; return ret;
...@@ -1727,7 +1734,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep, ...@@ -1727,7 +1734,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,
return 0; return 0;
err_free: err_free:
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, NULL);
err_unlock: err_unlock:
mutex_unlock(&p->mutex); mutex_unlock(&p->mutex);
return r; return r;
......
...@@ -629,6 +629,8 @@ enum kfd_pdd_bound { ...@@ -629,6 +629,8 @@ enum kfd_pdd_bound {
PDD_BOUND_SUSPENDED, PDD_BOUND_SUSPENDED,
}; };
#define MAX_VRAM_FILENAME_LEN 11
/* Data that is per-process-per device. */ /* Data that is per-process-per device. */
struct kfd_process_device { struct kfd_process_device {
/* /*
...@@ -671,6 +673,11 @@ struct kfd_process_device { ...@@ -671,6 +673,11 @@ struct kfd_process_device {
/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
enum kfd_pdd_bound bound; enum kfd_pdd_bound bound;
/* VRAM usage */
uint64_t vram_usage;
struct attribute attr_vram;
char vram_filename[MAX_VRAM_FILENAME_LEN];
}; };
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
......
...@@ -79,18 +79,22 @@ static struct kfd_procfs_tree procfs; ...@@ -79,18 +79,22 @@ static struct kfd_procfs_tree procfs;
static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
char *buffer) char *buffer)
{ {
int val = 0;
if (strcmp(attr->name, "pasid") == 0) { if (strcmp(attr->name, "pasid") == 0) {
struct kfd_process *p = container_of(attr, struct kfd_process, struct kfd_process *p = container_of(attr, struct kfd_process,
attr_pasid); attr_pasid);
val = p->pasid;
return snprintf(buffer, PAGE_SIZE, "%d\n", p->pasid);
} else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_vram);
if (pdd)
return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
} else { } else {
pr_err("Invalid attribute"); pr_err("Invalid attribute");
return -EINVAL; return -EINVAL;
} }
return snprintf(buffer, PAGE_SIZE, "%d\n", val); return 0;
} }
static void kfd_procfs_kobj_release(struct kobject *kobj) static void kfd_procfs_kobj_release(struct kobject *kobj)
...@@ -206,6 +210,34 @@ int kfd_procfs_add_queue(struct queue *q) ...@@ -206,6 +210,34 @@ int kfd_procfs_add_queue(struct queue *q)
return 0; return 0;
} }
int kfd_procfs_add_vram_usage(struct kfd_process *p)
{
int ret = 0;
struct kfd_process_device *pdd;
if (!p)
return -EINVAL;
if (!p->kobj)
return -EFAULT;
/* Create proc/<pid>/vram_<gpuid> file for each GPU */
list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u",
pdd->dev->id);
pdd->attr_vram.name = pdd->vram_filename;
pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
sysfs_attr_init(&pdd->attr_vram);
ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
if (ret)
pr_warn("Creating vram usage for gpu id %d failed",
(int)pdd->dev->id);
}
return ret;
}
void kfd_procfs_del_queue(struct queue *q) void kfd_procfs_del_queue(struct queue *q)
{ {
if (!q) if (!q)
...@@ -248,7 +280,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem, ...@@ -248,7 +280,7 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
struct kfd_dev *dev = pdd->dev; struct kfd_dev *dev = pdd->dev;
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm); amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->vm);
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem); amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, NULL);
} }
/* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process /* kfd_process_alloc_gpuvm - Allocate GPU VM for the KFD process
...@@ -312,7 +344,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, ...@@ -312,7 +344,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
return err; return err;
err_map_mem: err_map_mem:
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem); amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, NULL);
err_alloc_mem: err_alloc_mem:
*kptr = NULL; *kptr = NULL;
return err; return err;
...@@ -411,6 +443,11 @@ struct kfd_process *kfd_create_process(struct file *filep) ...@@ -411,6 +443,11 @@ struct kfd_process *kfd_create_process(struct file *filep)
process->kobj); process->kobj);
if (!process->kobj_queues) if (!process->kobj_queues)
pr_warn("Creating KFD proc/queues folder failed"); pr_warn("Creating KFD proc/queues folder failed");
ret = kfd_procfs_add_vram_usage(process);
if (ret)
pr_warn("Creating vram usage file for pid %d failed",
(int)process->lead_thread->pid);
} }
out: out:
if (!IS_ERR(process)) if (!IS_ERR(process))
...@@ -488,7 +525,7 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) ...@@ -488,7 +525,7 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
peer_pdd->dev->kgd, mem, peer_pdd->vm); peer_pdd->dev->kgd, mem, peer_pdd->vm);
} }
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem); amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, NULL);
kfd_process_device_remove_obj_handle(pdd, id); kfd_process_device_remove_obj_handle(pdd, id);
} }
} }
...@@ -551,6 +588,7 @@ static void kfd_process_wq_release(struct work_struct *work) ...@@ -551,6 +588,7 @@ static void kfd_process_wq_release(struct work_struct *work)
{ {
struct kfd_process *p = container_of(work, struct kfd_process, struct kfd_process *p = container_of(work, struct kfd_process,
release_work); release_work);
struct kfd_process_device *pdd;
/* Remove the procfs files */ /* Remove the procfs files */
if (p->kobj) { if (p->kobj) {
...@@ -558,6 +596,10 @@ static void kfd_process_wq_release(struct work_struct *work) ...@@ -558,6 +596,10 @@ static void kfd_process_wq_release(struct work_struct *work)
kobject_del(p->kobj_queues); kobject_del(p->kobj_queues);
kobject_put(p->kobj_queues); kobject_put(p->kobj_queues);
p->kobj_queues = NULL; p->kobj_queues = NULL;
list_for_each_entry(pdd, &p->per_device_data, per_device_list)
sysfs_remove_file(p->kobj, &pdd->attr_vram);
kobject_del(p->kobj); kobject_del(p->kobj);
kobject_put(p->kobj); kobject_put(p->kobj);
p->kobj = NULL; p->kobj = NULL;
...@@ -863,6 +905,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, ...@@ -863,6 +905,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
pdd->bound = PDD_UNBOUND; pdd->bound = PDD_UNBOUND;
pdd->already_dequeued = false; pdd->already_dequeued = false;
pdd->runtime_inuse = false; pdd->runtime_inuse = false;
pdd->vram_usage = 0;
list_add(&pdd->per_device_list, &p->per_device_data); list_add(&pdd->per_device_list, &p->per_device_data);
/* Init idr used for memory handle translation */ /* Init idr used for memory handle translation */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment