Commit 32cb59f3 authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdkfd: Track SDMA utilization per process

Track SDMA usage on a per process basis and report it through sysfs.
The value in the sysfs file indicates the amount of time SDMA has
been in-use by this process since the creation of the process.
This value is in microsecond granularity.
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2cdc9c20
...@@ -153,6 +153,52 @@ void decrement_queue_count(struct device_queue_manager *dqm, ...@@ -153,6 +153,52 @@ void decrement_queue_count(struct device_queue_manager *dqm,
dqm->active_cp_queue_count--; dqm->active_cp_queue_count--;
} }
int read_sdma_queue_counter(struct queue *q, uint64_t *val)
{
int ret;
uint64_t tmp = 0;
if (!q || !val)
return -EINVAL;
/*
* SDMA activity counter is stored at queue's RPTR + 0x8 location.
*/
if (!access_ok((const void __user *)((uint64_t)q->properties.read_ptr +
sizeof(uint64_t)), sizeof(uint64_t))) {
pr_err("Can't access sdma queue activity counter\n");
return -EFAULT;
}
ret = get_user(tmp, (uint64_t *)((uint64_t)(q->properties.read_ptr) +
sizeof(uint64_t)));
if (!ret) {
*val = tmp;
}
return ret;
}
static int update_sdma_queue_past_activity_stats(struct kfd_process_device *pdd,
struct queue *q)
{
int ret;
uint64_t val = 0;
if (!pdd)
return -ENODEV;
ret = read_sdma_queue_counter(q, &val);
if (ret) {
pr_err("Failed to read SDMA queue counter for queue: %d\n",
q->properties.queue_id);
return ret;
}
pdd->sdma_past_activity_counter += val;
return ret;
}
static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
{ {
struct kfd_dev *dev = qpd->dqm->dev; struct kfd_dev *dev = qpd->dqm->dev;
...@@ -487,6 +533,12 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, ...@@ -487,6 +533,12 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
if (retval == -ETIME) if (retval == -ETIME)
qpd->reset_wavefronts = true; qpd->reset_wavefronts = true;
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
}
mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
list_del(&q->list); list_del(&q->list);
...@@ -1468,6 +1520,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, ...@@ -1468,6 +1520,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
} }
} }
/* Get the SDMA queue stats */
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
update_sdma_queue_past_activity_stats(qpd_to_pdd(qpd), q);
}
/* /*
* Unconditionally decrement this counter, regardless of the queue's * Unconditionally decrement this counter, regardless of the queue's
* type * type
......
...@@ -251,4 +251,6 @@ static inline void dqm_unlock(struct device_queue_manager *dqm) ...@@ -251,4 +251,6 @@ static inline void dqm_unlock(struct device_queue_manager *dqm)
mutex_unlock(&dqm->lock_hidden); mutex_unlock(&dqm->lock_hidden);
} }
int read_sdma_queue_counter(struct queue *q, uint64_t *val);
#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
...@@ -629,7 +629,14 @@ enum kfd_pdd_bound { ...@@ -629,7 +629,14 @@ enum kfd_pdd_bound {
PDD_BOUND_SUSPENDED, PDD_BOUND_SUSPENDED,
}; };
#define MAX_VRAM_FILENAME_LEN 11 #define MAX_SYSFS_FILENAME_LEN 11
/*
* SDMA counter runs at 100MHz frequency.
* We display SDMA activity in microsecond granularity in sysfs.
* As a result, the divisor is 100.
*/
#define SDMA_ACTIVITY_DIVISOR 100
/* Data that is per-process-per device. */ /* Data that is per-process-per device. */
struct kfd_process_device { struct kfd_process_device {
...@@ -677,7 +684,12 @@ struct kfd_process_device { ...@@ -677,7 +684,12 @@ struct kfd_process_device {
/* VRAM usage */ /* VRAM usage */
uint64_t vram_usage; uint64_t vram_usage;
struct attribute attr_vram; struct attribute attr_vram;
char vram_filename[MAX_VRAM_FILENAME_LEN]; char vram_filename[MAX_SYSFS_FILENAME_LEN];
/* SDMA activity tracking */
uint64_t sdma_past_activity_counter;
struct attribute attr_sdma;
char sdma_filename[MAX_SYSFS_FILENAME_LEN];
}; };
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <linux/mmu_context.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/amd-iommu.h> #include <linux/amd-iommu.h>
#include <linux/notifier.h> #include <linux/notifier.h>
...@@ -76,6 +77,74 @@ struct kfd_procfs_tree { ...@@ -76,6 +77,74 @@ struct kfd_procfs_tree {
static struct kfd_procfs_tree procfs; static struct kfd_procfs_tree procfs;
/*
* Structure for SDMA activity tracking
*/
struct kfd_sdma_activity_handler_workarea {
struct work_struct sdma_activity_work;
struct kfd_process_device *pdd;
uint64_t sdma_activity_counter;
};
static void kfd_sdma_activity_worker(struct work_struct *work)
{
struct kfd_sdma_activity_handler_workarea *workarea;
struct kfd_process_device *pdd;
uint64_t val;
struct mm_struct *mm;
struct queue *q;
struct qcm_process_device *qpd;
struct device_queue_manager *dqm;
int ret = 0;
workarea = container_of(work, struct kfd_sdma_activity_handler_workarea,
sdma_activity_work);
if (!workarea)
return;
pdd = workarea->pdd;
dqm = pdd->dev->dqm;
qpd = &pdd->qpd;
if (!pdd || !dqm || !qpd)
return;
mm = get_task_mm(pdd->process->lead_thread);
if (!mm) {
return;
}
use_mm(mm);
dqm_lock(dqm);
/*
* Total SDMA activity is current SDMA activity + past SDMA activity
*/
workarea->sdma_activity_counter = pdd->sdma_past_activity_counter;
/*
* Get the current activity counters for all active SDMA queues
*/
list_for_each_entry(q, &qpd->queues_list, list) {
if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
(q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
val = 0;
ret = read_sdma_queue_counter(q, &val);
if (ret)
pr_debug("Failed to read SDMA queue active "
"counter for queue id: %d",
q->properties.queue_id);
else
workarea->sdma_activity_counter += val;
}
}
dqm_unlock(dqm);
unuse_mm(mm);
mmput(mm);
}
static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
char *buffer) char *buffer)
{ {
...@@ -87,8 +156,24 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, ...@@ -87,8 +156,24 @@ static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr,
} else if (strncmp(attr->name, "vram_", 5) == 0) { } else if (strncmp(attr->name, "vram_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device, struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_vram); attr_vram);
if (pdd)
return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage)); return snprintf(buffer, PAGE_SIZE, "%llu\n", READ_ONCE(pdd->vram_usage));
} else if (strncmp(attr->name, "sdma_", 5) == 0) {
struct kfd_process_device *pdd = container_of(attr, struct kfd_process_device,
attr_sdma);
struct kfd_sdma_activity_handler_workarea sdma_activity_work_handler;
INIT_WORK(&sdma_activity_work_handler.sdma_activity_work,
kfd_sdma_activity_worker);
sdma_activity_work_handler.pdd = pdd;
schedule_work(&sdma_activity_work_handler.sdma_activity_work);
flush_work(&sdma_activity_work_handler.sdma_activity_work);
return snprintf(buffer, PAGE_SIZE, "%llu\n",
(sdma_activity_work_handler.sdma_activity_counter)/
SDMA_ACTIVITY_DIVISOR);
} else { } else {
pr_err("Invalid attribute"); pr_err("Invalid attribute");
return -EINVAL; return -EINVAL;
...@@ -210,7 +295,24 @@ int kfd_procfs_add_queue(struct queue *q) ...@@ -210,7 +295,24 @@ int kfd_procfs_add_queue(struct queue *q)
return 0; return 0;
} }
int kfd_procfs_add_vram_usage(struct kfd_process *p) static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr,
char *name)
{
int ret = 0;
if (!p || !attr || !name)
return -EINVAL;
attr->name = name;
attr->mode = KFD_SYSFS_FILE_MODE;
sysfs_attr_init(attr);
ret = sysfs_create_file(p->kobj, attr);
return ret;
}
int kfd_procfs_add_sysfs_files(struct kfd_process *p)
{ {
int ret = 0; int ret = 0;
struct kfd_process_device *pdd; struct kfd_process_device *pdd;
...@@ -221,17 +323,25 @@ int kfd_procfs_add_vram_usage(struct kfd_process *p) ...@@ -221,17 +323,25 @@ int kfd_procfs_add_vram_usage(struct kfd_process *p)
if (!p->kobj) if (!p->kobj)
return -EFAULT; return -EFAULT;
/* Create proc/<pid>/vram_<gpuid> file for each GPU */ /*
* Create sysfs files for each GPU:
* - proc/<pid>/vram_<gpuid>
* - proc/<pid>/sdma_<gpuid>
*/
list_for_each_entry(pdd, &p->per_device_data, per_device_list) { list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
snprintf(pdd->vram_filename, MAX_VRAM_FILENAME_LEN, "vram_%u", snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u",
pdd->dev->id); pdd->dev->id);
pdd->attr_vram.name = pdd->vram_filename; ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename);
pdd->attr_vram.mode = KFD_SYSFS_FILE_MODE;
sysfs_attr_init(&pdd->attr_vram);
ret = sysfs_create_file(p->kobj, &pdd->attr_vram);
if (ret) if (ret)
pr_warn("Creating vram usage for gpu id %d failed", pr_warn("Creating vram usage for gpu id %d failed",
(int)pdd->dev->id); (int)pdd->dev->id);
snprintf(pdd->sdma_filename, MAX_SYSFS_FILENAME_LEN, "sdma_%u",
pdd->dev->id);
ret = kfd_sysfs_create_file(p, &pdd->attr_sdma, pdd->sdma_filename);
if (ret)
pr_warn("Creating sdma usage for gpu id %d failed",
(int)pdd->dev->id);
} }
return ret; return ret;
...@@ -444,9 +554,9 @@ struct kfd_process *kfd_create_process(struct file *filep) ...@@ -444,9 +554,9 @@ struct kfd_process *kfd_create_process(struct file *filep)
if (!process->kobj_queues) if (!process->kobj_queues)
pr_warn("Creating KFD proc/queues folder failed"); pr_warn("Creating KFD proc/queues folder failed");
ret = kfd_procfs_add_vram_usage(process); ret = kfd_procfs_add_sysfs_files(process);
if (ret) if (ret)
pr_warn("Creating vram usage file for pid %d failed", pr_warn("Creating sysfs usage file for pid %d failed",
(int)process->lead_thread->pid); (int)process->lead_thread->pid);
} }
out: out:
...@@ -597,8 +707,10 @@ static void kfd_process_wq_release(struct work_struct *work) ...@@ -597,8 +707,10 @@ static void kfd_process_wq_release(struct work_struct *work)
kobject_put(p->kobj_queues); kobject_put(p->kobj_queues);
p->kobj_queues = NULL; p->kobj_queues = NULL;
list_for_each_entry(pdd, &p->per_device_data, per_device_list) list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
sysfs_remove_file(p->kobj, &pdd->attr_vram); sysfs_remove_file(p->kobj, &pdd->attr_vram);
sysfs_remove_file(p->kobj, &pdd->attr_sdma);
}
kobject_del(p->kobj); kobject_del(p->kobj);
kobject_put(p->kobj); kobject_put(p->kobj);
...@@ -906,6 +1018,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, ...@@ -906,6 +1018,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
pdd->already_dequeued = false; pdd->already_dequeued = false;
pdd->runtime_inuse = false; pdd->runtime_inuse = false;
pdd->vram_usage = 0; pdd->vram_usage = 0;
pdd->sdma_past_activity_counter = 0;
list_add(&pdd->per_device_list, &p->per_device_data); list_add(&pdd->per_device_list, &p->per_device_data);
/* Init idr used for memory handle translation */ /* Init idr used for memory handle translation */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment