Commit a99c6d4f authored by Felix Kuehling's avatar Felix Kuehling Committed by Oded Gabbay

drm/amdkfd: map multiple processes to HW scheduler

Allow HWS to to execute multiple processes on the hardware
concurrently. The number of concurrent processes is limited by
the number of VMIDs allocated to the HWS.

A module parameter can be used for limiting this further or turn
it off altogether (mainly for debugging purposes).
Signed-off-by: default avatarYong Zhao <yong.zhao@amd.com>
Signed-off-by: default avatarJay Cornwall <Jay.Cornwall@amd.com>
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 8f8fb9b9
...@@ -238,6 +238,17 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, ...@@ -238,6 +238,17 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
- kfd->vm_info.first_vmid_kfd + 1; - kfd->vm_info.first_vmid_kfd + 1;
/* Verify module parameters regarding mapped process number*/
if ((hws_max_conc_proc < 0)
|| (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
dev_err(kfd_device,
"hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
kfd->vm_info.vmid_num_kfd);
kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
} else
kfd->max_proc_per_quantum = hws_max_conc_proc;
/* calculate max size of mqds needed for queues */ /* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device * size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned; kfd->device_info->mqd_size_aligned;
......
...@@ -50,6 +50,11 @@ module_param(sched_policy, int, 0444); ...@@ -50,6 +50,11 @@ module_param(sched_policy, int, 0444);
MODULE_PARM_DESC(sched_policy, MODULE_PARM_DESC(sched_policy,
"Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)");
int hws_max_conc_proc = 8;
module_param(hws_max_conc_proc, int, 0444);
MODULE_PARM_DESC(hws_max_conc_proc,
"Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))");
int cwsr_enable = 1; int cwsr_enable = 1;
module_param(cwsr_enable, int, 0444); module_param(cwsr_enable, int, 0444);
MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))"); MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))");
......
...@@ -57,13 +57,24 @@ static void pm_calc_rlib_size(struct packet_manager *pm, ...@@ -57,13 +57,24 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
{ {
unsigned int process_count, queue_count; unsigned int process_count, queue_count;
unsigned int map_queue_size; unsigned int map_queue_size;
unsigned int max_proc_per_quantum = 1;
struct kfd_dev *dev = pm->dqm->dev;
process_count = pm->dqm->processes_count; process_count = pm->dqm->processes_count;
queue_count = pm->dqm->queue_count; queue_count = pm->dqm->queue_count;
/* check if there is over subscription*/ /* check if there is over subscription
* Note: the arbitration between the number of VMIDs and
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
*over_subscription = false; *over_subscription = false;
if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) {
if (dev->max_proc_per_quantum > 1)
max_proc_per_quantum = dev->max_proc_per_quantum;
if ((process_count > max_proc_per_quantum) ||
queue_count > get_queues_num(pm->dqm)) {
*over_subscription = true; *over_subscription = true;
pr_debug("Over subscribed runlist\n"); pr_debug("Over subscribed runlist\n");
} }
...@@ -116,10 +127,24 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, ...@@ -116,10 +127,24 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
uint64_t ib, size_t ib_size_in_dwords, bool chain) uint64_t ib, size_t ib_size_in_dwords, bool chain)
{ {
struct pm4_mes_runlist *packet; struct pm4_mes_runlist *packet;
int concurrent_proc_cnt = 0;
struct kfd_dev *kfd = pm->dqm->dev;
if (WARN_ON(!ib)) if (WARN_ON(!ib))
return -EFAULT; return -EFAULT;
/* Determine the number of processes to map together to HW:
* it can not exceed the number of VMIDs available to the
* scheduler, and it is determined by the smaller of the number
* of processes in the runlist and kfd module parameter
* hws_max_conc_proc.
* Note: the arbitration between the number of VMIDs and
* hws_max_conc_proc has been done in
* kgd2kfd_device_init().
*/
concurrent_proc_cnt = min(pm->dqm->processes_count,
kfd->max_proc_per_quantum);
packet = (struct pm4_mes_runlist *)buffer; packet = (struct pm4_mes_runlist *)buffer;
memset(buffer, 0, sizeof(struct pm4_mes_runlist)); memset(buffer, 0, sizeof(struct pm4_mes_runlist));
...@@ -130,6 +155,7 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, ...@@ -130,6 +155,7 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
packet->bitfields4.chain = chain ? 1 : 0; packet->bitfields4.chain = chain ? 1 : 0;
packet->bitfields4.offload_polling = 0; packet->bitfields4.offload_polling = 0;
packet->bitfields4.valid = 1; packet->bitfields4.valid = 1;
packet->bitfields4.process_cnt = concurrent_proc_cnt;
packet->ordinal2 = lower_32_bits(ib); packet->ordinal2 = lower_32_bits(ib);
packet->bitfields3.ib_base_hi = upper_32_bits(ib); packet->bitfields3.ib_base_hi = upper_32_bits(ib);
......
...@@ -88,6 +88,12 @@ extern int max_num_of_queues_per_device; ...@@ -88,6 +88,12 @@ extern int max_num_of_queues_per_device;
/* Kernel module parameter to specify the scheduling policy */ /* Kernel module parameter to specify the scheduling policy */
extern int sched_policy; extern int sched_policy;
/*
* Kernel module parameter to specify the maximum process
* number per HW scheduler
*/
extern int hws_max_conc_proc;
extern int cwsr_enable; extern int cwsr_enable;
/* /*
...@@ -214,6 +220,9 @@ struct kfd_dev { ...@@ -214,6 +220,9 @@ struct kfd_dev {
/* Debug manager */ /* Debug manager */
struct kfd_dbgmgr *dbgmgr; struct kfd_dbgmgr *dbgmgr;
/* Maximum process number mapped to HW scheduler */
unsigned int max_proc_per_quantum;
/* CWSR */ /* CWSR */
bool cwsr_enabled; bool cwsr_enabled;
const void *cwsr_isa; const void *cwsr_isa;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment