Commit bcea3081 authored by Ben Goz's avatar Ben Goz Committed by Oded Gabbay

drm/amdkfd: Add SDMA user-mode queues support to QCM

This patch adds support for SDMA user-mode queues to the QCM - the Queue
management system that manages queues-per-device and queues-per-process.

v2: Remove calls to interface function that initializes sdma engines.

v3: Use the new names of some of the defines.
Signed-off-by: default avatarBen Goz <ben.goz@amd.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 77669eb8
...@@ -46,9 +46,24 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, ...@@ -46,9 +46,24 @@ static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q, struct queue *q,
struct qcm_process_device *qpd); struct qcm_process_device *qpd);
static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock); static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock);
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd);
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
unsigned int sdma_queue_id);
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
{
if (type == KFD_QUEUE_TYPE_SDMA)
return KFD_MQD_TYPE_CIK_SDMA;
return KFD_MQD_TYPE_CIK_CP;
}
static inline unsigned int get_pipes_num(struct device_queue_manager *dqm) static inline unsigned int get_pipes_num(struct device_queue_manager *dqm)
{ {
...@@ -189,7 +204,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, ...@@ -189,7 +204,10 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
*allocated_vmid = qpd->vmid; *allocated_vmid = qpd->vmid;
q->properties.vmid = qpd->vmid; q->properties.vmid = qpd->vmid;
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
retval = create_compute_queue_nocpsch(dqm, q, qpd); retval = create_compute_queue_nocpsch(dqm, q, qpd);
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
retval = create_sdma_queue_nocpsch(dqm, q, qpd);
if (retval != 0) { if (retval != 0) {
if (list_empty(&qpd->queues_list)) { if (list_empty(&qpd->queues_list)) {
...@@ -202,7 +220,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, ...@@ -202,7 +220,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
list_add(&q->list, &qpd->queues_list); list_add(&q->list, &qpd->queues_list);
dqm->queue_count++; dqm->queue_count++;
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count++;
mutex_unlock(&dqm->lock); mutex_unlock(&dqm->lock);
return 0; return 0;
} }
...@@ -279,8 +298,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, ...@@ -279,8 +298,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q) struct queue *q)
{ {
int retval; int retval;
struct mqd_manager *mqd; struct mqd_manager *mqd, *mqd_sdma;
BUG_ON(!dqm || !q || !q->mqd || !qpd); BUG_ON(!dqm || !q || !q->mqd || !qpd);
retval = 0; retval = 0;
...@@ -294,6 +312,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, ...@@ -294,6 +312,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
goto out; goto out;
} }
mqd_sdma = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA);
if (mqd_sdma == NULL) {
mutex_unlock(&dqm->lock);
return -ENOMEM;
}
retval = mqd->destroy_mqd(mqd, q->mqd, retval = mqd->destroy_mqd(mqd, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT, KFD_PREEMPT_TYPE_WAVEFRONT,
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
...@@ -302,7 +326,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm, ...@@ -302,7 +326,12 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
if (retval != 0) if (retval != 0)
goto out; goto out;
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
deallocate_hqd(dqm, q); deallocate_hqd(dqm, q);
else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
}
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
...@@ -323,7 +352,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) ...@@ -323,7 +352,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
BUG_ON(!dqm || !q || !q->mqd); BUG_ON(!dqm || !q || !q->mqd);
mutex_lock(&dqm->lock); mutex_lock(&dqm->lock);
mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); mqd = dqm->get_mqd_manager(dqm, q->properties.type);
if (mqd == NULL) { if (mqd == NULL) {
mutex_unlock(&dqm->lock); mutex_unlock(&dqm->lock);
return -ENOMEM; return -ENOMEM;
...@@ -526,7 +555,6 @@ static int init_pipelines(struct device_queue_manager *dqm, ...@@ -526,7 +555,6 @@ static int init_pipelines(struct device_queue_manager *dqm,
return 0; return 0;
} }
static int init_scheduler(struct device_queue_manager *dqm) static int init_scheduler(struct device_queue_manager *dqm)
{ {
int retval; int retval;
...@@ -556,6 +584,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) ...@@ -556,6 +584,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
mutex_init(&dqm->lock); mutex_init(&dqm->lock);
INIT_LIST_HEAD(&dqm->queues); INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->next_pipe_to_allocate = 0; dqm->queue_count = dqm->next_pipe_to_allocate = 0;
dqm->sdma_queue_count = 0;
dqm->allocated_queues = kcalloc(get_pipes_num(dqm), dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
sizeof(unsigned int), GFP_KERNEL); sizeof(unsigned int), GFP_KERNEL);
if (!dqm->allocated_queues) { if (!dqm->allocated_queues) {
...@@ -567,6 +596,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) ...@@ -567,6 +596,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1; dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
init_scheduler(dqm); init_scheduler(dqm);
return 0; return 0;
...@@ -598,6 +628,77 @@ static int stop_nocpsch(struct device_queue_manager *dqm) ...@@ -598,6 +628,77 @@ static int stop_nocpsch(struct device_queue_manager *dqm)
return 0; return 0;
} }
static int allocate_sdma_queue(struct device_queue_manager *dqm,
unsigned int *sdma_queue_id)
{
int bit;
if (dqm->sdma_bitmap == 0)
return -ENOMEM;
bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
CIK_SDMA_QUEUES);
clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
*sdma_queue_id = bit;
return 0;
}
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
unsigned int sdma_queue_id)
{
if (sdma_queue_id < 0 || sdma_queue_id >= CIK_SDMA_QUEUES)
return;
set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
}
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
uint32_t value = SDMA_ATC;
if (q->process->is_32bit_user_mode)
value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
else
value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
qpd_to_pdd(qpd)));
q->properties.sdma_vm_addr = value;
}
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
struct qcm_process_device *qpd)
{
struct mqd_manager *mqd;
int retval;
mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_SDMA);
if (!mqd)
return -ENOMEM;
retval = allocate_sdma_queue(dqm, &q->sdma_id);
if (retval != 0)
return retval;
q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;
pr_debug("kfd: sdma id is: %d\n", q->sdma_id);
pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id);
pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id);
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval != 0) {
deallocate_sdma_queue(dqm, q->sdma_id);
return retval;
}
init_sdma_vm(dqm, q, qpd);
return 0;
}
/* /*
* Device Queue Manager implementation for cp scheduler * Device Queue Manager implementation for cp scheduler
*/ */
...@@ -639,6 +740,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) ...@@ -639,6 +740,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
mutex_init(&dqm->lock); mutex_init(&dqm->lock);
INIT_LIST_HEAD(&dqm->queues); INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->processes_count = 0; dqm->queue_count = dqm->processes_count = 0;
dqm->sdma_queue_count = 0;
dqm->active_runlist = false; dqm->active_runlist = false;
retval = init_pipelines(dqm, get_pipes_num(dqm), 0); retval = init_pipelines(dqm, get_pipes_num(dqm), 0);
if (retval != 0) if (retval != 0)
...@@ -682,7 +784,6 @@ static int start_cpsch(struct device_queue_manager *dqm) ...@@ -682,7 +784,6 @@ static int start_cpsch(struct device_queue_manager *dqm)
dqm->fence_addr = dqm->fence_mem->cpu_ptr; dqm->fence_addr = dqm->fence_mem->cpu_ptr;
dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
list_for_each_entry(node, &dqm->queues, list) list_for_each_entry(node, &dqm->queues, list)
if (node->qpd->pqm->process && dqm->dev) if (node->qpd->pqm->process && dqm->dev)
kfd_bind_process_to_device(dqm->dev, kfd_bind_process_to_device(dqm->dev,
...@@ -753,6 +854,14 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, ...@@ -753,6 +854,14 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
mutex_unlock(&dqm->lock); mutex_unlock(&dqm->lock);
} }
static void select_sdma_engine_id(struct queue *q)
{
static int sdma_id;
q->sdma_id = sdma_id;
sdma_id = (sdma_id + 1) % 2;
}
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd, int *allocate_vmid) struct qcm_process_device *qpd, int *allocate_vmid)
{ {
...@@ -768,7 +877,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, ...@@ -768,7 +877,12 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
mutex_lock(&dqm->lock); mutex_lock(&dqm->lock);
mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
select_sdma_engine_id(q);
mqd = dqm->get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (mqd == NULL) { if (mqd == NULL) {
mutex_unlock(&dqm->lock); mutex_unlock(&dqm->lock);
return -ENOMEM; return -ENOMEM;
...@@ -785,6 +899,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, ...@@ -785,6 +899,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
retval = execute_queues_cpsch(dqm, false); retval = execute_queues_cpsch(dqm, false);
} }
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count++;
out: out:
mutex_unlock(&dqm->lock); mutex_unlock(&dqm->lock);
return retval; return retval;
...@@ -808,6 +925,14 @@ static int fence_wait_timeout(unsigned int *fence_addr, ...@@ -808,6 +925,14 @@ static int fence_wait_timeout(unsigned int *fence_addr,
return 0; return 0;
} }
static int destroy_sdma_queues(struct device_queue_manager *dqm,
unsigned int sdma_engine)
{
return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false,
sdma_engine);
}
static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
{ {
int retval; int retval;
...@@ -820,6 +945,15 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) ...@@ -820,6 +945,15 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock)
mutex_lock(&dqm->lock); mutex_lock(&dqm->lock);
if (dqm->active_runlist == false) if (dqm->active_runlist == false)
goto out; goto out;
pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
dqm->sdma_queue_count);
if (dqm->sdma_queue_count > 0) {
destroy_sdma_queues(dqm, 0);
destroy_sdma_queues(dqm, 1);
}
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0); KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0);
if (retval != 0) if (retval != 0)
...@@ -891,13 +1025,16 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, ...@@ -891,13 +1025,16 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
/* remove queue from list to prevent rescheduling after preemption */ /* remove queue from list to prevent rescheduling after preemption */
mutex_lock(&dqm->lock); mutex_lock(&dqm->lock);
mqd = dqm->get_mqd_manager(dqm,
mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) { if (!mqd) {
retval = -ENOMEM; retval = -ENOMEM;
goto failed; goto failed;
} }
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count--;
list_del(&q->list); list_del(&q->list);
dqm->queue_count--; dqm->queue_count--;
......
...@@ -36,6 +36,9 @@ ...@@ -36,6 +36,9 @@
#define KFD_VMID_START_OFFSET (8) #define KFD_VMID_START_OFFSET (8)
#define VMID_PER_DEVICE CIK_VMID_NUM #define VMID_PER_DEVICE CIK_VMID_NUM
#define KFD_DQM_FIRST_PIPE (0) #define KFD_DQM_FIRST_PIPE (0)
#define CIK_SDMA_QUEUES (4)
#define CIK_SDMA_QUEUES_PER_ENGINE (2)
#define CIK_SDMA_ENGINE_NUM (2)
struct device_process_node { struct device_process_node {
struct qcm_process_device *qpd; struct qcm_process_device *qpd;
...@@ -130,8 +133,10 @@ struct device_queue_manager { ...@@ -130,8 +133,10 @@ struct device_queue_manager {
struct list_head queues; struct list_head queues;
unsigned int processes_count; unsigned int processes_count;
unsigned int queue_count; unsigned int queue_count;
unsigned int sdma_queue_count;
unsigned int next_pipe_to_allocate; unsigned int next_pipe_to_allocate;
unsigned int *allocated_queues; unsigned int *allocated_queues;
unsigned int sdma_bitmap;
unsigned int vmid_bitmap; unsigned int vmid_bitmap;
uint64_t pipelines_addr; uint64_t pipelines_addr;
struct kfd_mem_obj *pipeline_mem; struct kfd_mem_obj *pipeline_mem;
......
...@@ -128,7 +128,6 @@ static int create_cp_queue(struct process_queue_manager *pqm, ...@@ -128,7 +128,6 @@ static int create_cp_queue(struct process_queue_manager *pqm,
/* let DQM handle it*/ /* let DQM handle it*/
q_properties->vmid = 0; q_properties->vmid = 0;
q_properties->queue_id = qid; q_properties->queue_id = qid;
q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
retval = init_queue(q, *q_properties); retval = init_queue(q, *q_properties);
if (retval != 0) if (retval != 0)
...@@ -189,6 +188,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, ...@@ -189,6 +188,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
} }
switch (type) { switch (type) {
case KFD_QUEUE_TYPE_SDMA:
case KFD_QUEUE_TYPE_COMPUTE: case KFD_QUEUE_TYPE_COMPUTE:
/* check if there is over subscription */ /* check if there is over subscription */
if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment