Commit 8dd69e69 authored by Philip Yang's avatar Philip Yang Committed by Alex Deucher

drm/amdkfd: avoid HMM change cause circular lock

There is circular lock between gfx and kfd path with HMM change:
lock(dqm) -> bo::reserve -> amdgpu_mn_lock

To avoid this, move init/unint_mqd() out of lock(dqm), to remove nested
locking between mmap_sem and bo::reserve. The locking order
is: bo::reserve -> amdgpu_mn_lock(p->mn)
Signed-off-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 57731a07
...@@ -1162,21 +1162,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, ...@@ -1162,21 +1162,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
int retval; int retval;
struct mqd_manager *mqd_mgr; struct mqd_manager *mqd_mgr;
retval = 0;
dqm_lock(dqm);
if (dqm->total_queue_count >= max_num_of_queues_per_device) { if (dqm->total_queue_count >= max_num_of_queues_per_device) {
pr_warn("Can't create new usermode queue because %d queues were already created\n", pr_warn("Can't create new usermode queue because %d queues were already created\n",
dqm->total_queue_count); dqm->total_queue_count);
retval = -EPERM; retval = -EPERM;
goto out_unlock; goto out;
} }
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
retval = allocate_sdma_queue(dqm, &q->sdma_id); retval = allocate_sdma_queue(dqm, &q->sdma_id);
if (retval) if (retval)
goto out_unlock; goto out;
q->properties.sdma_queue_id = q->properties.sdma_queue_id =
q->sdma_id / get_num_sdma_engines(dqm); q->sdma_id / get_num_sdma_engines(dqm);
q->properties.sdma_engine_id = q->properties.sdma_engine_id =
...@@ -1187,6 +1183,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, ...@@ -1187,6 +1183,9 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (retval) if (retval)
goto out_deallocate_sdma_queue; goto out_deallocate_sdma_queue;
/* Do init_mqd before dqm_lock(dqm) to avoid circular locking order:
* lock(dqm) -> bo::reserve
*/
mqd_mgr = dqm->ops.get_mqd_manager(dqm, mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type)); get_mqd_type_from_queue_type(q->properties.type));
...@@ -1194,6 +1193,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, ...@@ -1194,6 +1193,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
retval = -ENOMEM; retval = -ENOMEM;
goto out_deallocate_doorbell; goto out_deallocate_doorbell;
} }
/* /*
* Eviction state logic: we only mark active queues as evicted * Eviction state logic: we only mark active queues as evicted
* to avoid the overhead of restoring inactive queues later * to avoid the overhead of restoring inactive queues later
...@@ -1202,9 +1202,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, ...@@ -1202,9 +1202,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
q->properties.is_evicted = (q->properties.queue_size > 0 && q->properties.is_evicted = (q->properties.queue_size > 0 &&
q->properties.queue_percent > 0 && q->properties.queue_percent > 0 &&
q->properties.queue_address != 0); q->properties.queue_address != 0);
dqm->asic_ops.init_sdma_vm(dqm, q, qpd); dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
q->properties.tba_addr = qpd->tba_addr; q->properties.tba_addr = qpd->tba_addr;
q->properties.tma_addr = qpd->tma_addr; q->properties.tma_addr = qpd->tma_addr;
retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
...@@ -1212,6 +1210,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, ...@@ -1212,6 +1210,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (retval) if (retval)
goto out_deallocate_doorbell; goto out_deallocate_doorbell;
dqm_lock(dqm);
list_add(&q->list, &qpd->queues_list); list_add(&q->list, &qpd->queues_list);
qpd->queue_count++; qpd->queue_count++;
if (q->properties.is_active) { if (q->properties.is_active) {
...@@ -1239,9 +1239,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, ...@@ -1239,9 +1239,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
out_deallocate_sdma_queue: out_deallocate_sdma_queue:
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q->sdma_id); deallocate_sdma_queue(dqm, q->sdma_id);
out_unlock: out:
dqm_unlock(dqm);
return retval; return retval;
} }
...@@ -1404,8 +1402,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, ...@@ -1404,8 +1402,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
qpd->reset_wavefronts = true; qpd->reset_wavefronts = true;
} }
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
/* /*
* Unconditionally decrement this counter, regardless of the queue's * Unconditionally decrement this counter, regardless of the queue's
* type * type
...@@ -1416,6 +1412,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, ...@@ -1416,6 +1412,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
dqm_unlock(dqm); dqm_unlock(dqm);
/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
return retval; return retval;
failed: failed:
...@@ -1637,7 +1636,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, ...@@ -1637,7 +1636,11 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
qpd->reset_wavefronts = false; qpd->reset_wavefronts = false;
} }
/* lastly, free mqd resources */ dqm_unlock(dqm);
/* Lastly, free mqd resources.
* Do uninit_mqd() after dqm_unlock to avoid circular locking.
*/
list_for_each_entry_safe(q, next, &qpd->queues_list, list) { list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
mqd_mgr = dqm->ops.get_mqd_manager(dqm, mqd_mgr = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type)); get_mqd_type_from_queue_type(q->properties.type));
...@@ -1651,7 +1654,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, ...@@ -1651,7 +1654,6 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
} }
out: out:
dqm_unlock(dqm);
return retval; return retval;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment