Commit 8445dde1 authored by Ofir Bitton's avatar Ofir Bitton Committed by Oded Gabbay

habanalabs: move relevant datapath work outside cs lock

In order to shorten the time cs lock is being held, we move any
possible work outside of the cs lock.
Signed-off-by: default avatarOfir Bitton <obitton@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 2f6274e4
...@@ -84,31 +84,12 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask) ...@@ -84,31 +84,12 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
return 0; return 0;
} }
static void hl_fence_release(struct kref *kref) static void sob_reset_work(struct work_struct *work)
{ {
struct hl_fence *fence =
container_of(kref, struct hl_fence, refcount);
struct hl_cs_compl *hl_cs_cmpl = struct hl_cs_compl *hl_cs_cmpl =
container_of(fence, struct hl_cs_compl, base_fence); container_of(work, struct hl_cs_compl, sob_reset_work);
struct hl_device *hdev = hl_cs_cmpl->hdev; struct hl_device *hdev = hl_cs_cmpl->hdev;
/* EBUSY means the CS was never submitted and hence we don't have
* an attached hw_sob object that we should handle here
*/
if (fence->error == -EBUSY)
goto free;
if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
dev_dbg(hdev->dev,
"CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
hl_cs_cmpl->cs_seq,
hl_cs_cmpl->type,
hl_cs_cmpl->hw_sob->sob_id,
hl_cs_cmpl->sob_val);
/* /*
* A signal CS can get completion while the corresponding wait * A signal CS can get completion while the corresponding wait
* for signal CS is on its way to the PQ. The wait for signal CS * for signal CS is on its way to the PQ. The wait for signal CS
...@@ -131,6 +112,38 @@ static void hl_fence_release(struct kref *kref) ...@@ -131,6 +112,38 @@ static void hl_fence_release(struct kref *kref)
if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
hdev->asic_funcs->reset_sob_group(hdev, hdev->asic_funcs->reset_sob_group(hdev,
hl_cs_cmpl->sob_group); hl_cs_cmpl->sob_group);
kfree(hl_cs_cmpl);
}
static void hl_fence_release(struct kref *kref)
{
struct hl_fence *fence =
container_of(kref, struct hl_fence, refcount);
struct hl_cs_compl *hl_cs_cmpl =
container_of(fence, struct hl_cs_compl, base_fence);
struct hl_device *hdev = hl_cs_cmpl->hdev;
/* EBUSY means the CS was never submitted and hence we don't have
* an attached hw_sob object that we should handle here
*/
if (fence->error == -EBUSY)
goto free;
if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
(hl_cs_cmpl->type == CS_TYPE_WAIT) ||
(hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
dev_dbg(hdev->dev,
"CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
hl_cs_cmpl->cs_seq,
hl_cs_cmpl->type,
hl_cs_cmpl->hw_sob->sob_id,
hl_cs_cmpl->sob_val);
queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work);
return;
} }
free: free:
...@@ -670,9 +683,23 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -670,9 +683,23 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
goto free_cs; goto free_cs;
} }
cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
if (!cs->jobs_in_queue_cnt)
cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
if (!cs->jobs_in_queue_cnt) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
rc = -ENOMEM;
goto free_cs_cmpl;
}
cs_cmpl->hdev = hdev; cs_cmpl->hdev = hdev;
cs_cmpl->type = cs->type; cs_cmpl->type = cs->type;
spin_lock_init(&cs_cmpl->lock); spin_lock_init(&cs_cmpl->lock);
INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work);
cs->fence = &cs_cmpl->base_fence; cs->fence = &cs_cmpl->base_fence;
spin_lock(&ctx->cs_lock); spin_lock(&ctx->cs_lock);
...@@ -702,19 +729,6 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -702,19 +729,6 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
goto free_fence; goto free_fence;
} }
cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
if (!cs->jobs_in_queue_cnt)
cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL);
if (!cs->jobs_in_queue_cnt) {
atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
atomic64_inc(&cntr->out_of_mem_drop_cnt);
rc = -ENOMEM;
goto free_fence;
}
/* init hl_fence */ /* init hl_fence */
hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq); hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
...@@ -737,6 +751,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -737,6 +751,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
free_fence: free_fence:
spin_unlock(&ctx->cs_lock); spin_unlock(&ctx->cs_lock);
kfree(cs->jobs_in_queue_cnt);
free_cs_cmpl:
kfree(cs_cmpl); kfree(cs_cmpl);
free_cs: free_cs:
kfree(cs); kfree(cs);
...@@ -759,6 +775,8 @@ void hl_cs_rollback_all(struct hl_device *hdev) ...@@ -759,6 +775,8 @@ void hl_cs_rollback_all(struct hl_device *hdev)
int i; int i;
struct hl_cs *cs, *tmp; struct hl_cs *cs, *tmp;
flush_workqueue(hdev->sob_reset_wq);
/* flush all completions before iterating over the CS mirror list in /* flush all completions before iterating over the CS mirror list in
* order to avoid a race with the release functions * order to avoid a race with the release functions
*/ */
......
...@@ -368,11 +368,19 @@ static int device_early_init(struct hl_device *hdev) ...@@ -368,11 +368,19 @@ static int device_early_init(struct hl_device *hdev)
goto free_cq_wq; goto free_cq_wq;
} }
hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
if (!hdev->sob_reset_wq) {
dev_err(hdev->dev,
"Failed to allocate SOB reset workqueue\n");
rc = -ENOMEM;
goto free_eq_wq;
}
hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
GFP_KERNEL); GFP_KERNEL);
if (!hdev->hl_chip_info) { if (!hdev->hl_chip_info) {
rc = -ENOMEM; rc = -ENOMEM;
goto free_eq_wq; goto free_sob_reset_wq;
} }
hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE, hdev->idle_busy_ts_arr = kmalloc_array(HL_IDLE_BUSY_TS_ARR_SIZE,
...@@ -418,6 +426,8 @@ static int device_early_init(struct hl_device *hdev) ...@@ -418,6 +426,8 @@ static int device_early_init(struct hl_device *hdev)
kfree(hdev->idle_busy_ts_arr); kfree(hdev->idle_busy_ts_arr);
free_chip_info: free_chip_info:
kfree(hdev->hl_chip_info); kfree(hdev->hl_chip_info);
free_sob_reset_wq:
destroy_workqueue(hdev->sob_reset_wq);
free_eq_wq: free_eq_wq:
destroy_workqueue(hdev->eq_wq); destroy_workqueue(hdev->eq_wq);
free_cq_wq: free_cq_wq:
...@@ -454,6 +464,7 @@ static void device_early_fini(struct hl_device *hdev) ...@@ -454,6 +464,7 @@ static void device_early_fini(struct hl_device *hdev)
kfree(hdev->idle_busy_ts_arr); kfree(hdev->idle_busy_ts_arr);
kfree(hdev->hl_chip_info); kfree(hdev->hl_chip_info);
destroy_workqueue(hdev->sob_reset_wq);
destroy_workqueue(hdev->eq_wq); destroy_workqueue(hdev->eq_wq);
destroy_workqueue(hdev->device_reset_work.wq); destroy_workqueue(hdev->device_reset_work.wq);
......
...@@ -528,6 +528,7 @@ struct hl_fence { ...@@ -528,6 +528,7 @@ struct hl_fence {
/** /**
* struct hl_cs_compl - command submission completion object. * struct hl_cs_compl - command submission completion object.
* @sob_reset_work: workqueue object to run SOB reset flow.
* @base_fence: hl fence object. * @base_fence: hl fence object.
* @lock: spinlock to protect fence. * @lock: spinlock to protect fence.
* @hdev: habanalabs device structure. * @hdev: habanalabs device structure.
...@@ -538,6 +539,7 @@ struct hl_fence { ...@@ -538,6 +539,7 @@ struct hl_fence {
* @sob_group: the SOB group that is used in this collective wait CS. * @sob_group: the SOB group that is used in this collective wait CS.
*/ */
struct hl_cs_compl { struct hl_cs_compl {
struct work_struct sob_reset_work;
struct hl_fence base_fence; struct hl_fence base_fence;
spinlock_t lock; spinlock_t lock;
struct hl_device *hdev; struct hl_device *hdev;
...@@ -1905,6 +1907,7 @@ struct hl_mmu_funcs { ...@@ -1905,6 +1907,7 @@ struct hl_mmu_funcs {
* @cq_wq: work queues of completion queues for executing work in process * @cq_wq: work queues of completion queues for executing work in process
* context. * context.
* @eq_wq: work queue of event queue for executing work in process context. * @eq_wq: work queue of event queue for executing work in process context.
* @sob_reset_wq: work queue for sob reset executions.
* @kernel_ctx: Kernel driver context structure. * @kernel_ctx: Kernel driver context structure.
* @kernel_queues: array of hl_hw_queue. * @kernel_queues: array of hl_hw_queue.
* @cs_mirror_list: CS mirror list for TDR. * @cs_mirror_list: CS mirror list for TDR.
...@@ -2022,6 +2025,7 @@ struct hl_device { ...@@ -2022,6 +2025,7 @@ struct hl_device {
struct hl_user_interrupt common_user_interrupt; struct hl_user_interrupt common_user_interrupt;
struct workqueue_struct **cq_wq; struct workqueue_struct **cq_wq;
struct workqueue_struct *eq_wq; struct workqueue_struct *eq_wq;
struct workqueue_struct *sob_reset_wq;
struct hl_ctx *kernel_ctx; struct hl_ctx *kernel_ctx;
struct hl_hw_queue *kernel_queues; struct hl_hw_queue *kernel_queues;
struct list_head cs_mirror_list; struct list_head cs_mirror_list;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment