Commit eaa43a06 authored by farah kassabri's avatar farah kassabri Committed by Oded Gabbay

accel/habanalabs: Allow single timestamp registration request at a time

Protect against concurrency of user requesting to register a timestamp
offset (where the driver fills the timestamp when the command submission
has finished executing) to a specific user interrupt ID. The
protection is basically to allow only one timestamp registration
request to be handled at a time.

This is needed because the user can decide to re-use a timestamp
offset (register an already registered offset, to a different
interrupt ID). This means the request will cause the timestamp node to
move from one interrupt list to another interrupt list. In such
scenario, without proper protection, we could end up adding the same
node twice to the interrupts wait lists.
Signed-off-by: default avatarfarah kassabri <fkassabri@habana.ai>
Reviewed-by: default avatarOded Gabbay <ogabbay@kernel.org>
Signed-off-by: default avatarOded Gabbay <ogabbay@kernel.org>
parent 964b1f67
......@@ -119,6 +119,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid);
hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
mutex_destroy(&ctx->ts_reg_lock);
} else {
dev_dbg(hdev->dev, "closing kernel context\n");
hdev->asic_funcs->ctx_fini(ctx);
......@@ -268,6 +269,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
hl_encaps_sig_mgr_init(&ctx->sig_mgr);
mutex_init(&ctx->ts_reg_lock);
dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n",
get_task_comm(task_comm, current), ctx->asid);
}
......
......@@ -1144,6 +1144,7 @@ struct timestamp_reg_work_obj {
* @buf: pointer to the timestamp buffer which include both user/kernel buffers.
* relevant only when doing timestamps records registration.
* @cq_cb: pointer to CQ counter CB.
* @interrupt: interrupt that the node hanged on it's wait list.
* @timestamp_kernel_addr: timestamp handle address, where to set timestamp
* relevant only when doing timestamps records
* registration.
......@@ -1153,10 +1154,11 @@ struct timestamp_reg_work_obj {
* allocating records dynamically.
*/
struct timestamp_reg_info {
struct hl_mmap_mem_buf *buf;
struct hl_cb *cq_cb;
u64 *timestamp_kernel_addr;
u8 in_use;
struct hl_mmap_mem_buf *buf;
struct hl_cb *cq_cb;
struct hl_user_interrupt *interrupt;
u64 *timestamp_kernel_addr;
bool in_use;
};
/**
......@@ -1835,6 +1837,7 @@ struct hl_cs_outcome_store {
* @va_range: holds available virtual addresses for host and dram mappings.
* @mem_hash_lock: protects the mem_hash.
* @hw_block_list_lock: protects the HW block memory list.
* @ts_reg_lock: timestamp registration ioctls lock.
* @debugfs_list: node in debugfs list of contexts.
* @hw_block_mem_list: list of HW block virtual mapped addresses.
* @cs_counters: context command submission counters.
......@@ -1871,6 +1874,7 @@ struct hl_ctx {
struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX];
struct mutex mem_hash_lock;
struct mutex hw_block_list_lock;
struct mutex ts_reg_lock;
struct list_head debugfs_list;
struct list_head hw_block_mem_list;
struct hl_cs_counters_atomic cs_counters;
......
......@@ -233,7 +233,8 @@ static void hl_ts_free_objects(struct work_struct *work)
* list to a dedicated workqueue to do the actual put.
*/
static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
struct list_head **free_list, ktime_t now)
struct list_head **free_list, ktime_t now,
u32 interrupt_id)
{
struct timestamp_reg_free_node *free_node;
u64 timestamp;
......@@ -255,14 +256,12 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
pend->ts_reg_info.timestamp_kernel_addr,
*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
list_del(&pend->wait_list_node);
dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n",
pend, pend->ts_reg_info.timestamp_kernel_addr, interrupt_id);
/* Mark kernel CB node as free */
pend->ts_reg_info.in_use = 0;
pend->ts_reg_info.in_use = false;
list_del(&pend->wait_list_node);
/* Putting the refcount for ts_buff and cq_cb objects will be handled
* in workqueue context, just add job to free_list.
......@@ -296,13 +295,15 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
return;
spin_lock(&intr->wait_list_lock);
list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, wait_list_node) {
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
!pend->cq_kernel_addr) {
if (pend->ts_reg_info.buf) {
if (!reg_node_handle_fail) {
rc = handle_registration_node(hdev, pend,
&ts_reg_free_list_head, intr->timestamp);
&ts_reg_free_list_head, intr->timestamp,
intr->interrupt_id);
if (rc)
reg_node_handle_fail = true;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment