Commit 1251f23a authored by Oded Gabbay's avatar Oded Gabbay Committed by Greg Kroah-Hartman

habanalabs: add event queue and interrupts

This patch adds support for receiving events from Goya's control CPU and
for receiving MSI-X interrupts from Goya's DMA engines and CPU.

Goya's PCI controller supports up to 8 MSI-X interrupts, which only 6 of
them are currently used. The first 5 interrupts are dedicated for Goya's
DMA engine queues. The 6th interrupt is dedicated for Goya's control CPU.

The DMA queue will signal its MSI-X entry upon each completion of a command
buffer that was placed on its primary queue. The driver will then mark that
CB as completed and free the related resources. It will also update the
command submission object which that CB belongs to.

There is a dedicated event queue (EQ) between the driver and Goya's control
CPU. The EQ is located on the Host memory. The control CPU writes a new
entry to the EQ for various reasons, such as ECC error, MMU page fault, Hot
temperature. After writing the new entry to the EQ, the control CPU will
trigger its dedicated MSI-X entry to signal the driver that there is a new
entry in the EQ. The driver will then read the entry and act accordingly.
Reviewed-by: default avatarMike Rapoport <rppt@linux.ibm.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 9494a8dd
...@@ -181,6 +181,13 @@ static int device_early_init(struct hl_device *hdev) ...@@ -181,6 +181,13 @@ static int device_early_init(struct hl_device *hdev)
goto asid_fini; goto asid_fini;
} }
hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
if (hdev->eq_wq == NULL) {
dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
rc = -ENOMEM;
goto free_cq_wq;
}
hl_cb_mgr_init(&hdev->kernel_cb_mgr); hl_cb_mgr_init(&hdev->kernel_cb_mgr);
mutex_init(&hdev->fd_open_cnt_lock); mutex_init(&hdev->fd_open_cnt_lock);
...@@ -189,6 +196,8 @@ static int device_early_init(struct hl_device *hdev) ...@@ -189,6 +196,8 @@ static int device_early_init(struct hl_device *hdev)
return 0; return 0;
free_cq_wq:
destroy_workqueue(hdev->cq_wq);
asid_fini: asid_fini:
hl_asid_fini(hdev); hl_asid_fini(hdev);
early_fini: early_fini:
...@@ -210,6 +219,7 @@ static void device_early_fini(struct hl_device *hdev) ...@@ -210,6 +219,7 @@ static void device_early_fini(struct hl_device *hdev)
hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr); hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
destroy_workqueue(hdev->eq_wq);
destroy_workqueue(hdev->cq_wq); destroy_workqueue(hdev->cq_wq);
hl_asid_fini(hdev); hl_asid_fini(hdev);
...@@ -348,11 +358,22 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -348,11 +358,22 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
} }
} }
/*
* Initialize the event queue. Must be done before hw_init,
* because there the address of the event queue is being
* passed as argument to request_irq
*/
rc = hl_eq_init(hdev, &hdev->event_queue);
if (rc) {
dev_err(hdev->dev, "failed to initialize event queue\n");
goto cq_fini;
}
/* Allocate the kernel context */ /* Allocate the kernel context */
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL); hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
if (!hdev->kernel_ctx) { if (!hdev->kernel_ctx) {
rc = -ENOMEM; rc = -ENOMEM;
goto cq_fini; goto eq_fini;
} }
hdev->user_ctx = NULL; hdev->user_ctx = NULL;
...@@ -397,6 +418,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -397,6 +418,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
"kernel ctx is still alive on initialization failure\n"); "kernel ctx is still alive on initialization failure\n");
free_ctx: free_ctx:
kfree(hdev->kernel_ctx); kfree(hdev->kernel_ctx);
eq_fini:
hl_eq_fini(hdev, &hdev->event_queue);
cq_fini: cq_fini:
for (i = 0 ; i < cq_ready_cnt ; i++) for (i = 0 ; i < cq_ready_cnt ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]); hl_cq_fini(hdev, &hdev->completion_queue[i]);
...@@ -438,6 +461,13 @@ void hl_device_fini(struct hl_device *hdev) ...@@ -438,6 +461,13 @@ void hl_device_fini(struct hl_device *hdev)
/* Mark device as disabled */ /* Mark device as disabled */
hdev->disabled = true; hdev->disabled = true;
/*
* Halt the engines and disable interrupts so we won't get any more
* completions from H/W and we won't have any accesses from the
* H/W to the host machine
*/
hdev->asic_funcs->halt_engines(hdev, true);
hl_cb_pool_fini(hdev); hl_cb_pool_fini(hdev);
/* Release kernel context */ /* Release kernel context */
...@@ -447,6 +477,8 @@ void hl_device_fini(struct hl_device *hdev) ...@@ -447,6 +477,8 @@ void hl_device_fini(struct hl_device *hdev)
/* Reset the H/W. It will be in idle state after this returns */ /* Reset the H/W. It will be in idle state after this returns */
hdev->asic_funcs->hw_fini(hdev, true); hdev->asic_funcs->hw_fini(hdev, true);
hl_eq_fini(hdev, &hdev->event_queue);
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
hl_cq_fini(hdev, &hdev->completion_queue[i]); hl_cq_fini(hdev, &hdev->completion_queue[i]);
kfree(hdev->completion_queue); kfree(hdev->completion_queue);
......
This diff is collapsed.
...@@ -152,6 +152,7 @@ struct goya_device { ...@@ -152,6 +152,7 @@ struct goya_device {
/* TODO: remove hw_queues_lock after moving to scheduler code */ /* TODO: remove hw_queues_lock after moving to scheduler code */
spinlock_t hw_queues_lock; spinlock_t hw_queues_lock;
u64 ddr_bar_cur_addr; u64 ddr_bar_cur_addr;
u32 events_stat[GOYA_ASYNC_EVENT_ID_SIZE];
u32 hw_cap_initialized; u32 hw_cap_initialized;
}; };
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/cdev.h> #include <linux/cdev.h>
#include <linux/iopoll.h> #include <linux/iopoll.h>
#include <linux/irqreturn.h>
#define HL_NAME "habanalabs" #define HL_NAME "habanalabs"
...@@ -80,6 +81,7 @@ struct hw_queue_properties { ...@@ -80,6 +81,7 @@ struct hw_queue_properties {
* @cfg_size: configuration space size on SRAM. * @cfg_size: configuration space size on SRAM.
* @sram_size: total size of SRAM. * @sram_size: total size of SRAM.
* @max_asid: maximum number of open contexts (ASIDs). * @max_asid: maximum number of open contexts (ASIDs).
* @num_of_events: number of possible internal H/W IRQs.
* @completion_queues_count: number of completion queues. * @completion_queues_count: number of completion queues.
* @high_pll: high PLL frequency used by the device. * @high_pll: high PLL frequency used by the device.
* @cb_pool_cb_cnt: number of CBs in the CB pool. * @cb_pool_cb_cnt: number of CBs in the CB pool.
...@@ -106,6 +108,7 @@ struct asic_fixed_properties { ...@@ -106,6 +108,7 @@ struct asic_fixed_properties {
u32 cfg_size; u32 cfg_size;
u32 sram_size; u32 sram_size;
u32 max_asid; u32 max_asid;
u32 num_of_events;
u32 high_pll; u32 high_pll;
u32 cb_pool_cb_cnt; u32 cb_pool_cb_cnt;
u32 cb_pool_cb_size; u32 cb_pool_cb_size;
...@@ -198,6 +201,9 @@ struct hl_cs_job; ...@@ -198,6 +201,9 @@ struct hl_cs_job;
#define HL_CQ_LENGTH HL_QUEUE_LENGTH #define HL_CQ_LENGTH HL_QUEUE_LENGTH
#define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE) #define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
/* Must be power of 2 (HL_PAGE_SIZE / HL_EQ_ENTRY_SIZE) */
#define HL_EQ_LENGTH 64
#define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
/** /**
...@@ -245,6 +251,20 @@ struct hl_cq { ...@@ -245,6 +251,20 @@ struct hl_cq {
atomic_t free_slots_cnt; atomic_t free_slots_cnt;
}; };
/**
* struct hl_eq - describes the event queue (single one per device)
* @hdev: pointer to the device structure
* @kernel_address: holds the queue's kernel virtual address
* @bus_address: holds the queue's DMA address
* @ci: ci inside the queue
*/
struct hl_eq {
struct hl_device *hdev;
u64 kernel_address;
dma_addr_t bus_address;
u32 ci;
};
/* /*
* ASICs * ASICs
...@@ -271,6 +291,9 @@ enum hl_asic_type { ...@@ -271,6 +291,9 @@ enum hl_asic_type {
* @sw_fini: tears down driver state, does not configure H/W. * @sw_fini: tears down driver state, does not configure H/W.
* @hw_init: sets up the H/W state. * @hw_init: sets up the H/W state.
* @hw_fini: tears down the H/W state. * @hw_fini: tears down the H/W state.
* @halt_engines: halt engines, needed for reset sequence. This also disables
* interrupts from the device. Should be called before
* hw_fini and before CS rollback.
* @suspend: handles IP specific H/W or SW changes for suspend. * @suspend: handles IP specific H/W or SW changes for suspend.
* @resume: handles IP specific H/W or SW changes for resume. * @resume: handles IP specific H/W or SW changes for resume.
* @mmap: mmap function, does nothing. * @mmap: mmap function, does nothing.
...@@ -292,6 +315,9 @@ enum hl_asic_type { ...@@ -292,6 +315,9 @@ enum hl_asic_type {
* @dma_pool_free: free small DMA allocation from pool. * @dma_pool_free: free small DMA allocation from pool.
* @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool. * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool.
* @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool. * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool.
* @update_eq_ci: update event queue CI.
* @handle_eqe: handle event queue entry (IRQ) from ArmCP.
* @get_events_stat: retrieve event queue entries histogram.
* @hw_queues_lock: acquire H/W queues lock. * @hw_queues_lock: acquire H/W queues lock.
* @hw_queues_unlock: release H/W queues lock. * @hw_queues_unlock: release H/W queues lock.
* @send_cpu_message: send buffer to ArmCP. * @send_cpu_message: send buffer to ArmCP.
...@@ -303,6 +329,7 @@ struct hl_asic_funcs { ...@@ -303,6 +329,7 @@ struct hl_asic_funcs {
int (*sw_fini)(struct hl_device *hdev); int (*sw_fini)(struct hl_device *hdev);
int (*hw_init)(struct hl_device *hdev); int (*hw_init)(struct hl_device *hdev);
void (*hw_fini)(struct hl_device *hdev, bool hard_reset); void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
int (*suspend)(struct hl_device *hdev); int (*suspend)(struct hl_device *hdev);
int (*resume)(struct hl_device *hdev); int (*resume)(struct hl_device *hdev);
int (*mmap)(struct hl_fpriv *hpriv, struct vm_area_struct *vma); int (*mmap)(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
...@@ -325,6 +352,10 @@ struct hl_asic_funcs { ...@@ -325,6 +352,10 @@ struct hl_asic_funcs {
size_t size, dma_addr_t *dma_handle); size_t size, dma_addr_t *dma_handle);
void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev, void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
size_t size, void *vaddr); size_t size, void *vaddr);
void (*update_eq_ci)(struct hl_device *hdev, u32 val);
void (*handle_eqe)(struct hl_device *hdev,
struct hl_eq_entry *eq_entry);
void* (*get_events_stat)(struct hl_device *hdev, u32 *size);
void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev);
void (*hw_queues_unlock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev);
int (*send_cpu_message)(struct hl_device *hdev, u32 *msg, int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
...@@ -364,8 +395,6 @@ struct hl_ctx_mgr { ...@@ -364,8 +395,6 @@ struct hl_ctx_mgr {
}; };
/** /**
* struct hl_cs_job - command submission job. * struct hl_cs_job - command submission job.
* @finish_work: workqueue object to run when job is completed. * @finish_work: workqueue object to run when job is completed.
...@@ -455,6 +484,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val); ...@@ -455,6 +484,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
* @kernel_ctx: KMD context structure. * @kernel_ctx: KMD context structure.
* @kernel_queues: array of hl_hw_queue. * @kernel_queues: array of hl_hw_queue.
* @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs. * @kernel_cb_mgr: command buffer manager for creating/destroying/handling CGs.
* @event_queue: event queue for IRQ from ArmCP.
* @dma_pool: DMA pool for small allocations. * @dma_pool: DMA pool for small allocations.
* @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address. * @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address.
* @cpu_accessible_dma_address: KMD <-> ArmCP shared memory DMA address. * @cpu_accessible_dma_address: KMD <-> ArmCP shared memory DMA address.
...@@ -489,9 +519,11 @@ struct hl_device { ...@@ -489,9 +519,11 @@ struct hl_device {
enum hl_asic_type asic_type; enum hl_asic_type asic_type;
struct hl_cq *completion_queue; struct hl_cq *completion_queue;
struct workqueue_struct *cq_wq; struct workqueue_struct *cq_wq;
struct workqueue_struct *eq_wq;
struct hl_ctx *kernel_ctx; struct hl_ctx *kernel_ctx;
struct hl_hw_queue *kernel_queues; struct hl_hw_queue *kernel_queues;
struct hl_cb_mgr kernel_cb_mgr; struct hl_cb_mgr kernel_cb_mgr;
struct hl_eq event_queue;
struct dma_pool *dma_pool; struct dma_pool *dma_pool;
void *cpu_accessible_dma_mem; void *cpu_accessible_dma_mem;
dma_addr_t cpu_accessible_dma_address; dma_addr_t cpu_accessible_dma_address;
...@@ -573,6 +605,10 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id); ...@@ -573,6 +605,10 @@ void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id); int hl_cq_init(struct hl_device *hdev, struct hl_cq *q, u32 hw_queue_id);
void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q); void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q);
int hl_eq_init(struct hl_device *hdev, struct hl_eq *q);
void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q);
irqreturn_t hl_irq_handler_cq(int irq, void *arg);
irqreturn_t hl_irq_handler_eq(int irq, void *arg);
int hl_asid_init(struct hl_device *hdev); int hl_asid_init(struct hl_device *hdev);
void hl_asid_fini(struct hl_device *hdev); void hl_asid_fini(struct hl_device *hdev);
unsigned long hl_asid_alloc(struct hl_device *hdev); unsigned long hl_asid_alloc(struct hl_device *hdev);
......
...@@ -10,6 +10,30 @@ ...@@ -10,6 +10,30 @@
#include <linux/types.h> #include <linux/types.h>
/*
* EVENT QUEUE
*/
struct hl_eq_header {
__le32 reserved;
__le32 ctl;
};
struct hl_eq_entry {
struct hl_eq_header hdr;
__le64 data[7];
};
#define HL_EQ_ENTRY_SIZE sizeof(struct hl_eq_entry)
#define EQ_CTL_READY_SHIFT 31
#define EQ_CTL_READY_MASK 0x80000000
#define EQ_CTL_EVENT_TYPE_SHIFT 16
#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000
#define EVENT_QUEUE_MSIX_IDX 5
enum pq_init_status { enum pq_init_status {
PQ_INIT_STATUS_NA = 0, PQ_INIT_STATUS_NA = 0,
PQ_INIT_STATUS_READY_FOR_CP, PQ_INIT_STATUS_READY_FOR_CP,
......
...@@ -7,7 +7,20 @@ ...@@ -7,7 +7,20 @@
#include "habanalabs.h" #include "habanalabs.h"
#include <linux/irqreturn.h> #include <linux/slab.h>
/**
* This structure is used to schedule work of EQ entry and armcp_reset event
*
* @eq_work - workqueue object to run when EQ entry is received
* @hdev - pointer to device structure
* @eq_entry - copy of the EQ entry
*/
struct hl_eqe_work {
struct work_struct eq_work;
struct hl_device *hdev;
struct hl_eq_entry eq_entry;
};
/* /*
* hl_cq_inc_ptr - increment ci or pi of cq * hl_cq_inc_ptr - increment ci or pi of cq
...@@ -25,6 +38,33 @@ inline u32 hl_cq_inc_ptr(u32 ptr) ...@@ -25,6 +38,33 @@ inline u32 hl_cq_inc_ptr(u32 ptr)
return ptr; return ptr;
} }
/*
* hl_eq_inc_ptr - increment ci of eq
*
* @ptr: the current ci value of the event queue
*
* Increment ptr by 1. If it reaches the number of event queue
* entries, set it to 0
*/
inline u32 hl_eq_inc_ptr(u32 ptr)
{
ptr++;
if (unlikely(ptr == HL_EQ_LENGTH))
ptr = 0;
return ptr;
}
static void irq_handle_eqe(struct work_struct *work)
{
struct hl_eqe_work *eqe_work = container_of(work, struct hl_eqe_work,
eq_work);
struct hl_device *hdev = eqe_work->hdev;
hdev->asic_funcs->handle_eqe(hdev, &eqe_work->eq_entry);
kfree(eqe_work);
}
/* /*
* hl_irq_handler_cq - irq handler for completion queue * hl_irq_handler_cq - irq handler for completion queue
* *
...@@ -102,6 +142,68 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) ...@@ -102,6 +142,68 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
/*
* hl_irq_handler_eq - irq handler for event queue
*
* @irq: irq number
* @arg: pointer to event queue structure
*
*/
irqreturn_t hl_irq_handler_eq(int irq, void *arg)
{
struct hl_eq *eq = arg;
struct hl_device *hdev = eq->hdev;
struct hl_eq_entry *eq_entry;
struct hl_eq_entry *eq_base;
struct hl_eqe_work *handle_eqe_work;
eq_base = (struct hl_eq_entry *) (uintptr_t) eq->kernel_address;
while (1) {
bool entry_ready =
((eq_base[eq->ci].hdr.ctl & EQ_CTL_READY_MASK)
>> EQ_CTL_READY_SHIFT);
if (!entry_ready)
break;
eq_entry = &eq_base[eq->ci];
/*
* Make sure we read EQ entry contents after we've
* checked the ownership bit.
*/
dma_rmb();
if (hdev->disabled) {
dev_warn(hdev->dev,
"Device disabled but received IRQ %d for EQ\n",
irq);
goto skip_irq;
}
handle_eqe_work = kmalloc(sizeof(*handle_eqe_work), GFP_ATOMIC);
if (handle_eqe_work) {
INIT_WORK(&handle_eqe_work->eq_work, irq_handle_eqe);
handle_eqe_work->hdev = hdev;
memcpy(&handle_eqe_work->eq_entry, eq_entry,
sizeof(*eq_entry));
queue_work(hdev->eq_wq, &handle_eqe_work->eq_work);
}
skip_irq:
/* Clear EQ entry ready bit */
eq_entry->hdr.ctl &= ~EQ_CTL_READY_MASK;
eq->ci = hl_eq_inc_ptr(eq->ci);
hdev->asic_funcs->update_eq_ci(hdev, eq->ci);
}
return IRQ_HANDLED;
}
/* /*
* hl_cq_init - main initialization function for an cq object * hl_cq_init - main initialization function for an cq object
* *
...@@ -147,3 +249,46 @@ void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q) ...@@ -147,3 +249,46 @@ void hl_cq_fini(struct hl_device *hdev, struct hl_cq *q)
hdev->asic_funcs->dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES, hdev->asic_funcs->dma_free_coherent(hdev, HL_CQ_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address, q->bus_address); (void *) (uintptr_t) q->kernel_address, q->bus_address);
} }
/*
* hl_eq_init - main initialization function for an event queue object
*
* @hdev: pointer to device structure
* @q: pointer to eq structure
*
* Allocate dma-able memory for the event queue and initialize fields
* Returns 0 on success
*/
int hl_eq_init(struct hl_device *hdev, struct hl_eq *q)
{
void *p;
BUILD_BUG_ON(HL_EQ_SIZE_IN_BYTES > HL_PAGE_SIZE);
p = hdev->asic_funcs->dma_alloc_coherent(hdev, HL_EQ_SIZE_IN_BYTES,
&q->bus_address, GFP_KERNEL | __GFP_ZERO);
if (!p)
return -ENOMEM;
q->hdev = hdev;
q->kernel_address = (u64) (uintptr_t) p;
q->ci = 0;
return 0;
}
/*
* hl_eq_fini - destroy event queue
*
* @hdev: pointer to device structure
* @q: pointer to eq structure
*
* Free the event queue memory
*/
void hl_eq_fini(struct hl_device *hdev, struct hl_eq *q)
{
flush_workqueue(hdev->eq_wq);
hdev->asic_funcs->dma_free_coherent(hdev, HL_EQ_SIZE_IN_BYTES,
(void *) (uintptr_t) q->kernel_address, q->bus_address);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment