Commit be24dd48 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2021-10-18' of...

Merge tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

This tag contains habanalabs driver changes for v5.16:

- Add a new uAPI (under the memory ioctl) to request from the driver
  to export a DMA-BUF object that represents a memory region on
  the device's DRAM. This is needed to enable peer-to-peer over PCIe
  between habana device and an RDMA adapter (e.g. mlnx5 or efa
  rdma adapter).

- Add debugfs node to dynamically configure CS timeout. Up until now,
  it was only configurable through kernel module parameter.

- Fetch more comprehensive power information from the firmware.

- Always take timestamp when waiting for user interrupt, as the user
  needs that information to optimize the graph runtime compilation.

- Modify user interrupt to look on 64-bit user value as fence, instead
  of 32-bit.

- Bypass reset in case of repeated h/w error event after device reset.
  This is to prevent endless loop of resets to the device.

- Fix several bugs in multi CS completion code.

- Fix race condition in fd close/open.

- Update to latest firmware headers

- Add select CRC32 in kconfig

- Small fixes, cosmetics

* tag 'misc-habanalabs-next-2021-10-18' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (25 commits)
  habanalabs: refactor fence handling in hl_cs_poll_fences
  habanalabs: context cleanup cosmetics
  habanalabs: simplify wait for interrupt with timestamp flow
  habanalabs: initialize hpriv fields before adding new node
  habanalabs: Unify frequency set/get functionality
  habanalabs: select CRC32
  habanalabs: add support for dma-buf exporter
  habanalabs: define uAPI to export FD for DMA-BUF
  habanalabs: fix NULL pointer dereference
  habanalabs: fix race condition in multi CS completion
  habanalabs: use only u32
  habanalabs: update firmware files
  habanalabs: bypass reset for continuous h/w error event
  habanalabs: take timestamp on wait for interrupt
  habanalabs: prevent race between fd close/open
  habanalabs: refactor reset log message
  habanalabs: define soft-reset as inference op
  habanalabs: fix debugfs device memory MMU VA translation
  habanalabs: add support for a long interrupt target value
  habanalabs: remove redundant cs validity checks
  ...
parents 2b74240b b2faac38
...@@ -226,6 +226,12 @@ Description: Gets the state dump occurring on a CS timeout or failure. ...@@ -226,6 +226,12 @@ Description: Gets the state dump occurring on a CS timeout or failure.
Writing an integer X discards X state dumps, so that the Writing an integer X discards X state dumps, so that the
next read would return X+1-st newest state dump. next read would return X+1-st newest state dump.
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
Date: Sep 2021
KernelVersion: 5.16
Contact: obitton@habana.ai
Description: Sets the command submission timeout value in seconds.
What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err What: /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
Date: Mar 2020 Date: Mar 2020
KernelVersion: 5.6 KernelVersion: 5.6
......
...@@ -8,6 +8,8 @@ config HABANA_AI ...@@ -8,6 +8,8 @@ config HABANA_AI
depends on PCI && HAS_IOMEM depends on PCI && HAS_IOMEM
select GENERIC_ALLOCATOR select GENERIC_ALLOCATOR
select HWMON select HWMON
select DMA_SHARED_BUFFER
select CRC32
help help
Enables PCIe card driver for Habana's AI Processors (AIP) that are Enables PCIe card driver for Habana's AI Processors (AIP) that are
designed to accelerate Deep Learning inference and training workloads. designed to accelerate Deep Learning inference and training workloads.
......
...@@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \ ...@@ -11,4 +11,4 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
common/command_buffer.o common/hw_queue.o common/irq.o \ common/command_buffer.o common/hw_queue.o common/irq.o \
common/sysfs.o common/hwmon.o common/memory.o \ common/sysfs.o common/hwmon.o common/memory.o \
common/command_submission.o common/firmware_if.o \ common/command_submission.o common/firmware_if.o \
common/state_dump.o common/state_dump.o common/hwmgr.o
...@@ -143,6 +143,7 @@ static void hl_fence_init(struct hl_fence *fence, u64 sequence) ...@@ -143,6 +143,7 @@ static void hl_fence_init(struct hl_fence *fence, u64 sequence)
fence->cs_sequence = sequence; fence->cs_sequence = sequence;
fence->error = 0; fence->error = 0;
fence->timestamp = ktime_set(0, 0); fence->timestamp = ktime_set(0, 0);
fence->mcs_handling_done = false;
init_completion(&fence->completion); init_completion(&fence->completion);
} }
...@@ -431,11 +432,10 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) ...@@ -431,11 +432,10 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
/* Don't cancel TDR in case this CS was timedout because we might be /* Don't cancel TDR in case this CS was timedout because we might be
* running from the TDR context * running from the TDR context
*/ */
if (cs && (cs->timedout || if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)
hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
return; return;
if (cs && cs->tdr_active) if (cs->tdr_active)
cancel_delayed_work_sync(&cs->work_tdr); cancel_delayed_work_sync(&cs->work_tdr);
spin_lock(&hdev->cs_mirror_lock); spin_lock(&hdev->cs_mirror_lock);
...@@ -536,10 +536,21 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) ...@@ -536,10 +536,21 @@ static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
mcs_compl->timestamp = mcs_compl->timestamp =
ktime_to_ns(fence->timestamp); ktime_to_ns(fence->timestamp);
complete_all(&mcs_compl->completion); complete_all(&mcs_compl->completion);
/*
* Setting mcs_handling_done inside the lock ensures
* at least one fence have mcs_handling_done set to
* true before wait for mcs finish. This ensures at
* least one CS will be set as completed when polling
* mcs fences.
*/
fence->mcs_handling_done = true;
} }
spin_unlock(&mcs_compl->lock); spin_unlock(&mcs_compl->lock);
} }
/* In case CS completed without mcs completion initialized */
fence->mcs_handling_done = true;
} }
static inline void cs_release_sob_reset_handler(struct hl_device *hdev, static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
...@@ -2371,32 +2382,48 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data) ...@@ -2371,32 +2382,48 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
break; break;
} }
mcs_data->stream_master_qid_map |= fence->stream_master_qid_map; switch (status) {
case CS_WAIT_STATUS_BUSY:
if (status == CS_WAIT_STATUS_BUSY) /* CS did not finished, keep waiting on its QID*/
continue; mcs_data->stream_master_qid_map |=
fence->stream_master_qid_map;
mcs_data->completion_bitmap |= BIT(i); break;
case CS_WAIT_STATUS_COMPLETED:
/* /*
* best effort to extract timestamp. few notes: * Using mcs_handling_done to avoid possibility of mcs_data
* - if even single fence is gone we cannot extract timestamp * returns to user indicating CS completed before it finished
* (as fence not exist anymore) * all of its mcs handling, to avoid race the next time the
* - for all completed CSs we take the earliest timestamp. * user waits for mcs.
* for this we have to validate that: */
* 1. given timestamp was indeed set if (!fence->mcs_handling_done)
* 2. the timestamp is earliest of all timestamps so far break;
*/
if (status == CS_WAIT_STATUS_GONE) { mcs_data->completion_bitmap |= BIT(i);
/*
* For all completed CSs we take the earliest timestamp.
* For this we have to validate that the timestamp is
* earliest of all timestamps so far.
*/
if (mcs_data->update_ts &&
(ktime_compare(fence->timestamp, first_cs_time) < 0))
first_cs_time = fence->timestamp;
break;
case CS_WAIT_STATUS_GONE:
mcs_data->update_ts = false; mcs_data->update_ts = false;
mcs_data->gone_cs = true; mcs_data->gone_cs = true;
} else if (mcs_data->update_ts && /*
(ktime_compare(fence->timestamp, * It is possible to get an old sequence numbers from user
ktime_set(0, 0)) > 0) && * which related to already completed CSs and their fences
(ktime_compare(fence->timestamp, first_cs_time) < 0)) { * already gone. In this case, CS set as completed but
first_cs_time = fence->timestamp; * no need to consider its QID for mcs completion.
*/
mcs_data->completion_bitmap |= BIT(i);
break;
default:
dev_err(hdev->dev, "Invalid fence status\n");
return -EINVAL;
} }
} }
hl_fences_put(mcs_data->fence_arr, arr_len); hl_fences_put(mcs_data->fence_arr, arr_len);
...@@ -2740,13 +2767,14 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -2740,13 +2767,14 @@ static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
u32 timeout_us, u64 user_address, u32 timeout_us, u64 user_address,
u32 target_value, u16 interrupt_offset, u64 target_value, u16 interrupt_offset,
enum hl_cs_wait_status *status) enum hl_cs_wait_status *status,
u64 *timestamp)
{ {
struct hl_user_pending_interrupt *pend; struct hl_user_pending_interrupt *pend;
struct hl_user_interrupt *interrupt; struct hl_user_interrupt *interrupt;
unsigned long timeout, flags; unsigned long timeout, flags;
u32 completion_value; u64 completion_value;
long completion_rc; long completion_rc;
int rc = 0; int rc = 0;
...@@ -2780,15 +2808,17 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -2780,15 +2808,17 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
/* We check for completion value as interrupt could have been received /* We check for completion value as interrupt could have been received
* before we added the node to the wait list * before we added the node to the wait list
*/ */
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) { if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n"); dev_err(hdev->dev, "Failed to copy completion value from user\n");
rc = -EFAULT; rc = -EFAULT;
goto remove_pending_user_interrupt; goto remove_pending_user_interrupt;
} }
if (completion_value >= target_value) if (completion_value >= target_value) {
*status = CS_WAIT_STATUS_COMPLETED; *status = CS_WAIT_STATUS_COMPLETED;
else /* There was no interrupt, we assume the completion is now. */
pend->fence.timestamp = ktime_get();
} else
*status = CS_WAIT_STATUS_BUSY; *status = CS_WAIT_STATUS_BUSY;
if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED)) if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
...@@ -2812,7 +2842,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -2812,7 +2842,7 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
reinit_completion(&pend->fence.completion); reinit_completion(&pend->fence.completion);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) { if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) {
dev_err(hdev->dev, "Failed to copy completion value from user\n"); dev_err(hdev->dev, "Failed to copy completion value from user\n");
rc = -EFAULT; rc = -EFAULT;
...@@ -2839,6 +2869,8 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -2839,6 +2869,8 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
list_del(&pend->wait_list_node); list_del(&pend->wait_list_node);
spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
*timestamp = ktime_to_ns(pend->fence.timestamp);
kfree(pend); kfree(pend);
hl_ctx_put(ctx); hl_ctx_put(ctx);
...@@ -2852,6 +2884,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -2852,6 +2884,7 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
struct asic_fixed_properties *prop; struct asic_fixed_properties *prop;
union hl_wait_cs_args *args = data; union hl_wait_cs_args *args = data;
enum hl_cs_wait_status status; enum hl_cs_wait_status status;
u64 timestamp;
int rc; int rc;
prop = &hdev->asic_prop; prop = &hdev->asic_prop;
...@@ -2881,7 +2914,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -2881,7 +2914,8 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx,
args->in.interrupt_timeout_us, args->in.addr, args->in.interrupt_timeout_us, args->in.addr,
args->in.target, interrupt_offset, &status); args->in.target, interrupt_offset, &status,
&timestamp);
if (rc) { if (rc) {
if (rc != -EINTR) if (rc != -EINTR)
...@@ -2893,6 +2927,11 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -2893,6 +2927,11 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
memset(args, 0, sizeof(*args)); memset(args, 0, sizeof(*args));
if (timestamp) {
args->out.timestamp_nsec = timestamp;
args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
}
switch (status) { switch (status) {
case CS_WAIT_STATUS_COMPLETED: case CS_WAIT_STATUS_COMPLETED:
args->out.status = HL_WAIT_CS_STATUS_COMPLETED; args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
......
...@@ -181,12 +181,6 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv) ...@@ -181,12 +181,6 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
return rc; return rc;
} }
void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
{
if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
return;
}
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{ {
int rc = 0; int rc = 0;
...@@ -392,7 +386,7 @@ void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr) ...@@ -392,7 +386,7 @@ void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr)
idp = &mgr->ctx_handles; idp = &mgr->ctx_handles;
idr_for_each_entry(idp, ctx, id) idr_for_each_entry(idp, ctx, id)
hl_ctx_free(hdev, ctx); kref_put(&ctx->refcount, hl_ctx_do_release);
idr_destroy(&mgr->ctx_handles); idr_destroy(&mgr->ctx_handles);
mutex_destroy(&mgr->ctx_lock); mutex_destroy(&mgr->ctx_lock);
......
...@@ -1167,6 +1167,45 @@ static ssize_t hl_state_dump_write(struct file *f, const char __user *buf, ...@@ -1167,6 +1167,45 @@ static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
return count; return count;
} }
static ssize_t hl_timeout_locked_read(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
char tmp_buf[200];
ssize_t rc;
if (*ppos)
return 0;
sprintf(tmp_buf, "%d\n",
jiffies_to_msecs(hdev->timeout_jiffies) / 1000);
rc = simple_read_from_buffer(buf, strlen(tmp_buf) + 1, ppos, tmp_buf,
strlen(tmp_buf) + 1);
return rc;
}
static ssize_t hl_timeout_locked_write(struct file *f, const char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
u32 value;
ssize_t rc;
rc = kstrtouint_from_user(buf, count, 10, &value);
if (rc)
return rc;
if (value)
hdev->timeout_jiffies = msecs_to_jiffies(value * 1000);
else
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
return count;
}
static const struct file_operations hl_data32b_fops = { static const struct file_operations hl_data32b_fops = {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.read = hl_data_read32, .read = hl_data_read32,
...@@ -1240,6 +1279,12 @@ static const struct file_operations hl_state_dump_fops = { ...@@ -1240,6 +1279,12 @@ static const struct file_operations hl_state_dump_fops = {
.write = hl_state_dump_write .write = hl_state_dump_write
}; };
static const struct file_operations hl_timeout_locked_fops = {
.owner = THIS_MODULE,
.read = hl_timeout_locked_read,
.write = hl_timeout_locked_write
};
static const struct hl_info_list hl_debugfs_list[] = { static const struct hl_info_list hl_debugfs_list[] = {
{"command_buffers", command_buffers_show, NULL}, {"command_buffers", command_buffers_show, NULL},
{"command_submission", command_submission_show, NULL}, {"command_submission", command_submission_show, NULL},
...@@ -1421,6 +1466,12 @@ void hl_debugfs_add_device(struct hl_device *hdev) ...@@ -1421,6 +1466,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
dev_entry, dev_entry,
&hl_state_dump_fops); &hl_state_dump_fops);
debugfs_create_file("timeout_locked",
0644,
dev_entry->root,
dev_entry,
&hl_timeout_locked_fops);
for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
debugfs_create_file(hl_debugfs_list[i].name, debugfs_create_file(hl_debugfs_list[i].name,
0444, 0444,
......
...@@ -69,13 +69,6 @@ static void hpriv_release(struct kref *ref) ...@@ -69,13 +69,6 @@ static void hpriv_release(struct kref *ref)
mutex_destroy(&hpriv->restore_phase_mutex); mutex_destroy(&hpriv->restore_phase_mutex);
mutex_lock(&hdev->fpriv_list_lock);
list_del(&hpriv->dev_node);
hdev->compute_ctx = NULL;
mutex_unlock(&hdev->fpriv_list_lock);
kfree(hpriv);
if ((!hdev->pldm) && (hdev->pdev) && if ((!hdev->pldm) && (hdev->pdev) &&
(!hdev->asic_funcs->is_device_idle(hdev, (!hdev->asic_funcs->is_device_idle(hdev,
idle_mask, idle_mask,
...@@ -87,9 +80,32 @@ static void hpriv_release(struct kref *ref) ...@@ -87,9 +80,32 @@ static void hpriv_release(struct kref *ref)
device_is_idle = false; device_is_idle = false;
} }
/* We need to remove the user from the list to make sure the reset process won't
* try to kill the user process. Because, if we got here, it means there are no
* more driver/device resources that the user process is occupying so there is
* no need to kill it
*
* However, we can't set the compute_ctx to NULL at this stage. This is to prevent
* a race between the release and opening the device again. We don't want to let
* a user open the device while there a reset is about to happen.
*/
mutex_lock(&hdev->fpriv_list_lock);
list_del(&hpriv->dev_node);
mutex_unlock(&hdev->fpriv_list_lock);
if ((hdev->reset_if_device_not_idle && !device_is_idle) if ((hdev->reset_if_device_not_idle && !device_is_idle)
|| hdev->reset_upon_device_release) || hdev->reset_upon_device_release)
hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE); hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE);
/* Now we can mark the compute_ctx as empty. Even if a reset is running in a different
* thread, we don't care because the in_reset is marked so if a user will try to open
* the device it will fail on that, even if compute_ctx is NULL.
*/
mutex_lock(&hdev->fpriv_list_lock);
hdev->compute_ctx = NULL;
mutex_unlock(&hdev->fpriv_list_lock);
kfree(hpriv);
} }
void hl_hpriv_get(struct hl_fpriv *hpriv) void hl_hpriv_get(struct hl_fpriv *hpriv)
...@@ -530,6 +546,19 @@ static void hl_device_heartbeat(struct work_struct *work) ...@@ -530,6 +546,19 @@ static void hl_device_heartbeat(struct work_struct *work)
return; return;
reschedule: reschedule:
/*
* prev_reset_trigger tracks consecutive fatal h/w errors until first
* heartbeat immediately post reset.
* If control reached here, then at least one heartbeat work has been
* scheduled since last reset/init cycle.
* So if the device is not already in reset cycle, reset the flag
* prev_reset_trigger as no reset occurred with HL_RESET_FW_FATAL_ERR
* status for at least one heartbeat. From this point driver restarts
* tracking future consecutive fatal errors.
*/
if (!(atomic_read(&hdev->in_reset)))
hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
schedule_delayed_work(&hdev->work_heartbeat, schedule_delayed_work(&hdev->work_heartbeat,
usecs_to_jiffies(HL_HEARTBEAT_PER_USEC)); usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
} }
...@@ -909,6 +938,65 @@ static void device_disable_open_processes(struct hl_device *hdev) ...@@ -909,6 +938,65 @@ static void device_disable_open_processes(struct hl_device *hdev)
mutex_unlock(&hdev->fpriv_list_lock); mutex_unlock(&hdev->fpriv_list_lock);
} }
static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
{
u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
/*
* 'reset cause' is being updated here, because getting here
* means that it's the 1st time and the last time we're here
* ('in_reset' makes sure of it). This makes sure that
* 'reset_cause' will continue holding its 1st recorded reason!
*/
if (flags & HL_RESET_HEARTBEAT) {
hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
cur_reset_trigger = HL_RESET_HEARTBEAT;
} else if (flags & HL_RESET_TDR) {
hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
cur_reset_trigger = HL_RESET_TDR;
} else if (flags & HL_RESET_FW_FATAL_ERR) {
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
cur_reset_trigger = HL_RESET_FW_FATAL_ERR;
} else {
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
}
/*
* If reset cause is same twice, then reset_trigger_repeated
* is set and if this reset is due to a fatal FW error
* device is set to an unstable state.
*/
if (hdev->prev_reset_trigger != cur_reset_trigger) {
hdev->prev_reset_trigger = cur_reset_trigger;
hdev->reset_trigger_repeated = 0;
} else {
hdev->reset_trigger_repeated = 1;
}
/* If reset is due to heartbeat, device CPU is no responsive in
* which case no point sending PCI disable message to it.
*
* If F/W is performing the reset, no need to send it a message to disable
* PCI access
*/
if ((flags & HL_RESET_HARD) &&
!(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
/* Disable PCI access from device F/W so he won't send
* us additional interrupts. We disable MSI/MSI-X at
* the halt_engines function and we can't have the F/W
* sending us interrupts after that. We need to disable
* the access here because if the device is marked
* disable, the message won't be send. Also, in case
* of heartbeat, the device CPU is marked as disable
* so this message won't be sent
*/
if (hl_fw_send_pci_access_msg(hdev,
CPUCP_PACKET_DISABLE_PCI_ACCESS))
dev_warn(hdev->dev,
"Failed to disable PCI access by F/W\n");
}
}
/* /*
* hl_device_reset - reset the device * hl_device_reset - reset the device
* *
...@@ -954,7 +1042,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) ...@@ -954,7 +1042,7 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
goto do_reset; goto do_reset;
} }
if (!hard_reset && !hdev->allow_external_soft_reset) { if (!hard_reset && !hdev->allow_inference_soft_reset) {
hard_instead_soft = true; hard_instead_soft = true;
hard_reset = true; hard_reset = true;
} }
...@@ -978,47 +1066,21 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) ...@@ -978,47 +1066,21 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
if (rc) if (rc)
return 0; return 0;
/* handle_reset_trigger(hdev, flags);
* 'reset cause' is being updated here, because getting here
* means that it's the 1st time and the last time we're here
* ('in_reset' makes sure of it). This makes sure that
* 'reset_cause' will continue holding its 1st recorded reason!
*/
if (flags & HL_RESET_HEARTBEAT)
hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
else if (flags & HL_RESET_TDR)
hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
else
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
/* If reset is due to heartbeat, device CPU is no responsive in
* which case no point sending PCI disable message to it.
*
* If F/W is performing the reset, no need to send it a message to disable
* PCI access
*/
if (hard_reset && !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
/* Disable PCI access from device F/W so he won't send
* us additional interrupts. We disable MSI/MSI-X at
* the halt_engines function and we can't have the F/W
* sending us interrupts after that. We need to disable
* the access here because if the device is marked
* disable, the message won't be send. Also, in case
* of heartbeat, the device CPU is marked as disable
* so this message won't be sent
*/
if (hl_fw_send_pci_access_msg(hdev,
CPUCP_PACKET_DISABLE_PCI_ACCESS))
dev_warn(hdev->dev,
"Failed to disable PCI access by F/W\n");
}
/* This also blocks future CS/VM/JOB completion operations */ /* This also blocks future CS/VM/JOB completion operations */
hdev->disabled = true; hdev->disabled = true;
take_release_locks(hdev); take_release_locks(hdev);
dev_err(hdev->dev, "Going to RESET device!\n"); if (hard_reset)
dev_info(hdev->dev, "Going to reset device\n");
else if (flags & HL_RESET_DEVICE_RELEASE)
dev_info(hdev->dev,
"Going to reset device after it was released by user\n");
else
dev_info(hdev->dev,
"Going to reset compute engines of inference device\n");
} }
again: again:
...@@ -1108,6 +1170,17 @@ int hl_device_reset(struct hl_device *hdev, u32 flags) ...@@ -1108,6 +1170,17 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
hdev->device_cpu_disabled = false; hdev->device_cpu_disabled = false;
hdev->hard_reset_pending = false; hdev->hard_reset_pending = false;
if (hdev->reset_trigger_repeated &&
(hdev->prev_reset_trigger == HL_RESET_FW_FATAL_ERR)) {
/* if there 2 back to back resets from FW,
* ensure driver puts the driver in a unusable state
*/
dev_crit(hdev->dev,
"Consecutive FW fatal errors received, stopping hard reset\n");
rc = -EIO;
goto out_err;
}
if (hdev->kernel_ctx) { if (hdev->kernel_ctx) {
dev_crit(hdev->dev, dev_crit(hdev->dev,
"kernel ctx was alive during hard reset, something is terribly wrong\n"); "kernel ctx was alive during hard reset, something is terribly wrong\n");
......
...@@ -2162,18 +2162,17 @@ static void hl_fw_linux_update_state(struct hl_device *hdev, ...@@ -2162,18 +2162,17 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
} }
/** /**
* hl_fw_dynamic_report_reset_cause - send a COMMS message with the cause * hl_fw_dynamic_send_msg - send a COMMS message with attached data
* of the newly triggered hard reset
* *
* @hdev: pointer to the habanalabs device structure * @hdev: pointer to the habanalabs device structure
* @fw_loader: managing structure for loading device's FW * @fw_loader: managing structure for loading device's FW
* @reset_cause: enumerated cause for the recent hard reset * @msg_type: message type
* @data: data to be sent
* *
* @return 0 on success, otherwise non-zero error code * @return 0 on success, otherwise non-zero error code
*/ */
static int hl_fw_dynamic_report_reset_cause(struct hl_device *hdev, static int hl_fw_dynamic_send_msg(struct hl_device *hdev,
struct fw_load_mgr *fw_loader, struct fw_load_mgr *fw_loader, u8 msg_type, void *data)
enum comms_reset_cause reset_cause)
{ {
struct lkd_msg_comms msg; struct lkd_msg_comms msg;
int rc; int rc;
...@@ -2181,11 +2180,20 @@ static int hl_fw_dynamic_report_reset_cause(struct hl_device *hdev, ...@@ -2181,11 +2180,20 @@ static int hl_fw_dynamic_report_reset_cause(struct hl_device *hdev,
memset(&msg, 0, sizeof(msg)); memset(&msg, 0, sizeof(msg));
/* create message to be sent */ /* create message to be sent */
msg.header.type = HL_COMMS_RESET_CAUSE_TYPE; msg.header.type = msg_type;
msg.header.size = cpu_to_le16(sizeof(struct comms_msg_header)); msg.header.size = cpu_to_le16(sizeof(struct comms_msg_header));
msg.header.magic = cpu_to_le32(HL_COMMS_MSG_MAGIC); msg.header.magic = cpu_to_le32(HL_COMMS_MSG_MAGIC);
msg.reset_cause = reset_cause; switch (msg_type) {
case HL_COMMS_RESET_CAUSE_TYPE:
msg.reset_cause = *(__u8 *) data;
break;
default:
dev_err(hdev->dev,
"Send COMMS message - invalid message type %u\n",
msg_type);
return -EINVAL;
}
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
sizeof(struct lkd_msg_comms)); sizeof(struct lkd_msg_comms));
...@@ -2252,8 +2260,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, ...@@ -2252,8 +2260,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
goto protocol_err; goto protocol_err;
if (hdev->curr_reset_cause) { if (hdev->curr_reset_cause) {
rc = hl_fw_dynamic_report_reset_cause(hdev, fw_loader, rc = hl_fw_dynamic_send_msg(hdev, fw_loader,
hdev->curr_reset_cause); HL_COMMS_RESET_CAUSE_TYPE, &hdev->curr_reset_cause);
if (rc) if (rc)
goto protocol_err; goto protocol_err;
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/io-64-nonatomic-lo-hi.h> #include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/coresight.h> #include <linux/coresight.h>
#include <linux/dma-buf.h>
#define HL_NAME "habanalabs" #define HL_NAME "habanalabs"
...@@ -68,6 +69,9 @@ ...@@ -68,6 +69,9 @@
#define HL_STATE_DUMP_HIST_LEN 5 #define HL_STATE_DUMP_HIST_LEN 5
/* Default value for device reset trigger , an invalid value */
#define HL_RESET_TRIGGER_DEFAULT 0xFF
#define OBJ_NAMES_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ #define OBJ_NAMES_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
#define SYNC_TO_ENGINE_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ #define SYNC_TO_ENGINE_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
...@@ -132,13 +136,18 @@ enum hl_mmu_page_table_location { ...@@ -132,13 +136,18 @@ enum hl_mmu_page_table_location {
* - HL_RESET_FW * - HL_RESET_FW
* F/W will perform the reset. No need to ask it to reset the device. This is relevant * F/W will perform the reset. No need to ask it to reset the device. This is relevant
* only when running with secured f/w * only when running with secured f/w
*
* - HL_RESET_FW_FATAL_ERR
* Set if reset is due to a fatal error from FW
*/ */
#define HL_RESET_HARD (1 << 0) #define HL_RESET_HARD (1 << 0)
#define HL_RESET_FROM_RESET_THREAD (1 << 1) #define HL_RESET_FROM_RESET_THREAD (1 << 1)
#define HL_RESET_HEARTBEAT (1 << 2) #define HL_RESET_HEARTBEAT (1 << 2)
#define HL_RESET_TDR (1 << 3) #define HL_RESET_TDR (1 << 3)
#define HL_RESET_DEVICE_RELEASE (1 << 4) #define HL_RESET_DEVICE_RELEASE (1 << 4)
#define HL_RESET_FW (1 << 5) #define HL_RESET_FW (1 << 5)
#define HL_RESET_FW_FATAL_ERR (1 << 6)
#define HL_MAX_SOBS_PER_MONITOR 8 #define HL_MAX_SOBS_PER_MONITOR 8
...@@ -447,6 +456,9 @@ struct hl_hints_range { ...@@ -447,6 +456,9 @@ struct hl_hints_range {
* for hints validity check. * for hints validity check.
* device_dma_offset_for_host_access: the offset to add to host DMA addresses * device_dma_offset_for_host_access: the offset to add to host DMA addresses
* to enable the device to access them. * to enable the device to access them.
* @max_freq_value: current max clk frequency.
* @clk_pll_index: clock PLL index that specify which PLL determines the clock
* we display to the user
* @mmu_pgt_size: MMU page tables total size. * @mmu_pgt_size: MMU page tables total size.
* @mmu_pte_size: PTE size in MMU page tables. * @mmu_pte_size: PTE size in MMU page tables.
* @mmu_hop_table_size: MMU hop table size. * @mmu_hop_table_size: MMU hop table size.
...@@ -543,6 +555,8 @@ struct asic_fixed_properties { ...@@ -543,6 +555,8 @@ struct asic_fixed_properties {
u64 cb_va_end_addr; u64 cb_va_end_addr;
u64 dram_hints_align_mask; u64 dram_hints_align_mask;
u64 device_dma_offset_for_host_access; u64 device_dma_offset_for_host_access;
u64 max_freq_value;
u32 clk_pll_index;
u32 mmu_pgt_size; u32 mmu_pgt_size;
u32 mmu_pte_size; u32 mmu_pte_size;
u32 mmu_hop_table_size; u32 mmu_hop_table_size;
...@@ -601,6 +615,9 @@ struct asic_fixed_properties { ...@@ -601,6 +615,9 @@ struct asic_fixed_properties {
* masters QIDs that multi cs is waiting on * masters QIDs that multi cs is waiting on
* @error: mark this fence with error * @error: mark this fence with error
* @timestamp: timestamp upon completion * @timestamp: timestamp upon completion
* @mcs_handling_done: indicates that corresponding command submission has
* finished msc handling, this does not mean it was part
* of the mcs
*/ */
struct hl_fence { struct hl_fence {
struct completion completion; struct completion completion;
...@@ -609,6 +626,7 @@ struct hl_fence { ...@@ -609,6 +626,7 @@ struct hl_fence {
u32 stream_master_qid_map; u32 stream_master_qid_map;
int error; int error;
ktime_t timestamp; ktime_t timestamp;
u8 mcs_handling_done;
}; };
/** /**
...@@ -1352,6 +1370,23 @@ struct hl_cs_counters_atomic { ...@@ -1352,6 +1370,23 @@ struct hl_cs_counters_atomic {
atomic64_t validation_drop_cnt; atomic64_t validation_drop_cnt;
}; };
/**
* struct hl_dmabuf_priv - a dma-buf private object.
* @dmabuf: pointer to dma-buf object.
* @ctx: pointer to the dma-buf owner's context.
* @phys_pg_pack: pointer to physical page pack if the dma-buf was exported for
* memory allocation handle.
* @device_address: physical address of the device's memory. Relevant only
* if phys_pg_pack is NULL (dma-buf was exported from address).
* The total size can be taken from the dmabuf object.
*/
struct hl_dmabuf_priv {
struct dma_buf *dmabuf;
struct hl_ctx *ctx;
struct hl_vm_phys_pg_pack *phys_pg_pack;
uint64_t device_address;
};
/** /**
* struct hl_ctx - user/kernel context. * struct hl_ctx - user/kernel context.
* @mem_hash: holds mapping from virtual address to virtual memory area * @mem_hash: holds mapping from virtual address to virtual memory area
...@@ -1662,6 +1697,7 @@ struct hl_vm_hw_block_list_node { ...@@ -1662,6 +1697,7 @@ struct hl_vm_hw_block_list_node {
* @npages: num physical pages in the pack. * @npages: num physical pages in the pack.
* @total_size: total size of all the pages in this list. * @total_size: total size of all the pages in this list.
* @mapping_cnt: number of shared mappings. * @mapping_cnt: number of shared mappings.
* @exporting_cnt: number of dma-buf exporting.
* @asid: the context related to this list. * @asid: the context related to this list.
* @page_size: size of each page in the pack. * @page_size: size of each page in the pack.
* @flags: HL_MEM_* flags related to this list. * @flags: HL_MEM_* flags related to this list.
...@@ -1676,6 +1712,7 @@ struct hl_vm_phys_pg_pack { ...@@ -1676,6 +1712,7 @@ struct hl_vm_phys_pg_pack {
u64 npages; u64 npages;
u64 total_size; u64 total_size;
atomic_t mapping_cnt; atomic_t mapping_cnt;
u32 exporting_cnt;
u32 asid; u32 asid;
u32 page_size; u32 page_size;
u32 flags; u32 flags;
...@@ -2396,6 +2433,7 @@ struct multi_cs_data { ...@@ -2396,6 +2433,7 @@ struct multi_cs_data {
* the error will be ignored by the driver during * the error will be ignored by the driver during
* device initialization. Mainly used to debug and * device initialization. Mainly used to debug and
* workaround firmware bugs * workaround firmware bugs
* @dram_pci_bar_start: start bus address of PCIe bar towards DRAM.
* @last_successful_open_jif: timestamp (jiffies) of the last successful * @last_successful_open_jif: timestamp (jiffies) of the last successful
* device open. * device open.
* @last_open_session_duration_jif: duration (jiffies) of the last device open * @last_open_session_duration_jif: duration (jiffies) of the last device open
...@@ -2440,8 +2478,12 @@ struct multi_cs_data { ...@@ -2440,8 +2478,12 @@ struct multi_cs_data {
* @collective_mon_idx: helper index for collective initialization * @collective_mon_idx: helper index for collective initialization
* @supports_coresight: is CoreSight supported. * @supports_coresight: is CoreSight supported.
* @supports_soft_reset: is soft reset supported. * @supports_soft_reset: is soft reset supported.
* @allow_external_soft_reset: true if soft reset initiated by user or TDR is * @allow_inference_soft_reset: true if the ASIC supports soft reset that is
* allowed. * initiated by user or TDR. This is only true
* in inference ASICs, as there is no real-world
* use-case of doing soft-reset in training (due
* to the fact that training runs on multiple
* devices)
* @supports_cb_mapping: is mapping a CB to the device's MMU supported. * @supports_cb_mapping: is mapping a CB to the device's MMU supported.
* @needs_reset: true if reset_on_lockup is false and device should be reset * @needs_reset: true if reset_on_lockup is false and device should be reset
* due to lockup. * due to lockup.
...@@ -2452,6 +2494,10 @@ struct multi_cs_data { ...@@ -2452,6 +2494,10 @@ struct multi_cs_data {
* @supports_staged_submission: true if staged submissions are supported * @supports_staged_submission: true if staged submissions are supported
* @curr_reset_cause: saves an enumerated reset cause when a hard reset is * @curr_reset_cause: saves an enumerated reset cause when a hard reset is
* triggered, and cleared after it is shared with preboot. * triggered, and cleared after it is shared with preboot.
* @prev_reset_trigger: saves the previous trigger which caused a reset, overidden
* with a new value on next reset
* @reset_trigger_repeated: set if device reset is triggered more than once with
* same cause.
* @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to * @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to
* complete instead. * complete instead.
* @device_cpu_is_halted: Flag to indicate whether the device CPU was already * @device_cpu_is_halted: Flag to indicate whether the device CPU was already
...@@ -2537,6 +2583,7 @@ struct hl_device { ...@@ -2537,6 +2583,7 @@ struct hl_device {
u64 max_power; u64 max_power;
u64 clock_gating_mask; u64 clock_gating_mask;
u64 boot_error_status_mask; u64 boot_error_status_mask;
u64 dram_pci_bar_start;
u64 last_successful_open_jif; u64 last_successful_open_jif;
u64 last_open_session_duration_jif; u64 last_open_session_duration_jif;
u64 open_counter; u64 open_counter;
...@@ -2572,13 +2619,15 @@ struct hl_device { ...@@ -2572,13 +2619,15 @@ struct hl_device {
u8 collective_mon_idx; u8 collective_mon_idx;
u8 supports_coresight; u8 supports_coresight;
u8 supports_soft_reset; u8 supports_soft_reset;
u8 allow_external_soft_reset; u8 allow_inference_soft_reset;
u8 supports_cb_mapping; u8 supports_cb_mapping;
u8 needs_reset; u8 needs_reset;
u8 process_kill_trial_cnt; u8 process_kill_trial_cnt;
u8 device_fini_pending; u8 device_fini_pending;
u8 supports_staged_submission; u8 supports_staged_submission;
u8 curr_reset_cause; u8 curr_reset_cause;
u8 prev_reset_trigger;
u8 reset_trigger_repeated;
u8 skip_reset_on_timeout; u8 skip_reset_on_timeout;
u8 device_cpu_is_halted; u8 device_cpu_is_halted;
u8 supports_wait_for_multi_cs; u8 supports_wait_for_multi_cs;
...@@ -2956,6 +3005,15 @@ int hl_set_voltage(struct hl_device *hdev, ...@@ -2956,6 +3005,15 @@ int hl_set_voltage(struct hl_device *hdev,
int sensor_index, u32 attr, long value); int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev, int hl_set_current(struct hl_device *hdev,
int sensor_index, u32 attr, long value); int sensor_index, u32 attr, long value);
int hl_set_power(struct hl_device *hdev,
int sensor_index, u32 attr, long value);
int hl_get_power(struct hl_device *hdev,
int sensor_index, u32 attr, long *value);
int hl_get_clk_rate(struct hl_device *hdev,
u32 *cur_clk, u32 *max_clk);
void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
void hl_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
void hw_sob_get(struct hl_hw_sob *hw_sob); void hw_sob_get(struct hl_hw_sob *hw_sob);
void hw_sob_put(struct hl_hw_sob *hw_sob); void hw_sob_put(struct hl_hw_sob *hw_sob);
void hl_encaps_handle_do_release(struct kref *ref); void hl_encaps_handle_do_release(struct kref *ref);
......
...@@ -225,6 +225,17 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) ...@@ -225,6 +225,17 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
if (!hpriv) if (!hpriv)
return -ENOMEM; return -ENOMEM;
/* Prevent other routines from reading partial hpriv data by
* initializing hpriv fields before inserting it to the list
*/
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
hpriv->is_control = true;
nonseekable_open(inode, filp);
hpriv->taskpid = find_get_pid(current->pid);
mutex_lock(&hdev->fpriv_list_lock); mutex_lock(&hdev->fpriv_list_lock);
if (!hl_device_operational(hdev, NULL)) { if (!hl_device_operational(hdev, NULL)) {
...@@ -238,19 +249,15 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) ...@@ -238,19 +249,15 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
list_add(&hpriv->dev_node, &hdev->fpriv_list); list_add(&hpriv->dev_node, &hdev->fpriv_list);
mutex_unlock(&hdev->fpriv_list_lock); mutex_unlock(&hdev->fpriv_list_lock);
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
hpriv->is_control = true;
nonseekable_open(inode, filp);
hpriv->taskpid = find_get_pid(current->pid);
return 0; return 0;
out_err: out_err:
mutex_unlock(&hdev->fpriv_list_lock); mutex_unlock(&hdev->fpriv_list_lock);
filp->private_data = NULL;
put_pid(hpriv->taskpid);
kfree(hpriv); kfree(hpriv);
return rc; return rc;
} }
...@@ -339,6 +346,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, ...@@ -339,6 +346,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
set_driver_behavior_per_device(hdev); set_driver_behavior_per_device(hdev);
hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN; hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
if (timeout_locked) if (timeout_locked)
hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000); hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* /*
* Copyright 2016-2018 HabanaLabs, Ltd. * Copyright 2019-2021 HabanaLabs, Ltd.
* All Rights Reserved. * All Rights Reserved.
*/ */
#include "gaudiP.h" #include "habanalabs.h"
#include "../include/gaudi/gaudi_fw_if.h"
void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) void hl_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
{ {
struct gaudi_device *gaudi = hdev->asic_specific; hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
hdev->asic_prop.max_freq_value);
if (freq == PLL_LAST)
hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value);
} }
int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) int hl_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
{ {
long value; long value;
if (!hl_device_operational(hdev, NULL)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false); value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
if (value < 0) { if (value < 0) {
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
...@@ -33,7 +30,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) ...@@ -33,7 +30,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
*max_clk = (value / 1000 / 1000); *max_clk = (value / 1000 / 1000);
value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true); value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
if (value < 0) { if (value < 0) {
dev_err(hdev->dev, dev_err(hdev->dev,
...@@ -51,15 +48,14 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev, ...@@ -51,15 +48,14 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev,
struct device_attribute *attr, char *buf) struct device_attribute *attr, char *buf)
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
struct gaudi_device *gaudi = hdev->asic_specific;
long value; long value;
if (!hl_device_operational(hdev, NULL)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false); value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, false);
gaudi->max_freq_value = value; hdev->asic_prop.max_freq_value = value;
return sprintf(buf, "%lu\n", (value / 1000 / 1000)); return sprintf(buf, "%lu\n", (value / 1000 / 1000));
} }
...@@ -68,7 +64,6 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev, ...@@ -68,7 +64,6 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count) struct device_attribute *attr, const char *buf, size_t count)
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
struct gaudi_device *gaudi = hdev->asic_specific;
int rc; int rc;
u64 value; u64 value;
...@@ -83,9 +78,10 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev, ...@@ -83,9 +78,10 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev,
goto fail; goto fail;
} }
gaudi->max_freq_value = value * 1000 * 1000; hdev->asic_prop.max_freq_value = value * 1000 * 1000;
hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value); hl_set_frequency(hdev, hdev->asic_prop.clk_pll_index,
hdev->asic_prop.max_freq_value);
fail: fail:
return count; return count;
...@@ -100,7 +96,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev, ...@@ -100,7 +96,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev,
if (!hl_device_operational(hdev, NULL)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true); value = hl_get_frequency(hdev, hdev->asic_prop.clk_pll_index, true);
return sprintf(buf, "%lu\n", (value / 1000 / 1000)); return sprintf(buf, "%lu\n", (value / 1000 / 1000));
} }
...@@ -108,14 +104,14 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev, ...@@ -108,14 +104,14 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev,
static DEVICE_ATTR_RW(clk_max_freq_mhz); static DEVICE_ATTR_RW(clk_max_freq_mhz);
static DEVICE_ATTR_RO(clk_cur_freq_mhz); static DEVICE_ATTR_RO(clk_cur_freq_mhz);
static struct attribute *gaudi_dev_attrs[] = { static struct attribute *hl_dev_attrs[] = {
&dev_attr_clk_max_freq_mhz.attr, &dev_attr_clk_max_freq_mhz.attr,
&dev_attr_clk_cur_freq_mhz.attr, &dev_attr_clk_cur_freq_mhz.attr,
NULL, NULL,
}; };
void gaudi_add_device_attr(struct hl_device *hdev, void hl_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp) struct attribute_group *dev_attr_grp)
{ {
dev_attr_grp->attrs = gaudi_dev_attrs; dev_attr_grp->attrs = hl_dev_attrs;
} }
...@@ -113,6 +113,9 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, ...@@ -113,6 +113,9 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
int rc; int rc;
u32 cpucp_attr;
bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;
if (!hl_device_operational(hdev, NULL)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
...@@ -121,65 +124,134 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type, ...@@ -121,65 +124,134 @@ static int hl_read(struct device *dev, enum hwmon_sensor_types type,
case hwmon_temp: case hwmon_temp:
switch (attr) { switch (attr) {
case hwmon_temp_input: case hwmon_temp_input:
cpucp_attr = cpucp_temp_input;
break;
case hwmon_temp_max: case hwmon_temp_max:
cpucp_attr = cpucp_temp_max;
break;
case hwmon_temp_crit: case hwmon_temp_crit:
cpucp_attr = cpucp_temp_crit;
break;
case hwmon_temp_max_hyst: case hwmon_temp_max_hyst:
cpucp_attr = cpucp_temp_max_hyst;
break;
case hwmon_temp_crit_hyst: case hwmon_temp_crit_hyst:
cpucp_attr = cpucp_temp_crit_hyst;
break;
case hwmon_temp_offset: case hwmon_temp_offset:
cpucp_attr = cpucp_temp_offset;
break;
case hwmon_temp_highest: case hwmon_temp_highest:
cpucp_attr = cpucp_temp_highest;
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
rc = hl_get_temperature(hdev, channel, attr, val); if (use_cpucp_enum)
rc = hl_get_temperature(hdev, channel, cpucp_attr, val);
else
rc = hl_get_temperature(hdev, channel, attr, val);
break; break;
case hwmon_in: case hwmon_in:
switch (attr) { switch (attr) {
case hwmon_in_input: case hwmon_in_input:
cpucp_attr = cpucp_in_input;
break;
case hwmon_in_min: case hwmon_in_min:
cpucp_attr = cpucp_in_min;
break;
case hwmon_in_max: case hwmon_in_max:
cpucp_attr = cpucp_in_max;
break;
case hwmon_in_highest: case hwmon_in_highest:
cpucp_attr = cpucp_in_highest;
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
rc = hl_get_voltage(hdev, channel, attr, val); if (use_cpucp_enum)
rc = hl_get_voltage(hdev, channel, cpucp_attr, val);
else
rc = hl_get_voltage(hdev, channel, attr, val);
break; break;
case hwmon_curr: case hwmon_curr:
switch (attr) { switch (attr) {
case hwmon_curr_input: case hwmon_curr_input:
cpucp_attr = cpucp_curr_input;
break;
case hwmon_curr_min: case hwmon_curr_min:
cpucp_attr = cpucp_curr_min;
break;
case hwmon_curr_max: case hwmon_curr_max:
cpucp_attr = cpucp_curr_max;
break;
case hwmon_curr_highest: case hwmon_curr_highest:
cpucp_attr = cpucp_curr_highest;
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
rc = hl_get_current(hdev, channel, attr, val); if (use_cpucp_enum)
rc = hl_get_current(hdev, channel, cpucp_attr, val);
else
rc = hl_get_current(hdev, channel, attr, val);
break; break;
case hwmon_fan: case hwmon_fan:
switch (attr) { switch (attr) {
case hwmon_fan_input: case hwmon_fan_input:
cpucp_attr = cpucp_fan_input;
break;
case hwmon_fan_min: case hwmon_fan_min:
cpucp_attr = cpucp_fan_min;
break;
case hwmon_fan_max: case hwmon_fan_max:
cpucp_attr = cpucp_fan_max;
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
rc = hl_get_fan_speed(hdev, channel, attr, val);
if (use_cpucp_enum)
rc = hl_get_fan_speed(hdev, channel, cpucp_attr, val);
else
rc = hl_get_fan_speed(hdev, channel, attr, val);
break; break;
case hwmon_pwm: case hwmon_pwm:
switch (attr) { switch (attr) {
case hwmon_pwm_input: case hwmon_pwm_input:
cpucp_attr = cpucp_pwm_input;
break;
case hwmon_pwm_enable: case hwmon_pwm_enable:
cpucp_attr = cpucp_pwm_enable;
break;
default:
return -EINVAL;
}
if (use_cpucp_enum)
rc = hl_get_pwm_info(hdev, channel, cpucp_attr, val);
else
rc = hl_get_pwm_info(hdev, channel, attr, val);
break;
case hwmon_power:
switch (attr) {
case hwmon_power_input:
cpucp_attr = CPUCP_POWER_INPUT;
break;
case hwmon_power_input_highest:
cpucp_attr = CPUCP_POWER_INPUT_HIGHEST;
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
rc = hl_get_pwm_info(hdev, channel, attr, val);
if (use_cpucp_enum)
rc = hl_get_power(hdev, channel, cpucp_attr, val);
else
rc = hl_get_power(hdev, channel, attr, val);
break; break;
default: default:
return -EINVAL; return -EINVAL;
...@@ -191,6 +263,9 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, ...@@ -191,6 +263,9 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long val) u32 attr, int channel, long val)
{ {
struct hl_device *hdev = dev_get_drvdata(dev); struct hl_device *hdev = dev_get_drvdata(dev);
u32 cpucp_attr;
bool use_cpucp_enum = (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
CPU_BOOT_DEV_STS0_MAP_HWMON_EN) ? true : false;
if (!hl_device_operational(hdev, NULL)) if (!hl_device_operational(hdev, NULL))
return -ENODEV; return -ENODEV;
...@@ -199,40 +274,78 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type, ...@@ -199,40 +274,78 @@ static int hl_write(struct device *dev, enum hwmon_sensor_types type,
case hwmon_temp: case hwmon_temp:
switch (attr) { switch (attr) {
case hwmon_temp_offset: case hwmon_temp_offset:
cpucp_attr = cpucp_temp_offset;
break;
case hwmon_temp_reset_history: case hwmon_temp_reset_history:
cpucp_attr = cpucp_temp_reset_history;
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
hl_set_temperature(hdev, channel, attr, val);
if (use_cpucp_enum)
hl_set_temperature(hdev, channel, cpucp_attr, val);
else
hl_set_temperature(hdev, channel, attr, val);
break; break;
case hwmon_pwm: case hwmon_pwm:
switch (attr) { switch (attr) {
case hwmon_pwm_input: case hwmon_pwm_input:
cpucp_attr = cpucp_pwm_input;
break;
case hwmon_pwm_enable: case hwmon_pwm_enable:
cpucp_attr = cpucp_pwm_enable;
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
hl_set_pwm_info(hdev, channel, attr, val);
if (use_cpucp_enum)
hl_set_pwm_info(hdev, channel, cpucp_attr, val);
else
hl_set_pwm_info(hdev, channel, attr, val);
break; break;
case hwmon_in: case hwmon_in:
switch (attr) { switch (attr) {
case hwmon_in_reset_history: case hwmon_in_reset_history:
cpucp_attr = cpucp_in_reset_history;
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
hl_set_voltage(hdev, channel, attr, val);
if (use_cpucp_enum)
hl_set_voltage(hdev, channel, cpucp_attr, val);
else
hl_set_voltage(hdev, channel, attr, val);
break; break;
case hwmon_curr: case hwmon_curr:
switch (attr) { switch (attr) {
case hwmon_curr_reset_history: case hwmon_curr_reset_history:
cpucp_attr = cpucp_curr_reset_history;
break; break;
default: default:
return -EINVAL; return -EINVAL;
} }
hl_set_current(hdev, channel, attr, val);
if (use_cpucp_enum)
hl_set_current(hdev, channel, cpucp_attr, val);
else
hl_set_current(hdev, channel, attr, val);
break;
case hwmon_power:
switch (attr) {
case hwmon_power_reset_history:
cpucp_attr = CPUCP_POWER_RESET_INPUT_HISTORY;
break;
default:
return -EINVAL;
}
if (use_cpucp_enum)
hl_set_power(hdev, channel, cpucp_attr, val);
else
hl_set_power(hdev, channel, attr, val);
break; break;
default: default:
return -EINVAL; return -EINVAL;
...@@ -296,6 +409,15 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type, ...@@ -296,6 +409,15 @@ static umode_t hl_is_visible(const void *data, enum hwmon_sensor_types type,
return 0644; return 0644;
} }
break; break;
case hwmon_power:
switch (attr) {
case hwmon_power_input:
case hwmon_power_input_highest:
return 0444;
case hwmon_power_reset_history:
return 0200;
}
break;
default: default:
break; break;
} }
...@@ -551,6 +673,60 @@ int hl_set_current(struct hl_device *hdev, ...@@ -551,6 +673,60 @@ int hl_set_current(struct hl_device *hdev,
return rc; return rc;
} }
int hl_set_power(struct hl_device *hdev,
int sensor_index, u32 attr, long value)
{
struct cpucp_packet pkt;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, NULL);
if (rc)
dev_err(hdev->dev,
"Failed to set power of sensor %d, error %d\n",
sensor_index, rc);
return rc;
}
int hl_get_power(struct hl_device *hdev,
int sensor_index, u32 attr, long *value)
{
struct cpucp_packet pkt;
u64 result;
int rc;
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = cpu_to_le32(CPUCP_PACKET_POWER_GET <<
CPUCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
0, &result);
*value = (long) result;
if (rc) {
dev_err(hdev->dev,
"Failed to get power of sensor %d, error %d\n",
sensor_index, rc);
*value = 0;
}
return rc;
}
int hl_hwmon_init(struct hl_device *hdev) int hl_hwmon_init(struct hl_device *hdev)
{ {
struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev; struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev;
......
...@@ -141,10 +141,13 @@ static void handle_user_cq(struct hl_device *hdev, ...@@ -141,10 +141,13 @@ static void handle_user_cq(struct hl_device *hdev,
struct hl_user_interrupt *user_cq) struct hl_user_interrupt *user_cq)
{ {
struct hl_user_pending_interrupt *pend; struct hl_user_pending_interrupt *pend;
ktime_t now = ktime_get();
spin_lock(&user_cq->wait_list_lock); spin_lock(&user_cq->wait_list_lock);
list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) list_for_each_entry(pend, &user_cq->wait_list_head, wait_list_node) {
pend->fence.timestamp = now;
complete_all(&pend->fence.completion); complete_all(&pend->fence.completion);
}
spin_unlock(&user_cq->wait_list_lock); spin_unlock(&user_cq->wait_list_lock);
} }
......
This diff is collapsed.
...@@ -501,23 +501,25 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr, ...@@ -501,23 +501,25 @@ static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) && if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) &&
!is_power_of_2(prop->dram_page_size)) { !is_power_of_2(prop->dram_page_size)) {
unsigned long dram_page_size = prop->dram_page_size; u64 dram_page_size, dram_base, abs_phys_addr, abs_virt_addr,
u64 page_offset_mask; page_id, page_start;
u64 phys_addr_mask; u32 page_off;
u32 bit;
/* /*
* find last set bit in page_size to cover all bits of page * Bit arithmetics cannot be used for non power of two page
* offset. note that 1 has to be added to bit index. * sizes. In addition, since bit arithmetics is not used,
* note that the internal ulong variable is used to avoid * we cannot ignore dram base. All that shall be considerd.
* alignment issue.
*/ */
bit = find_last_bit(&dram_page_size,
sizeof(dram_page_size) * BITS_PER_BYTE) + 1; dram_page_size = prop->dram_page_size;
page_offset_mask = (BIT_ULL(bit) - 1); dram_base = prop->dram_base_address;
phys_addr_mask = ~page_offset_mask; abs_phys_addr = tmp_phys_addr - dram_base;
*phys_addr = (tmp_phys_addr & phys_addr_mask) | abs_virt_addr = virt_addr - dram_base;
(virt_addr & page_offset_mask); page_id = DIV_ROUND_DOWN_ULL(abs_phys_addr, dram_page_size);
page_start = page_id * dram_page_size;
div_u64_rem(abs_virt_addr, dram_page_size, &page_off);
*phys_addr = page_start + page_off + dram_base;
} else { } else {
/* /*
* find the correct hop shift field in hl_mmu_properties * find the correct hop shift field in hl_mmu_properties
......
...@@ -206,12 +206,12 @@ static ssize_t soft_reset_store(struct device *dev, ...@@ -206,12 +206,12 @@ static ssize_t soft_reset_store(struct device *dev,
goto out; goto out;
} }
if (!hdev->allow_external_soft_reset) { if (!hdev->allow_inference_soft_reset) {
dev_err(hdev->dev, "Device does not support soft-reset\n"); dev_err(hdev->dev, "Device does not support inference soft-reset\n");
goto out; goto out;
} }
dev_warn(hdev->dev, "Soft-Reset requested through sysfs\n"); dev_warn(hdev->dev, "Inference Soft-Reset requested through sysfs\n");
hl_device_reset(hdev, 0); hl_device_reset(hdev, 0);
......
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_hwmgr.o gaudi/gaudi_security.o \ HL_GAUDI_FILES := gaudi/gaudi.o gaudi/gaudi_security.o \
gaudi/gaudi_coresight.o gaudi/gaudi_coresight.o
...@@ -661,6 +661,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) ...@@ -661,6 +661,9 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->server_type = HL_SERVER_TYPE_UNKNOWN; prop->server_type = HL_SERVER_TYPE_UNKNOWN;
prop->clk_pll_index = HL_GAUDI_MME_PLL;
prop->max_freq_value = GAUDI_MAX_CLK_FREQ;
return 0; return 0;
} }
...@@ -795,6 +798,7 @@ static int gaudi_early_init(struct hl_device *hdev) ...@@ -795,6 +798,7 @@ static int gaudi_early_init(struct hl_device *hdev)
} }
prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID); prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
hdev->dram_pci_bar_start = pci_resource_start(pdev, HBM_BAR_ID);
/* If FW security is enabled at this point it means no access to ELBI */ /* If FW security is enabled at this point it means no access to ELBI */
if (hdev->asic_prop.fw_security_enabled) { if (hdev->asic_prop.fw_security_enabled) {
...@@ -1837,8 +1841,6 @@ static int gaudi_sw_init(struct hl_device *hdev) ...@@ -1837,8 +1841,6 @@ static int gaudi_sw_init(struct hl_device *hdev)
gaudi->cpucp_info_get = gaudi_cpucp_info_get; gaudi->cpucp_info_get = gaudi_cpucp_info_get;
gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
hdev->asic_specific = gaudi; hdev->asic_specific = gaudi;
/* Create DMA pool for small allocations */ /* Create DMA pool for small allocations */
...@@ -2616,7 +2618,7 @@ static void gaudi_init_e2e(struct hl_device *hdev) ...@@ -2616,7 +2618,7 @@ static void gaudi_init_e2e(struct hl_device *hdev)
static void gaudi_init_hbm_cred(struct hl_device *hdev) static void gaudi_init_hbm_cred(struct hl_device *hdev)
{ {
uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd; u32 hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
if (hdev->asic_prop.fw_security_enabled) if (hdev->asic_prop.fw_security_enabled)
return; return;
...@@ -7932,6 +7934,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, ...@@ -7932,6 +7934,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
{ {
struct gaudi_device *gaudi = hdev->asic_specific; struct gaudi_device *gaudi = hdev->asic_specific;
u32 ctl = le32_to_cpu(eq_entry->hdr.ctl); u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
u32 fw_fatal_err_flag = 0;
u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK) u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
>> EQ_CTL_EVENT_TYPE_SHIFT); >> EQ_CTL_EVENT_TYPE_SHIFT);
bool reset_required; bool reset_required;
...@@ -7972,6 +7975,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, ...@@ -7972,6 +7975,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
gaudi_print_irq_info(hdev, event_type, true); gaudi_print_irq_info(hdev, event_type, true);
gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
goto reset_device; goto reset_device;
case GAUDI_EVENT_GIC500: case GAUDI_EVENT_GIC500:
...@@ -7979,6 +7983,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, ...@@ -7979,6 +7983,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
case GAUDI_EVENT_L2_RAM_ECC: case GAUDI_EVENT_L2_RAM_ECC:
case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
gaudi_print_irq_info(hdev, event_type, false); gaudi_print_irq_info(hdev, event_type, false);
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
goto reset_device; goto reset_device;
case GAUDI_EVENT_HBM0_SPI_0: case GAUDI_EVENT_HBM0_SPI_0:
...@@ -7989,6 +7994,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, ...@@ -7989,6 +7994,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
gaudi_hbm_read_interrupts(hdev, gaudi_hbm_read_interrupts(hdev,
gaudi_hbm_event_to_dev(event_type), gaudi_hbm_event_to_dev(event_type),
&eq_entry->hbm_ecc_data); &eq_entry->hbm_ecc_data);
fw_fatal_err_flag = HL_RESET_FW_FATAL_ERR;
goto reset_device; goto reset_device;
case GAUDI_EVENT_HBM0_SPI_1: case GAUDI_EVENT_HBM0_SPI_1:
...@@ -8171,9 +8177,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev, ...@@ -8171,9 +8177,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
reset_device: reset_device:
if (hdev->asic_prop.fw_security_enabled) if (hdev->asic_prop.fw_security_enabled)
hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW); hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW | fw_fatal_err_flag);
else if (hdev->hard_reset_on_fw_events) else if (hdev->hard_reset_on_fw_events)
hl_device_reset(hdev, HL_RESET_HARD); hl_device_reset(hdev, HL_RESET_HARD | fw_fatal_err_flag);
else else
hl_fw_unmask_irq(hdev, event_type); hl_fw_unmask_irq(hdev, event_type);
} }
...@@ -9439,9 +9445,9 @@ static const struct hl_asic_funcs gaudi_funcs = { ...@@ -9439,9 +9445,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
.debugfs_read64 = gaudi_debugfs_read64, .debugfs_read64 = gaudi_debugfs_read64,
.debugfs_write64 = gaudi_debugfs_write64, .debugfs_write64 = gaudi_debugfs_write64,
.debugfs_read_dma = gaudi_debugfs_read_dma, .debugfs_read_dma = gaudi_debugfs_read_dma,
.add_device_attr = gaudi_add_device_attr, .add_device_attr = hl_add_device_attr,
.handle_eqe = gaudi_handle_eqe, .handle_eqe = gaudi_handle_eqe,
.set_pll_profile = gaudi_set_pll_profile, .set_pll_profile = hl_set_pll_profile,
.get_events_stat = gaudi_get_events_stat, .get_events_stat = gaudi_get_events_stat,
.read_pte = gaudi_read_pte, .read_pte = gaudi_read_pte,
.write_pte = gaudi_write_pte, .write_pte = gaudi_write_pte,
...@@ -9465,7 +9471,7 @@ static const struct hl_asic_funcs gaudi_funcs = { ...@@ -9465,7 +9471,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
.halt_coresight = gaudi_halt_coresight, .halt_coresight = gaudi_halt_coresight,
.ctx_init = gaudi_ctx_init, .ctx_init = gaudi_ctx_init,
.ctx_fini = gaudi_ctx_fini, .ctx_fini = gaudi_ctx_fini,
.get_clk_rate = gaudi_get_clk_rate, .get_clk_rate = hl_get_clk_rate,
.get_queue_id_for_cq = gaudi_get_queue_id_for_cq, .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
.load_firmware_to_device = gaudi_load_firmware_to_device, .load_firmware_to_device = gaudi_load_firmware_to_device,
.load_boot_fit_to_device = gaudi_load_boot_fit_to_device, .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
......
...@@ -319,7 +319,6 @@ struct gaudi_internal_qman_info { ...@@ -319,7 +319,6 @@ struct gaudi_internal_qman_info {
* the actual number of internal queues because they are not in * the actual number of internal queues because they are not in
* consecutive order. * consecutive order.
* @hbm_bar_cur_addr: current address of HBM PCI bar. * @hbm_bar_cur_addr: current address of HBM PCI bar.
* @max_freq_value: current max clk frequency.
* @events: array that holds all event id's * @events: array that holds all event id's
* @events_stat: array that holds histogram of all received events. * @events_stat: array that holds histogram of all received events.
* @events_stat_aggregate: same as events_stat but doesn't get cleared on reset * @events_stat_aggregate: same as events_stat but doesn't get cleared on reset
...@@ -345,7 +344,6 @@ struct gaudi_device { ...@@ -345,7 +344,6 @@ struct gaudi_device {
struct gaudi_collective_properties collective_props; struct gaudi_collective_properties collective_props;
u64 hbm_bar_cur_addr; u64 hbm_bar_cur_addr;
u64 max_freq_value;
u32 events[GAUDI_EVENT_SIZE]; u32 events[GAUDI_EVENT_SIZE];
u32 events_stat[GAUDI_EVENT_SIZE]; u32 events_stat[GAUDI_EVENT_SIZE];
...@@ -359,10 +357,8 @@ void gaudi_init_security(struct hl_device *hdev); ...@@ -359,10 +357,8 @@ void gaudi_init_security(struct hl_device *hdev);
void gaudi_ack_protection_bits_errors(struct hl_device *hdev); void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
void gaudi_add_device_attr(struct hl_device *hdev, void gaudi_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp); struct attribute_group *dev_attr_grp);
void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
int gaudi_debug_coresight(struct hl_device *hdev, void *data); int gaudi_debug_coresight(struct hl_device *hdev, void *data);
void gaudi_halt_coresight(struct hl_device *hdev); void gaudi_halt_coresight(struct hl_device *hdev);
int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid); void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid);
#endif /* GAUDIP_H_ */ #endif /* GAUDIP_H_ */
...@@ -471,6 +471,8 @@ int goya_set_fixed_properties(struct hl_device *hdev) ...@@ -471,6 +471,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->server_type = HL_SERVER_TYPE_UNKNOWN; prop->server_type = HL_SERVER_TYPE_UNKNOWN;
prop->clk_pll_index = HL_GOYA_MME_PLL;
return 0; return 0;
} }
...@@ -622,6 +624,7 @@ static int goya_early_init(struct hl_device *hdev) ...@@ -622,6 +624,7 @@ static int goya_early_init(struct hl_device *hdev)
} }
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID); prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
hdev->dram_pci_bar_start = pci_resource_start(pdev, DDR_BAR_ID);
/* If FW security is enabled at this point it means no access to ELBI */ /* If FW security is enabled at this point it means no access to ELBI */
if (hdev->asic_prop.fw_security_enabled) { if (hdev->asic_prop.fw_security_enabled) {
...@@ -959,7 +962,7 @@ static int goya_sw_init(struct hl_device *hdev) ...@@ -959,7 +962,7 @@ static int goya_sw_init(struct hl_device *hdev)
spin_lock_init(&goya->hw_queues_lock); spin_lock_init(&goya->hw_queues_lock);
hdev->supports_coresight = true; hdev->supports_coresight = true;
hdev->supports_soft_reset = true; hdev->supports_soft_reset = true;
hdev->allow_external_soft_reset = true; hdev->allow_inference_soft_reset = true;
hdev->supports_wait_for_multi_cs = false; hdev->supports_wait_for_multi_cs = false;
hdev->asic_funcs->set_pci_memory_regions(hdev); hdev->asic_funcs->set_pci_memory_regions(hdev);
...@@ -4829,6 +4832,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry) ...@@ -4829,6 +4832,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6: case GOYA_ASYNC_EVENT_ID_PLL0 ... GOYA_ASYNC_EVENT_ID_PLL6:
case GOYA_ASYNC_EVENT_ID_AXI_ECC: case GOYA_ASYNC_EVENT_ID_AXI_ECC:
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC: case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
goya_print_irq_info(hdev, event_type, false);
if (hdev->hard_reset_on_fw_events)
hl_device_reset(hdev, (HL_RESET_HARD |
HL_RESET_FW_FATAL_ERR));
break;
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET: case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
goya_print_irq_info(hdev, event_type, false); goya_print_irq_info(hdev, event_type, false);
if (hdev->hard_reset_on_fw_events) if (hdev->hard_reset_on_fw_events)
...@@ -5649,7 +5658,7 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5649,7 +5658,7 @@ static const struct hl_asic_funcs goya_funcs = {
.halt_coresight = goya_halt_coresight, .halt_coresight = goya_halt_coresight,
.ctx_init = goya_ctx_init, .ctx_init = goya_ctx_init,
.ctx_fini = goya_ctx_fini, .ctx_fini = goya_ctx_fini,
.get_clk_rate = goya_get_clk_rate, .get_clk_rate = hl_get_clk_rate,
.get_queue_id_for_cq = goya_get_queue_id_for_cq, .get_queue_id_for_cq = goya_get_queue_id_for_cq,
.load_firmware_to_device = goya_load_firmware_to_device, .load_firmware_to_device = goya_load_firmware_to_device,
.load_boot_fit_to_device = goya_load_boot_fit_to_device, .load_boot_fit_to_device = goya_load_boot_fit_to_device,
......
...@@ -235,7 +235,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, ...@@ -235,7 +235,6 @@ void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
void *vaddr); void *vaddr);
void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev); void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev);
int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk);
u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx); u32 goya_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx);
u64 goya_get_device_time(struct hl_device *hdev); u64 goya_get_device_time(struct hl_device *hdev);
......
...@@ -32,37 +32,6 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) ...@@ -32,37 +32,6 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq)
} }
} }
int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk)
{
long value;
if (!hl_device_operational(hdev, NULL))
return -ENODEV;
value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false);
if (value < 0) {
dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n",
value);
return value;
}
*max_clk = (value / 1000 / 1000);
value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true);
if (value < 0) {
dev_err(hdev->dev,
"Failed to retrieve device current clock %ld\n",
value);
return value;
}
*cur_clk = (value / 1000 / 1000);
return 0;
}
static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr, static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr,
char *buf) char *buf)
{ {
......
...@@ -542,11 +542,14 @@ enum cpucp_packet_rc { ...@@ -542,11 +542,14 @@ enum cpucp_packet_rc {
*/ */
enum cpucp_temp_type { enum cpucp_temp_type {
cpucp_temp_input, cpucp_temp_input,
cpucp_temp_min = 4,
cpucp_temp_min_hyst,
cpucp_temp_max = 6, cpucp_temp_max = 6,
cpucp_temp_max_hyst, cpucp_temp_max_hyst,
cpucp_temp_crit, cpucp_temp_crit,
cpucp_temp_crit_hyst, cpucp_temp_crit_hyst,
cpucp_temp_offset = 19, cpucp_temp_offset = 19,
cpucp_temp_lowest = 21,
cpucp_temp_highest = 22, cpucp_temp_highest = 22,
cpucp_temp_reset_history = 23 cpucp_temp_reset_history = 23
}; };
...@@ -555,6 +558,7 @@ enum cpucp_in_attributes { ...@@ -555,6 +558,7 @@ enum cpucp_in_attributes {
cpucp_in_input, cpucp_in_input,
cpucp_in_min, cpucp_in_min,
cpucp_in_max, cpucp_in_max,
cpucp_in_lowest = 6,
cpucp_in_highest = 7, cpucp_in_highest = 7,
cpucp_in_reset_history cpucp_in_reset_history
}; };
...@@ -563,6 +567,7 @@ enum cpucp_curr_attributes { ...@@ -563,6 +567,7 @@ enum cpucp_curr_attributes {
cpucp_curr_input, cpucp_curr_input,
cpucp_curr_min, cpucp_curr_min,
cpucp_curr_max, cpucp_curr_max,
cpucp_curr_lowest = 6,
cpucp_curr_highest = 7, cpucp_curr_highest = 7,
cpucp_curr_reset_history cpucp_curr_reset_history
}; };
...@@ -598,6 +603,16 @@ enum cpucp_pll_type_attributes { ...@@ -598,6 +603,16 @@ enum cpucp_pll_type_attributes {
cpucp_pll_pci, cpucp_pll_pci,
}; };
/*
* cpucp_power_type aligns with hwmon_power_attributes
* defined in Linux kernel hwmon.h file
*/
enum cpucp_power_type {
CPUCP_POWER_INPUT = 8,
CPUCP_POWER_INPUT_HIGHEST = 9,
CPUCP_POWER_RESET_INPUT_HISTORY = 11
};
/* /*
* MSI type enumeration table for all ASICs and future SW versions. * MSI type enumeration table for all ASICs and future SW versions.
* For future ASIC-LKD compatibility, we can only add new enumerations. * For future ASIC-LKD compatibility, we can only add new enumerations.
...@@ -731,6 +746,9 @@ struct cpucp_security_info { ...@@ -731,6 +746,9 @@ struct cpucp_security_info {
* @pll_map: Bit map of supported PLLs for current ASIC version. * @pll_map: Bit map of supported PLLs for current ASIC version.
* @mme_binning_mask: MME binning mask, * @mme_binning_mask: MME binning mask,
* (0 = functional, 1 = binned) * (0 = functional, 1 = binned)
* @dram_binning_mask: DRAM binning mask, 1 bit per dram instance
* (0 = functional 1 = binned)
* @memory_repair_flag: eFuse flag indicating memory repair
*/ */
struct cpucp_info { struct cpucp_info {
struct cpucp_sensor sensors[CPUCP_MAX_SENSORS]; struct cpucp_sensor sensors[CPUCP_MAX_SENSORS];
...@@ -749,7 +767,9 @@ struct cpucp_info { ...@@ -749,7 +767,9 @@ struct cpucp_info {
__le64 reserved3; __le64 reserved3;
__le64 reserved4; __le64 reserved4;
__u8 reserved5; __u8 reserved5;
__u8 pad[7]; __u8 dram_binning_mask;
__u8 memory_repair_flag;
__u8 pad[5];
struct cpucp_security_info sec_info; struct cpucp_security_info sec_info;
__le32 reserved6; __le32 reserved6;
__u8 pll_map[PLL_MAP_LEN]; __u8 pll_map[PLL_MAP_LEN];
......
...@@ -8,8 +8,6 @@ ...@@ -8,8 +8,6 @@
#ifndef GAUDI_FW_IF_H #ifndef GAUDI_FW_IF_H
#define GAUDI_FW_IF_H #define GAUDI_FW_IF_H
#include <linux/types.h>
#define GAUDI_EVENT_QUEUE_MSI_IDX 8 #define GAUDI_EVENT_QUEUE_MSI_IDX 8
#define GAUDI_NIC_PORT1_MSI_IDX 10 #define GAUDI_NIC_PORT1_MSI_IDX 10
#define GAUDI_NIC_PORT3_MSI_IDX 12 #define GAUDI_NIC_PORT3_MSI_IDX 12
...@@ -78,13 +76,13 @@ struct gaudi_nic_status { ...@@ -78,13 +76,13 @@ struct gaudi_nic_status {
__u32 high_ber_cnt; __u32 high_ber_cnt;
}; };
struct gaudi_flops_2_data { struct gaudi_cold_rst_data {
union { union {
struct { struct {
__u32 spsram_init_done : 1; u32 spsram_init_done : 1;
__u32 reserved : 31; u32 reserved : 31;
}; };
__u32 data; __le32 data;
}; };
}; };
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#define mmRDWR_TEST mmPSOC_GLOBAL_CONF_SCRATCHPAD_30 #define mmRDWR_TEST mmPSOC_GLOBAL_CONF_SCRATCHPAD_30
#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31 #define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31
#define mmPREBOOT_PCIE_EN mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_1 #define mmPREBOOT_PCIE_EN mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_1
#define mmCOLD_RST_DATA mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_2
#define mmUPD_PENDING_STS mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_3 #define mmUPD_PENDING_STS mmPSOC_GLOBAL_CONF_COLD_RST_FLOPS_3
#endif /* GAUDI_REG_MAP_H_ */ #endif /* GAUDI_REG_MAP_H_ */
...@@ -272,6 +272,16 @@ enum hl_gaudi_pll_index { ...@@ -272,6 +272,16 @@ enum hl_gaudi_pll_index {
HL_GAUDI_PLL_MAX HL_GAUDI_PLL_MAX
}; };
/**
* enum hl_device_status - Device status information.
* @HL_DEVICE_STATUS_OPERATIONAL: Device is operational.
* @HL_DEVICE_STATUS_IN_RESET: Device is currently during reset.
* @HL_DEVICE_STATUS_MALFUNCTION: Device is unusable.
* @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled.
* @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in
* progress.
* @HL_DEVICE_STATUS_LAST: Last status.
*/
enum hl_device_status { enum hl_device_status {
HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_OPERATIONAL,
HL_DEVICE_STATUS_IN_RESET, HL_DEVICE_STATUS_IN_RESET,
...@@ -556,33 +566,30 @@ enum gaudi_dcores { ...@@ -556,33 +566,30 @@ enum gaudi_dcores {
HL_GAUDI_ES_DCORE HL_GAUDI_ES_DCORE
}; };
/**
* struct hl_info_args - Main structure to retrieve device related information.
* @return_pointer: User space address of the relevant structure related to HL_INFO_* operation
* mentioned in @op.
* @return_size: Size of the structure used in @return_pointer, just like "size" in "snprintf", it
* limits how many bytes the kernel can write. For hw_events array, the size should be
* hl_info_hw_ip_info.num_of_events * sizeof(__u32).
* @op: Defines which type of information to be retrieved. Refer HL_INFO_* for details.
* @dcore_id: DCORE id for which the information is relevant (for Gaudi refer to enum gaudi_dcores).
* @ctx_id: Context ID of the user. Currently not in use.
* @period_ms: Period value, in milliseconds, for utilization rate in range 100ms - 1000ms in 100 ms
* resolution. Currently not in use.
* @pll_index: Index as defined in hl_<asic type>_pll_index enumeration.
* @pad: Padding to 64 bit.
*/
struct hl_info_args { struct hl_info_args {
/* Location of relevant struct in userspace */
__u64 return_pointer; __u64 return_pointer;
/*
* The size of the return value. Just like "size" in "snprintf",
* it limits how many bytes the kernel can write
*
* For hw_events array, the size should be
* hl_info_hw_ip_info.num_of_events * sizeof(__u32)
*/
__u32 return_size; __u32 return_size;
/* HL_INFO_* */
__u32 op; __u32 op;
union { union {
/* Dcore id for which the information is relevant.
* For Gaudi refer to 'enum gaudi_dcores'
*/
__u32 dcore_id; __u32 dcore_id;
/* Context ID - Currently not in use */
__u32 ctx_id; __u32 ctx_id;
/* Period value for utilization rate (100ms - 1000ms, in 100ms
* resolution.
*/
__u32 period_ms; __u32 period_ms;
/* PLL frequency retrieval */
__u32 pll_index; __u32 pll_index;
}; };
...@@ -890,11 +897,7 @@ struct hl_wait_cs_in { ...@@ -890,11 +897,7 @@ struct hl_wait_cs_in {
*/ */
__u64 addr; __u64 addr;
/* Target value for completion comparison */ /* Target value for completion comparison */
__u32 target; __u64 target;
/* Absolute timeout to wait for interrupt
* in microseconds
*/
__u32 interrupt_timeout_us;
}; };
}; };
...@@ -910,7 +913,12 @@ struct hl_wait_cs_in { ...@@ -910,7 +913,12 @@ struct hl_wait_cs_in {
/* Multi CS API info- valid entries in multi-CS array */ /* Multi CS API info- valid entries in multi-CS array */
__u8 seq_arr_len; __u8 seq_arr_len;
__u8 pad[7]; __u8 pad[3];
/* Absolute timeout to wait for an interrupt in microseconds.
* Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set
*/
__u32 interrupt_timeout_us;
}; };
#define HL_WAIT_CS_STATUS_COMPLETED 0 #define HL_WAIT_CS_STATUS_COMPLETED 0
...@@ -952,6 +960,10 @@ union hl_wait_cs_args { ...@@ -952,6 +960,10 @@ union hl_wait_cs_args {
#define HL_MEM_OP_UNMAP 3 #define HL_MEM_OP_UNMAP 3
/* Opcode to map a hw block */ /* Opcode to map a hw block */
#define HL_MEM_OP_MAP_BLOCK 4 #define HL_MEM_OP_MAP_BLOCK 4
/* Opcode to create DMA-BUF object for an existing device memory allocation
* and to export an FD of that DMA-BUF back to the caller
*/
#define HL_MEM_OP_EXPORT_DMABUF_FD 5
/* Memory flags */ /* Memory flags */
#define HL_MEM_CONTIGUOUS 0x1 #define HL_MEM_CONTIGUOUS 0x1
...@@ -1023,11 +1035,26 @@ struct hl_mem_in { ...@@ -1023,11 +1035,26 @@ struct hl_mem_in {
/* Virtual address returned from HL_MEM_OP_MAP */ /* Virtual address returned from HL_MEM_OP_MAP */
__u64 device_virt_addr; __u64 device_virt_addr;
} unmap; } unmap;
/* HL_MEM_OP_EXPORT_DMABUF_FD */
struct {
/* Handle returned from HL_MEM_OP_ALLOC. In Gaudi,
* where we don't have MMU for the device memory, the
* driver expects a physical address (instead of
* a handle) in the device memory space.
*/
__u64 handle;
/* Size of memory allocation. Relevant only for GAUDI */
__u64 mem_size;
} export_dmabuf_fd;
}; };
/* HL_MEM_OP_* */ /* HL_MEM_OP_* */
__u32 op; __u32 op;
/* HL_MEM_* flags */ /* HL_MEM_* flags.
* For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the
* DMA-BUF file/FD flags.
*/
__u32 flags; __u32 flags;
/* Context ID - Currently not in use */ /* Context ID - Currently not in use */
__u32 ctx_id; __u32 ctx_id;
...@@ -1064,6 +1091,13 @@ struct hl_mem_out { ...@@ -1064,6 +1091,13 @@ struct hl_mem_out {
__u32 pad; __u32 pad;
}; };
/* Returned in HL_MEM_OP_EXPORT_DMABUF_FD. Represents the
* DMA-BUF object that was created to describe a memory
* allocation on the device's memory space. The FD should be
* passed to the importer driver
*/
__s32 fd;
}; };
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment