Commit 08057253 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-habanalabs-next-2023-10-10' of...

Merge tag 'drm-habanalabs-next-2023-10-10' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into drm-next

This tag contains habanalabs driver changes for v6.7.

The notable changes are:

- uAPI changes:
  - Expose tsc clock sampling to better sync clock information in profiler.
  - Enhance engine error reporting in the info ioctl.
  - Block access to the eventfd operations through the control device.
  - Disable the option of the user to register multiple times with the same
    offset for timestamp dump by the driver. If a user wants to use the same
    offset in the timestamp buffer for different interrupt, it needs to first
    de-register the offset.
  - When exporting dma-buf (for p2p), force the user to specify size/offset
    in multiples of PAGE_SIZE. This is instead of the driver doing the
    rounding to PAGE_SIZE, which has caused the driver to map more memory
    than was intended by the user.

- New features and improvements:
  - Complete the move of the driver to the accel subsystem by removing the
    custom habanalabs class and major and registering to accel subsystem.
  - Move the firmware interface files to include/linux/habanalabs. This is
    a pre-requisite for upstreaming the NIC drivers of Gaudi (as they need to
    include those files).
  - Perform device hard-reset upon PCIe AXI drain event to prevent the failure
    from cascading to different IP blocks in the SoC. In secured environments,
    this is done automatically by the firmware.
  - Print device name when it is removed for better debuggability.
  - Add support for trace of dma map sgtable operations.
  - Optimize handling of user interrupts by splitting the interrupts to two
    lists. One list for fast handling and second list for handling with
    timestamp recording, which is slower.
  - Prevent double device hard-reset due to 2 adjacent H/W events.
  - Set device status 'malfunction' while in rmmod.

- Firmware related fixes:
  - Extend preboot timeout because preboot loading might take longer than
    expected in certain cases.
  - Add a protection mechanism for the Event Queue. In case it is full, the
    firmware will be able to notify about it through a dedicated interrupt.
  - Perform device hard-reset in case scrubbing of memory has failed.

- Bug fixes and code cleanups:
  - Small fixes of dma-buf handling in Gaudi2, such as handling an offset != 0,
    using the correct exported size, creation of sg table.
  - Fix spmu mask creation.
  - Fix bug in wait for cs completion for decoder workloads.
  - Cleanup Greco name from documentation.
  - Fix bug in recording timestamp during cs completion interrupt handling.
  - Fix CoreSight ETF configuration and flush logic.
  - Fix small bug in hpriv_list handling (the list that contains the private
    data per process that opens our device).
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCgAdFiEE7TEboABC71LctBLFZR1NuKta54AFAmUlHoQACgkQZR1NuKta
# 54DsXQf8CW+W4iWJf5UDTj/E/giu9rVRrsUsU0hhCcXbecIxRsLObYXtulENu5/u
# VuEAo/tAvo0LUKi8pdIv6ernDKaxZ1+fimlfXMCzllAA/ts3yp1NgunprsIsx3tv
# YgcJ2GNR8UlVZ1qYuZl+4dOTyD0yfRMROUXBe7wqKnUXOEepOiLBxq6W15tZiJnx
# L+V0yGkNk6pAoADIXLW9EgEXiN/bJZCXGPWp06i/Nz7cHIHJGoV59wAqftqllCtk
# 8ZMkLByjlQKPhc5AgWBtKE8EGVip3sm7b/Q2Gq0ZXdZiebyVJ+AjuuDOdtq1UCIw
# Rcp2576E7rByIBu3RAFlrioWhuR5Zw==
# =2ien
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 10 Oct 2023 19:51:00 AEST
# gpg:                using RSA key ED311BA00042EF52DCB412C5651D4DB8AB5AE780
# gpg: Can't check signature: No public key
From: Oded Gabbay <ogabbay@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/ZSUfiX4J7v4Wn0cU@ogabbay-vm-u22.habana-labs.com
parents 614351f4 4db74c0f
......@@ -9086,6 +9086,7 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git
F: Documentation/ABI/testing/debugfs-driver-habanalabs
F: Documentation/ABI/testing/sysfs-driver-habanalabs
F: drivers/accel/habanalabs/
F: include/linux/habanalabs/
F: include/trace/events/habanalabs.h
F: include/uapi/drm/habanalabs_accel.h
......
......@@ -21,7 +21,6 @@ static DEFINE_SPINLOCK(accel_minor_lock);
static struct idr accel_minors_idr;
static struct dentry *accel_debugfs_root;
static struct class *accel_class;
static struct device_type accel_sysfs_device_minor = {
.name = "accel_minor"
......@@ -32,23 +31,19 @@ static char *accel_devnode(const struct device *dev, umode_t *mode)
return kasprintf(GFP_KERNEL, "accel/%s", dev_name(dev));
}
static const struct class accel_class = {
.name = "accel",
.devnode = accel_devnode,
};
static int accel_sysfs_init(void)
{
accel_class = class_create("accel");
if (IS_ERR(accel_class))
return PTR_ERR(accel_class);
accel_class->devnode = accel_devnode;
return 0;
return class_register(&accel_class);
}
static void accel_sysfs_destroy(void)
{
if (IS_ERR_OR_NULL(accel_class))
return;
class_destroy(accel_class);
accel_class = NULL;
class_unregister(&accel_class);
}
static int accel_name_info(struct seq_file *m, void *data)
......@@ -117,7 +112,7 @@ void accel_debugfs_register(struct drm_device *dev)
void accel_set_device_instance_params(struct device *kdev, int index)
{
kdev->devt = MKDEV(ACCEL_MAJOR, index);
kdev->class = accel_class;
kdev->class = &accel_class;
kdev->type = &accel_sysfs_device_minor;
}
......
......@@ -361,10 +361,11 @@ static int hl_cb_info(struct hl_mem_mgr *mmg,
return rc;
}
int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{
union hl_cb_args *args = data;
struct hl_fpriv *hpriv = file_priv->driver_priv;
struct hl_device *hdev = hpriv->hdev;
union hl_cb_args *args = data;
u64 handle = 0, device_va = 0;
enum hl_device_status status;
u32 usage_cnt = 0;
......
......@@ -102,7 +102,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
kfree(ctx->cs_pending);
if (ctx->asid != HL_KERNEL_ASID_ID) {
dev_dbg(hdev->dev, "closing user context %d\n", ctx->asid);
dev_dbg(hdev->dev, "closing user context, asid=%u\n", ctx->asid);
/* The engines are stopped as there is no executing CS, but the
* Coresight might be still working by accessing addresses
......@@ -119,6 +119,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid);
hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
mutex_destroy(&ctx->ts_reg_lock);
} else {
dev_dbg(hdev->dev, "closing kernel context\n");
hdev->asic_funcs->ctx_fini(ctx);
......@@ -198,6 +199,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{
char task_comm[TASK_COMM_LEN];
int rc = 0, i;
ctx->hdev = hdev;
......@@ -267,7 +269,10 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
hl_encaps_sig_mgr_init(&ctx->sig_mgr);
dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
mutex_init(&ctx->ts_reg_lock);
dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n",
get_task_comm(task_comm, current), ctx->asid);
}
return 0;
......
......@@ -18,8 +18,6 @@
#define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
#define I2C_MAX_TRANSACTION_LEN 8
static struct dentry *hl_debug_root;
static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
u8 i2c_reg, u8 i2c_len, u64 *val)
{
......@@ -1788,20 +1786,14 @@ void hl_debugfs_add_device(struct hl_device *hdev)
{
struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), hl_debug_root);
dev_entry->root = hdev->drm.accel->debugfs_root;
add_files_to_device(hdev, dev_entry, dev_entry->root);
if (!hdev->asic_prop.fw_security_enabled)
add_secured_nodes(dev_entry, dev_entry->root);
}
void hl_debugfs_remove_device(struct hl_device *hdev)
{
struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
debugfs_remove_recursive(entry->root);
}
void hl_debugfs_add_file(struct hl_fpriv *hpriv)
{
struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
......@@ -1932,13 +1924,3 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
up_write(&dev_entry->state_dump_sem);
}
void __init hl_debugfs_init(void)
{
hl_debug_root = debugfs_create_dir("habanalabs", NULL);
}
void hl_debugfs_fini(void)
{
debugfs_remove_recursive(hl_debug_root);
}
This diff is collapsed.
......@@ -6,7 +6,7 @@
*/
#include "habanalabs.h"
#include "../include/common/hl_boot_if.h"
#include <linux/habanalabs/hl_boot_if.h>
#include <linux/firmware.h>
#include <linux/crc32.h>
......@@ -724,6 +724,11 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) {
dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
/* Ignore this bit, don't prevent driver loading */
dev_dbg(hdev->dev, "device unusable status is set\n");
......@@ -1459,6 +1464,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
dev_err(hdev->dev,
"Device boot progress - Stuck in preboot after security initialization\n");
break;
case CPU_BOOT_STATUS_FW_SHUTDOWN_PREP:
dev_err(hdev->dev,
"Device boot progress - Stuck in preparation for shutdown\n");
break;
default:
dev_err(hdev->dev,
"Device boot progress - Invalid or unexpected status code %d\n", status);
......@@ -1469,8 +1478,9 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
int hl_fw_wait_preboot_ready(struct hl_device *hdev)
{
struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
u32 status;
int rc;
u32 status = 0, timeout;
int rc, tries = 1;
bool preboot_still_runs;
/* Need to check two possible scenarios:
*
......@@ -1480,6 +1490,8 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
* All other status values - for older firmwares where the uboot was
* loaded from the FLASH
*/
timeout = pre_fw_load->wait_for_preboot_timeout;
retry:
rc = hl_poll_timeout(
hdev,
pre_fw_load->cpu_boot_status_reg,
......@@ -1488,7 +1500,24 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
(status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
hdev->fw_poll_interval_usec,
pre_fw_load->wait_for_preboot_timeout);
timeout);
/*
* if F/W reports "security-ready" it means preboot might take longer.
* If the field 'wait_for_preboot_extended_timeout' is non 0 we wait again
* with that timeout
*/
preboot_still_runs = (status == CPU_BOOT_STATUS_SECURITY_READY ||
status == CPU_BOOT_STATUS_IN_PREBOOT ||
status == CPU_BOOT_STATUS_FW_SHUTDOWN_PREP ||
status == CPU_BOOT_STATUS_DRAM_RDY);
if (rc && tries && preboot_still_runs) {
tries--;
if (pre_fw_load->wait_for_preboot_extended_timeout) {
timeout = pre_fw_load->wait_for_preboot_extended_timeout;
goto retry;
}
}
if (rc) {
detect_cpu_boot_status(hdev, status);
......@@ -2743,7 +2772,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
struct lkd_fw_binning_info *binning_info;
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, 0);
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
sizeof(struct lkd_msg_comms));
if (rc)
goto protocol_err;
......@@ -2777,6 +2807,11 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hdev->decoder_binning, hdev->rotator_binning);
}
if (hdev->asic_prop.support_dynamic_resereved_fw_size) {
hdev->asic_prop.reserved_fw_mem_size =
le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb);
}
return 0;
}
......
......@@ -14,6 +14,11 @@
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/version.h>
#include <drm/drm_accel.h>
#include <drm/drm_drv.h>
#include <drm/drm_ioctl.h>
#define CREATE_TRACE_POINTS
#include <trace/events/habanalabs.h>
......@@ -27,7 +32,6 @@ MODULE_DESCRIPTION(HL_DRIVER_DESC);
MODULE_LICENSE("GPL v2");
static int hl_major;
static struct class *hl_class;
static DEFINE_IDR(hl_devs_idr);
static DEFINE_MUTEX(hl_devs_idr_lock);
......@@ -70,6 +74,42 @@ static const struct pci_device_id ids[] = {
};
MODULE_DEVICE_TABLE(pci, ids);
static const struct drm_ioctl_desc hl_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(HL_INFO, hl_info_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_CB, hl_cb_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_CS, hl_cs_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_WAIT_CS, hl_wait_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_MEMORY, hl_mem_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_DEBUG, hl_debug_ioctl, 0),
};
static const struct file_operations hl_fops = {
.owner = THIS_MODULE,
.open = accel_open,
.release = drm_release,
.unlocked_ioctl = drm_ioctl,
.compat_ioctl = drm_compat_ioctl,
.llseek = noop_llseek,
.mmap = hl_mmap
};
static const struct drm_driver hl_driver = {
.driver_features = DRIVER_COMPUTE_ACCEL,
.name = HL_NAME,
.desc = HL_DRIVER_DESC,
.major = LINUX_VERSION_MAJOR,
.minor = LINUX_VERSION_PATCHLEVEL,
.patchlevel = LINUX_VERSION_SUBLEVEL,
.date = "20190505",
.fops = &hl_fops,
.open = hl_device_open,
.postclose = hl_device_release,
.ioctls = hl_drm_ioctls,
.num_ioctls = ARRAY_SIZE(hl_drm_ioctls)
};
/*
* get_asic_type - translate device id to asic type
*
......@@ -123,43 +163,28 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
}
/*
* hl_device_open - open function for habanalabs device
*
* @inode: pointer to inode structure
* @filp: pointer to file structure
* hl_device_open() - open function for habanalabs device.
* @ddev: pointer to DRM device structure.
* @file: pointer to DRM file private data structure.
*
* Called when process opens an habanalabs device.
*/
int hl_device_open(struct inode *inode, struct file *filp)
int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv)
{
struct hl_device *hdev = to_hl_device(ddev);
enum hl_device_status status;
struct hl_device *hdev;
struct hl_fpriv *hpriv;
int rc;
mutex_lock(&hl_devs_idr_lock);
hdev = idr_find(&hl_devs_idr, iminor(inode));
mutex_unlock(&hl_devs_idr_lock);
if (!hdev) {
pr_err("Couldn't find device %d:%d\n",
imajor(inode), iminor(inode));
return -ENXIO;
}
hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
if (!hpriv)
return -ENOMEM;
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
mutex_init(&hpriv->notifier_event.lock);
mutex_init(&hpriv->restore_phase_mutex);
mutex_init(&hpriv->ctx_lock);
kref_init(&hpriv->refcount);
nonseekable_open(inode, filp);
hl_ctx_mgr_init(&hpriv->ctx_mgr);
hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
......@@ -225,6 +250,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
hdev->last_successful_open_jif = jiffies;
hdev->last_successful_open_ktime = ktime_get();
file_priv->driver_priv = hpriv;
hpriv->file_priv = file_priv;
return 0;
out_err:
......@@ -232,7 +260,6 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_mem_mgr_fini(&hpriv->mem_mgr);
hl_mem_mgr_idr_destroy(&hpriv->mem_mgr);
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
filp->private_data = NULL;
mutex_destroy(&hpriv->ctx_lock);
mutex_destroy(&hpriv->restore_phase_mutex);
mutex_destroy(&hpriv->notifier_event.lock);
......@@ -268,9 +295,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
*/
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
mutex_init(&hpriv->notifier_event.lock);
nonseekable_open(inode, filp);
hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
......@@ -317,7 +342,6 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
hdev->major = hl_major;
hdev->hclass = hl_class;
hdev->memory_scrub = memory_scrub;
hdev->reset_on_lockup = reset_on_lockup;
hdev->boot_error_status_mask = boot_error_status_mask;
......@@ -383,6 +407,31 @@ static int fixup_device_params(struct hl_device *hdev)
return 0;
}
static int allocate_device_id(struct hl_device *hdev)
{
int id;
mutex_lock(&hl_devs_idr_lock);
id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
mutex_unlock(&hl_devs_idr_lock);
if (id < 0) {
if (id == -ENOSPC)
pr_err("too many devices in the system\n");
return -EBUSY;
}
hdev->id = id;
/*
* Firstly initialized with the internal device ID.
* Will be updated later after the DRM device registration to hold the minor ID.
*/
hdev->cdev_idx = hdev->id;
return 0;
}
/**
* create_hdev - create habanalabs device instance
*
......@@ -395,26 +444,28 @@ static int fixup_device_params(struct hl_device *hdev)
*/
static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
{
int main_id, ctrl_id = 0, rc = 0;
struct hl_device *hdev;
int rc;
*dev = NULL;
hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
if (!hdev)
return -ENOMEM;
hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm);
if (IS_ERR(hdev))
return PTR_ERR(hdev);
hdev->dev = hdev->drm.dev;
/* Will be NULL in case of simulator device */
hdev->pdev = pdev;
/* Assign status description string */
strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
"in device creation", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
"in reset after device release", HL_STR_MAX);
......@@ -425,7 +476,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
if (hdev->asic_type == ASIC_INVALID) {
dev_err(&pdev->dev, "Unsupported ASIC\n");
rc = -ENODEV;
goto free_hdev;
goto out_err;
}
copy_kernel_module_params_to_device(hdev);
......@@ -434,42 +485,15 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
fixup_device_params(hdev);
mutex_lock(&hl_devs_idr_lock);
/* Always save 2 numbers, 1 for main device and 1 for control.
* They must be consecutive
*/
main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
if (main_id >= 0)
ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
main_id + 2, GFP_KERNEL);
mutex_unlock(&hl_devs_idr_lock);
if ((main_id < 0) || (ctrl_id < 0)) {
if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
pr_err("too many devices in the system\n");
if (main_id >= 0) {
mutex_lock(&hl_devs_idr_lock);
idr_remove(&hl_devs_idr, main_id);
mutex_unlock(&hl_devs_idr_lock);
}
rc = -EBUSY;
goto free_hdev;
}
hdev->id = main_id;
hdev->id_control = ctrl_id;
rc = allocate_device_id(hdev);
if (rc)
goto out_err;
*dev = hdev;
return 0;
free_hdev:
kfree(hdev);
out_err:
return rc;
}
......@@ -484,10 +508,8 @@ static void destroy_hdev(struct hl_device *hdev)
/* Remove device from the device list */
mutex_lock(&hl_devs_idr_lock);
idr_remove(&hl_devs_idr, hdev->id);
idr_remove(&hl_devs_idr, hdev->id_control);
mutex_unlock(&hl_devs_idr_lock);
kfree(hdev);
}
static int hl_pmops_suspend(struct device *dev)
......@@ -691,28 +713,16 @@ static int __init hl_init(void)
hl_major = MAJOR(dev);
hl_class = class_create(HL_NAME);
if (IS_ERR(hl_class)) {
pr_err("failed to allocate class\n");
rc = PTR_ERR(hl_class);
goto remove_major;
}
hl_debugfs_init();
rc = pci_register_driver(&hl_pci_driver);
if (rc) {
pr_err("failed to register pci device\n");
goto remove_debugfs;
goto remove_major;
}
pr_debug("driver loaded\n");
return 0;
remove_debugfs:
hl_debugfs_fini();
class_destroy(hl_class);
remove_major:
unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
return rc;
......@@ -725,14 +735,6 @@ static void __exit hl_exit(void)
{
pci_unregister_driver(&hl_pci_driver);
/*
* Removing debugfs must be after all devices or simulator devices
* have been removed because otherwise we get a bug in the
* debugfs module for referencing NULL objects
*/
hl_debugfs_fini();
class_destroy(hl_class);
unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
idr_destroy(&hl_devs_idr);
......
......@@ -17,6 +17,8 @@
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <asm/msr.h>
static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
[HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
......@@ -320,6 +322,7 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
time_sync.device_time = hdev->asic_funcs->get_device_time(hdev);
time_sync.host_time = ktime_get_raw_ns();
time_sync.tsc_time = rdtsc();
return copy_to_user(out, &time_sync,
min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
......@@ -875,6 +878,28 @@ static int fw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
return rc ? -EFAULT : 0;
}
static int engine_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer;
struct hl_device *hdev = hpriv->hdev;
u32 user_buf_size = args->return_size;
struct engine_err_info *info;
int rc;
if (!user_buf)
return -EINVAL;
info = &hdev->captured_err_info.engine_err;
if (!info->event_info_available)
return 0;
if (user_buf_size < sizeof(struct hl_info_engine_err_event))
return -ENOMEM;
rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_engine_err_event));
return rc ? -EFAULT : 0;
}
static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *info_args)
{
void __user *buff = (void __user *) (uintptr_t) info_args->return_pointer;
......@@ -1001,6 +1026,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_FW_ERR_EVENT:
return fw_err_info(hpriv, args);
case HL_INFO_USER_ENGINE_ERR_EVENT:
return engine_err_info(hpriv, args);
case HL_INFO_DRAM_USAGE:
return dram_usage_info(hpriv, args);
default:
......@@ -1070,20 +1098,34 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
return rc;
}
static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{
struct hl_fpriv *hpriv = file_priv->driver_priv;
return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev);
}
static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
{
struct hl_info_args *args = data;
switch (args->op) {
case HL_INFO_GET_EVENTS:
case HL_INFO_UNREGISTER_EVENTFD:
case HL_INFO_REGISTER_EVENTFD:
return -EOPNOTSUPP;
default:
break;
}
return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
}
static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{
struct hl_debug_args *args = data;
struct hl_fpriv *hpriv = file_priv->driver_priv;
struct hl_device *hdev = hpriv->hdev;
struct hl_debug_args *args = data;
enum hl_device_status status;
int rc = 0;
......@@ -1126,25 +1168,15 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
}
#define HL_IOCTL_DEF(ioctl, _func) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
static const struct hl_ioctl_desc hl_ioctls[] = {
HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_wait_ioctl),
HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
};
[_IOC_NR(ioctl) - HL_COMMAND_START] = {.cmd = ioctl, .func = _func}
static const struct hl_ioctl_desc hl_ioctls_control[] = {
HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control)
HL_IOCTL_DEF(DRM_IOCTL_HL_INFO, hl_info_ioctl_control)
};
static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long arg,
const struct hl_ioctl_desc *ioctl, struct device *dev)
{
struct hl_fpriv *hpriv = filep->private_data;
unsigned int nr = _IOC_NR(cmd);
char stack_kdata[128] = {0};
char *kdata = NULL;
......@@ -1194,9 +1226,13 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
retcode = -EFAULT;
out_err:
if (retcode)
dev_dbg_ratelimited(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
task_pid_nr(current), cmd, nr);
if (retcode) {
char task_comm[TASK_COMM_LEN];
dev_dbg_ratelimited(dev,
"error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
}
if (kdata != stack_kdata)
kfree(kdata);
......@@ -1204,29 +1240,6 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
return retcode;
}
long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct hl_fpriv *hpriv = filep->private_data;
struct hl_device *hdev = hpriv->hdev;
const struct hl_ioctl_desc *ioctl = NULL;
unsigned int nr = _IOC_NR(cmd);
if (!hdev) {
pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n");
return -ENODEV;
}
if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
ioctl = &hl_ioctls[nr];
} else {
dev_dbg_ratelimited(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
task_pid_nr(current), nr);
return -ENOTTY;
}
return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev);
}
long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct hl_fpriv *hpriv = filep->private_data;
......@@ -1239,13 +1252,16 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
return -ENODEV;
}
if (nr == _IOC_NR(HL_IOCTL_INFO)) {
ioctl = &hl_ioctls_control[nr];
if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) {
ioctl = &hl_ioctls_control[nr - HL_COMMAND_START];
} else {
dev_dbg_ratelimited(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n",
task_pid_nr(current), nr);
char task_comm[TASK_COMM_LEN];
dev_dbg_ratelimited(hdev->dev_ctrl,
"invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
return -ENOTTY;
}
return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl);
return _hl_ioctl(hpriv, cmd, arg, ioctl, hdev->dev_ctrl);
}
......@@ -204,8 +204,10 @@ static void hl_ts_free_objects(struct work_struct *work)
{
struct timestamp_reg_work_obj *job =
container_of(work, struct timestamp_reg_work_obj, free_obj);
struct list_head *dynamic_alloc_free_list_head = job->dynamic_alloc_free_obj_head;
struct timestamp_reg_free_node *free_obj, *temp_free_obj;
struct list_head *free_list_head = job->free_obj_head;
struct hl_device *hdev = job->hdev;
list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
......@@ -215,10 +217,28 @@ static void hl_ts_free_objects(struct work_struct *work)
hl_mmap_mem_buf_put(free_obj->buf);
hl_cb_put(free_obj->cq_cb);
kfree(free_obj);
atomic_set(&free_obj->in_use, 0);
}
kfree(free_list_head);
if (dynamic_alloc_free_list_head) {
list_for_each_entry_safe(free_obj, temp_free_obj, dynamic_alloc_free_list_head,
free_objects_node) {
dev_dbg(hdev->dev,
"Dynamic_Alloc list: About to put refcount to buf (%p) cq_cb(%p)\n",
free_obj->buf,
free_obj->cq_cb);
hl_mmap_mem_buf_put(free_obj->buf);
hl_cb_put(free_obj->cq_cb);
list_del(&free_obj->free_objects_node);
kfree(free_obj);
}
kfree(dynamic_alloc_free_list_head);
}
kfree(job);
}
......@@ -233,11 +253,18 @@ static void hl_ts_free_objects(struct work_struct *work)
* list to a dedicated workqueue to do the actual put.
*/
static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
struct list_head **free_list, ktime_t now)
struct list_head **free_list,
struct list_head **dynamic_alloc_list,
struct hl_user_interrupt *intr)
{
struct hl_ts_free_jobs *ts_free_jobs_data;
struct timestamp_reg_free_node *free_node;
u32 free_node_index;
u64 timestamp;
ts_free_jobs_data = &intr->ts_free_jobs_data;
free_node_index = ts_free_jobs_data->next_avail_free_node_idx;
if (!(*free_list)) {
/* Alloc/Init the timestamp registration free objects list */
*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
......@@ -247,39 +274,65 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
INIT_LIST_HEAD(*free_list);
}
free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC);
free_node = &ts_free_jobs_data->free_nodes_pool[free_node_index];
if (atomic_cmpxchg(&free_node->in_use, 0, 1)) {
dev_dbg(hdev->dev,
"Timestamp free node pool is full, buff: %p, record: %p, irq: %u\n",
pend->ts_reg_info.buf,
pend,
intr->interrupt_id);
if (!(*dynamic_alloc_list)) {
*dynamic_alloc_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
if (!(*dynamic_alloc_list))
return -ENOMEM;
INIT_LIST_HEAD(*dynamic_alloc_list);
}
free_node = kmalloc(sizeof(struct timestamp_reg_free_node), GFP_ATOMIC);
if (!free_node)
return -ENOMEM;
timestamp = ktime_to_ns(now);
free_node->dynamic_alloc = 1;
}
*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
timestamp = ktime_to_ns(intr->timestamp);
dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n",
pend->ts_reg_info.timestamp_kernel_addr,
*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
*pend->ts_reg_info.timestamp_kernel_addr = timestamp;
list_del(&pend->wait_list_node);
dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n",
pend, pend->ts_reg_info.timestamp_kernel_addr, intr->interrupt_id);
/* Mark kernel CB node as free */
pend->ts_reg_info.in_use = 0;
list_del(&pend->list_node);
/* Putting the refcount for ts_buff and cq_cb objects will be handled
* in workqueue context, just add job to free_list.
*/
free_node->buf = pend->ts_reg_info.buf;
free_node->cq_cb = pend->ts_reg_info.cq_cb;
if (free_node->dynamic_alloc) {
list_add(&free_node->free_objects_node, *dynamic_alloc_list);
} else {
ts_free_jobs_data->next_avail_free_node_idx =
(++free_node_index) % ts_free_jobs_data->free_nodes_length;
list_add(&free_node->free_objects_node, *free_list);
}
/* Mark TS record as free */
pend->ts_reg_info.in_use = false;
return 0;
}
static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interrupt *intr)
static void handle_user_interrupt_ts_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
{
struct list_head *ts_reg_free_list_head = NULL, *dynamic_alloc_list_head = NULL;
struct hl_user_pending_interrupt *pend, *temp_pend;
struct list_head *ts_reg_free_list_head = NULL;
struct timestamp_reg_work_obj *job;
bool reg_node_handle_fail = false;
unsigned long flags;
int rc;
/* For registration nodes:
......@@ -288,36 +341,32 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
* or in irq handler context at all (since release functions are long and
* might sleep), so we will need to handle that part in workqueue context.
* To avoid handling kmalloc failure which compels us rolling back actions
* and move nodes hanged on the free list back to the interrupt wait list
* and move nodes hanged on the free list back to the interrupt ts list
* we always alloc the job of the WQ at the beginning.
*/
job = kmalloc(sizeof(*job), GFP_ATOMIC);
if (!job)
return;
spin_lock(&intr->wait_list_lock);
list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, wait_list_node) {
spin_lock_irqsave(&intr->ts_list_lock, flags);
list_for_each_entry_safe(pend, temp_pend, &intr->ts_list_head, list_node) {
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
!pend->cq_kernel_addr) {
if (pend->ts_reg_info.buf) {
if (!reg_node_handle_fail) {
rc = handle_registration_node(hdev, pend,
&ts_reg_free_list_head, intr->timestamp);
&ts_reg_free_list_head,
&dynamic_alloc_list_head, intr);
if (rc)
reg_node_handle_fail = true;
}
} else {
/* Handle wait target value node */
pend->fence.timestamp = intr->timestamp;
complete_all(&pend->fence.completion);
}
}
}
spin_unlock(&intr->wait_list_lock);
spin_unlock_irqrestore(&intr->ts_list_lock, flags);
if (ts_reg_free_list_head) {
INIT_WORK(&job->free_obj, hl_ts_free_objects);
job->free_obj_head = ts_reg_free_list_head;
job->dynamic_alloc_free_obj_head = dynamic_alloc_list_head;
job->hdev = hdev;
queue_work(hdev->ts_free_obj_wq, &job->free_obj);
} else {
......@@ -325,6 +374,23 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
}
}
static void handle_user_interrupt_wait_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
{
struct hl_user_pending_interrupt *pend, *temp_pend;
unsigned long flags;
spin_lock_irqsave(&intr->wait_list_lock, flags);
list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, list_node) {
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
!pend->cq_kernel_addr) {
/* Handle wait target value node */
pend->fence.timestamp = intr->timestamp;
complete_all(&pend->fence.completion);
}
}
spin_unlock_irqrestore(&intr->wait_list_lock, flags);
}
static void handle_tpc_interrupt(struct hl_device *hdev)
{
u64 event_mask;
......@@ -346,19 +412,38 @@ static void handle_unexpected_user_interrupt(struct hl_device *hdev)
}
/**
* hl_irq_handler_user_interrupt - irq handler for user interrupts
* hl_irq_user_interrupt_handler - irq handler for user interrupts.
*
* @irq: irq number
* @arg: pointer to user interrupt structure
*
*/
irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg)
irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg)
{
struct hl_user_interrupt *user_int = arg;
struct hl_device *hdev = user_int->hdev;
user_int->timestamp = ktime_get();
switch (user_int->type) {
case HL_USR_INTERRUPT_CQ:
/* First handle user waiters threads */
handle_user_interrupt_wait_list(hdev, &hdev->common_user_cq_interrupt);
handle_user_interrupt_wait_list(hdev, user_int);
/* Second handle user timestamp registrations */
handle_user_interrupt_ts_list(hdev, &hdev->common_user_cq_interrupt);
handle_user_interrupt_ts_list(hdev, user_int);
break;
case HL_USR_INTERRUPT_DECODER:
handle_user_interrupt_wait_list(hdev, &hdev->common_decoder_interrupt);
/* Handle decoder interrupt registered on this specific irq */
handle_user_interrupt_wait_list(hdev, user_int);
break;
default:
break;
}
return IRQ_WAKE_THREAD;
return IRQ_HANDLED;
}
/**
......@@ -374,19 +459,8 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
struct hl_user_interrupt *user_int = arg;
struct hl_device *hdev = user_int->hdev;
user_int->timestamp = ktime_get();
switch (user_int->type) {
case HL_USR_INTERRUPT_CQ:
handle_user_interrupt(hdev, &hdev->common_user_cq_interrupt);
/* Handle user cq interrupt registered on this specific irq */
handle_user_interrupt(hdev, user_int);
break;
case HL_USR_INTERRUPT_DECODER:
handle_user_interrupt(hdev, &hdev->common_decoder_interrupt);
/* Handle decoder interrupt registered on this specific irq */
handle_user_interrupt(hdev, user_int);
break;
case HL_USR_INTERRUPT_TPC:
handle_tpc_interrupt(hdev);
break;
......@@ -400,6 +474,18 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
return IRQ_HANDLED;
}
irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg)
{
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
struct hl_device *hdev = arg;
dev_err(hdev->dev, "EQ error interrupt received\n");
hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
return IRQ_HANDLED;
}
/**
* hl_irq_handler_eq - irq handler for event queue
*
......
This diff is collapsed.
......@@ -63,6 +63,10 @@
#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
......@@ -660,7 +664,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
......@@ -4619,8 +4623,7 @@ static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
static int gaudi_scrub_device_mem(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US :
min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
u64 addr, size, val = hdev->memory_scrub_val;
ktime_t timeout;
int rc = 0;
......@@ -4904,7 +4907,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
list_add_tail(&userptr->job_node, parser->job_userptr_list);
rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto unpin_memory;
......@@ -8000,7 +8003,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
return rc;
if (!strlen(prop->cpucp_info.card_name))
strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
......@@ -9140,9 +9143,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
.asic_dma_pool_free = gaudi_dma_pool_free,
.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
.cs_parser = gaudi_cs_parser,
.asic_dma_map_sgtable = hl_dma_map_sgtable,
.dma_map_sgtable = hl_asic_dma_map_sgtable,
.add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
.update_eq_ci = gaudi_update_eq_ci,
.context_switch = gaudi_context_switch,
......
......@@ -10,7 +10,7 @@
#include <uapi/drm/habanalabs_accel.h>
#include "../common/habanalabs.h"
#include "../include/common/hl_boot_if.h"
#include <linux/habanalabs/hl_boot_if.h>
#include "../include/gaudi/gaudi_packets.h"
#include "../include/gaudi/gaudi.h"
#include "../include/gaudi/gaudi_async_events.h"
......
......@@ -482,6 +482,11 @@ static int gaudi_config_etf(struct hl_device *hdev,
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
val = RREG32(base_reg + 0x20);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(base_reg + 0x304);
val |= 0x1000;
WREG32(base_reg + 0x304, val);
......@@ -580,6 +585,13 @@ static int gaudi_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
val = RREG32(mmPSOC_ETR_CTL);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(mmPSOC_ETR_FFCR);
val |= 0x1000;
WREG32(mmPSOC_ETR_FFCR, val);
......
This diff is collapsed.
......@@ -10,7 +10,7 @@
#include <uapi/drm/habanalabs_accel.h>
#include "../common/habanalabs.h"
#include "../include/common/hl_boot_if.h"
#include <linux/habanalabs/hl_boot_if.h>
#include "../include/gaudi2/gaudi2.h"
#include "../include/gaudi2/gaudi2_packets.h"
#include "../include/gaudi2/gaudi2_fw_if.h"
......@@ -84,6 +84,7 @@
#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */
#define GAUDI2_PREBOOT_REQ_TIMEOUT_USEC 25000000 /* 25s */
#define GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC 85000000 /* 85s */
#define GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC 10000000 /* 10s */
......@@ -419,6 +420,7 @@ enum gaudi2_irq_num {
GAUDI2_IRQ_NUM_NIC_PORT_FIRST,
GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
GAUDI2_IRQ_NUM_TPC_ASSERT,
GAUDI2_IRQ_NUM_EQ_ERROR,
GAUDI2_IRQ_NUM_RESERVED_FIRST,
GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1),
GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT,
......
......@@ -151,8 +151,8 @@ static u64 debug_stm_regs[GAUDI2_STM_LAST + 1] = {
[GAUDI2_STM_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_STM_BASE,
[GAUDI2_STM_PCIE] = mmPCIE_STM_BASE,
[GAUDI2_STM_PSOC] = mmPSOC_STM_BASE,
[GAUDI2_STM_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_STM_BASE,
[GAUDI2_STM_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_STM_BASE,
[GAUDI2_STM_PSOC_ARC0_CS] = 0,
[GAUDI2_STM_PSOC_ARC1_CS] = 0,
[GAUDI2_STM_PDMA0_CS] = mmPDMA0_CS_STM_BASE,
[GAUDI2_STM_PDMA1_CS] = mmPDMA1_CS_STM_BASE,
[GAUDI2_STM_CPU] = mmCPU_STM_BASE,
......@@ -293,8 +293,8 @@ static u64 debug_etf_regs[GAUDI2_ETF_LAST + 1] = {
[GAUDI2_ETF_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_ETF_BASE,
[GAUDI2_ETF_PCIE] = mmPCIE_ETF_BASE,
[GAUDI2_ETF_PSOC] = mmPSOC_ETF_BASE,
[GAUDI2_ETF_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_ETF_BASE,
[GAUDI2_ETF_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_ETF_BASE,
[GAUDI2_ETF_PSOC_ARC0_CS] = 0,
[GAUDI2_ETF_PSOC_ARC1_CS] = 0,
[GAUDI2_ETF_PDMA0_CS] = mmPDMA0_CS_ETF_BASE,
[GAUDI2_ETF_PDMA1_CS] = mmPDMA1_CS_ETF_BASE,
[GAUDI2_ETF_CPU_0] = mmCPU_ETF_0_BASE,
......@@ -436,8 +436,8 @@ static u64 debug_funnel_regs[GAUDI2_FUNNEL_LAST + 1] = {
[GAUDI2_FUNNEL_DCORE3_RTR6] = mmDCORE3_RTR6_FUNNEL_BASE,
[GAUDI2_FUNNEL_DCORE3_RTR7] = mmDCORE3_RTR7_FUNNEL_BASE,
[GAUDI2_FUNNEL_PSOC] = mmPSOC_FUNNEL_BASE,
[GAUDI2_FUNNEL_PSOC_ARC0] = mmPSOC_ARC0_FUNNEL_BASE,
[GAUDI2_FUNNEL_PSOC_ARC1] = mmPSOC_ARC1_FUNNEL_BASE,
[GAUDI2_FUNNEL_PSOC_ARC0] = 0,
[GAUDI2_FUNNEL_PSOC_ARC1] = 0,
[GAUDI2_FUNNEL_XDMA] = mmXDMA_FUNNEL_BASE,
[GAUDI2_FUNNEL_CPU] = mmCPU_FUNNEL_BASE,
[GAUDI2_FUNNEL_PMMU] = mmPMMU_FUNNEL_BASE,
......@@ -766,10 +766,10 @@ static u64 debug_bmon_regs[GAUDI2_BMON_LAST + 1] = {
[GAUDI2_BMON_PCIE_MSTR_RD] = mmPCIE_BMON_MSTR_RD_BASE,
[GAUDI2_BMON_PCIE_SLV_WR] = mmPCIE_BMON_SLV_WR_BASE,
[GAUDI2_BMON_PCIE_SLV_RD] = mmPCIE_BMON_SLV_RD_BASE,
[GAUDI2_BMON_PSOC_ARC0_0] = mmPSOC_ARC0_BMON_0_BASE,
[GAUDI2_BMON_PSOC_ARC0_1] = mmPSOC_ARC0_BMON_1_BASE,
[GAUDI2_BMON_PSOC_ARC1_0] = mmPSOC_ARC1_BMON_0_BASE,
[GAUDI2_BMON_PSOC_ARC1_1] = mmPSOC_ARC1_BMON_1_BASE,
[GAUDI2_BMON_PSOC_ARC0_0] = 0,
[GAUDI2_BMON_PSOC_ARC0_1] = 0,
[GAUDI2_BMON_PSOC_ARC1_0] = 0,
[GAUDI2_BMON_PSOC_ARC1_1] = 0,
[GAUDI2_BMON_PDMA0_0] = mmPDMA0_BMON_0_BASE,
[GAUDI2_BMON_PDMA0_1] = mmPDMA0_BMON_1_BASE,
[GAUDI2_BMON_PDMA1_0] = mmPDMA1_BMON_0_BASE,
......@@ -968,8 +968,8 @@ static u64 debug_spmu_regs[GAUDI2_SPMU_LAST + 1] = {
[GAUDI2_SPMU_DCORE3_VDEC0_CS] = mmDCORE3_VDEC0_CS_SPMU_BASE,
[GAUDI2_SPMU_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_SPMU_BASE,
[GAUDI2_SPMU_PCIE] = mmPCIE_SPMU_BASE,
[GAUDI2_SPMU_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_SPMU_BASE,
[GAUDI2_SPMU_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_SPMU_BASE,
[GAUDI2_SPMU_PSOC_ARC0_CS] = 0,
[GAUDI2_SPMU_PSOC_ARC1_CS] = 0,
[GAUDI2_SPMU_PDMA0_CS] = mmPDMA0_CS_SPMU_BASE,
[GAUDI2_SPMU_PDMA1_CS] = mmPDMA1_CS_SPMU_BASE,
[GAUDI2_SPMU_PMMU_CS] = mmPMMU_CS_SPMU_BASE,
......@@ -2092,6 +2092,11 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
if (rc)
return -EIO;
val = RREG32(base_reg + mmETF_CTL_OFFSET);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(base_reg + mmETF_FFCR_OFFSET);
val |= 0x1000;
WREG32(base_reg + mmETF_FFCR_OFFSET, val);
......@@ -2120,10 +2125,17 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
if (!input)
return -EINVAL;
val = RREG32(base_reg + mmETF_RSZ_OFFSET) << 2;
if (val) {
val = ffs(val);
WREG32(base_reg + mmETF_PSCR_OFFSET, val);
} else {
WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
}
WREG32(base_reg + mmETF_BUFWM_OFFSET, 0x3FFC);
WREG32(base_reg + mmETF_MODE_OFFSET, input->sink_mode);
WREG32(base_reg + mmETF_FFCR_OFFSET, 0x4001);
WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
WREG32(base_reg + mmETF_CTL_OFFSET, 1);
} else {
WREG32(base_reg + mmETF_BUFWM_OFFSET, 0);
......@@ -2189,6 +2201,11 @@ static int gaudi2_config_etr(struct hl_device *hdev, struct hl_ctx *ctx,
if (rc)
return -EIO;
val = RREG32(mmPSOC_ETR_CTL);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(mmPSOC_ETR_FFCR);
val |= 0x1000;
WREG32(mmPSOC_ETR_FFCR, val);
......@@ -2483,7 +2500,8 @@ static int gaudi2_config_spmu(struct hl_device *hdev, struct hl_debug_params *pa
* set enabled events mask based on input->event_types_num
*/
event_mask = 0x80000000;
event_mask |= GENMASK(input->event_types_num, 0);
if (input->event_types_num)
event_mask |= GENMASK(input->event_types_num - 1, 0);
WREG32(base_reg + mmSPMU_PMCNTENSET_EL0_OFFSET, event_mask);
} else {
......
......@@ -1601,6 +1601,7 @@ static const u32 gaudi2_pb_dcr0_tpc0_unsecured_regs[] = {
mmDCORE0_TPC0_CFG_KERNEL_SRF_30,
mmDCORE0_TPC0_CFG_KERNEL_SRF_31,
mmDCORE0_TPC0_CFG_TPC_SB_L0CD,
mmDCORE0_TPC0_CFG_TPC_COUNT,
mmDCORE0_TPC0_CFG_TPC_ID,
mmDCORE0_TPC0_CFG_QM_KERNEL_ID_INC,
mmDCORE0_TPC0_CFG_QM_TID_BASE_SIZE_HIGH_DIM_0,
......@@ -2907,7 +2908,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev)
* - range 11: NIC11_CFG + *_DBG (not including TPC_DBG)
*
* If F/W security is not enabled:
* - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP)
* - ranges 12,13: PSOC_CFG (excluding PSOC_TIMESTAMP, PSOC_EFUSE and PSOC_GLOBAL_CONF)
*/
u64 lbw_range_min_short[] = {
mmNIC0_TX_AXUSER_BASE,
......@@ -2923,7 +2924,7 @@ static void gaudi2_init_lbw_range_registers_secure(struct hl_device *hdev)
mmNIC10_TX_AXUSER_BASE,
mmNIC11_TX_AXUSER_BASE,
mmPSOC_I2C_M0_BASE,
mmPSOC_EFUSE_BASE
mmPSOC_GPIO0_BASE
};
u64 lbw_range_max_short[] = {
mmNIC0_MAC_CH3_MAC_PCS_BASE + HL_BLOCK_SIZE,
......@@ -3219,6 +3220,7 @@ static void gaudi2_init_range_registers(struct hl_device *hdev)
*/
static int gaudi2_init_protection_bits(struct hl_device *hdev)
{
u32 *user_regs_array = NULL, user_regs_array_size = 0, engine_core_intr_reg;
struct asic_fixed_properties *prop = &hdev->asic_prop;
u32 instance_offset;
int rc = 0;
......@@ -3389,11 +3391,24 @@ static int gaudi2_init_protection_bits(struct hl_device *hdev)
/* PSOC.
* Except for PSOC_GLOBAL_CONF, skip when security is enabled in F/W, because the blocks are
* protected by privileged RR.
* For PSOC_GLOBAL_CONF, need to un-secure the scratchpad register which is used for engine
* cores to raise events towards F/W.
*/
engine_core_intr_reg = (u32) (hdev->asic_prop.engine_core_interrupt_reg_addr - CFG_BASE);
if (engine_core_intr_reg >= mmPSOC_GLOBAL_CONF_SCRATCHPAD_0 &&
engine_core_intr_reg <= mmPSOC_GLOBAL_CONF_SCRATCHPAD_31) {
user_regs_array = &engine_core_intr_reg;
user_regs_array_size = 1;
} else {
dev_err(hdev->dev,
"Engine cores register for interrupts (%#x) is not a PSOC scratchpad register\n",
engine_core_intr_reg);
}
rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
HL_PB_SINGLE_INSTANCE, HL_PB_NA,
gaudi2_pb_psoc_global_conf, ARRAY_SIZE(gaudi2_pb_psoc_global_conf),
NULL, HL_PB_NA);
user_regs_array, user_regs_array_size);
if (!hdev->asic_prop.fw_security_enabled)
rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA,
......
......@@ -466,7 +466,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
prop->max_pending_cs = GOYA_MAX_PENDING_CS;
......@@ -3358,7 +3358,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
list_add_tail(&userptr->job_node, parser->job_userptr_list);
rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir);
rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto unpin_memory;
......@@ -5122,7 +5122,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
}
if (!strlen(prop->cpucp_info.card_name))
strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
return 0;
......@@ -5465,9 +5465,9 @@ static const struct hl_asic_funcs goya_funcs = {
.asic_dma_pool_free = goya_dma_pool_free,
.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable,
.dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
.cs_parser = goya_cs_parser,
.asic_dma_map_sgtable = hl_dma_map_sgtable,
.dma_map_sgtable = hl_asic_dma_map_sgtable,
.add_end_of_cb_packets = goya_add_end_of_cb_packets,
.update_eq_ci = goya_update_eq_ci,
.context_switch = goya_context_switch,
......
......@@ -9,8 +9,8 @@
#define GOYAP_H_
#include <uapi/drm/habanalabs_accel.h>
#include <linux/habanalabs/hl_boot_if.h>
#include "../common/habanalabs.h"
#include "../include/common/hl_boot_if.h"
#include "../include/goya/goya_packets.h"
#include "../include/goya/goya.h"
#include "../include/goya/goya_async_events.h"
......
......@@ -315,6 +315,11 @@ static int goya_config_etf(struct hl_device *hdev,
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
val = RREG32(base_reg + 0x20);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(base_reg + 0x304);
val |= 0x1000;
WREG32(base_reg + 0x304, val);
......@@ -386,6 +391,11 @@ static int goya_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
val = RREG32(mmPSOC_ETR_CTL);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(mmPSOC_ETR_FFCR);
val |= 0x1000;
WREG32(mmPSOC_ETR_FFCR, val);
......
......@@ -959,6 +959,13 @@ enum gaudi2_async_event_id {
GAUDI2_EVENT_ARC_DCCM_FULL = 1319,
GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320,
GAUDI2_EVENT_CPU_DEV_RESET_REQ = 1321,
GAUDI2_EVENT_ARC_PWR_BRK_ENTRY = 1322,
GAUDI2_EVENT_ARC_PWR_BRK_EXT = 1323,
GAUDI2_EVENT_ARC_PWR_RD_MODE0 = 1324,
GAUDI2_EVENT_ARC_PWR_RD_MODE1 = 1325,
GAUDI2_EVENT_ARC_PWR_RD_MODE2 = 1326,
GAUDI2_EVENT_ARC_PWR_RD_MODE3 = 1327,
GAUDI2_EVENT_ARC_EQ_HEARTBEAT = 1328,
GAUDI2_EVENT_SIZE,
};
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment