Commit 08057253 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-habanalabs-next-2023-10-10' of...

Merge tag 'drm-habanalabs-next-2023-10-10' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into drm-next

This tag contains habanalabs driver changes for v6.7.

The notable changes are:

- uAPI changes:
  - Expose tsc clock sampling to better sync clock information in profiler.
  - Enhance engine error reporting in the info ioctl.
  - Block access to the eventfd operations through the control device.
  - Disable the option of the user to register multiple times with the same
    offset for timestamp dump by the driver. If a user wants to use the same
    offset in the timestamp buffer for different interrupt, it needs to first
    de-register the offset.
  - When exporting dma-buf (for p2p), force the user to specify size/offset
    in multiples of PAGE_SIZE. This is instead of the driver doing the
    rounding to PAGE_SIZE, which has caused the driver to map more memory
    than was intended by the user.

- New features and improvements:
  - Complete the move of the driver to the accel subsystem by removing the
    custom habanalabs class and major and registering to accel subsystem.
  - Move the firmware interface files to include/linux/habanalabs. This is
    a pre-requisite for upstreaming the NIC drivers of Gaudi (as they need to
    include those files).
  - Perform device hard-reset upon PCIe AXI drain event to prevent the failure
    from cascading to different IP blocks in the SoC. In secured environments,
    this is done automatically by the firmware.
  - Print device name when it is removed for better debuggability.
  - Add support for trace of dma map sgtable operations.
  - Optimize handling of user interrupts by splitting the interrupts to two
    lists. One list for fast handling and second list for handling with
    timestamp recording, which is slower.
  - Prevent double device hard-reset due to 2 adjacent H/W events.
  - Set device status 'malfunction' while in rmmod.

- Firmware related fixes:
  - Extend preboot timeout because preboot loading might take longer than
    expected in certain cases.
  - Add a protection mechanism for the Event Queue. In case it is full, the
    firmware will be able to notify about it through a dedicated interrupt.
  - Perform device hard-reset in case scrubbing of memory has failed.

- Bug fixes and code cleanups:
  - Small fixes of dma-buf handling in Gaudi2, such as handling an offset != 0,
    using the correct exported size, creation of sg table.
  - Fix spmu mask creation.
  - Fix bug in wait for cs completion for decoder workloads.
  - Cleanup Greco name from documentation.
  - Fix bug in recording timestamp during cs completion interrupt handling.
  - Fix CoreSight ETF configuration and flush logic.
  - Fix small bug in hpriv_list handling (the list that contains the private
    data per process that opens our device).
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

# -----BEGIN PGP SIGNATURE-----
#
# iQEzBAABCgAdFiEE7TEboABC71LctBLFZR1NuKta54AFAmUlHoQACgkQZR1NuKta
# 54DsXQf8CW+W4iWJf5UDTj/E/giu9rVRrsUsU0hhCcXbecIxRsLObYXtulENu5/u
# VuEAo/tAvo0LUKi8pdIv6ernDKaxZ1+fimlfXMCzllAA/ts3yp1NgunprsIsx3tv
# YgcJ2GNR8UlVZ1qYuZl+4dOTyD0yfRMROUXBe7wqKnUXOEepOiLBxq6W15tZiJnx
# L+V0yGkNk6pAoADIXLW9EgEXiN/bJZCXGPWp06i/Nz7cHIHJGoV59wAqftqllCtk
# 8ZMkLByjlQKPhc5AgWBtKE8EGVip3sm7b/Q2Gq0ZXdZiebyVJ+AjuuDOdtq1UCIw
# Rcp2576E7rByIBu3RAFlrioWhuR5Zw==
# =2ien
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 10 Oct 2023 19:51:00 AEST
# gpg:                using RSA key ED311BA00042EF52DCB412C5651D4DB8AB5AE780
# gpg: Can't check signature: No public key
From: Oded Gabbay <ogabbay@kernel.org>
Link: https://patchwork.freedesktop.org/patch/msgid/ZSUfiX4J7v4Wn0cU@ogabbay-vm-u22.habana-labs.com
parents 614351f4 4db74c0f
...@@ -9086,6 +9086,7 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git ...@@ -9086,6 +9086,7 @@ T: git https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux.git
F: Documentation/ABI/testing/debugfs-driver-habanalabs F: Documentation/ABI/testing/debugfs-driver-habanalabs
F: Documentation/ABI/testing/sysfs-driver-habanalabs F: Documentation/ABI/testing/sysfs-driver-habanalabs
F: drivers/accel/habanalabs/ F: drivers/accel/habanalabs/
F: include/linux/habanalabs/
F: include/trace/events/habanalabs.h F: include/trace/events/habanalabs.h
F: include/uapi/drm/habanalabs_accel.h F: include/uapi/drm/habanalabs_accel.h
......
...@@ -21,7 +21,6 @@ static DEFINE_SPINLOCK(accel_minor_lock); ...@@ -21,7 +21,6 @@ static DEFINE_SPINLOCK(accel_minor_lock);
static struct idr accel_minors_idr; static struct idr accel_minors_idr;
static struct dentry *accel_debugfs_root; static struct dentry *accel_debugfs_root;
static struct class *accel_class;
static struct device_type accel_sysfs_device_minor = { static struct device_type accel_sysfs_device_minor = {
.name = "accel_minor" .name = "accel_minor"
...@@ -32,23 +31,19 @@ static char *accel_devnode(const struct device *dev, umode_t *mode) ...@@ -32,23 +31,19 @@ static char *accel_devnode(const struct device *dev, umode_t *mode)
return kasprintf(GFP_KERNEL, "accel/%s", dev_name(dev)); return kasprintf(GFP_KERNEL, "accel/%s", dev_name(dev));
} }
static const struct class accel_class = {
.name = "accel",
.devnode = accel_devnode,
};
static int accel_sysfs_init(void) static int accel_sysfs_init(void)
{ {
accel_class = class_create("accel"); return class_register(&accel_class);
if (IS_ERR(accel_class))
return PTR_ERR(accel_class);
accel_class->devnode = accel_devnode;
return 0;
} }
static void accel_sysfs_destroy(void) static void accel_sysfs_destroy(void)
{ {
if (IS_ERR_OR_NULL(accel_class)) class_unregister(&accel_class);
return;
class_destroy(accel_class);
accel_class = NULL;
} }
static int accel_name_info(struct seq_file *m, void *data) static int accel_name_info(struct seq_file *m, void *data)
...@@ -117,7 +112,7 @@ void accel_debugfs_register(struct drm_device *dev) ...@@ -117,7 +112,7 @@ void accel_debugfs_register(struct drm_device *dev)
void accel_set_device_instance_params(struct device *kdev, int index) void accel_set_device_instance_params(struct device *kdev, int index)
{ {
kdev->devt = MKDEV(ACCEL_MAJOR, index); kdev->devt = MKDEV(ACCEL_MAJOR, index);
kdev->class = accel_class; kdev->class = &accel_class;
kdev->type = &accel_sysfs_device_minor; kdev->type = &accel_sysfs_device_minor;
} }
......
...@@ -361,10 +361,11 @@ static int hl_cb_info(struct hl_mem_mgr *mmg, ...@@ -361,10 +361,11 @@ static int hl_cb_info(struct hl_mem_mgr *mmg,
return rc; return rc;
} }
int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) int hl_cb_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{ {
union hl_cb_args *args = data; struct hl_fpriv *hpriv = file_priv->driver_priv;
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
union hl_cb_args *args = data;
u64 handle = 0, device_va = 0; u64 handle = 0, device_va = 0;
enum hl_device_status status; enum hl_device_status status;
u32 usage_cnt = 0; u32 usage_cnt = 0;
......
...@@ -102,7 +102,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) ...@@ -102,7 +102,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
kfree(ctx->cs_pending); kfree(ctx->cs_pending);
if (ctx->asid != HL_KERNEL_ASID_ID) { if (ctx->asid != HL_KERNEL_ASID_ID) {
dev_dbg(hdev->dev, "closing user context %d\n", ctx->asid); dev_dbg(hdev->dev, "closing user context, asid=%u\n", ctx->asid);
/* The engines are stopped as there is no executing CS, but the /* The engines are stopped as there is no executing CS, but the
* Coresight might be still working by accessing addresses * Coresight might be still working by accessing addresses
...@@ -119,6 +119,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx) ...@@ -119,6 +119,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
hl_vm_ctx_fini(ctx); hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid); hl_asid_free(hdev, ctx->asid);
hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr); hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
mutex_destroy(&ctx->ts_reg_lock);
} else { } else {
dev_dbg(hdev->dev, "closing kernel context\n"); dev_dbg(hdev->dev, "closing kernel context\n");
hdev->asic_funcs->ctx_fini(ctx); hdev->asic_funcs->ctx_fini(ctx);
...@@ -198,6 +199,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv) ...@@ -198,6 +199,7 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{ {
char task_comm[TASK_COMM_LEN];
int rc = 0, i; int rc = 0, i;
ctx->hdev = hdev; ctx->hdev = hdev;
...@@ -267,7 +269,10 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx) ...@@ -267,7 +269,10 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
hl_encaps_sig_mgr_init(&ctx->sig_mgr); hl_encaps_sig_mgr_init(&ctx->sig_mgr);
dev_dbg(hdev->dev, "create user context %d\n", ctx->asid); mutex_init(&ctx->ts_reg_lock);
dev_dbg(hdev->dev, "create user context, comm=\"%s\", asid=%u\n",
get_task_comm(task_comm, current), ctx->asid);
} }
return 0; return 0;
......
...@@ -18,8 +18,6 @@ ...@@ -18,8 +18,6 @@
#define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE) #define MMU_KBUF_SIZE (MMU_ADDR_BUF_SIZE + MMU_ASID_BUF_SIZE)
#define I2C_MAX_TRANSACTION_LEN 8 #define I2C_MAX_TRANSACTION_LEN 8
static struct dentry *hl_debug_root;
static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr, static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
u8 i2c_reg, u8 i2c_len, u64 *val) u8 i2c_reg, u8 i2c_len, u64 *val)
{ {
...@@ -1788,20 +1786,14 @@ void hl_debugfs_add_device(struct hl_device *hdev) ...@@ -1788,20 +1786,14 @@ void hl_debugfs_add_device(struct hl_device *hdev)
{ {
struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs; struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
dev_entry->root = debugfs_create_dir(dev_name(hdev->dev), hl_debug_root); dev_entry->root = hdev->drm.accel->debugfs_root;
add_files_to_device(hdev, dev_entry, dev_entry->root); add_files_to_device(hdev, dev_entry, dev_entry->root);
if (!hdev->asic_prop.fw_security_enabled) if (!hdev->asic_prop.fw_security_enabled)
add_secured_nodes(dev_entry, dev_entry->root); add_secured_nodes(dev_entry, dev_entry->root);
} }
void hl_debugfs_remove_device(struct hl_device *hdev)
{
struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
debugfs_remove_recursive(entry->root);
}
void hl_debugfs_add_file(struct hl_fpriv *hpriv) void hl_debugfs_add_file(struct hl_fpriv *hpriv)
{ {
struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs; struct hl_dbg_device_entry *dev_entry = &hpriv->hdev->hl_debugfs;
...@@ -1932,13 +1924,3 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data, ...@@ -1932,13 +1924,3 @@ void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
up_write(&dev_entry->state_dump_sem); up_write(&dev_entry->state_dump_sem);
} }
void __init hl_debugfs_init(void)
{
hl_debug_root = debugfs_create_dir("habanalabs", NULL);
}
void hl_debugfs_fini(void)
{
debugfs_remove_recursive(hl_debug_root);
}
This diff is collapsed.
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
*/ */
#include "habanalabs.h" #include "habanalabs.h"
#include "../include/common/hl_boot_if.h" #include <linux/habanalabs/hl_boot_if.h>
#include <linux/firmware.h> #include <linux/firmware.h>
#include <linux/crc32.h> #include <linux/crc32.h>
...@@ -724,6 +724,11 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val, ...@@ -724,6 +724,11 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
err_exists = true; err_exists = true;
} }
if (err_val & CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL) {
dev_err(hdev->dev, "Device boot error - Failed to set threshold for temperature sensor\n");
err_exists = true;
}
if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) { if (err_val & CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL) {
/* Ignore this bit, don't prevent driver loading */ /* Ignore this bit, don't prevent driver loading */
dev_dbg(hdev->dev, "device unusable status is set\n"); dev_dbg(hdev->dev, "device unusable status is set\n");
...@@ -1459,6 +1464,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) ...@@ -1459,6 +1464,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
dev_err(hdev->dev, dev_err(hdev->dev,
"Device boot progress - Stuck in preboot after security initialization\n"); "Device boot progress - Stuck in preboot after security initialization\n");
break; break;
case CPU_BOOT_STATUS_FW_SHUTDOWN_PREP:
dev_err(hdev->dev,
"Device boot progress - Stuck in preparation for shutdown\n");
break;
default: default:
dev_err(hdev->dev, dev_err(hdev->dev,
"Device boot progress - Invalid or unexpected status code %d\n", status); "Device boot progress - Invalid or unexpected status code %d\n", status);
...@@ -1469,8 +1478,9 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status) ...@@ -1469,8 +1478,9 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
int hl_fw_wait_preboot_ready(struct hl_device *hdev) int hl_fw_wait_preboot_ready(struct hl_device *hdev)
{ {
struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load; struct pre_fw_load_props *pre_fw_load = &hdev->fw_loader.pre_fw_load;
u32 status; u32 status = 0, timeout;
int rc; int rc, tries = 1;
bool preboot_still_runs;
/* Need to check two possible scenarios: /* Need to check two possible scenarios:
* *
...@@ -1480,6 +1490,8 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev) ...@@ -1480,6 +1490,8 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
* All other status values - for older firmwares where the uboot was * All other status values - for older firmwares where the uboot was
* loaded from the FLASH * loaded from the FLASH
*/ */
timeout = pre_fw_load->wait_for_preboot_timeout;
retry:
rc = hl_poll_timeout( rc = hl_poll_timeout(
hdev, hdev,
pre_fw_load->cpu_boot_status_reg, pre_fw_load->cpu_boot_status_reg,
...@@ -1488,7 +1500,24 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev) ...@@ -1488,7 +1500,24 @@ int hl_fw_wait_preboot_ready(struct hl_device *hdev)
(status == CPU_BOOT_STATUS_READY_TO_BOOT) || (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
(status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT), (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
hdev->fw_poll_interval_usec, hdev->fw_poll_interval_usec,
pre_fw_load->wait_for_preboot_timeout); timeout);
/*
* if F/W reports "security-ready" it means preboot might take longer.
* If the field 'wait_for_preboot_extended_timeout' is non 0 we wait again
* with that timeout
*/
preboot_still_runs = (status == CPU_BOOT_STATUS_SECURITY_READY ||
status == CPU_BOOT_STATUS_IN_PREBOOT ||
status == CPU_BOOT_STATUS_FW_SHUTDOWN_PREP ||
status == CPU_BOOT_STATUS_DRAM_RDY);
if (rc && tries && preboot_still_runs) {
tries--;
if (pre_fw_load->wait_for_preboot_extended_timeout) {
timeout = pre_fw_load->wait_for_preboot_extended_timeout;
goto retry;
}
}
if (rc) { if (rc) {
detect_cpu_boot_status(hdev, status); detect_cpu_boot_status(hdev, status);
...@@ -2743,7 +2772,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, ...@@ -2743,7 +2772,8 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) { if (!(hdev->fw_components & FW_TYPE_BOOT_CPU)) {
struct lkd_fw_binning_info *binning_info; struct lkd_fw_binning_info *binning_info;
rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader, 0); rc = hl_fw_dynamic_request_descriptor(hdev, fw_loader,
sizeof(struct lkd_msg_comms));
if (rc) if (rc)
goto protocol_err; goto protocol_err;
...@@ -2777,6 +2807,11 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev, ...@@ -2777,6 +2807,11 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
hdev->decoder_binning, hdev->rotator_binning); hdev->decoder_binning, hdev->rotator_binning);
} }
if (hdev->asic_prop.support_dynamic_resereved_fw_size) {
hdev->asic_prop.reserved_fw_mem_size =
le32_to_cpu(fw_loader->dynamic_loader.comm_desc.rsvd_mem_size_mb);
}
return 0; return 0;
} }
......
...@@ -14,6 +14,11 @@ ...@@ -14,6 +14,11 @@
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/version.h>
#include <drm/drm_accel.h>
#include <drm/drm_drv.h>
#include <drm/drm_ioctl.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/habanalabs.h> #include <trace/events/habanalabs.h>
...@@ -27,7 +32,6 @@ MODULE_DESCRIPTION(HL_DRIVER_DESC); ...@@ -27,7 +32,6 @@ MODULE_DESCRIPTION(HL_DRIVER_DESC);
MODULE_LICENSE("GPL v2"); MODULE_LICENSE("GPL v2");
static int hl_major; static int hl_major;
static struct class *hl_class;
static DEFINE_IDR(hl_devs_idr); static DEFINE_IDR(hl_devs_idr);
static DEFINE_MUTEX(hl_devs_idr_lock); static DEFINE_MUTEX(hl_devs_idr_lock);
...@@ -70,6 +74,42 @@ static const struct pci_device_id ids[] = { ...@@ -70,6 +74,42 @@ static const struct pci_device_id ids[] = {
}; };
MODULE_DEVICE_TABLE(pci, ids); MODULE_DEVICE_TABLE(pci, ids);
static const struct drm_ioctl_desc hl_drm_ioctls[] = {
DRM_IOCTL_DEF_DRV(HL_INFO, hl_info_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_CB, hl_cb_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_CS, hl_cs_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_WAIT_CS, hl_wait_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_MEMORY, hl_mem_ioctl, 0),
DRM_IOCTL_DEF_DRV(HL_DEBUG, hl_debug_ioctl, 0),
};
static const struct file_operations hl_fops = {
.owner = THIS_MODULE,
.open = accel_open,
.release = drm_release,
.unlocked_ioctl = drm_ioctl,
.compat_ioctl = drm_compat_ioctl,
.llseek = noop_llseek,
.mmap = hl_mmap
};
static const struct drm_driver hl_driver = {
.driver_features = DRIVER_COMPUTE_ACCEL,
.name = HL_NAME,
.desc = HL_DRIVER_DESC,
.major = LINUX_VERSION_MAJOR,
.minor = LINUX_VERSION_PATCHLEVEL,
.patchlevel = LINUX_VERSION_SUBLEVEL,
.date = "20190505",
.fops = &hl_fops,
.open = hl_device_open,
.postclose = hl_device_release,
.ioctls = hl_drm_ioctls,
.num_ioctls = ARRAY_SIZE(hl_drm_ioctls)
};
/* /*
* get_asic_type - translate device id to asic type * get_asic_type - translate device id to asic type
* *
...@@ -123,43 +163,28 @@ static bool is_asic_secured(enum hl_asic_type asic_type) ...@@ -123,43 +163,28 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
} }
/* /*
* hl_device_open - open function for habanalabs device * hl_device_open() - open function for habanalabs device.
* * @ddev: pointer to DRM device structure.
* @inode: pointer to inode structure * @file: pointer to DRM file private data structure.
* @filp: pointer to file structure
* *
* Called when process opens an habanalabs device. * Called when process opens an habanalabs device.
*/ */
int hl_device_open(struct inode *inode, struct file *filp) int hl_device_open(struct drm_device *ddev, struct drm_file *file_priv)
{ {
struct hl_device *hdev = to_hl_device(ddev);
enum hl_device_status status; enum hl_device_status status;
struct hl_device *hdev;
struct hl_fpriv *hpriv; struct hl_fpriv *hpriv;
int rc; int rc;
mutex_lock(&hl_devs_idr_lock);
hdev = idr_find(&hl_devs_idr, iminor(inode));
mutex_unlock(&hl_devs_idr_lock);
if (!hdev) {
pr_err("Couldn't find device %d:%d\n",
imajor(inode), iminor(inode));
return -ENXIO;
}
hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL); hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
if (!hpriv) if (!hpriv)
return -ENOMEM; return -ENOMEM;
hpriv->hdev = hdev; hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
mutex_init(&hpriv->notifier_event.lock); mutex_init(&hpriv->notifier_event.lock);
mutex_init(&hpriv->restore_phase_mutex); mutex_init(&hpriv->restore_phase_mutex);
mutex_init(&hpriv->ctx_lock); mutex_init(&hpriv->ctx_lock);
kref_init(&hpriv->refcount); kref_init(&hpriv->refcount);
nonseekable_open(inode, filp);
hl_ctx_mgr_init(&hpriv->ctx_mgr); hl_ctx_mgr_init(&hpriv->ctx_mgr);
hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr); hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
...@@ -225,6 +250,9 @@ int hl_device_open(struct inode *inode, struct file *filp) ...@@ -225,6 +250,9 @@ int hl_device_open(struct inode *inode, struct file *filp)
hdev->last_successful_open_jif = jiffies; hdev->last_successful_open_jif = jiffies;
hdev->last_successful_open_ktime = ktime_get(); hdev->last_successful_open_ktime = ktime_get();
file_priv->driver_priv = hpriv;
hpriv->file_priv = file_priv;
return 0; return 0;
out_err: out_err:
...@@ -232,7 +260,6 @@ int hl_device_open(struct inode *inode, struct file *filp) ...@@ -232,7 +260,6 @@ int hl_device_open(struct inode *inode, struct file *filp)
hl_mem_mgr_fini(&hpriv->mem_mgr); hl_mem_mgr_fini(&hpriv->mem_mgr);
hl_mem_mgr_idr_destroy(&hpriv->mem_mgr); hl_mem_mgr_idr_destroy(&hpriv->mem_mgr);
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
filp->private_data = NULL;
mutex_destroy(&hpriv->ctx_lock); mutex_destroy(&hpriv->ctx_lock);
mutex_destroy(&hpriv->restore_phase_mutex); mutex_destroy(&hpriv->restore_phase_mutex);
mutex_destroy(&hpriv->notifier_event.lock); mutex_destroy(&hpriv->notifier_event.lock);
...@@ -268,9 +295,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp) ...@@ -268,9 +295,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
*/ */
hpriv->hdev = hdev; hpriv->hdev = hdev;
filp->private_data = hpriv; filp->private_data = hpriv;
hpriv->filp = filp;
mutex_init(&hpriv->notifier_event.lock);
nonseekable_open(inode, filp); nonseekable_open(inode, filp);
hpriv->taskpid = get_task_pid(current, PIDTYPE_PID); hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
...@@ -317,7 +342,6 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev) ...@@ -317,7 +342,6 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type); hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
hdev->major = hl_major; hdev->major = hl_major;
hdev->hclass = hl_class;
hdev->memory_scrub = memory_scrub; hdev->memory_scrub = memory_scrub;
hdev->reset_on_lockup = reset_on_lockup; hdev->reset_on_lockup = reset_on_lockup;
hdev->boot_error_status_mask = boot_error_status_mask; hdev->boot_error_status_mask = boot_error_status_mask;
...@@ -383,6 +407,31 @@ static int fixup_device_params(struct hl_device *hdev) ...@@ -383,6 +407,31 @@ static int fixup_device_params(struct hl_device *hdev)
return 0; return 0;
} }
static int allocate_device_id(struct hl_device *hdev)
{
int id;
mutex_lock(&hl_devs_idr_lock);
id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
mutex_unlock(&hl_devs_idr_lock);
if (id < 0) {
if (id == -ENOSPC)
pr_err("too many devices in the system\n");
return -EBUSY;
}
hdev->id = id;
/*
* Firstly initialized with the internal device ID.
* Will be updated later after the DRM device registration to hold the minor ID.
*/
hdev->cdev_idx = hdev->id;
return 0;
}
/** /**
* create_hdev - create habanalabs device instance * create_hdev - create habanalabs device instance
* *
...@@ -395,27 +444,29 @@ static int fixup_device_params(struct hl_device *hdev) ...@@ -395,27 +444,29 @@ static int fixup_device_params(struct hl_device *hdev)
*/ */
static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
{ {
int main_id, ctrl_id = 0, rc = 0;
struct hl_device *hdev; struct hl_device *hdev;
int rc;
*dev = NULL; *dev = NULL;
hdev = kzalloc(sizeof(*hdev), GFP_KERNEL); hdev = devm_drm_dev_alloc(&pdev->dev, &hl_driver, struct hl_device, drm);
if (!hdev) if (IS_ERR(hdev))
return -ENOMEM; return PTR_ERR(hdev);
hdev->dev = hdev->drm.dev;
/* Will be NULL in case of simulator device */ /* Will be NULL in case of simulator device */
hdev->pdev = pdev; hdev->pdev = pdev;
/* Assign status description string */ /* Assign status description string */
strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX); strscpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX); strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX); strscpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX); strscpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION], strscpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
"in device creation", HL_STR_MAX); "in device creation", HL_STR_MAX);
strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE], strscpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
"in reset after device release", HL_STR_MAX); "in reset after device release", HL_STR_MAX);
/* First, we must find out which ASIC are we handling. This is needed /* First, we must find out which ASIC are we handling. This is needed
...@@ -425,7 +476,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) ...@@ -425,7 +476,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
if (hdev->asic_type == ASIC_INVALID) { if (hdev->asic_type == ASIC_INVALID) {
dev_err(&pdev->dev, "Unsupported ASIC\n"); dev_err(&pdev->dev, "Unsupported ASIC\n");
rc = -ENODEV; rc = -ENODEV;
goto free_hdev; goto out_err;
} }
copy_kernel_module_params_to_device(hdev); copy_kernel_module_params_to_device(hdev);
...@@ -434,42 +485,15 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev) ...@@ -434,42 +485,15 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
fixup_device_params(hdev); fixup_device_params(hdev);
mutex_lock(&hl_devs_idr_lock); rc = allocate_device_id(hdev);
if (rc)
/* Always save 2 numbers, 1 for main device and 1 for control. goto out_err;
* They must be consecutive
*/
main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
if (main_id >= 0)
ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
main_id + 2, GFP_KERNEL);
mutex_unlock(&hl_devs_idr_lock);
if ((main_id < 0) || (ctrl_id < 0)) {
if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
pr_err("too many devices in the system\n");
if (main_id >= 0) {
mutex_lock(&hl_devs_idr_lock);
idr_remove(&hl_devs_idr, main_id);
mutex_unlock(&hl_devs_idr_lock);
}
rc = -EBUSY;
goto free_hdev;
}
hdev->id = main_id;
hdev->id_control = ctrl_id;
*dev = hdev; *dev = hdev;
return 0; return 0;
free_hdev: out_err:
kfree(hdev);
return rc; return rc;
} }
...@@ -484,10 +508,8 @@ static void destroy_hdev(struct hl_device *hdev) ...@@ -484,10 +508,8 @@ static void destroy_hdev(struct hl_device *hdev)
/* Remove device from the device list */ /* Remove device from the device list */
mutex_lock(&hl_devs_idr_lock); mutex_lock(&hl_devs_idr_lock);
idr_remove(&hl_devs_idr, hdev->id); idr_remove(&hl_devs_idr, hdev->id);
idr_remove(&hl_devs_idr, hdev->id_control);
mutex_unlock(&hl_devs_idr_lock); mutex_unlock(&hl_devs_idr_lock);
kfree(hdev);
} }
static int hl_pmops_suspend(struct device *dev) static int hl_pmops_suspend(struct device *dev)
...@@ -691,28 +713,16 @@ static int __init hl_init(void) ...@@ -691,28 +713,16 @@ static int __init hl_init(void)
hl_major = MAJOR(dev); hl_major = MAJOR(dev);
hl_class = class_create(HL_NAME);
if (IS_ERR(hl_class)) {
pr_err("failed to allocate class\n");
rc = PTR_ERR(hl_class);
goto remove_major;
}
hl_debugfs_init();
rc = pci_register_driver(&hl_pci_driver); rc = pci_register_driver(&hl_pci_driver);
if (rc) { if (rc) {
pr_err("failed to register pci device\n"); pr_err("failed to register pci device\n");
goto remove_debugfs; goto remove_major;
} }
pr_debug("driver loaded\n"); pr_debug("driver loaded\n");
return 0; return 0;
remove_debugfs:
hl_debugfs_fini();
class_destroy(hl_class);
remove_major: remove_major:
unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
return rc; return rc;
...@@ -725,14 +735,6 @@ static void __exit hl_exit(void) ...@@ -725,14 +735,6 @@ static void __exit hl_exit(void)
{ {
pci_unregister_driver(&hl_pci_driver); pci_unregister_driver(&hl_pci_driver);
/*
* Removing debugfs must be after all devices or simulator devices
* have been removed because otherwise we get a bug in the
* debugfs module for referencing NULL objects
*/
hl_debugfs_fini();
class_destroy(hl_class);
unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS); unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
idr_destroy(&hl_devs_idr); idr_destroy(&hl_devs_idr);
......
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <asm/msr.h>
static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = { static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr), [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
[HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf), [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
...@@ -320,6 +322,7 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args) ...@@ -320,6 +322,7 @@ static int time_sync_info(struct hl_device *hdev, struct hl_info_args *args)
time_sync.device_time = hdev->asic_funcs->get_device_time(hdev); time_sync.device_time = hdev->asic_funcs->get_device_time(hdev);
time_sync.host_time = ktime_get_raw_ns(); time_sync.host_time = ktime_get_raw_ns();
time_sync.tsc_time = rdtsc();
return copy_to_user(out, &time_sync, return copy_to_user(out, &time_sync,
min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0; min((size_t) max_size, sizeof(time_sync))) ? -EFAULT : 0;
...@@ -875,6 +878,28 @@ static int fw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args) ...@@ -875,6 +878,28 @@ static int fw_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
return rc ? -EFAULT : 0; return rc ? -EFAULT : 0;
} }
static int engine_err_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
void __user *user_buf = (void __user *) (uintptr_t) args->return_pointer;
struct hl_device *hdev = hpriv->hdev;
u32 user_buf_size = args->return_size;
struct engine_err_info *info;
int rc;
if (!user_buf)
return -EINVAL;
info = &hdev->captured_err_info.engine_err;
if (!info->event_info_available)
return 0;
if (user_buf_size < sizeof(struct hl_info_engine_err_event))
return -ENOMEM;
rc = copy_to_user(user_buf, &info->event, sizeof(struct hl_info_engine_err_event));
return rc ? -EFAULT : 0;
}
static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *info_args) static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *info_args)
{ {
void __user *buff = (void __user *) (uintptr_t) info_args->return_pointer; void __user *buff = (void __user *) (uintptr_t) info_args->return_pointer;
...@@ -1001,6 +1026,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, ...@@ -1001,6 +1026,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_FW_ERR_EVENT: case HL_INFO_FW_ERR_EVENT:
return fw_err_info(hpriv, args); return fw_err_info(hpriv, args);
case HL_INFO_USER_ENGINE_ERR_EVENT:
return engine_err_info(hpriv, args);
case HL_INFO_DRAM_USAGE: case HL_INFO_DRAM_USAGE:
return dram_usage_info(hpriv, args); return dram_usage_info(hpriv, args);
default: default:
...@@ -1070,20 +1098,34 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, ...@@ -1070,20 +1098,34 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
return rc; return rc;
} }
static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data) int hl_info_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{ {
struct hl_fpriv *hpriv = file_priv->driver_priv;
return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev); return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev);
} }
static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data) static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
{ {
struct hl_info_args *args = data;
switch (args->op) {
case HL_INFO_GET_EVENTS:
case HL_INFO_UNREGISTER_EVENTFD:
case HL_INFO_REGISTER_EVENTFD:
return -EOPNOTSUPP;
default:
break;
}
return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl); return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
} }
static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) int hl_debug_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv)
{ {
struct hl_debug_args *args = data; struct hl_fpriv *hpriv = file_priv->driver_priv;
struct hl_device *hdev = hpriv->hdev; struct hl_device *hdev = hpriv->hdev;
struct hl_debug_args *args = data;
enum hl_device_status status; enum hl_device_status status;
int rc = 0; int rc = 0;
...@@ -1126,25 +1168,15 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data) ...@@ -1126,25 +1168,15 @@ static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
} }
#define HL_IOCTL_DEF(ioctl, _func) \ #define HL_IOCTL_DEF(ioctl, _func) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func} [_IOC_NR(ioctl) - HL_COMMAND_START] = {.cmd = ioctl, .func = _func}
static const struct hl_ioctl_desc hl_ioctls[] = {
HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl),
HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_wait_ioctl),
HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
};
static const struct hl_ioctl_desc hl_ioctls_control[] = { static const struct hl_ioctl_desc hl_ioctls_control[] = {
HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control) HL_IOCTL_DEF(DRM_IOCTL_HL_INFO, hl_info_ioctl_control)
}; };
static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, static long _hl_ioctl(struct hl_fpriv *hpriv, unsigned int cmd, unsigned long arg,
const struct hl_ioctl_desc *ioctl, struct device *dev) const struct hl_ioctl_desc *ioctl, struct device *dev)
{ {
struct hl_fpriv *hpriv = filep->private_data;
unsigned int nr = _IOC_NR(cmd); unsigned int nr = _IOC_NR(cmd);
char stack_kdata[128] = {0}; char stack_kdata[128] = {0};
char *kdata = NULL; char *kdata = NULL;
...@@ -1194,9 +1226,13 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, ...@@ -1194,9 +1226,13 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
retcode = -EFAULT; retcode = -EFAULT;
out_err: out_err:
if (retcode) if (retcode) {
dev_dbg_ratelimited(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", char task_comm[TASK_COMM_LEN];
task_pid_nr(current), cmd, nr);
dev_dbg_ratelimited(dev,
"error in ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
}
if (kdata != stack_kdata) if (kdata != stack_kdata)
kfree(kdata); kfree(kdata);
...@@ -1204,29 +1240,6 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg, ...@@ -1204,29 +1240,6 @@ static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
return retcode; return retcode;
} }
long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct hl_fpriv *hpriv = filep->private_data;
struct hl_device *hdev = hpriv->hdev;
const struct hl_ioctl_desc *ioctl = NULL;
unsigned int nr = _IOC_NR(cmd);
if (!hdev) {
pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n");
return -ENODEV;
}
if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
ioctl = &hl_ioctls[nr];
} else {
dev_dbg_ratelimited(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
task_pid_nr(current), nr);
return -ENOTTY;
}
return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev);
}
long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
{ {
struct hl_fpriv *hpriv = filep->private_data; struct hl_fpriv *hpriv = filep->private_data;
...@@ -1239,13 +1252,16 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) ...@@ -1239,13 +1252,16 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
return -ENODEV; return -ENODEV;
} }
if (nr == _IOC_NR(HL_IOCTL_INFO)) { if (nr == _IOC_NR(DRM_IOCTL_HL_INFO)) {
ioctl = &hl_ioctls_control[nr]; ioctl = &hl_ioctls_control[nr - HL_COMMAND_START];
} else { } else {
dev_dbg_ratelimited(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n", char task_comm[TASK_COMM_LEN];
task_pid_nr(current), nr);
dev_dbg_ratelimited(hdev->dev_ctrl,
"invalid ioctl: pid=%d, comm=\"%s\", cmd=%#010x, nr=%#04x\n",
task_pid_nr(current), get_task_comm(task_comm, current), cmd, nr);
return -ENOTTY; return -ENOTTY;
} }
return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl); return _hl_ioctl(hpriv, cmd, arg, ioctl, hdev->dev_ctrl);
} }
...@@ -204,8 +204,10 @@ static void hl_ts_free_objects(struct work_struct *work) ...@@ -204,8 +204,10 @@ static void hl_ts_free_objects(struct work_struct *work)
{ {
struct timestamp_reg_work_obj *job = struct timestamp_reg_work_obj *job =
container_of(work, struct timestamp_reg_work_obj, free_obj); container_of(work, struct timestamp_reg_work_obj, free_obj);
struct list_head *dynamic_alloc_free_list_head = job->dynamic_alloc_free_obj_head;
struct timestamp_reg_free_node *free_obj, *temp_free_obj; struct timestamp_reg_free_node *free_obj, *temp_free_obj;
struct list_head *free_list_head = job->free_obj_head; struct list_head *free_list_head = job->free_obj_head;
struct hl_device *hdev = job->hdev; struct hl_device *hdev = job->hdev;
list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) { list_for_each_entry_safe(free_obj, temp_free_obj, free_list_head, free_objects_node) {
...@@ -215,10 +217,28 @@ static void hl_ts_free_objects(struct work_struct *work) ...@@ -215,10 +217,28 @@ static void hl_ts_free_objects(struct work_struct *work)
hl_mmap_mem_buf_put(free_obj->buf); hl_mmap_mem_buf_put(free_obj->buf);
hl_cb_put(free_obj->cq_cb); hl_cb_put(free_obj->cq_cb);
kfree(free_obj); atomic_set(&free_obj->in_use, 0);
} }
kfree(free_list_head); kfree(free_list_head);
if (dynamic_alloc_free_list_head) {
list_for_each_entry_safe(free_obj, temp_free_obj, dynamic_alloc_free_list_head,
free_objects_node) {
dev_dbg(hdev->dev,
"Dynamic_Alloc list: About to put refcount to buf (%p) cq_cb(%p)\n",
free_obj->buf,
free_obj->cq_cb);
hl_mmap_mem_buf_put(free_obj->buf);
hl_cb_put(free_obj->cq_cb);
list_del(&free_obj->free_objects_node);
kfree(free_obj);
}
kfree(dynamic_alloc_free_list_head);
}
kfree(job); kfree(job);
} }
...@@ -233,11 +253,18 @@ static void hl_ts_free_objects(struct work_struct *work) ...@@ -233,11 +253,18 @@ static void hl_ts_free_objects(struct work_struct *work)
* list to a dedicated workqueue to do the actual put. * list to a dedicated workqueue to do the actual put.
*/ */
static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend, static int handle_registration_node(struct hl_device *hdev, struct hl_user_pending_interrupt *pend,
struct list_head **free_list, ktime_t now) struct list_head **free_list,
struct list_head **dynamic_alloc_list,
struct hl_user_interrupt *intr)
{ {
struct hl_ts_free_jobs *ts_free_jobs_data;
struct timestamp_reg_free_node *free_node; struct timestamp_reg_free_node *free_node;
u32 free_node_index;
u64 timestamp; u64 timestamp;
ts_free_jobs_data = &intr->ts_free_jobs_data;
free_node_index = ts_free_jobs_data->next_avail_free_node_idx;
if (!(*free_list)) { if (!(*free_list)) {
/* Alloc/Init the timestamp registration free objects list */ /* Alloc/Init the timestamp registration free objects list */
*free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC); *free_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
...@@ -247,39 +274,65 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi ...@@ -247,39 +274,65 @@ static int handle_registration_node(struct hl_device *hdev, struct hl_user_pendi
INIT_LIST_HEAD(*free_list); INIT_LIST_HEAD(*free_list);
} }
free_node = kmalloc(sizeof(*free_node), GFP_ATOMIC); free_node = &ts_free_jobs_data->free_nodes_pool[free_node_index];
if (!free_node) if (atomic_cmpxchg(&free_node->in_use, 0, 1)) {
return -ENOMEM; dev_dbg(hdev->dev,
"Timestamp free node pool is full, buff: %p, record: %p, irq: %u\n",
pend->ts_reg_info.buf,
pend,
intr->interrupt_id);
timestamp = ktime_to_ns(now); if (!(*dynamic_alloc_list)) {
*dynamic_alloc_list = kmalloc(sizeof(struct list_head), GFP_ATOMIC);
if (!(*dynamic_alloc_list))
return -ENOMEM;
*pend->ts_reg_info.timestamp_kernel_addr = timestamp; INIT_LIST_HEAD(*dynamic_alloc_list);
}
free_node = kmalloc(sizeof(struct timestamp_reg_free_node), GFP_ATOMIC);
if (!free_node)
return -ENOMEM;
free_node->dynamic_alloc = 1;
}
dev_dbg(hdev->dev, "Timestamp is set to ts cb address (%p), ts: 0x%llx\n", timestamp = ktime_to_ns(intr->timestamp);
pend->ts_reg_info.timestamp_kernel_addr,
*(u64 *)pend->ts_reg_info.timestamp_kernel_addr);
list_del(&pend->wait_list_node); *pend->ts_reg_info.timestamp_kernel_addr = timestamp;
dev_dbg(hdev->dev, "Irq handle: Timestamp record (%p) ts cb address (%p), interrupt_id: %u\n",
pend, pend->ts_reg_info.timestamp_kernel_addr, intr->interrupt_id);
/* Mark kernel CB node as free */ list_del(&pend->list_node);
pend->ts_reg_info.in_use = 0;
/* Putting the refcount for ts_buff and cq_cb objects will be handled /* Putting the refcount for ts_buff and cq_cb objects will be handled
* in workqueue context, just add job to free_list. * in workqueue context, just add job to free_list.
*/ */
free_node->buf = pend->ts_reg_info.buf; free_node->buf = pend->ts_reg_info.buf;
free_node->cq_cb = pend->ts_reg_info.cq_cb; free_node->cq_cb = pend->ts_reg_info.cq_cb;
list_add(&free_node->free_objects_node, *free_list);
if (free_node->dynamic_alloc) {
list_add(&free_node->free_objects_node, *dynamic_alloc_list);
} else {
ts_free_jobs_data->next_avail_free_node_idx =
(++free_node_index) % ts_free_jobs_data->free_nodes_length;
list_add(&free_node->free_objects_node, *free_list);
}
/* Mark TS record as free */
pend->ts_reg_info.in_use = false;
return 0; return 0;
} }
static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interrupt *intr) static void handle_user_interrupt_ts_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
{ {
struct list_head *ts_reg_free_list_head = NULL, *dynamic_alloc_list_head = NULL;
struct hl_user_pending_interrupt *pend, *temp_pend; struct hl_user_pending_interrupt *pend, *temp_pend;
struct list_head *ts_reg_free_list_head = NULL;
struct timestamp_reg_work_obj *job; struct timestamp_reg_work_obj *job;
bool reg_node_handle_fail = false; bool reg_node_handle_fail = false;
unsigned long flags;
int rc; int rc;
/* For registration nodes: /* For registration nodes:
...@@ -288,36 +341,32 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru ...@@ -288,36 +341,32 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
* or in irq handler context at all (since release functions are long and * or in irq handler context at all (since release functions are long and
* might sleep), so we will need to handle that part in workqueue context. * might sleep), so we will need to handle that part in workqueue context.
* To avoid handling kmalloc failure which compels us rolling back actions * To avoid handling kmalloc failure which compels us rolling back actions
* and move nodes hanged on the free list back to the interrupt wait list * and move nodes hanged on the free list back to the interrupt ts list
* we always alloc the job of the WQ at the beginning. * we always alloc the job of the WQ at the beginning.
*/ */
job = kmalloc(sizeof(*job), GFP_ATOMIC); job = kmalloc(sizeof(*job), GFP_ATOMIC);
if (!job) if (!job)
return; return;
spin_lock(&intr->wait_list_lock); spin_lock_irqsave(&intr->ts_list_lock, flags);
list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, wait_list_node) { list_for_each_entry_safe(pend, temp_pend, &intr->ts_list_head, list_node) {
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) || if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
!pend->cq_kernel_addr) { !pend->cq_kernel_addr) {
if (pend->ts_reg_info.buf) { if (!reg_node_handle_fail) {
if (!reg_node_handle_fail) { rc = handle_registration_node(hdev, pend,
rc = handle_registration_node(hdev, pend, &ts_reg_free_list_head,
&ts_reg_free_list_head, intr->timestamp); &dynamic_alloc_list_head, intr);
if (rc) if (rc)
reg_node_handle_fail = true; reg_node_handle_fail = true;
}
} else {
/* Handle wait target value node */
pend->fence.timestamp = intr->timestamp;
complete_all(&pend->fence.completion);
} }
} }
} }
spin_unlock(&intr->wait_list_lock); spin_unlock_irqrestore(&intr->ts_list_lock, flags);
if (ts_reg_free_list_head) { if (ts_reg_free_list_head) {
INIT_WORK(&job->free_obj, hl_ts_free_objects); INIT_WORK(&job->free_obj, hl_ts_free_objects);
job->free_obj_head = ts_reg_free_list_head; job->free_obj_head = ts_reg_free_list_head;
job->dynamic_alloc_free_obj_head = dynamic_alloc_list_head;
job->hdev = hdev; job->hdev = hdev;
queue_work(hdev->ts_free_obj_wq, &job->free_obj); queue_work(hdev->ts_free_obj_wq, &job->free_obj);
} else { } else {
...@@ -325,6 +374,23 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru ...@@ -325,6 +374,23 @@ static void handle_user_interrupt(struct hl_device *hdev, struct hl_user_interru
} }
} }
static void handle_user_interrupt_wait_list(struct hl_device *hdev, struct hl_user_interrupt *intr)
{
struct hl_user_pending_interrupt *pend, *temp_pend;
unsigned long flags;
spin_lock_irqsave(&intr->wait_list_lock, flags);
list_for_each_entry_safe(pend, temp_pend, &intr->wait_list_head, list_node) {
if ((pend->cq_kernel_addr && *(pend->cq_kernel_addr) >= pend->cq_target_value) ||
!pend->cq_kernel_addr) {
/* Handle wait target value node */
pend->fence.timestamp = intr->timestamp;
complete_all(&pend->fence.completion);
}
}
spin_unlock_irqrestore(&intr->wait_list_lock, flags);
}
static void handle_tpc_interrupt(struct hl_device *hdev) static void handle_tpc_interrupt(struct hl_device *hdev)
{ {
u64 event_mask; u64 event_mask;
...@@ -346,19 +412,38 @@ static void handle_unexpected_user_interrupt(struct hl_device *hdev) ...@@ -346,19 +412,38 @@ static void handle_unexpected_user_interrupt(struct hl_device *hdev)
} }
/** /**
* hl_irq_handler_user_interrupt - irq handler for user interrupts * hl_irq_user_interrupt_handler - irq handler for user interrupts.
* *
* @irq: irq number * @irq: irq number
* @arg: pointer to user interrupt structure * @arg: pointer to user interrupt structure
*
*/ */
irqreturn_t hl_irq_handler_user_interrupt(int irq, void *arg) irqreturn_t hl_irq_user_interrupt_handler(int irq, void *arg)
{ {
struct hl_user_interrupt *user_int = arg; struct hl_user_interrupt *user_int = arg;
struct hl_device *hdev = user_int->hdev;
user_int->timestamp = ktime_get(); user_int->timestamp = ktime_get();
switch (user_int->type) {
case HL_USR_INTERRUPT_CQ:
/* First handle user waiters threads */
handle_user_interrupt_wait_list(hdev, &hdev->common_user_cq_interrupt);
handle_user_interrupt_wait_list(hdev, user_int);
return IRQ_WAKE_THREAD; /* Second handle user timestamp registrations */
handle_user_interrupt_ts_list(hdev, &hdev->common_user_cq_interrupt);
handle_user_interrupt_ts_list(hdev, user_int);
break;
case HL_USR_INTERRUPT_DECODER:
handle_user_interrupt_wait_list(hdev, &hdev->common_decoder_interrupt);
/* Handle decoder interrupt registered on this specific irq */
handle_user_interrupt_wait_list(hdev, user_int);
break;
default:
break;
}
return IRQ_HANDLED;
} }
/** /**
...@@ -374,19 +459,8 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg) ...@@ -374,19 +459,8 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
struct hl_user_interrupt *user_int = arg; struct hl_user_interrupt *user_int = arg;
struct hl_device *hdev = user_int->hdev; struct hl_device *hdev = user_int->hdev;
user_int->timestamp = ktime_get();
switch (user_int->type) { switch (user_int->type) {
case HL_USR_INTERRUPT_CQ:
handle_user_interrupt(hdev, &hdev->common_user_cq_interrupt);
/* Handle user cq interrupt registered on this specific irq */
handle_user_interrupt(hdev, user_int);
break;
case HL_USR_INTERRUPT_DECODER:
handle_user_interrupt(hdev, &hdev->common_decoder_interrupt);
/* Handle decoder interrupt registered on this specific irq */
handle_user_interrupt(hdev, user_int);
break;
case HL_USR_INTERRUPT_TPC: case HL_USR_INTERRUPT_TPC:
handle_tpc_interrupt(hdev); handle_tpc_interrupt(hdev);
break; break;
...@@ -400,6 +474,18 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg) ...@@ -400,6 +474,18 @@ irqreturn_t hl_irq_user_interrupt_thread_handler(int irq, void *arg)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
irqreturn_t hl_irq_eq_error_interrupt_thread_handler(int irq, void *arg)
{
u64 event_mask = HL_NOTIFIER_EVENT_DEVICE_RESET | HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
struct hl_device *hdev = arg;
dev_err(hdev->dev, "EQ error interrupt received\n");
hl_device_cond_reset(hdev, HL_DRV_RESET_HARD, event_mask);
return IRQ_HANDLED;
}
/** /**
* hl_irq_handler_eq - irq handler for event queue * hl_irq_handler_eq - irq handler for event queue
* *
......
This diff is collapsed.
...@@ -63,6 +63,10 @@ ...@@ -63,6 +63,10 @@
#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb" #define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin" #define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
MODULE_FIRMWARE(GAUDI_BOOT_FIT_FILE);
MODULE_FIRMWARE(GAUDI_LINUX_FW_FILE);
MODULE_FIRMWARE(GAUDI_TPC_FW_FILE);
#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */ #define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
#define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ #define GAUDI_RESET_TIMEOUT_MSEC 2000 /* 2000ms */
...@@ -660,7 +664,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) ...@@ -660,7 +664,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN); CARD_NAME_MAX_LEN);
prop->max_pending_cs = GAUDI_MAX_PENDING_CS; prop->max_pending_cs = GAUDI_MAX_PENDING_CS;
...@@ -4619,8 +4623,7 @@ static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) ...@@ -4619,8 +4623,7 @@ static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val)
static int gaudi_scrub_device_mem(struct hl_device *hdev) static int gaudi_scrub_device_mem(struct hl_device *hdev)
{ {
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 wait_to_idle_time = hdev->pdev ? HBM_SCRUBBING_TIMEOUT_US : u64 wait_to_idle_time = HBM_SCRUBBING_TIMEOUT_US;
min_t(u64, HBM_SCRUBBING_TIMEOUT_US * 10, HL_SIM_MAX_TIMEOUT_US);
u64 addr, size, val = hdev->memory_scrub_val; u64 addr, size, val = hdev->memory_scrub_val;
ktime_t timeout; ktime_t timeout;
int rc = 0; int rc = 0;
...@@ -4904,7 +4907,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev, ...@@ -4904,7 +4907,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
list_add_tail(&userptr->job_node, parser->job_userptr_list); list_add_tail(&userptr->job_node, parser->job_userptr_list);
rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
if (rc) { if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n"); dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto unpin_memory; goto unpin_memory;
...@@ -8000,7 +8003,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) ...@@ -8000,7 +8003,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
return rc; return rc;
if (!strlen(prop->cpucp_info.card_name)) if (!strlen(prop->cpucp_info.card_name))
strncpy(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME, strscpy_pad(prop->cpucp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN); CARD_NAME_MAX_LEN);
hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type); hdev->card_type = le32_to_cpu(hdev->asic_prop.cpucp_info.card_type);
...@@ -9140,9 +9143,9 @@ static const struct hl_asic_funcs gaudi_funcs = { ...@@ -9140,9 +9143,9 @@ static const struct hl_asic_funcs gaudi_funcs = {
.asic_dma_pool_free = gaudi_dma_pool_free, .asic_dma_pool_free = gaudi_dma_pool_free,
.cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
.cs_parser = gaudi_cs_parser, .cs_parser = gaudi_cs_parser,
.asic_dma_map_sgtable = hl_dma_map_sgtable, .dma_map_sgtable = hl_asic_dma_map_sgtable,
.add_end_of_cb_packets = gaudi_add_end_of_cb_packets, .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
.update_eq_ci = gaudi_update_eq_ci, .update_eq_ci = gaudi_update_eq_ci,
.context_switch = gaudi_context_switch, .context_switch = gaudi_context_switch,
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
#include <uapi/drm/habanalabs_accel.h> #include <uapi/drm/habanalabs_accel.h>
#include "../common/habanalabs.h" #include "../common/habanalabs.h"
#include "../include/common/hl_boot_if.h" #include <linux/habanalabs/hl_boot_if.h>
#include "../include/gaudi/gaudi_packets.h" #include "../include/gaudi/gaudi_packets.h"
#include "../include/gaudi/gaudi.h" #include "../include/gaudi/gaudi.h"
#include "../include/gaudi/gaudi_async_events.h" #include "../include/gaudi/gaudi_async_events.h"
......
...@@ -482,6 +482,11 @@ static int gaudi_config_etf(struct hl_device *hdev, ...@@ -482,6 +482,11 @@ static int gaudi_config_etf(struct hl_device *hdev,
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK); WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
val = RREG32(base_reg + 0x20);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(base_reg + 0x304); val = RREG32(base_reg + 0x304);
val |= 0x1000; val |= 0x1000;
WREG32(base_reg + 0x304, val); WREG32(base_reg + 0x304, val);
...@@ -580,6 +585,13 @@ static int gaudi_config_etr(struct hl_device *hdev, ...@@ -580,6 +585,13 @@ static int gaudi_config_etr(struct hl_device *hdev,
WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK); WREG32(mmPSOC_ETR_LAR, CORESIGHT_UNLOCK);
val = RREG32(mmPSOC_ETR_CTL);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(mmPSOC_ETR_FFCR); val = RREG32(mmPSOC_ETR_FFCR);
val |= 0x1000; val |= 0x1000;
WREG32(mmPSOC_ETR_FFCR, val); WREG32(mmPSOC_ETR_FFCR, val);
......
This diff is collapsed.
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
#include <uapi/drm/habanalabs_accel.h> #include <uapi/drm/habanalabs_accel.h>
#include "../common/habanalabs.h" #include "../common/habanalabs.h"
#include "../include/common/hl_boot_if.h" #include <linux/habanalabs/hl_boot_if.h>
#include "../include/gaudi2/gaudi2.h" #include "../include/gaudi2/gaudi2.h"
#include "../include/gaudi2/gaudi2_packets.h" #include "../include/gaudi2/gaudi2_packets.h"
#include "../include/gaudi2/gaudi2_fw_if.h" #include "../include/gaudi2/gaudi2_fw_if.h"
...@@ -84,6 +84,7 @@ ...@@ -84,6 +84,7 @@
#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */ #define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */
#define GAUDI2_PREBOOT_REQ_TIMEOUT_USEC 25000000 /* 25s */ #define GAUDI2_PREBOOT_REQ_TIMEOUT_USEC 25000000 /* 25s */
#define GAUDI2_PREBOOT_EXTENDED_REQ_TIMEOUT_USEC 85000000 /* 85s */
#define GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC 10000000 /* 10s */ #define GAUDI2_BOOT_FIT_REQ_TIMEOUT_USEC 10000000 /* 10s */
...@@ -419,6 +420,7 @@ enum gaudi2_irq_num { ...@@ -419,6 +420,7 @@ enum gaudi2_irq_num {
GAUDI2_IRQ_NUM_NIC_PORT_FIRST, GAUDI2_IRQ_NUM_NIC_PORT_FIRST,
GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1), GAUDI2_IRQ_NUM_NIC_PORT_LAST = (GAUDI2_IRQ_NUM_NIC_PORT_FIRST + NIC_NUMBER_OF_PORTS - 1),
GAUDI2_IRQ_NUM_TPC_ASSERT, GAUDI2_IRQ_NUM_TPC_ASSERT,
GAUDI2_IRQ_NUM_EQ_ERROR,
GAUDI2_IRQ_NUM_RESERVED_FIRST, GAUDI2_IRQ_NUM_RESERVED_FIRST,
GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1), GAUDI2_IRQ_NUM_RESERVED_LAST = (GAUDI2_MSIX_ENTRIES - GAUDI2_TOTAL_USER_INTERRUPTS - 1),
GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT, GAUDI2_IRQ_NUM_UNEXPECTED_ERROR = RESERVED_MSIX_UNEXPECTED_USER_ERROR_INTERRUPT,
......
...@@ -151,8 +151,8 @@ static u64 debug_stm_regs[GAUDI2_STM_LAST + 1] = { ...@@ -151,8 +151,8 @@ static u64 debug_stm_regs[GAUDI2_STM_LAST + 1] = {
[GAUDI2_STM_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_STM_BASE, [GAUDI2_STM_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_STM_BASE,
[GAUDI2_STM_PCIE] = mmPCIE_STM_BASE, [GAUDI2_STM_PCIE] = mmPCIE_STM_BASE,
[GAUDI2_STM_PSOC] = mmPSOC_STM_BASE, [GAUDI2_STM_PSOC] = mmPSOC_STM_BASE,
[GAUDI2_STM_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_STM_BASE, [GAUDI2_STM_PSOC_ARC0_CS] = 0,
[GAUDI2_STM_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_STM_BASE, [GAUDI2_STM_PSOC_ARC1_CS] = 0,
[GAUDI2_STM_PDMA0_CS] = mmPDMA0_CS_STM_BASE, [GAUDI2_STM_PDMA0_CS] = mmPDMA0_CS_STM_BASE,
[GAUDI2_STM_PDMA1_CS] = mmPDMA1_CS_STM_BASE, [GAUDI2_STM_PDMA1_CS] = mmPDMA1_CS_STM_BASE,
[GAUDI2_STM_CPU] = mmCPU_STM_BASE, [GAUDI2_STM_CPU] = mmCPU_STM_BASE,
...@@ -293,8 +293,8 @@ static u64 debug_etf_regs[GAUDI2_ETF_LAST + 1] = { ...@@ -293,8 +293,8 @@ static u64 debug_etf_regs[GAUDI2_ETF_LAST + 1] = {
[GAUDI2_ETF_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_ETF_BASE, [GAUDI2_ETF_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_ETF_BASE,
[GAUDI2_ETF_PCIE] = mmPCIE_ETF_BASE, [GAUDI2_ETF_PCIE] = mmPCIE_ETF_BASE,
[GAUDI2_ETF_PSOC] = mmPSOC_ETF_BASE, [GAUDI2_ETF_PSOC] = mmPSOC_ETF_BASE,
[GAUDI2_ETF_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_ETF_BASE, [GAUDI2_ETF_PSOC_ARC0_CS] = 0,
[GAUDI2_ETF_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_ETF_BASE, [GAUDI2_ETF_PSOC_ARC1_CS] = 0,
[GAUDI2_ETF_PDMA0_CS] = mmPDMA0_CS_ETF_BASE, [GAUDI2_ETF_PDMA0_CS] = mmPDMA0_CS_ETF_BASE,
[GAUDI2_ETF_PDMA1_CS] = mmPDMA1_CS_ETF_BASE, [GAUDI2_ETF_PDMA1_CS] = mmPDMA1_CS_ETF_BASE,
[GAUDI2_ETF_CPU_0] = mmCPU_ETF_0_BASE, [GAUDI2_ETF_CPU_0] = mmCPU_ETF_0_BASE,
...@@ -436,8 +436,8 @@ static u64 debug_funnel_regs[GAUDI2_FUNNEL_LAST + 1] = { ...@@ -436,8 +436,8 @@ static u64 debug_funnel_regs[GAUDI2_FUNNEL_LAST + 1] = {
[GAUDI2_FUNNEL_DCORE3_RTR6] = mmDCORE3_RTR6_FUNNEL_BASE, [GAUDI2_FUNNEL_DCORE3_RTR6] = mmDCORE3_RTR6_FUNNEL_BASE,
[GAUDI2_FUNNEL_DCORE3_RTR7] = mmDCORE3_RTR7_FUNNEL_BASE, [GAUDI2_FUNNEL_DCORE3_RTR7] = mmDCORE3_RTR7_FUNNEL_BASE,
[GAUDI2_FUNNEL_PSOC] = mmPSOC_FUNNEL_BASE, [GAUDI2_FUNNEL_PSOC] = mmPSOC_FUNNEL_BASE,
[GAUDI2_FUNNEL_PSOC_ARC0] = mmPSOC_ARC0_FUNNEL_BASE, [GAUDI2_FUNNEL_PSOC_ARC0] = 0,
[GAUDI2_FUNNEL_PSOC_ARC1] = mmPSOC_ARC1_FUNNEL_BASE, [GAUDI2_FUNNEL_PSOC_ARC1] = 0,
[GAUDI2_FUNNEL_XDMA] = mmXDMA_FUNNEL_BASE, [GAUDI2_FUNNEL_XDMA] = mmXDMA_FUNNEL_BASE,
[GAUDI2_FUNNEL_CPU] = mmCPU_FUNNEL_BASE, [GAUDI2_FUNNEL_CPU] = mmCPU_FUNNEL_BASE,
[GAUDI2_FUNNEL_PMMU] = mmPMMU_FUNNEL_BASE, [GAUDI2_FUNNEL_PMMU] = mmPMMU_FUNNEL_BASE,
...@@ -766,10 +766,10 @@ static u64 debug_bmon_regs[GAUDI2_BMON_LAST + 1] = { ...@@ -766,10 +766,10 @@ static u64 debug_bmon_regs[GAUDI2_BMON_LAST + 1] = {
[GAUDI2_BMON_PCIE_MSTR_RD] = mmPCIE_BMON_MSTR_RD_BASE, [GAUDI2_BMON_PCIE_MSTR_RD] = mmPCIE_BMON_MSTR_RD_BASE,
[GAUDI2_BMON_PCIE_SLV_WR] = mmPCIE_BMON_SLV_WR_BASE, [GAUDI2_BMON_PCIE_SLV_WR] = mmPCIE_BMON_SLV_WR_BASE,
[GAUDI2_BMON_PCIE_SLV_RD] = mmPCIE_BMON_SLV_RD_BASE, [GAUDI2_BMON_PCIE_SLV_RD] = mmPCIE_BMON_SLV_RD_BASE,
[GAUDI2_BMON_PSOC_ARC0_0] = mmPSOC_ARC0_BMON_0_BASE, [GAUDI2_BMON_PSOC_ARC0_0] = 0,
[GAUDI2_BMON_PSOC_ARC0_1] = mmPSOC_ARC0_BMON_1_BASE, [GAUDI2_BMON_PSOC_ARC0_1] = 0,
[GAUDI2_BMON_PSOC_ARC1_0] = mmPSOC_ARC1_BMON_0_BASE, [GAUDI2_BMON_PSOC_ARC1_0] = 0,
[GAUDI2_BMON_PSOC_ARC1_1] = mmPSOC_ARC1_BMON_1_BASE, [GAUDI2_BMON_PSOC_ARC1_1] = 0,
[GAUDI2_BMON_PDMA0_0] = mmPDMA0_BMON_0_BASE, [GAUDI2_BMON_PDMA0_0] = mmPDMA0_BMON_0_BASE,
[GAUDI2_BMON_PDMA0_1] = mmPDMA0_BMON_1_BASE, [GAUDI2_BMON_PDMA0_1] = mmPDMA0_BMON_1_BASE,
[GAUDI2_BMON_PDMA1_0] = mmPDMA1_BMON_0_BASE, [GAUDI2_BMON_PDMA1_0] = mmPDMA1_BMON_0_BASE,
...@@ -968,8 +968,8 @@ static u64 debug_spmu_regs[GAUDI2_SPMU_LAST + 1] = { ...@@ -968,8 +968,8 @@ static u64 debug_spmu_regs[GAUDI2_SPMU_LAST + 1] = {
[GAUDI2_SPMU_DCORE3_VDEC0_CS] = mmDCORE3_VDEC0_CS_SPMU_BASE, [GAUDI2_SPMU_DCORE3_VDEC0_CS] = mmDCORE3_VDEC0_CS_SPMU_BASE,
[GAUDI2_SPMU_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_SPMU_BASE, [GAUDI2_SPMU_DCORE3_VDEC1_CS] = mmDCORE3_VDEC1_CS_SPMU_BASE,
[GAUDI2_SPMU_PCIE] = mmPCIE_SPMU_BASE, [GAUDI2_SPMU_PCIE] = mmPCIE_SPMU_BASE,
[GAUDI2_SPMU_PSOC_ARC0_CS] = mmPSOC_ARC0_CS_SPMU_BASE, [GAUDI2_SPMU_PSOC_ARC0_CS] = 0,
[GAUDI2_SPMU_PSOC_ARC1_CS] = mmPSOC_ARC1_CS_SPMU_BASE, [GAUDI2_SPMU_PSOC_ARC1_CS] = 0,
[GAUDI2_SPMU_PDMA0_CS] = mmPDMA0_CS_SPMU_BASE, [GAUDI2_SPMU_PDMA0_CS] = mmPDMA0_CS_SPMU_BASE,
[GAUDI2_SPMU_PDMA1_CS] = mmPDMA1_CS_SPMU_BASE, [GAUDI2_SPMU_PDMA1_CS] = mmPDMA1_CS_SPMU_BASE,
[GAUDI2_SPMU_PMMU_CS] = mmPMMU_CS_SPMU_BASE, [GAUDI2_SPMU_PMMU_CS] = mmPMMU_CS_SPMU_BASE,
...@@ -2092,6 +2092,11 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par ...@@ -2092,6 +2092,11 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
if (rc) if (rc)
return -EIO; return -EIO;
val = RREG32(base_reg + mmETF_CTL_OFFSET);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(base_reg + mmETF_FFCR_OFFSET); val = RREG32(base_reg + mmETF_FFCR_OFFSET);
val |= 0x1000; val |= 0x1000;
WREG32(base_reg + mmETF_FFCR_OFFSET, val); WREG32(base_reg + mmETF_FFCR_OFFSET, val);
...@@ -2120,10 +2125,17 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par ...@@ -2120,10 +2125,17 @@ static int gaudi2_config_etf(struct hl_device *hdev, struct hl_debug_params *par
if (!input) if (!input)
return -EINVAL; return -EINVAL;
val = RREG32(base_reg + mmETF_RSZ_OFFSET) << 2;
if (val) {
val = ffs(val);
WREG32(base_reg + mmETF_PSCR_OFFSET, val);
} else {
WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
}
WREG32(base_reg + mmETF_BUFWM_OFFSET, 0x3FFC); WREG32(base_reg + mmETF_BUFWM_OFFSET, 0x3FFC);
WREG32(base_reg + mmETF_MODE_OFFSET, input->sink_mode); WREG32(base_reg + mmETF_MODE_OFFSET, input->sink_mode);
WREG32(base_reg + mmETF_FFCR_OFFSET, 0x4001); WREG32(base_reg + mmETF_FFCR_OFFSET, 0x4001);
WREG32(base_reg + mmETF_PSCR_OFFSET, 0x10);
WREG32(base_reg + mmETF_CTL_OFFSET, 1); WREG32(base_reg + mmETF_CTL_OFFSET, 1);
} else { } else {
WREG32(base_reg + mmETF_BUFWM_OFFSET, 0); WREG32(base_reg + mmETF_BUFWM_OFFSET, 0);
...@@ -2189,6 +2201,11 @@ static int gaudi2_config_etr(struct hl_device *hdev, struct hl_ctx *ctx, ...@@ -2189,6 +2201,11 @@ static int gaudi2_config_etr(struct hl_device *hdev, struct hl_ctx *ctx,
if (rc) if (rc)
return -EIO; return -EIO;
val = RREG32(mmPSOC_ETR_CTL);
if ((!params->enable && val == 0x0) || (params->enable && val != 0x0))
return 0;
val = RREG32(mmPSOC_ETR_FFCR); val = RREG32(mmPSOC_ETR_FFCR);
val |= 0x1000; val |= 0x1000;
WREG32(mmPSOC_ETR_FFCR, val); WREG32(mmPSOC_ETR_FFCR, val);
...@@ -2483,7 +2500,8 @@ static int gaudi2_config_spmu(struct hl_device *hdev, struct hl_debug_params *pa ...@@ -2483,7 +2500,8 @@ static int gaudi2_config_spmu(struct hl_device *hdev, struct hl_debug_params *pa
* set enabled events mask based on input->event_types_num * set enabled events mask based on input->event_types_num
*/ */
event_mask = 0x80000000; event_mask = 0x80000000;
event_mask |= GENMASK(input->event_types_num, 0); if (input->event_types_num)
event_mask |= GENMASK(input->event_types_num - 1, 0);
WREG32(base_reg + mmSPMU_PMCNTENSET_EL0_OFFSET, event_mask); WREG32(base_reg + mmSPMU_PMCNTENSET_EL0_OFFSET, event_mask);
} else { } else {
......
...@@ -466,7 +466,7 @@ int goya_set_fixed_properties(struct hl_device *hdev) ...@@ -466,7 +466,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
prop->pcie_dbi_base_address = mmPCIE_DBI_BASE; prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI; prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN); CARD_NAME_MAX_LEN);
prop->max_pending_cs = GOYA_MAX_PENDING_CS; prop->max_pending_cs = GOYA_MAX_PENDING_CS;
...@@ -3358,7 +3358,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev, ...@@ -3358,7 +3358,7 @@ static int goya_pin_memory_before_cs(struct hl_device *hdev,
list_add_tail(&userptr->job_node, parser->job_userptr_list); list_add_tail(&userptr->job_node, parser->job_userptr_list);
rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); rc = hl_dma_map_sgtable(hdev, userptr->sgt, dir);
if (rc) { if (rc) {
dev_err(hdev->dev, "failed to map sgt with DMA region\n"); dev_err(hdev->dev, "failed to map sgt with DMA region\n");
goto unpin_memory; goto unpin_memory;
...@@ -5122,7 +5122,7 @@ int goya_cpucp_info_get(struct hl_device *hdev) ...@@ -5122,7 +5122,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
} }
if (!strlen(prop->cpucp_info.card_name)) if (!strlen(prop->cpucp_info.card_name))
strncpy(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME, strscpy_pad(prop->cpucp_info.card_name, GOYA_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN); CARD_NAME_MAX_LEN);
return 0; return 0;
...@@ -5465,9 +5465,9 @@ static const struct hl_asic_funcs goya_funcs = { ...@@ -5465,9 +5465,9 @@ static const struct hl_asic_funcs goya_funcs = {
.asic_dma_pool_free = goya_dma_pool_free, .asic_dma_pool_free = goya_dma_pool_free,
.cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_alloc = goya_cpu_accessible_dma_pool_alloc,
.cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free, .cpu_accessible_dma_pool_free = goya_cpu_accessible_dma_pool_free,
.hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, .dma_unmap_sgtable = hl_asic_dma_unmap_sgtable,
.cs_parser = goya_cs_parser, .cs_parser = goya_cs_parser,
.asic_dma_map_sgtable = hl_dma_map_sgtable, .dma_map_sgtable = hl_asic_dma_map_sgtable,
.add_end_of_cb_packets = goya_add_end_of_cb_packets, .add_end_of_cb_packets = goya_add_end_of_cb_packets,
.update_eq_ci = goya_update_eq_ci, .update_eq_ci = goya_update_eq_ci,
.context_switch = goya_context_switch, .context_switch = goya_context_switch,
......
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
#define GOYAP_H_ #define GOYAP_H_
#include <uapi/drm/habanalabs_accel.h> #include <uapi/drm/habanalabs_accel.h>
#include <linux/habanalabs/hl_boot_if.h>
#include "../common/habanalabs.h" #include "../common/habanalabs.h"
#include "../include/common/hl_boot_if.h"
#include "../include/goya/goya_packets.h" #include "../include/goya/goya_packets.h"
#include "../include/goya/goya.h" #include "../include/goya/goya.h"
#include "../include/goya/goya_async_events.h" #include "../include/goya/goya_async_events.h"
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment