Commit 9b4a66fd authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge tag 'misc-habanalabs-next-2019-09-05' of...

Merge tag 'misc-habanalabs-next-2019-09-05' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains the following changes for kernel 5.4:

- Create an additional char device per PCI device. The new char device
  allows any application to query the device for stats, information, idle
  state and more. This is needed to support system/monitoring
  applications, while also allowing the deep-learning application to send
  work to the ASIC through the main (original) char device.

- Fix possible kernel crash in case user supplies a smaller-than-required
  buffer to the DEBUG IOCTL.

- Expose the device to userspace only after initialization was done, to
  prevent a race between the initialization and user submitting workloads.

- Add uapi, as part of INFO IOCTL, to allow user to query the device
  utilization rate.

- Add uapi, as part of INFO IOCTL, to allow user to retrieve aggregate H/W
  events, i.e. counting H/W events from the loading of the driver.

- Register to the HWMON subsystem with the board's name, to allow the
  user to prepare a custom sensor file per board.

- Use correct macros for endian swapping.

- Improve error printing in multiple places.

- Small bug fixes.

* tag 'misc-habanalabs-next-2019-09-05' of git://people.freedesktop.org/~gabbayo/linux: (30 commits)
  habanalabs: correctly cast variable to __le32
  habanalabs: show correct id in error print
  habanalabs: stop using the acronym KMD
  habanalabs: display card name as sensors header
  habanalabs: add uapi to retrieve aggregate H/W events
  habanalabs: add uapi to retrieve device utilization
  habanalabs: Make the Coresight timestamp perpetual
  habanalabs: explicitly set the queue-id enumerated numbers
  habanalabs: print to kernel log when reset is finished
  habanalabs: replace __le32_to_cpu with le32_to_cpu
  habanalabs: replace __cpu_to_le32/64 with cpu_to_le32/64
  habanalabs: Handle HW_IP_INFO if device disabled or in reset
  habanalabs: Expose devices after initialization is done
  habanalabs: improve security in Debug IOCTL
  habanalabs: use default structure for user input in Debug IOCTL
  habanalabs: Add descriptive name to PSOC app status register
  habanalabs: Add descriptive names to PSOC scratch-pad registers
  habanalabs: create two char devices per ASIC
  habanalabs: change device_setup_cdev() to be more generic
  habanalabs: maintain a list of file private data objects
  ...
parents 25ec8710 6dc66f7c
......@@ -57,6 +57,7 @@ KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Description: Allows the user to set the maximum clock frequency for MME, TPC
and IC when the power management profile is set to "automatic".
This property is valid only for the Goya ASIC family
What: /sys/class/habanalabs/hl<n>/ic_clk
Date: Jan 2019
......@@ -127,8 +128,8 @@ Description: Power management profile. Values are "auto", "manual". In "auto"
the max clock frequency to a low value when there are no user
processes that are opened on the device's file. In "manual"
mode, the user sets the maximum clock frequency by writing to
ic_clk, mme_clk and tpc_clk
ic_clk, mme_clk and tpc_clk. This property is valid only for
the Goya ASIC family
What: /sys/class/habanalabs/hl<n>/preboot_btl_ver
Date: Jan 2019
......@@ -187,10 +188,3 @@ Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Description: Version of the u-boot running on the device's CPU
\ No newline at end of file
What: /sys/class/habanalabs/hl<n>/write_open_cnt
Date: Jan 2019
KernelVersion: 5.1
Contact: oded.gabbay@gmail.com
Description: Displays the total number of user processes that are currently
opened on the device's file
......@@ -18,7 +18,7 @@ int hl_asid_init(struct hl_device *hdev)
mutex_init(&hdev->asid_mutex);
/* ASID 0 is reserved for KMD and device CPU */
/* ASID 0 is reserved for the kernel driver and device CPU */
set_bit(0, hdev->asid_bitmap);
return 0;
......
......@@ -397,7 +397,8 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size)
rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, cb_size, &cb_handle,
HL_KERNEL_ASID_ID);
if (rc) {
dev_err(hdev->dev, "Failed to allocate CB for KMD %d\n", rc);
dev_err(hdev->dev,
"Failed to allocate CB for the kernel driver %d\n", rc);
return NULL;
}
......
......@@ -178,11 +178,23 @@ static void cs_do_release(struct kref *ref)
/* We also need to update CI for internal queues */
if (cs->submitted) {
int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt);
hdev->asic_funcs->hw_queues_lock(hdev);
WARN_ONCE((cs_cnt < 0),
"hl%d: error in CS active cnt %d\n",
hdev->id, cs_cnt);
hdev->cs_active_cnt--;
if (!hdev->cs_active_cnt) {
struct hl_device_idle_busy_ts *ts;
ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
ts->busy_to_idle_ts = ktime_get();
if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
hdev->idle_busy_ts_idx = 0;
} else if (hdev->cs_active_cnt < 0) {
dev_crit(hdev->dev, "CS active cnt %d is negative\n",
hdev->cs_active_cnt);
}
hdev->asic_funcs->hw_queues_unlock(hdev);
hl_int_hw_queue_update_ci(cs);
......@@ -305,6 +317,8 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
other = ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)];
if ((other) && (!dma_fence_is_signaled(other))) {
spin_unlock(&ctx->cs_lock);
dev_dbg(hdev->dev,
"Rejecting CS because of too many in-flights CS\n");
rc = -EAGAIN;
goto free_fence;
}
......@@ -395,8 +409,9 @@ static struct hl_cb *validate_queue_index(struct hl_device *hdev,
return NULL;
}
if (hw_queue_prop->kmd_only) {
dev_err(hdev->dev, "Queue index %d is restricted for KMD\n",
if (hw_queue_prop->driver_only) {
dev_err(hdev->dev,
"Queue index %d is restricted for the kernel driver\n",
chunk->queue_index);
return NULL;
} else if (hw_queue_prop->type == QUEUE_TYPE_INT) {
......
......@@ -26,12 +26,13 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
dma_fence_put(ctx->cs_pending[i]);
if (ctx->asid != HL_KERNEL_ASID_ID) {
/*
* The engines are stopped as there is no executing CS, but the
/* The engines are stopped as there is no executing CS, but the
* Coresight might be still working by accessing addresses
* related to the stopped engines. Hence stop it explicitly.
* Stop only if this is the compute context, as there can be
* only one compute context
*/
if (hdev->in_debug)
if ((hdev->in_debug) && (hdev->compute_ctx == ctx))
hl_device_set_debug_mode(hdev, false);
hl_vm_ctx_fini(ctx);
......@@ -67,29 +68,36 @@ int hl_ctx_create(struct hl_device *hdev, struct hl_fpriv *hpriv)
goto out_err;
}
mutex_lock(&mgr->ctx_lock);
rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
mutex_unlock(&mgr->ctx_lock);
if (rc < 0) {
dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
goto free_ctx;
}
ctx->handle = rc;
rc = hl_ctx_init(hdev, ctx, false);
if (rc)
goto free_ctx;
goto remove_from_idr;
hl_hpriv_get(hpriv);
ctx->hpriv = hpriv;
/* TODO: remove for multiple contexts */
/* TODO: remove for multiple contexts per process */
hpriv->ctx = ctx;
hdev->user_ctx = ctx;
mutex_lock(&mgr->ctx_lock);
rc = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
mutex_unlock(&mgr->ctx_lock);
if (rc < 0) {
dev_err(hdev->dev, "Failed to allocate IDR for a new CTX\n");
hl_ctx_free(hdev, ctx);
goto out_err;
}
/* TODO: remove the following line for multiple process support */
hdev->compute_ctx = ctx;
return 0;
remove_from_idr:
mutex_lock(&mgr->ctx_lock);
idr_remove(&mgr->ctx_handles, ctx->handle);
mutex_unlock(&mgr->ctx_lock);
free_ctx:
kfree(ctx);
out_err:
......@@ -120,7 +128,7 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
ctx->thread_ctx_switch_wait_token = 0;
if (is_kernel_ctx) {
ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */
ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
rc = hl_mmu_ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "Failed to init mmu ctx module\n");
......
......@@ -29,7 +29,7 @@ static int hl_debugfs_i2c_read(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_I2C_RD <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_RD <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.i2c_bus = i2c_bus;
pkt.i2c_addr = i2c_addr;
......@@ -55,12 +55,12 @@ static int hl_debugfs_i2c_write(struct hl_device *hdev, u8 i2c_bus, u8 i2c_addr,
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_I2C_WR <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_I2C_WR <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.i2c_bus = i2c_bus;
pkt.i2c_addr = i2c_addr;
pkt.i2c_reg = i2c_reg;
pkt.value = __cpu_to_le64(val);
pkt.value = cpu_to_le64(val);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_DEVICE_TIMEOUT_USEC, NULL);
......@@ -81,10 +81,10 @@ static void hl_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state)
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_LED_SET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_LED_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.led_index = __cpu_to_le32(led);
pkt.value = __cpu_to_le64(state);
pkt.led_index = cpu_to_le32(led);
pkt.value = cpu_to_le64(state);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_DEVICE_TIMEOUT_USEC, NULL);
......@@ -370,7 +370,7 @@ static int mmu_show(struct seq_file *s, void *data)
if (dev_entry->mmu_asid == HL_KERNEL_ASID_ID)
ctx = hdev->kernel_ctx;
else
ctx = hdev->user_ctx;
ctx = hdev->compute_ctx;
if (!ctx) {
dev_err(hdev->dev, "no ctx available\n");
......@@ -533,7 +533,7 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
u64 *phys_addr)
{
struct hl_ctx *ctx = hdev->user_ctx;
struct hl_ctx *ctx = hdev->compute_ctx;
u64 hop_addr, hop_pte_addr, hop_pte;
u64 offset_mask = HOP4_MASK | OFFSET_MASK;
int rc = 0;
......
This diff is collapsed.
......@@ -9,6 +9,7 @@
#include "include/hw_ip/mmu/mmu_general.h"
#include "include/hw_ip/mmu/mmu_v1_0.h"
#include "include/goya/asic_reg/goya_masks.h"
#include "include/goya/goya_reg_map.h"
#include <linux/pci.h>
#include <linux/genalloc.h>
......@@ -41,8 +42,8 @@
* PQ, CQ and CP are not secured.
* PQ, CB and the data are on the SRAM/DRAM.
*
* Since QMAN DMA is secured, KMD is parsing the DMA CB:
* - KMD checks DMA pointer
* Since QMAN DMA is secured, the driver is parsing the DMA CB:
* - checks DMA pointer
* - WREG, MSG_PROT are not allowed.
* - MSG_LONG/SHORT are allowed.
*
......@@ -55,15 +56,15 @@
* QMAN DMA: PQ, CQ and CP are secured.
* MMU is set to bypass on the Secure props register of the QMAN.
* The reasons we don't enable MMU for PQ, CQ and CP are:
* - PQ entry is in kernel address space and KMD doesn't map it.
* - PQ entry is in kernel address space and the driver doesn't map it.
* - CP writes to MSIX register and to kernel address space (completion
* queue).
*
* DMA is not secured but because CP is secured, KMD still needs to parse the
* CB, but doesn't need to check the DMA addresses.
* DMA is not secured but because CP is secured, the driver still needs to parse
* the CB, but doesn't need to check the DMA addresses.
*
* For QMAN DMA 0, DMA is also secured because only KMD uses this DMA and KMD
* doesn't map memory in MMU.
* For QMAN DMA 0, DMA is also secured because only the driver uses this DMA and
* the driver doesn't map memory in MMU.
*
* QMAN TPC/MME: PQ, CQ and CP aren't secured (no change from MMU disabled mode)
*
......@@ -335,18 +336,18 @@ void goya_get_fixed_properties(struct hl_device *hdev)
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
prop->hw_queues_props[i].kmd_only = 0;
prop->hw_queues_props[i].driver_only = 0;
}
for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES ; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
prop->hw_queues_props[i].kmd_only = 1;
prop->hw_queues_props[i].driver_only = 1;
}
for (; i < NUMBER_OF_EXT_HW_QUEUES + NUMBER_OF_CPU_HW_QUEUES +
NUMBER_OF_INT_HW_QUEUES; i++) {
prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
prop->hw_queues_props[i].kmd_only = 0;
prop->hw_queues_props[i].driver_only = 0;
}
for (; i < HL_MAX_QUEUES; i++)
......@@ -1006,36 +1007,34 @@ int goya_init_cpu_queues(struct hl_device *hdev)
eq = &hdev->event_queue;
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_0,
lower_32_bits(cpu_pq->bus_address));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_1,
upper_32_bits(cpu_pq->bus_address));
WREG32(mmCPU_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
WREG32(mmCPU_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_2, lower_32_bits(eq->bus_address));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_3, upper_32_bits(eq->bus_address));
WREG32(mmCPU_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
WREG32(mmCPU_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_8,
WREG32(mmCPU_CQ_BASE_ADDR_LOW,
lower_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_9,
WREG32(mmCPU_CQ_BASE_ADDR_HIGH,
upper_32_bits(VA_CPU_ACCESSIBLE_MEM_ADDR));
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES);
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES);
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, HL_CPU_ACCESSIBLE_MEM_SIZE);
WREG32(mmCPU_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
WREG32(mmCPU_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
WREG32(mmCPU_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
/* Used for EQ CI */
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0);
WREG32(mmCPU_EQ_CI, 0);
WREG32(mmCPU_IF_PF_PQ_PI, 0);
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_7, PQ_INIT_STATUS_READY_FOR_CP);
WREG32(mmCPU_PQ_INIT_STATUS, PQ_INIT_STATUS_READY_FOR_CP);
WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
GOYA_ASYNC_EVENT_ID_PI_UPDATE);
err = hl_poll_timeout(
hdev,
mmPSOC_GLOBAL_CONF_SCRATCHPAD_7,
mmCPU_PQ_INIT_STATUS,
status,
(status == PQ_INIT_STATUS_READY_FOR_HOST),
1000,
......@@ -2063,6 +2062,25 @@ static void goya_disable_msix(struct hl_device *hdev)
goya->hw_cap_initialized &= ~HW_CAP_MSIX;
}
static void goya_enable_timestamp(struct hl_device *hdev)
{
/* Disable the timestamp counter */
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
/* Zero the lower/upper parts of the 64-bit counter */
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
/* Enable the counter */
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
}
static void goya_disable_timestamp(struct hl_device *hdev)
{
/* Disable the timestamp counter */
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
}
static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
{
u32 wait_timeout_ms, cpu_timeout_ms;
......@@ -2103,6 +2121,8 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
goya_disable_external_queues(hdev);
goya_disable_internal_queues(hdev);
goya_disable_timestamp(hdev);
if (hard_reset) {
goya_disable_msix(hdev);
goya_mmu_remove_device_cpu_mappings(hdev);
......@@ -2205,12 +2225,12 @@ static void goya_read_device_fw_version(struct hl_device *hdev,
switch (fwc) {
case FW_COMP_UBOOT:
ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_29);
ver_off = RREG32(mmUBOOT_VER_OFFSET);
dest = hdev->asic_prop.uboot_ver;
name = "U-Boot";
break;
case FW_COMP_PREBOOT:
ver_off = RREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_28);
ver_off = RREG32(mmPREBOOT_VER_OFFSET);
dest = hdev->asic_prop.preboot_ver;
name = "Preboot";
break;
......@@ -2469,7 +2489,7 @@ static int goya_hw_init(struct hl_device *hdev)
* we need to reset the chip before doing H/W init. This register is
* cleared by the H/W upon H/W reset
*/
WREG32(mmPSOC_GLOBAL_CONF_APP_STATUS, HL_DEVICE_HW_STATE_DIRTY);
WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
rc = goya_init_cpu(hdev, GOYA_CPU_TIMEOUT_USEC);
if (rc) {
......@@ -2505,6 +2525,8 @@ static int goya_hw_init(struct hl_device *hdev)
goya_init_tpc_qmans(hdev);
goya_enable_timestamp(hdev);
/* MSI-X must be enabled before CPU queues are initialized */
rc = goya_enable_msix(hdev);
if (rc)
......@@ -2831,7 +2853,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
dev_err_ratelimited(hdev->dev,
"Can't send KMD job on QMAN0 because the device is not idle\n");
"Can't send driver job on QMAN0 because the device is not idle\n");
return -EBUSY;
}
......@@ -3949,7 +3971,7 @@ void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
void goya_update_eq_ci(struct hl_device *hdev, u32 val)
{
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, val);
WREG32(mmCPU_EQ_CI, val);
}
void goya_restore_phase_topology(struct hl_device *hdev)
......@@ -4447,6 +4469,7 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
struct goya_device *goya = hdev->asic_specific;
goya->events_stat[event_type]++;
goya->events_stat_aggregate[event_type]++;
switch (event_type) {
case GOYA_ASYNC_EVENT_ID_PCIE_IF:
......@@ -4528,12 +4551,16 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
}
}
void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
{
struct goya_device *goya = hdev->asic_specific;
*size = (u32) sizeof(goya->events_stat);
if (aggregate) {
*size = (u32) sizeof(goya->events_stat_aggregate);
return goya->events_stat_aggregate;
}
*size = (u32) sizeof(goya->events_stat);
return goya->events_stat;
}
......@@ -4934,6 +4961,10 @@ int goya_armcp_info_get(struct hl_device *hdev)
prop->dram_end_address = prop->dram_base_address + dram_size;
}
if (!strlen(prop->armcp_info.card_name))
strncpy(prop->armcp_info.card_name, GOYA_DEFAULT_CARD_NAME,
CARD_NAME_MAX_LEN);
return 0;
}
......@@ -5047,7 +5078,7 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
{
return RREG32(mmPSOC_GLOBAL_CONF_APP_STATUS);
return RREG32(mmHW_STATE);
}
static const struct hl_asic_funcs goya_funcs = {
......
......@@ -55,6 +55,8 @@
#define DRAM_PHYS_DEFAULT_SIZE 0x100000000ull /* 4GB */
#define GOYA_DEFAULT_CARD_NAME "HL1000"
/* DRAM Memory Map */
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
......@@ -68,19 +70,19 @@
MMU_PAGE_TABLES_SIZE)
#define MMU_CACHE_MNG_ADDR (MMU_DRAM_DEFAULT_PAGE_ADDR + \
MMU_DRAM_DEFAULT_PAGE_SIZE)
#define DRAM_KMD_END_ADDR (MMU_CACHE_MNG_ADDR + \
#define DRAM_DRIVER_END_ADDR (MMU_CACHE_MNG_ADDR + \
MMU_CACHE_MNG_SIZE)
#define DRAM_BASE_ADDR_USER 0x20000000
#if (DRAM_KMD_END_ADDR > DRAM_BASE_ADDR_USER)
#error "KMD must reserve no more than 512MB"
#if (DRAM_DRIVER_END_ADDR > DRAM_BASE_ADDR_USER)
#error "Driver must reserve no more than 512MB"
#endif
/*
* SRAM Memory Map for KMD
* SRAM Memory Map for Driver
*
* KMD occupies KMD_SRAM_SIZE bytes from the start of SRAM. It is used for
* Driver occupies DRIVER_SRAM_SIZE bytes from the start of SRAM. It is used for
* MME/TPC QMANs
*
*/
......@@ -106,10 +108,10 @@
#define TPC7_QMAN_BASE_OFFSET (TPC6_QMAN_BASE_OFFSET + \
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define SRAM_KMD_RES_OFFSET (TPC7_QMAN_BASE_OFFSET + \
#define SRAM_DRIVER_RES_OFFSET (TPC7_QMAN_BASE_OFFSET + \
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#if (SRAM_KMD_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START)
#if (SRAM_DRIVER_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START)
#error "MME/TPC QMANs SRAM space exceeds limit"
#endif
......@@ -162,6 +164,7 @@ struct goya_device {
u64 ddr_bar_cur_addr;
u32 events_stat[GOYA_ASYNC_EVENT_ID_SIZE];
u32 events_stat_aggregate[GOYA_ASYNC_EVENT_ID_SIZE];
u32 hw_cap_initialized;
u8 device_cpu_mmu_mappings_done;
};
......@@ -215,7 +218,7 @@ int goya_suspend(struct hl_device *hdev);
int goya_resume(struct hl_device *hdev);
void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry);
void *goya_get_events_stat(struct hl_device *hdev, u32 *size);
void *goya_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size);
void goya_add_end_of_cb_packets(struct hl_device *hdev, u64 kernel_address,
u32 len, u64 cq_addr, u32 cq_val, u32 msix_vec);
......
......@@ -15,6 +15,10 @@
#define GOYA_PLDM_CORESIGHT_TIMEOUT_USEC (CORESIGHT_TIMEOUT_USEC * 100)
#define SPMU_SECTION_SIZE DMA_CH_0_CS_SPMU_MAX_OFFSET
#define SPMU_EVENT_TYPES_OFFSET 0x400
#define SPMU_MAX_COUNTERS 6
static u64 debug_stm_regs[GOYA_STM_LAST + 1] = {
[GOYA_STM_CPU] = mmCPU_STM_BASE,
[GOYA_STM_DMA_CH_0_CS] = mmDMA_CH_0_CS_STM_BASE,
......@@ -226,9 +230,16 @@ static int goya_config_stm(struct hl_device *hdev,
struct hl_debug_params *params)
{
struct hl_debug_params_stm *input;
u64 base_reg = debug_stm_regs[params->reg_idx] - CFG_BASE;
u64 base_reg;
int rc;
if (params->reg_idx >= ARRAY_SIZE(debug_stm_regs)) {
dev_err(hdev->dev, "Invalid register index in STM\n");
return -EINVAL;
}
base_reg = debug_stm_regs[params->reg_idx] - CFG_BASE;
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
if (params->enable) {
......@@ -288,10 +299,17 @@ static int goya_config_etf(struct hl_device *hdev,
struct hl_debug_params *params)
{
struct hl_debug_params_etf *input;
u64 base_reg = debug_etf_regs[params->reg_idx] - CFG_BASE;
u64 base_reg;
u32 val;
int rc;
if (params->reg_idx >= ARRAY_SIZE(debug_etf_regs)) {
dev_err(hdev->dev, "Invalid register index in ETF\n");
return -EINVAL;
}
base_reg = debug_etf_regs[params->reg_idx] - CFG_BASE;
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
val = RREG32(base_reg + 0x304);
......@@ -445,11 +463,18 @@ static int goya_config_etr(struct hl_device *hdev,
static int goya_config_funnel(struct hl_device *hdev,
struct hl_debug_params *params)
{
WREG32(debug_funnel_regs[params->reg_idx] - CFG_BASE + 0xFB0,
CORESIGHT_UNLOCK);
u64 base_reg;
if (params->reg_idx >= ARRAY_SIZE(debug_funnel_regs)) {
dev_err(hdev->dev, "Invalid register index in FUNNEL\n");
return -EINVAL;
}
base_reg = debug_funnel_regs[params->reg_idx] - CFG_BASE;
WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
WREG32(debug_funnel_regs[params->reg_idx] - CFG_BASE,
params->enable ? 0x33F : 0);
WREG32(base_reg, params->enable ? 0x33F : 0);
return 0;
}
......@@ -458,9 +483,16 @@ static int goya_config_bmon(struct hl_device *hdev,
struct hl_debug_params *params)
{
struct hl_debug_params_bmon *input;
u64 base_reg = debug_bmon_regs[params->reg_idx] - CFG_BASE;
u64 base_reg;
u32 pcie_base = 0;
if (params->reg_idx >= ARRAY_SIZE(debug_bmon_regs)) {
dev_err(hdev->dev, "Invalid register index in BMON\n");
return -EINVAL;
}
base_reg = debug_bmon_regs[params->reg_idx] - CFG_BASE;
WREG32(base_reg + 0x104, 1);
if (params->enable) {
......@@ -522,7 +554,7 @@ static int goya_config_bmon(struct hl_device *hdev,
static int goya_config_spmu(struct hl_device *hdev,
struct hl_debug_params *params)
{
u64 base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE;
u64 base_reg;
struct hl_debug_params_spmu *input = params->input;
u64 *output;
u32 output_arr_len;
......@@ -531,6 +563,13 @@ static int goya_config_spmu(struct hl_device *hdev,
u32 cycle_cnt_idx;
int i;
if (params->reg_idx >= ARRAY_SIZE(debug_spmu_regs)) {
dev_err(hdev->dev, "Invalid register index in SPMU\n");
return -EINVAL;
}
base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE;
if (params->enable) {
input = params->input;
......@@ -539,7 +578,13 @@ static int goya_config_spmu(struct hl_device *hdev,
if (input->event_types_num < 3) {
dev_err(hdev->dev,
"not enough values for SPMU enable\n");
"not enough event types values for SPMU enable\n");
return -EINVAL;
}
if (input->event_types_num > SPMU_MAX_COUNTERS) {
dev_err(hdev->dev,
"too many event types values for SPMU enable\n");
return -EINVAL;
}
......@@ -547,7 +592,8 @@ static int goya_config_spmu(struct hl_device *hdev,
WREG32(base_reg + 0xE04, 0x41013040);
for (i = 0 ; i < input->event_types_num ; i++)
WREG32(base_reg + 0x400 + i * 4, input->event_types[i]);
WREG32(base_reg + SPMU_EVENT_TYPES_OFFSET + i * 4,
input->event_types[i]);
WREG32(base_reg + 0xE04, 0x41013041);
WREG32(base_reg + 0xC00, 0x8000003F);
......@@ -567,6 +613,12 @@ static int goya_config_spmu(struct hl_device *hdev,
return -EINVAL;
}
if (events_num > SPMU_MAX_COUNTERS) {
dev_err(hdev->dev,
"too many events values for SPMU disable\n");
return -EINVAL;
}
WREG32(base_reg + 0xE04, 0x41013040);
for (i = 0 ; i < events_num ; i++)
......@@ -584,24 +636,11 @@ static int goya_config_spmu(struct hl_device *hdev,
return 0;
}
static int goya_config_timestamp(struct hl_device *hdev,
struct hl_debug_params *params)
{
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
if (params->enable) {
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
}
return 0;
}
int goya_debug_coresight(struct hl_device *hdev, void *data)
{
struct hl_debug_params *params = data;
u32 val;
int rc;
int rc = 0;
switch (params->op) {
case HL_DEBUG_OP_STM:
......@@ -623,7 +662,7 @@ int goya_debug_coresight(struct hl_device *hdev, void *data)
rc = goya_config_spmu(hdev, params);
break;
case HL_DEBUG_OP_TIMESTAMP:
rc = goya_config_timestamp(hdev, params);
/* Do nothing as this opcode is deprecated */
break;
default:
......
......@@ -230,18 +230,127 @@ static ssize_t ic_clk_curr_show(struct device *dev,
return sprintf(buf, "%lu\n", value);
}
static ssize_t pm_mng_profile_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
if (hl_device_disabled_or_in_reset(hdev))
return -ENODEV;
return sprintf(buf, "%s\n",
(hdev->pm_mng_profile == PM_AUTO) ? "auto" :
(hdev->pm_mng_profile == PM_MANUAL) ? "manual" :
"unknown");
}
static ssize_t pm_mng_profile_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct hl_device *hdev = dev_get_drvdata(dev);
if (hl_device_disabled_or_in_reset(hdev)) {
count = -ENODEV;
goto out;
}
mutex_lock(&hdev->fpriv_list_lock);
if (hdev->compute_ctx) {
dev_err(hdev->dev,
"Can't change PM profile while compute context is opened on the device\n");
count = -EPERM;
goto unlock_mutex;
}
if (strncmp("auto", buf, strlen("auto")) == 0) {
/* Make sure we are in LOW PLL when changing modes */
if (hdev->pm_mng_profile == PM_MANUAL) {
hdev->curr_pll_profile = PLL_HIGH;
hl_device_set_frequency(hdev, PLL_LOW);
hdev->pm_mng_profile = PM_AUTO;
}
} else if (strncmp("manual", buf, strlen("manual")) == 0) {
if (hdev->pm_mng_profile == PM_AUTO) {
/* Must release the lock because the work thread also
* takes this lock. But before we release it, set
* the mode to manual so nothing will change if a user
* suddenly opens the device
*/
hdev->pm_mng_profile = PM_MANUAL;
mutex_unlock(&hdev->fpriv_list_lock);
/* Flush the current work so we can return to the user
* knowing that he is the only one changing frequencies
*/
flush_delayed_work(&hdev->work_freq);
return count;
}
} else {
dev_err(hdev->dev, "value should be auto or manual\n");
count = -EINVAL;
}
unlock_mutex:
mutex_unlock(&hdev->fpriv_list_lock);
out:
return count;
}
static ssize_t high_pll_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
if (hl_device_disabled_or_in_reset(hdev))
return -ENODEV;
return sprintf(buf, "%u\n", hdev->high_pll);
}
static ssize_t high_pll_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct hl_device *hdev = dev_get_drvdata(dev);
long value;
int rc;
if (hl_device_disabled_or_in_reset(hdev)) {
count = -ENODEV;
goto out;
}
rc = kstrtoul(buf, 0, &value);
if (rc) {
count = -EINVAL;
goto out;
}
hdev->high_pll = value;
out:
return count;
}
static DEVICE_ATTR_RW(high_pll);
static DEVICE_ATTR_RW(ic_clk);
static DEVICE_ATTR_RO(ic_clk_curr);
static DEVICE_ATTR_RW(mme_clk);
static DEVICE_ATTR_RO(mme_clk_curr);
static DEVICE_ATTR_RW(pm_mng_profile);
static DEVICE_ATTR_RW(tpc_clk);
static DEVICE_ATTR_RO(tpc_clk_curr);
static struct attribute *goya_dev_attrs[] = {
&dev_attr_high_pll.attr,
&dev_attr_ic_clk.attr,
&dev_attr_ic_clk_curr.attr,
&dev_attr_mme_clk.attr,
&dev_attr_mme_clk_curr.attr,
&dev_attr_pm_mng_profile.attr,
&dev_attr_tpc_clk.attr,
&dev_attr_tpc_clk_curr.attr,
NULL,
......
This diff is collapsed.
......@@ -95,80 +95,127 @@ int hl_device_open(struct inode *inode, struct file *filp)
return -ENXIO;
}
mutex_lock(&hdev->fd_open_cnt_lock);
hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
if (!hpriv)
return -ENOMEM;
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
mutex_init(&hpriv->restore_phase_mutex);
kref_init(&hpriv->refcount);
nonseekable_open(inode, filp);
hl_cb_mgr_init(&hpriv->cb_mgr);
hl_ctx_mgr_init(&hpriv->ctx_mgr);
hpriv->taskpid = find_get_pid(current->pid);
mutex_lock(&hdev->fpriv_list_lock);
if (hl_device_disabled_or_in_reset(hdev)) {
dev_err_ratelimited(hdev->dev,
"Can't open %s because it is disabled or in reset\n",
dev_name(hdev->dev));
mutex_unlock(&hdev->fd_open_cnt_lock);
return -EPERM;
rc = -EPERM;
goto out_err;
}
if (hdev->in_debug) {
dev_err_ratelimited(hdev->dev,
"Can't open %s because it is being debugged by another user\n",
dev_name(hdev->dev));
mutex_unlock(&hdev->fd_open_cnt_lock);
return -EPERM;
rc = -EPERM;
goto out_err;
}
if (atomic_read(&hdev->fd_open_cnt)) {
dev_info_ratelimited(hdev->dev,
if (hdev->compute_ctx) {
dev_dbg_ratelimited(hdev->dev,
"Can't open %s because another user is working on it\n",
dev_name(hdev->dev));
mutex_unlock(&hdev->fd_open_cnt_lock);
return -EBUSY;
}
atomic_inc(&hdev->fd_open_cnt);
mutex_unlock(&hdev->fd_open_cnt_lock);
hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
if (!hpriv) {
rc = -ENOMEM;
goto close_device;
rc = -EBUSY;
goto out_err;
}
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
mutex_init(&hpriv->restore_phase_mutex);
kref_init(&hpriv->refcount);
nonseekable_open(inode, filp);
hl_cb_mgr_init(&hpriv->cb_mgr);
hl_ctx_mgr_init(&hpriv->ctx_mgr);
rc = hl_ctx_create(hdev, hpriv);
if (rc) {
dev_err(hdev->dev, "Failed to open FD (CTX fail)\n");
dev_err(hdev->dev, "Failed to create context %d\n", rc);
goto out_err;
}
hpriv->taskpid = find_get_pid(current->pid);
/*
* Device is IDLE at this point so it is legal to change PLLs. There
* is no need to check anything because if the PLL is already HIGH, the
* set function will return without doing anything
/* Device is IDLE at this point so it is legal to change PLLs.
* There is no need to check anything because if the PLL is
* already HIGH, the set function will return without doing
* anything
*/
hl_device_set_frequency(hdev, PLL_HIGH);
list_add(&hpriv->dev_node, &hdev->fpriv_list);
mutex_unlock(&hdev->fpriv_list_lock);
hl_debugfs_add_file(hpriv);
return 0;
out_err:
filp->private_data = NULL;
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
mutex_unlock(&hdev->fpriv_list_lock);
hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
filp->private_data = NULL;
mutex_destroy(&hpriv->restore_phase_mutex);
put_pid(hpriv->taskpid);
kfree(hpriv);
return rc;
}
int hl_device_open_ctrl(struct inode *inode, struct file *filp)
{
struct hl_device *hdev;
struct hl_fpriv *hpriv;
int rc;
mutex_lock(&hl_devs_idr_lock);
hdev = idr_find(&hl_devs_idr, iminor(inode));
mutex_unlock(&hl_devs_idr_lock);
if (!hdev) {
pr_err("Couldn't find device %d:%d\n",
imajor(inode), iminor(inode));
return -ENXIO;
}
hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
if (!hpriv)
return -ENOMEM;
mutex_lock(&hdev->fpriv_list_lock);
if (hl_device_disabled_or_in_reset(hdev)) {
dev_err_ratelimited(hdev->dev_ctrl,
"Can't open %s because it is disabled or in reset\n",
dev_name(hdev->dev_ctrl));
rc = -EPERM;
goto out_err;
}
list_add(&hpriv->dev_node, &hdev->fpriv_list);
mutex_unlock(&hdev->fpriv_list_lock);
hpriv->hdev = hdev;
filp->private_data = hpriv;
hpriv->filp = filp;
hpriv->is_control = true;
nonseekable_open(inode, filp);
hpriv->taskpid = find_get_pid(current->pid);
close_device:
atomic_dec(&hdev->fd_open_cnt);
return 0;
out_err:
mutex_unlock(&hdev->fpriv_list_lock);
kfree(hpriv);
return rc;
}
......@@ -199,7 +246,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
enum hl_asic_type asic_type, int minor)
{
struct hl_device *hdev;
int rc;
int rc, main_id, ctrl_id = 0;
*dev = NULL;
......@@ -240,33 +287,34 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
mutex_lock(&hl_devs_idr_lock);
if (minor == -1) {
rc = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
/* Always save 2 numbers, 1 for main device and 1 for control.
* They must be consecutive
*/
main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
GFP_KERNEL);
} else {
void *old_idr = idr_replace(&hl_devs_idr, hdev, minor);
if (IS_ERR_VALUE(old_idr)) {
rc = PTR_ERR(old_idr);
pr_err("Error %d when trying to replace minor %d\n",
rc, minor);
mutex_unlock(&hl_devs_idr_lock);
goto free_hdev;
}
rc = minor;
}
if (main_id >= 0)
ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
main_id + 2, GFP_KERNEL);
mutex_unlock(&hl_devs_idr_lock);
if (rc < 0) {
if (rc == -ENOSPC) {
if ((main_id < 0) || (ctrl_id < 0)) {
if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
pr_err("too many devices in the system\n");
rc = -EBUSY;
if (main_id >= 0) {
mutex_lock(&hl_devs_idr_lock);
idr_remove(&hl_devs_idr, main_id);
mutex_unlock(&hl_devs_idr_lock);
}
rc = -EBUSY;
goto free_hdev;
}
hdev->id = rc;
hdev->id = main_id;
hdev->id_control = ctrl_id;
*dev = hdev;
......@@ -288,6 +336,7 @@ void destroy_hdev(struct hl_device *hdev)
/* Remove device from the device list */
mutex_lock(&hl_devs_idr_lock);
idr_remove(&hl_devs_idr, hdev->id);
idr_remove(&hl_devs_idr, hdev->id_control);
mutex_unlock(&hl_devs_idr_lock);
kfree(hdev);
......@@ -295,8 +344,7 @@ void destroy_hdev(struct hl_device *hdev)
static int hl_pmops_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct hl_device *hdev = pci_get_drvdata(pdev);
struct hl_device *hdev = dev_get_drvdata(dev);
pr_debug("Going to suspend PCI device\n");
......@@ -310,8 +358,7 @@ static int hl_pmops_suspend(struct device *dev)
static int hl_pmops_resume(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct hl_device *hdev = pci_get_drvdata(pdev);
struct hl_device *hdev = dev_get_drvdata(dev);
pr_debug("Going to resume PCI device\n");
......
......@@ -65,7 +65,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.num_of_events = prop->num_of_events;
memcpy(hw_ip.armcp_version,
prop->armcp_info.armcp_version, VERSION_MAX_LEN);
hw_ip.armcp_cpld_version = __le32_to_cpu(prop->armcp_info.cpld_version);
hw_ip.armcp_cpld_version = le32_to_cpu(prop->armcp_info.cpld_version);
hw_ip.psoc_pci_pll_nr = prop->psoc_pci_pll_nr;
hw_ip.psoc_pci_pll_nf = prop->psoc_pci_pll_nf;
hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
......@@ -75,7 +75,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
}
static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
static int hw_events_info(struct hl_device *hdev, bool aggregate,
struct hl_info_args *args)
{
u32 size, max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
......@@ -84,13 +85,14 @@ static int hw_events_info(struct hl_device *hdev, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
arr = hdev->asic_funcs->get_events_stat(hdev, &size);
arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size);
return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
}
static int dram_usage_info(struct hl_device *hdev, struct hl_info_args *args)
static int dram_usage_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
struct hl_device *hdev = hpriv->hdev;
struct hl_info_dram_usage dram_usage = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
......@@ -104,7 +106,9 @@ static int dram_usage_info(struct hl_device *hdev, struct hl_info_args *args)
prop->dram_base_address);
dram_usage.dram_free_mem = (prop->dram_size - dram_kmd_size) -
atomic64_read(&hdev->dram_used_mem);
dram_usage.ctx_dram_mem = atomic64_read(&hdev->user_ctx->dram_phys_mem);
if (hpriv->ctx)
dram_usage.ctx_dram_mem =
atomic64_read(&hpriv->ctx->dram_phys_mem);
return copy_to_user(out, &dram_usage,
min((size_t) max_size, sizeof(dram_usage))) ? -EFAULT : 0;
......@@ -141,13 +145,16 @@ static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
params->op = args->op;
if (args->input_ptr && args->input_size) {
input = memdup_user(u64_to_user_ptr(args->input_ptr),
args->input_size);
if (IS_ERR(input)) {
rc = PTR_ERR(input);
input = NULL;
dev_err(hdev->dev,
"error %d when copying input debug data\n", rc);
input = kzalloc(hl_debug_struct_size[args->op], GFP_KERNEL);
if (!input) {
rc = -ENOMEM;
goto out;
}
if (copy_from_user(input, u64_to_user_ptr(args->input_ptr),
args->input_size)) {
rc = -EFAULT;
dev_err(hdev->dev, "failed to copy input debug data\n");
goto out;
}
......@@ -191,42 +198,81 @@ static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
return rc;
}
static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
static int device_utilization(struct hl_device *hdev, struct hl_info_args *args)
{
struct hl_info_device_utilization device_util = {0};
u32 max_size = args->return_size;
void __user *out = (void __user *) (uintptr_t) args->return_pointer;
if ((!max_size) || (!out))
return -EINVAL;
if ((args->period_ms < 100) || (args->period_ms > 1000) ||
(args->period_ms % 100)) {
dev_err(hdev->dev,
"period %u must be between 100 - 1000 and must be divisible by 100\n",
args->period_ms);
return -EINVAL;
}
device_util.utilization = hl_device_utilization(hdev, args->period_ms);
return copy_to_user(out, &device_util,
min((size_t) max_size, sizeof(device_util))) ? -EFAULT : 0;
}
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
struct hl_info_args *args = data;
struct hl_device *hdev = hpriv->hdev;
int rc;
/* We want to return device status even if it disabled or in reset */
if (args->op == HL_INFO_DEVICE_STATUS)
/*
* Information is returned for the following opcodes even if the device
* is disabled or in reset.
*/
switch (args->op) {
case HL_INFO_HW_IP_INFO:
return hw_ip_info(hdev, args);
case HL_INFO_DEVICE_STATUS:
return device_status_info(hdev, args);
default:
break;
}
if (hl_device_disabled_or_in_reset(hdev)) {
dev_warn_ratelimited(hdev->dev,
dev_warn_ratelimited(dev,
"Device is %s. Can't execute INFO IOCTL\n",
atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
return -EBUSY;
}
switch (args->op) {
case HL_INFO_HW_IP_INFO:
rc = hw_ip_info(hdev, args);
break;
case HL_INFO_HW_EVENTS:
rc = hw_events_info(hdev, args);
rc = hw_events_info(hdev, false, args);
break;
case HL_INFO_DRAM_USAGE:
rc = dram_usage_info(hdev, args);
rc = dram_usage_info(hpriv, args);
break;
case HL_INFO_HW_IDLE:
rc = hw_idle(hdev, args);
break;
case HL_INFO_DEVICE_UTILIZATION:
rc = device_utilization(hdev, args);
break;
case HL_INFO_HW_EVENTS_AGGREGATE:
rc = hw_events_info(hdev, true, args);
break;
default:
dev_err(hdev->dev, "Invalid request %d\n", args->op);
dev_err(dev, "Invalid request %d\n", args->op);
rc = -ENOTTY;
break;
}
......@@ -234,6 +280,16 @@ static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
return rc;
}
static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
{
return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev);
}
static int hl_info_ioctl_control(struct hl_fpriv *hpriv, void *data)
{
return _hl_info_ioctl(hpriv, data, hpriv->hdev->dev_ctrl);
}
static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
{
struct hl_debug_args *args = data;
......@@ -288,30 +344,37 @@ static const struct hl_ioctl_desc hl_ioctls[] = {
HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
};
#define HL_CORE_IOCTL_COUNT ARRAY_SIZE(hl_ioctls)
static const struct hl_ioctl_desc hl_ioctls_control[] = {
HL_IOCTL_DEF(HL_IOCTL_INFO, hl_info_ioctl_control)
};
long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
static long _hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg,
const struct hl_ioctl_desc *ioctl, struct device *dev)
{
struct hl_fpriv *hpriv = filep->private_data;
struct hl_device *hdev = hpriv->hdev;
hl_ioctl_t *func;
const struct hl_ioctl_desc *ioctl = NULL;
unsigned int nr = _IOC_NR(cmd);
char stack_kdata[128] = {0};
char *kdata = NULL;
unsigned int usize, asize;
hl_ioctl_t *func;
u32 hl_size;
int retcode;
if (hdev->hard_reset_pending) {
dev_crit_ratelimited(hdev->dev,
dev_crit_ratelimited(hdev->dev_ctrl,
"Device HARD reset pending! Please close FD\n");
return -ENODEV;
}
if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
u32 hl_size;
/* Do not trust userspace, use our own definition */
func = ioctl->func;
ioctl = &hl_ioctls[nr];
if (unlikely(!func)) {
dev_dbg(dev, "no function\n");
retcode = -ENOTTY;
goto out_err;
}
hl_size = _IOC_SIZE(ioctl->cmd);
usize = asize = _IOC_SIZE(cmd);
......@@ -319,20 +382,6 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
asize = hl_size;
cmd = ioctl->cmd;
} else {
dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
task_pid_nr(current), nr);
return -ENOTTY;
}
/* Do not trust userspace, use our own definition */
func = ioctl->func;
if (unlikely(!func)) {
dev_dbg(hdev->dev, "no function\n");
retcode = -ENOTTY;
goto out_err;
}
if (cmd & (IOC_IN | IOC_OUT)) {
if (asize <= sizeof(stack_kdata)) {
......@@ -363,8 +412,7 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
out_err:
if (retcode)
dev_dbg(hdev->dev,
"error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
dev_dbg(dev, "error in ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
task_pid_nr(current), cmd, nr);
if (kdata != stack_kdata)
......@@ -372,3 +420,39 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
return retcode;
}
long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct hl_fpriv *hpriv = filep->private_data;
struct hl_device *hdev = hpriv->hdev;
const struct hl_ioctl_desc *ioctl = NULL;
unsigned int nr = _IOC_NR(cmd);
if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
ioctl = &hl_ioctls[nr];
} else {
dev_err(hdev->dev, "invalid ioctl: pid=%d, nr=0x%02x\n",
task_pid_nr(current), nr);
return -ENOTTY;
}
return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev);
}
long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct hl_fpriv *hpriv = filep->private_data;
struct hl_device *hdev = hpriv->hdev;
const struct hl_ioctl_desc *ioctl = NULL;
unsigned int nr = _IOC_NR(cmd);
if (nr == _IOC_NR(HL_IOCTL_INFO)) {
ioctl = &hl_ioctls_control[nr];
} else {
dev_err(hdev->dev_ctrl, "invalid ioctl: pid=%d, nr=0x%02x\n",
task_pid_nr(current), nr);
return -ENOTTY;
}
return _hl_ioctl(filep, cmd, arg, ioctl, hdev->dev_ctrl);
}
......@@ -80,9 +80,9 @@ static void ext_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
bd = (struct hl_bd *) (uintptr_t) q->kernel_address;
bd += hl_pi_2_offset(q->pi);
bd->ctl = __cpu_to_le32(ctl);
bd->len = __cpu_to_le32(len);
bd->ptr = __cpu_to_le64(ptr);
bd->ctl = cpu_to_le32(ctl);
bd->len = cpu_to_le32(len);
bd->ptr = cpu_to_le64(ptr);
q->pi = hl_queue_inc_ptr(q->pi);
hdev->asic_funcs->ring_doorbell(hdev, q->hw_queue_id, q->pi);
......@@ -249,7 +249,7 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
len = job->job_cb_size;
ptr = cb->bus_address;
cq_pkt.data = __cpu_to_le32(
cq_pkt.data = cpu_to_le32(
((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
& CQ_ENTRY_SHADOW_INDEX_MASK) |
(1 << CQ_ENTRY_SHADOW_INDEX_VALID_SHIFT) |
......@@ -267,7 +267,7 @@ static void ext_hw_queue_schedule_job(struct hl_cs_job *job)
hdev->asic_funcs->add_end_of_cb_packets(hdev, cb->kernel_address, len,
cq_addr,
__le32_to_cpu(cq_pkt.data),
le32_to_cpu(cq_pkt.data),
q->hw_queue_id);
q->shadow_queue[hl_pi_2_offset(q->pi)] = job;
......@@ -364,7 +364,13 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
spin_unlock(&hdev->hw_queues_mirror_lock);
}
atomic_inc(&hdev->cs_active_cnt);
if (!hdev->cs_active_cnt++) {
struct hl_device_idle_busy_ts *ts;
ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx];
ts->busy_to_idle_ts = ktime_set(0, 0);
ts->idle_to_busy_ts = ktime_get();
}
list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
if (job->ext_queue)
......
......@@ -26,7 +26,7 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev,
int rc, i, j;
for (i = 0 ; i < ARMCP_MAX_SENSORS ; i++) {
type = __le32_to_cpu(sensors_arr[i].type);
type = le32_to_cpu(sensors_arr[i].type);
if ((type == 0) && (sensors_arr[i].flags == 0))
break;
......@@ -58,10 +58,10 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev,
}
for (i = 0 ; i < arr_size ; i++) {
type = __le32_to_cpu(sensors_arr[i].type);
type = le32_to_cpu(sensors_arr[i].type);
curr_arr = sensors_by_type[type];
curr_arr[sensors_by_type_next_index[type]++] =
__le32_to_cpu(sensors_arr[i].flags);
le32_to_cpu(sensors_arr[i].flags);
}
channels_info = kcalloc(num_active_sensor_types + 1,
......@@ -273,7 +273,7 @@ long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr)
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEMPERATURE_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
......@@ -299,7 +299,7 @@ long hl_get_voltage(struct hl_device *hdev, int sensor_index, u32 attr)
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_VOLTAGE_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
......@@ -325,7 +325,7 @@ long hl_get_current(struct hl_device *hdev, int sensor_index, u32 attr)
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_CURRENT_GET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_CURRENT_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
......@@ -351,7 +351,7 @@ long hl_get_fan_speed(struct hl_device *hdev, int sensor_index, u32 attr)
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FAN_SPEED_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
......@@ -377,7 +377,7 @@ long hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr)
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_PWM_GET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
......@@ -403,11 +403,11 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_PWM_SET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_PWM_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.sensor_index = __cpu_to_le16(sensor_index);
pkt.type = __cpu_to_le16(attr);
pkt.value = __cpu_to_le64(value);
pkt.value = cpu_to_le64(value);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SENSORS_PKT_TIMEOUT, NULL);
......@@ -421,6 +421,7 @@ void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr,
int hl_hwmon_init(struct hl_device *hdev)
{
struct device *dev = hdev->pdev ? &hdev->pdev->dev : hdev->dev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
if ((hdev->hwmon_initialized) || !(hdev->fw_loading))
......@@ -430,7 +431,8 @@ int hl_hwmon_init(struct hl_device *hdev)
hdev->hl_chip_info->ops = &hl_hwmon_ops;
hdev->hwmon_dev = hwmon_device_register_with_info(dev,
"habanalabs", hdev, hdev->hl_chip_info, NULL);
prop->armcp_info.card_name, hdev,
hdev->hl_chip_info, NULL);
if (IS_ERR(hdev->hwmon_dev)) {
rc = PTR_ERR(hdev->hwmon_dev);
dev_err(hdev->dev,
......
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2018 HabanaLabs, Ltd.
* Copyright 2016-2019 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
......@@ -41,33 +41,34 @@ enum pq_init_status {
/*
* ArmCP Primary Queue Packets
*
* During normal operation, KMD needs to send various messages to ArmCP,
* usually either to SET some value into a H/W periphery or to GET the current
* value of some H/W periphery. For example, SET the frequency of MME/TPC and
* GET the value of the thermal sensor.
*
* These messages can be initiated either by the User application or by KMD
* itself, e.g. power management code. In either case, the communication from
* KMD to ArmCP will *always* be in synchronous mode, meaning that KMD will
* send a single message and poll until the message was acknowledged and the
* results are ready (if results are needed).
*
* This means that only a single message can be sent at a time and KMD must
* wait for its result before sending the next message. Having said that,
* because these are control messages which are sent in a relatively low
* During normal operation, the host's kernel driver needs to send various
* messages to ArmCP, usually either to SET some value into a H/W periphery or
* to GET the current value of some H/W periphery. For example, SET the
* frequency of MME/TPC and GET the value of the thermal sensor.
*
* These messages can be initiated either by the User application or by the
* host's driver itself, e.g. power management code. In either case, the
* communication from the host's driver to ArmCP will *always* be in
* synchronous mode, meaning that the host will send a single message and poll
* until the message was acknowledged and the results are ready (if results are
* needed).
*
* This means that only a single message can be sent at a time and the host's
* driver must wait for its result before sending the next message. Having said
* that, because these are control messages which are sent in a relatively low
* frequency, this limitation seems acceptable. It's important to note that
* in case of multiple devices, messages to different devices *can* be sent
* at the same time.
*
* The message, inputs/outputs (if relevant) and fence object will be located
* on the device DDR at an address that will be determined by KMD. During
* device initialization phase, KMD will pass to ArmCP that address. Most of
* the message types will contain inputs/outputs inside the message itself.
* The common part of each message will contain the opcode of the message (its
* type) and a field representing a fence object.
*
* When KMD wishes to send a message to ArmCP, it will write the message
* contents to the device DDR, clear the fence object and then write the
* on the device DDR at an address that will be determined by the host's driver.
* During device initialization phase, the host will pass to ArmCP that address.
* Most of the message types will contain inputs/outputs inside the message
* itself. The common part of each message will contain the opcode of the
* message (its type) and a field representing a fence object.
*
* When the host's driver wishes to send a message to ArmCP, it will write the
* message contents to the device DDR, clear the fence object and then write the
* value 484 to the mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR register to issue
* the 484 interrupt-id to the ARM core.
*
......@@ -78,12 +79,13 @@ enum pq_init_status {
* device DDR and then write to the fence object. If an error occurred, ArmCP
* will fill the rc field with the right error code.
*
* In the meantime, KMD will poll on the fence object. Once KMD sees that the
* fence object is signaled, it will read the results from the device DDR
* (if relevant) and resume the code execution in KMD.
* In the meantime, the host's driver will poll on the fence object. Once the
* host sees that the fence object is signaled, it will read the results from
* the device DDR (if relevant) and resume the code execution in the host's
* driver.
*
* To use QMAN packets, the opcode must be the QMAN opcode, shifted by 8
* so the value being put by the KMD matches the value read by ArmCP
* so the value being put by the host's driver matches the value read by ArmCP
*
* Non-QMAN packets should be limited to values 1 through (2^8 - 1)
*
......@@ -148,9 +150,9 @@ enum pq_init_status {
*
* ARMCP_PACKET_INFO_GET -
* Fetch information from the device as specified in the packet's
* structure. KMD passes the max size it allows the ArmCP to write to
* the structure, to prevent data corruption in case of mismatched
* KMD/FW versions.
* structure. The host's driver passes the max size it allows the ArmCP to
* write to the structure, to prevent data corruption in case of
* mismatched driver/FW versions.
*
* ARMCP_PACKET_FLASH_PROGRAM_REMOVED - this packet was removed
*
......@@ -183,9 +185,9 @@ enum pq_init_status {
* ARMCP_PACKET_EEPROM_DATA_GET -
* Get EEPROM data from the ArmCP kernel. The buffer is specified in the
* addr field. The CPU will put the returned data size in the result
* field. In addition, KMD passes the max size it allows the ArmCP to
* write to the structure, to prevent data corruption in case of
* mismatched KMD/FW versions.
* field. In addition, the host's driver passes the max size it allows the
* ArmCP to write to the structure, to prevent data corruption in case of
* mismatched driver/FW versions.
*
*/
......@@ -231,7 +233,7 @@ struct armcp_packet {
__le32 ctl;
__le32 fence; /* Signal to KMD that message is completed */
__le32 fence; /* Signal to host that message is completed */
union {
struct {/* For temperature/current/voltage/fan/pwm get/set */
......@@ -310,6 +312,7 @@ struct eq_generic_event {
* ArmCP info
*/
#define CARD_NAME_MAX_LEN 16
#define VERSION_MAX_LEN 128
#define ARMCP_MAX_SENSORS 128
......@@ -318,6 +321,19 @@ struct armcp_sensor {
__le32 flags;
};
/**
* struct armcp_info - Info from ArmCP that is necessary to the host's driver
* @sensors: available sensors description.
* @kernel_version: ArmCP linux kernel version.
* @reserved: reserved field.
* @cpld_version: CPLD programmed F/W version.
* @infineon_version: Infineon main DC-DC version.
* @fuse_version: silicon production FUSE information.
* @thermal_version: thermald S/W version.
* @armcp_version: ArmCP S/W version.
* @dram_size: available DRAM size.
* @card_name: card name that will be displayed in HWMON subsystem on the host
*/
struct armcp_info {
struct armcp_sensor sensors[ARMCP_MAX_SENSORS];
__u8 kernel_version[VERSION_MAX_LEN];
......@@ -328,6 +344,7 @@ struct armcp_info {
__u8 thermal_version[VERSION_MAX_LEN];
__u8 armcp_version[VERSION_MAX_LEN];
__le64 dram_size;
char card_name[CARD_NAME_MAX_LEN];
};
#endif /* ARMCP_IF_H */
......@@ -38,4 +38,6 @@
#define TPC_MAX_NUM 8
#define MME_MAX_NUM 1
#endif /* GOYA_H */
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2019 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef GOYA_REG_MAP_H_
#define GOYA_REG_MAP_H_
/*
* PSOC scratch-pad registers
*/
#define mmCPU_PQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
#define mmCPU_PQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
#define mmCPU_EQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
#define mmCPU_EQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
#define mmCPU_EQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_4
#define mmCPU_PQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_5
#define mmCPU_EQ_CI mmPSOC_GLOBAL_CONF_SCRATCHPAD_6
#define mmCPU_PQ_INIT_STATUS mmPSOC_GLOBAL_CONF_SCRATCHPAD_7
#define mmCPU_CQ_BASE_ADDR_LOW mmPSOC_GLOBAL_CONF_SCRATCHPAD_8
#define mmCPU_CQ_BASE_ADDR_HIGH mmPSOC_GLOBAL_CONF_SCRATCHPAD_9
#define mmCPU_CQ_LENGTH mmPSOC_GLOBAL_CONF_SCRATCHPAD_10
#define mmUPD_STS mmPSOC_GLOBAL_CONF_SCRATCHPAD_26
#define mmUPD_CMD mmPSOC_GLOBAL_CONF_SCRATCHPAD_27
#define mmPREBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_28
#define mmUBOOT_VER_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_29
#define mmUBOOT_OFFSET mmPSOC_GLOBAL_CONF_SCRATCHPAD_30
#define mmBTL_ID mmPSOC_GLOBAL_CONF_SCRATCHPAD_31
#define mmHW_STATE mmPSOC_GLOBAL_CONF_APP_STATUS
#endif /* GOYA_REG_MAP_H_ */
......@@ -160,7 +160,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
while (1) {
bool entry_ready =
((__le32_to_cpu(eq_base[eq->ci].hdr.ctl) &
((le32_to_cpu(eq_base[eq->ci].hdr.ctl) &
EQ_CTL_READY_MASK) >> EQ_CTL_READY_SHIFT);
if (!entry_ready)
......@@ -194,7 +194,7 @@ irqreturn_t hl_irq_handler_eq(int irq, void *arg)
skip_irq:
/* Clear EQ entry ready bit */
eq_entry->hdr.ctl =
__cpu_to_le32(__le32_to_cpu(eq_entry->hdr.ctl) &
cpu_to_le32(le32_to_cpu(eq_entry->hdr.ctl) &
~EQ_CTL_READY_MASK);
eq->ci = hl_eq_inc_ptr(eq->ci);
......
......@@ -21,12 +21,12 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
memset(&pkt, 0, sizeof(pkt));
if (curr)
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_CURR_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
else
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = __cpu_to_le32(pll_index);
pkt.pll_index = cpu_to_le32(pll_index);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SET_CLK_PKT_TIMEOUT, &result);
......@@ -48,10 +48,10 @@ void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_FREQUENCY_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.pll_index = __cpu_to_le32(pll_index);
pkt.value = __cpu_to_le64(freq);
pkt.pll_index = cpu_to_le32(pll_index);
pkt.value = cpu_to_le64(freq);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SET_CLK_PKT_TIMEOUT, NULL);
......@@ -70,7 +70,7 @@ u64 hl_get_max_power(struct hl_device *hdev)
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_GET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
......@@ -91,9 +91,9 @@ void hl_set_max_power(struct hl_device *hdev, u64 value)
memset(&pkt, 0, sizeof(pkt));
pkt.ctl = __cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
pkt.ctl = cpu_to_le32(ARMCP_PACKET_MAX_POWER_SET <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.value = __cpu_to_le64(value);
pkt.value = cpu_to_le64(value);
rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
SET_PWR_PKT_TIMEOUT, NULL);
......@@ -102,100 +102,6 @@ void hl_set_max_power(struct hl_device *hdev, u64 value)
dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
}
static ssize_t pm_mng_profile_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
if (hl_device_disabled_or_in_reset(hdev))
return -ENODEV;
return sprintf(buf, "%s\n",
(hdev->pm_mng_profile == PM_AUTO) ? "auto" :
(hdev->pm_mng_profile == PM_MANUAL) ? "manual" :
"unknown");
}
static ssize_t pm_mng_profile_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct hl_device *hdev = dev_get_drvdata(dev);
if (hl_device_disabled_or_in_reset(hdev)) {
count = -ENODEV;
goto out;
}
mutex_lock(&hdev->fd_open_cnt_lock);
if (atomic_read(&hdev->fd_open_cnt) > 0) {
dev_err(hdev->dev,
"Can't change PM profile while user process is opened on the device\n");
count = -EPERM;
goto unlock_mutex;
}
if (strncmp("auto", buf, strlen("auto")) == 0) {
/* Make sure we are in LOW PLL when changing modes */
if (hdev->pm_mng_profile == PM_MANUAL) {
atomic_set(&hdev->curr_pll_profile, PLL_HIGH);
hl_device_set_frequency(hdev, PLL_LOW);
hdev->pm_mng_profile = PM_AUTO;
}
} else if (strncmp("manual", buf, strlen("manual")) == 0) {
/* Make sure we are in LOW PLL when changing modes */
if (hdev->pm_mng_profile == PM_AUTO) {
flush_delayed_work(&hdev->work_freq);
hdev->pm_mng_profile = PM_MANUAL;
}
} else {
dev_err(hdev->dev, "value should be auto or manual\n");
count = -EINVAL;
goto unlock_mutex;
}
unlock_mutex:
mutex_unlock(&hdev->fd_open_cnt_lock);
out:
return count;
}
static ssize_t high_pll_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
if (hl_device_disabled_or_in_reset(hdev))
return -ENODEV;
return sprintf(buf, "%u\n", hdev->high_pll);
}
static ssize_t high_pll_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
struct hl_device *hdev = dev_get_drvdata(dev);
long value;
int rc;
if (hl_device_disabled_or_in_reset(hdev)) {
count = -ENODEV;
goto out;
}
rc = kstrtoul(buf, 0, &value);
if (rc) {
count = -EINVAL;
goto out;
}
hdev->high_pll = value;
out:
return count;
}
static ssize_t uboot_ver_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
......@@ -351,14 +257,6 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%s\n", str);
}
static ssize_t write_open_cnt_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct hl_device *hdev = dev_get_drvdata(dev);
return sprintf(buf, "%d\n", hdev->user_ctx ? 1 : 0);
}
static ssize_t soft_reset_cnt_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
......@@ -450,18 +348,15 @@ static DEVICE_ATTR_RO(device_type);
static DEVICE_ATTR_RO(fuse_ver);
static DEVICE_ATTR_WO(hard_reset);
static DEVICE_ATTR_RO(hard_reset_cnt);
static DEVICE_ATTR_RW(high_pll);
static DEVICE_ATTR_RO(infineon_ver);
static DEVICE_ATTR_RW(max_power);
static DEVICE_ATTR_RO(pci_addr);
static DEVICE_ATTR_RW(pm_mng_profile);
static DEVICE_ATTR_RO(preboot_btl_ver);
static DEVICE_ATTR_WO(soft_reset);
static DEVICE_ATTR_RO(soft_reset_cnt);
static DEVICE_ATTR_RO(status);
static DEVICE_ATTR_RO(thermal_ver);
static DEVICE_ATTR_RO(uboot_ver);
static DEVICE_ATTR_RO(write_open_cnt);
static struct bin_attribute bin_attr_eeprom = {
.attr = {.name = "eeprom", .mode = (0444)},
......@@ -477,18 +372,15 @@ static struct attribute *hl_dev_attrs[] = {
&dev_attr_fuse_ver.attr,
&dev_attr_hard_reset.attr,
&dev_attr_hard_reset_cnt.attr,
&dev_attr_high_pll.attr,
&dev_attr_infineon_ver.attr,
&dev_attr_max_power.attr,
&dev_attr_pci_addr.attr,
&dev_attr_pm_mng_profile.attr,
&dev_attr_preboot_btl_ver.attr,
&dev_attr_soft_reset.attr,
&dev_attr_soft_reset_cnt.attr,
&dev_attr_status.attr,
&dev_attr_thermal_ver.attr,
&dev_attr_uboot_ver.attr,
&dev_attr_write_open_cnt.attr,
NULL,
};
......
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
*
* Copyright 2016-2018 HabanaLabs, Ltd.
* Copyright 2016-2019 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
......@@ -28,20 +28,20 @@
enum goya_queue_id {
GOYA_QUEUE_ID_DMA_0 = 0,
GOYA_QUEUE_ID_DMA_1,
GOYA_QUEUE_ID_DMA_2,
GOYA_QUEUE_ID_DMA_3,
GOYA_QUEUE_ID_DMA_4,
GOYA_QUEUE_ID_CPU_PQ,
GOYA_QUEUE_ID_MME, /* Internal queues start here */
GOYA_QUEUE_ID_TPC0,
GOYA_QUEUE_ID_TPC1,
GOYA_QUEUE_ID_TPC2,
GOYA_QUEUE_ID_TPC3,
GOYA_QUEUE_ID_TPC4,
GOYA_QUEUE_ID_TPC5,
GOYA_QUEUE_ID_TPC6,
GOYA_QUEUE_ID_TPC7,
GOYA_QUEUE_ID_DMA_1 = 1,
GOYA_QUEUE_ID_DMA_2 = 2,
GOYA_QUEUE_ID_DMA_3 = 3,
GOYA_QUEUE_ID_DMA_4 = 4,
GOYA_QUEUE_ID_CPU_PQ = 5,
GOYA_QUEUE_ID_MME = 6, /* Internal queues start here */
GOYA_QUEUE_ID_TPC0 = 7,
GOYA_QUEUE_ID_TPC1 = 8,
GOYA_QUEUE_ID_TPC2 = 9,
GOYA_QUEUE_ID_TPC3 = 10,
GOYA_QUEUE_ID_TPC4 = 11,
GOYA_QUEUE_ID_TPC5 = 12,
GOYA_QUEUE_ID_TPC6 = 13,
GOYA_QUEUE_ID_TPC7 = 14,
GOYA_QUEUE_ID_SIZE
};
......@@ -75,12 +75,34 @@ enum hl_device_status {
HL_DEVICE_STATUS_MALFUNCTION
};
/* Opcode for management ioctl */
/* Opcode for management ioctl
*
* HW_IP_INFO - Receive information about different IP blocks in the
* device.
* HL_INFO_HW_EVENTS - Receive an array describing how many times each event
* occurred since the last hard reset.
* HL_INFO_DRAM_USAGE - Retrieve the dram usage inside the device and of the
* specific context. This is relevant only for devices
* where the dram is managed by the kernel driver
* HL_INFO_HW_IDLE - Retrieve information about the idle status of each
* internal engine.
* HL_INFO_DEVICE_STATUS - Retrieve the device's status. This opcode doesn't
* require an open context.
* HL_INFO_DEVICE_UTILIZATION - Retrieve the total utilization of the device
* over the last period specified by the user.
* The period can be between 100ms to 1s, in
* resolution of 100ms. The return value is a
* percentage of the utilization rate.
* HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each
* event occurred since the driver was loaded.
*/
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
#define HL_INFO_DRAM_USAGE 2
#define HL_INFO_HW_IDLE 3
#define HL_INFO_DEVICE_STATUS 4
#define HL_INFO_DEVICE_UTILIZATION 6
#define HL_INFO_HW_EVENTS_AGGREGATE 7
#define HL_INFO_VERSION_MAX_LEN 128
......@@ -122,6 +144,11 @@ struct hl_info_device_status {
__u32 pad;
};
struct hl_info_device_utilization {
__u32 utilization;
__u32 pad;
};
struct hl_info_args {
/* Location of relevant struct in userspace */
__u64 return_pointer;
......@@ -137,8 +164,15 @@ struct hl_info_args {
/* HL_INFO_* */
__u32 op;
union {
/* Context ID - Currently not in use */
__u32 ctx_id;
/* Period value for utilization rate (100ms - 1000ms, in 100ms
* resolution.
*/
__u32 period_ms;
};
__u32 pad;
};
......@@ -295,12 +329,12 @@ struct hl_mem_in {
struct {
/*
* Requested virtual address of mapped memory.
* KMD will try to map the requested region to this
* hint address, as long as the address is valid and
* not already mapped. The user should check the
* The driver will try to map the requested region to
* this hint address, as long as the address is valid
* and not already mapped. The user should check the
* returned address of the IOCTL to make sure he got
* the hint address. Passing 0 here means that KMD
* will choose the address itself.
* the hint address. Passing 0 here means that the
* driver will choose the address itself.
*/
__u64 hint_addr;
/* Handle returned from HL_MEM_OP_ALLOC */
......@@ -313,12 +347,12 @@ struct hl_mem_in {
__u64 host_virt_addr;
/*
* Requested virtual address of mapped memory.
* KMD will try to map the requested region to this
* hint address, as long as the address is valid and
* not already mapped. The user should check the
* The driver will try to map the requested region to
* this hint address, as long as the address is valid
* and not already mapped. The user should check the
* returned address of the IOCTL to make sure he got
* the hint address. Passing 0 here means that KMD
* will choose the address itself.
* the hint address. Passing 0 here means that the
* driver will choose the address itself.
*/
__u64 hint_addr;
/* Size of allocated host memory */
......@@ -439,7 +473,7 @@ struct hl_debug_params_spmu {
#define HL_DEBUG_OP_BMON 4
/* Opcode for SPMU component */
#define HL_DEBUG_OP_SPMU 5
/* Opcode for timestamp */
/* Opcode for timestamp (deprecated) */
#define HL_DEBUG_OP_TIMESTAMP 6
/* Opcode for setting the device into or out of debug mode. The enable
* variable should be 1 for enabling debug mode and 0 for disabling it
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment