Commit 99b9d7b4 authored by Oded Gabbay's avatar Oded Gabbay Committed by Greg Kroah-Hartman

habanalabs: add basic Goya support

This patch adds a basic support for the Goya device. The code initializes
the device's PCI controller and PCI bars. It also initializes various S/W
structures and adds some basic helper functions.
Reviewed-by: default avatarMike Rapoport <rppt@linux.ibm.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 1ea2a20e
...@@ -5,3 +5,6 @@ ...@@ -5,3 +5,6 @@
obj-m := habanalabs.o obj-m := habanalabs.o
habanalabs-y := habanalabs_drv.o device.o habanalabs-y := habanalabs_drv.o device.o
include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)
...@@ -120,8 +120,11 @@ static int device_setup_cdev(struct hl_device *hdev, struct class *hclass, ...@@ -120,8 +120,11 @@ static int device_setup_cdev(struct hl_device *hdev, struct class *hclass,
*/ */
static int device_early_init(struct hl_device *hdev) static int device_early_init(struct hl_device *hdev)
{ {
int rc;
switch (hdev->asic_type) { switch (hdev->asic_type) {
case ASIC_GOYA: case ASIC_GOYA:
goya_set_asic_funcs(hdev);
strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name)); strlcpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
break; break;
default: default:
...@@ -130,6 +133,10 @@ static int device_early_init(struct hl_device *hdev) ...@@ -130,6 +133,10 @@ static int device_early_init(struct hl_device *hdev)
return -EINVAL; return -EINVAL;
} }
rc = hdev->asic_funcs->early_init(hdev);
if (rc)
return rc;
return 0; return 0;
} }
...@@ -141,6 +148,10 @@ static int device_early_init(struct hl_device *hdev) ...@@ -141,6 +148,10 @@ static int device_early_init(struct hl_device *hdev)
*/ */
static void device_early_fini(struct hl_device *hdev) static void device_early_fini(struct hl_device *hdev)
{ {
if (hdev->asic_funcs->early_fini)
hdev->asic_funcs->early_fini(hdev);
} }
/* /*
...@@ -154,8 +165,15 @@ static void device_early_fini(struct hl_device *hdev) ...@@ -154,8 +165,15 @@ static void device_early_fini(struct hl_device *hdev)
*/ */
int hl_device_suspend(struct hl_device *hdev) int hl_device_suspend(struct hl_device *hdev)
{ {
int rc;
pci_save_state(hdev->pdev); pci_save_state(hdev->pdev);
rc = hdev->asic_funcs->suspend(hdev);
if (rc)
dev_err(hdev->dev,
"Failed to disable PCI access of device CPU\n");
/* Shut down the device */ /* Shut down the device */
pci_disable_device(hdev->pdev); pci_disable_device(hdev->pdev);
pci_set_power_state(hdev->pdev, PCI_D3hot); pci_set_power_state(hdev->pdev, PCI_D3hot);
...@@ -185,6 +203,13 @@ int hl_device_resume(struct hl_device *hdev) ...@@ -185,6 +203,13 @@ int hl_device_resume(struct hl_device *hdev)
return rc; return rc;
} }
rc = hdev->asic_funcs->resume(hdev);
if (rc) {
dev_err(hdev->dev,
"Failed to enable PCI access from device CPU\n");
return rc;
}
return 0; return 0;
} }
...@@ -212,11 +237,21 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass) ...@@ -212,11 +237,21 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
if (rc) if (rc)
goto release_device; goto release_device;
/*
* Start calling ASIC initialization. First S/W then H/W and finally
* late init
*/
rc = hdev->asic_funcs->sw_init(hdev);
if (rc)
goto early_fini;
dev_notice(hdev->dev, dev_notice(hdev->dev,
"Successfully added device to habanalabs driver\n"); "Successfully added device to habanalabs driver\n");
return 0; return 0;
early_fini:
device_early_fini(hdev);
release_device: release_device:
device_destroy(hclass, hdev->dev->devt); device_destroy(hclass, hdev->dev->devt);
cdev_del(&hdev->cdev); cdev_del(&hdev->cdev);
...@@ -247,6 +282,9 @@ void hl_device_fini(struct hl_device *hdev) ...@@ -247,6 +282,9 @@ void hl_device_fini(struct hl_device *hdev)
/* Mark device as disabled */ /* Mark device as disabled */
hdev->disabled = true; hdev->disabled = true;
/* Call ASIC S/W finalize function */
hdev->asic_funcs->sw_fini(hdev);
device_early_fini(hdev); device_early_fini(hdev);
/* Hide device from user */ /* Hide device from user */
...@@ -338,3 +376,36 @@ int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr, ...@@ -338,3 +376,36 @@ int hl_poll_timeout_device_memory(struct hl_device *hdev, void __iomem *addr,
return *val ? 0 : -ETIMEDOUT; return *val ? 0 : -ETIMEDOUT;
} }
/*
* MMIO register access helper functions.
*/
/*
* hl_rreg - Read an MMIO register
*
* @hdev: pointer to habanalabs device structure
* @reg: MMIO register offset (in bytes)
*
* Returns the value of the MMIO register we are asked to read
*
*/
inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
{
return readl(hdev->rmmio + reg);
}
/*
* hl_wreg - Write to an MMIO register
*
* @hdev: pointer to habanalabs device structure
* @reg: MMIO register offset (in bytes)
* @val: 32-bit value
*
* Writes the 32-bit value into the MMIO register
*
*/
inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
{
writel(val, hdev->rmmio + reg);
}
subdir-ccflags-y += -I$(src)
HL_GOYA_FILES := goya/goya.o
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2019 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef GOYAP_H_
#define GOYAP_H_
#include <uapi/misc/habanalabs.h>
#include "habanalabs.h"
#include "include/goya/goya.h"
#define NUMBER_OF_CMPLT_QUEUES 5
#define NUMBER_OF_EXT_HW_QUEUES 5
#define NUMBER_OF_CPU_HW_QUEUES 1
#define NUMBER_OF_INT_HW_QUEUES 9
#define NUMBER_OF_HW_QUEUES (NUMBER_OF_EXT_HW_QUEUES + \
NUMBER_OF_CPU_HW_QUEUES + \
NUMBER_OF_INT_HW_QUEUES)
/*
* Number of MSIX interrupts IDS:
* Each completion queue has 1 ID
* The event queue has 1 ID
*/
#define NUMBER_OF_INTERRUPTS (NUMBER_OF_CMPLT_QUEUES + 1)
#if (NUMBER_OF_HW_QUEUES >= HL_MAX_QUEUES)
#error "Number of H/W queues must be smaller than HL_MAX_QUEUES"
#endif
#if (NUMBER_OF_INTERRUPTS > GOYA_MSIX_ENTRIES)
#error "Number of MSIX interrupts must be smaller or equal to GOYA_MSIX_ENTRIES"
#endif
#define QMAN_FENCE_TIMEOUT_USEC 10000 /* 10 ms */
#define QMAN_STOP_TIMEOUT_USEC 100000 /* 100 ms */
#define TPC_ENABLED_MASK 0xFF
#define PLL_HIGH_DEFAULT 1575000000 /* 1.575 GHz */
#define GOYA_ARMCP_INFO_TIMEOUT 10000000 /* 10s */
#define DRAM_PHYS_DEFAULT_SIZE 0x100000000ull /* 4GB */
/* DRAM Memory Map */
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
#define MMU_PAGE_TABLES_SIZE 0x0E000000 /* 224MB */
#define MMU_CACHE_MNG_SIZE 0x00001000 /* 4KB */
#define CPU_PQ_PKT_SIZE 0x00001000 /* 4KB */
#define CPU_PQ_DATA_SIZE 0x01FFE000 /* 32MB - 8KB */
#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE
#define MMU_PAGE_TABLES_ADDR (CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE)
#define MMU_CACHE_MNG_ADDR (MMU_PAGE_TABLES_ADDR + MMU_PAGE_TABLES_SIZE)
#define CPU_PQ_PKT_ADDR (MMU_CACHE_MNG_ADDR + MMU_CACHE_MNG_SIZE)
#define CPU_PQ_DATA_ADDR (CPU_PQ_PKT_ADDR + CPU_PQ_PKT_SIZE)
#define DRAM_BASE_ADDR_USER (CPU_PQ_DATA_ADDR + CPU_PQ_DATA_SIZE)
#if (DRAM_BASE_ADDR_USER != 0x20000000)
#error "KMD must reserve 512MB"
#endif
/*
* SRAM Memory Map for KMD
*
* KMD occupies KMD_SRAM_SIZE bytes from the start of SRAM. It is used for
* MME/TPC QMANs
*
*/
#define MME_QMAN_BASE_OFFSET 0x000000 /* Must be 0 */
#define MME_QMAN_LENGTH 64
#define TPC_QMAN_LENGTH 64
#define TPC0_QMAN_BASE_OFFSET (MME_QMAN_BASE_OFFSET + \
(MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC1_QMAN_BASE_OFFSET (TPC0_QMAN_BASE_OFFSET + \
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC2_QMAN_BASE_OFFSET (TPC1_QMAN_BASE_OFFSET + \
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC3_QMAN_BASE_OFFSET (TPC2_QMAN_BASE_OFFSET + \
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC4_QMAN_BASE_OFFSET (TPC3_QMAN_BASE_OFFSET + \
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC5_QMAN_BASE_OFFSET (TPC4_QMAN_BASE_OFFSET + \
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC6_QMAN_BASE_OFFSET (TPC5_QMAN_BASE_OFFSET + \
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define TPC7_QMAN_BASE_OFFSET (TPC6_QMAN_BASE_OFFSET + \
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#define SRAM_KMD_RES_OFFSET (TPC7_QMAN_BASE_OFFSET + \
(TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE))
#if (SRAM_KMD_RES_OFFSET >= GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START)
#error "MME/TPC QMANs SRAM space exceeds limit"
#endif
#define SRAM_USER_BASE_OFFSET GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START
/* Virtual address space */
#define VA_HOST_SPACE_START 0x1000000000000ull /* 256TB */
#define VA_HOST_SPACE_END 0x3FF8000000000ull /* 1PB - 1TB */
#define VA_HOST_SPACE_SIZE (VA_HOST_SPACE_END - \
VA_HOST_SPACE_START) /* 767TB */
#define VA_DDR_SPACE_START 0x800000000ull /* 32GB */
#define VA_DDR_SPACE_END 0x2000000000ull /* 128GB */
#define VA_DDR_SPACE_SIZE (VA_DDR_SPACE_END - \
VA_DDR_SPACE_START) /* 128GB */
#define DMA_MAX_TRANSFER_SIZE 0xFFFFFFFF
#define HW_CAP_PLL 0x00000001
#define HW_CAP_DDR_0 0x00000002
#define HW_CAP_DDR_1 0x00000004
#define HW_CAP_MME 0x00000008
#define HW_CAP_CPU 0x00000010
#define HW_CAP_DMA 0x00000020
#define HW_CAP_MSIX 0x00000040
#define HW_CAP_CPU_Q 0x00000080
#define HW_CAP_MMU 0x00000100
#define HW_CAP_TPC_MBIST 0x00000200
#define HW_CAP_GOLDEN 0x00000400
#define HW_CAP_TPC 0x00000800
#define CPU_PKT_SHIFT 5
#define CPU_PKT_SIZE (1 << CPU_PKT_SHIFT)
#define CPU_PKT_MASK (~((1 << CPU_PKT_SHIFT) - 1))
#define CPU_MAX_PKTS_IN_CB 32
#define CPU_CB_SIZE (CPU_PKT_SIZE * CPU_MAX_PKTS_IN_CB)
#define CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * CPU_CB_SIZE)
enum goya_fw_component {
FW_COMP_UBOOT,
FW_COMP_PREBOOT
};
struct goya_device {
/* TODO: remove hw_queues_lock after moving to scheduler code */
spinlock_t hw_queues_lock;
u64 ddr_bar_cur_addr;
u32 hw_cap_initialized;
};
#endif /* GOYAP_H_ */
...@@ -14,9 +14,62 @@ ...@@ -14,9 +14,62 @@
#define HL_NAME "habanalabs" #define HL_NAME "habanalabs"
#define HL_MAX_QUEUES 128
struct hl_device; struct hl_device;
/**
* struct asic_fixed_properties - ASIC specific immutable properties.
* @sram_base_address: SRAM physical start address.
* @sram_end_address: SRAM physical end address.
* @sram_user_base_address - SRAM physical start address for user access.
* @dram_base_address: DRAM physical start address.
* @dram_end_address: DRAM physical end address.
* @dram_user_base_address: DRAM physical start address for user access.
* @dram_size: DRAM total size.
* @dram_pci_bar_size: size of PCI bar towards DRAM.
* @host_phys_base_address: base physical address of host memory for
* transactions that the device generates.
* @va_space_host_start_address: base address of virtual memory range for
* mapping host memory.
* @va_space_host_end_address: end address of virtual memory range for
* mapping host memory.
* @va_space_dram_start_address: base address of virtual memory range for
* mapping DRAM memory.
* @va_space_dram_end_address: end address of virtual memory range for
* mapping DRAM memory.
* @cfg_size: configuration space size on SRAM.
* @sram_size: total size of SRAM.
* @max_asid: maximum number of open contexts (ASIDs).
* @completion_queues_count: number of completion queues.
* @high_pll: high PLL frequency used by the device.
* @tpc_enabled_mask: which TPCs are enabled.
*/
struct asic_fixed_properties {
u64 sram_base_address;
u64 sram_end_address;
u64 sram_user_base_address;
u64 dram_base_address;
u64 dram_end_address;
u64 dram_user_base_address;
u64 dram_size;
u64 dram_pci_bar_size;
u64 host_phys_base_address;
u64 va_space_host_start_address;
u64 va_space_host_end_address;
u64 va_space_dram_start_address;
u64 va_space_dram_end_address;
u32 cfg_size;
u32 sram_size;
u32 max_asid;
u32 high_pll;
u8 completion_queues_count;
u8 tpc_enabled_mask;
};
#define HL_QUEUE_LENGTH 256
/* /*
* ASICs * ASICs
*/ */
...@@ -33,6 +86,36 @@ enum hl_asic_type { ...@@ -33,6 +86,36 @@ enum hl_asic_type {
ASIC_INVALID ASIC_INVALID
}; };
/**
* struct hl_asic_funcs - ASIC specific functions that are can be called from
* common code.
* @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
* @early_fini: tears down what was done in early_init.
* @sw_init: sets up driver state, does not configure H/W.
* @sw_fini: tears down driver state, does not configure H/W.
* @suspend: handles IP specific H/W or SW changes for suspend.
* @resume: handles IP specific H/W or SW changes for resume.
* @dma_alloc_coherent: Allocate coherent DMA memory by calling
* dma_alloc_coherent(). This is ASIC function because its
* implementation is not trivial when the driver is loaded
* in simulation mode (not upstreamed).
* @dma_free_coherent: Free coherent DMA memory by calling dma_free_coherent().
* This is ASIC function because its implementation is not
* trivial when the driver is loaded in simulation mode
* (not upstreamed).
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
int (*early_fini)(struct hl_device *hdev);
int (*sw_init)(struct hl_device *hdev);
int (*sw_fini)(struct hl_device *hdev);
int (*suspend)(struct hl_device *hdev);
int (*resume)(struct hl_device *hdev);
void* (*dma_alloc_coherent)(struct hl_device *hdev, size_t size,
dma_addr_t *dma_handle, gfp_t flag);
void (*dma_free_coherent)(struct hl_device *hdev, size_t size,
void *cpu_addr, dma_addr_t dma_handle);
};
/* /*
* FILE PRIVATE STRUCTURE * FILE PRIVATE STRUCTURE
...@@ -62,26 +145,78 @@ struct hl_fpriv { ...@@ -62,26 +145,78 @@ struct hl_fpriv {
*/ */
#define HL_MAX_MINORS 256 #define HL_MAX_MINORS 256
/*
* Registers read & write functions.
*/
u32 hl_rreg(struct hl_device *hdev, u32 reg);
void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
#define hl_poll_timeout(hdev, addr, val, cond, sleep_us, timeout_us) \
readl_poll_timeout(hdev->rmmio + addr, val, cond, sleep_us, timeout_us)
#define RREG32(reg) hl_rreg(hdev, (reg))
#define WREG32(reg, v) hl_wreg(hdev, (reg), (v))
#define DREG32(reg) pr_info("REGISTER: " #reg " : 0x%08X\n", \
hl_rreg(hdev, (reg)))
#define WREG32_P(reg, val, mask) \
do { \
u32 tmp_ = RREG32(reg); \
tmp_ &= (mask); \
tmp_ |= ((val) & ~(mask)); \
WREG32(reg, tmp_); \
} while (0)
#define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
#define REG_FIELD_SHIFT(reg, field) reg##_##field##_SHIFT
#define REG_FIELD_MASK(reg, field) reg##_##field##_MASK
#define WREG32_FIELD(reg, field, val) \
WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \
(val) << REG_FIELD_SHIFT(reg, field))
/** /**
* struct hl_device - habanalabs device structure. * struct hl_device - habanalabs device structure.
* @pdev: pointer to PCI device, can be NULL in case of simulator device. * @pdev: pointer to PCI device, can be NULL in case of simulator device.
* @pcie_bar: array of available PCIe bars.
* @rmmio: configuration area address on SRAM.
* @cdev: related char device. * @cdev: related char device.
* @dev: realted kernel basic device structure. * @dev: realted kernel basic device structure.
* @asic_name: ASIC specific nmae. * @asic_name: ASIC specific nmae.
* @asic_type: ASIC specific type. * @asic_type: ASIC specific type.
* @dma_pool: DMA pool for small allocations.
* @cpu_accessible_dma_mem: KMD <-> ArmCP shared memory CPU address.
* @cpu_accessible_dma_address: KMD <-> ArmCP shared memory DMA address.
* @cpu_accessible_dma_pool: KMD <-> ArmCP shared memory pool.
* @asic_prop: ASIC specific immutable properties.
* @asic_funcs: ASIC specific functions.
* @asic_specific: ASIC specific information to use only from ASIC files.
* @major: habanalabs KMD major. * @major: habanalabs KMD major.
* @id: device minor. * @id: device minor.
* @disabled: is device disabled. * @disabled: is device disabled.
*/ */
struct hl_device { struct hl_device {
struct pci_dev *pdev; struct pci_dev *pdev;
void __iomem *pcie_bar[6];
void __iomem *rmmio;
struct cdev cdev; struct cdev cdev;
struct device *dev; struct device *dev;
char asic_name[16]; char asic_name[16];
enum hl_asic_type asic_type; enum hl_asic_type asic_type;
struct dma_pool *dma_pool;
void *cpu_accessible_dma_mem;
dma_addr_t cpu_accessible_dma_address;
struct gen_pool *cpu_accessible_dma_pool;
struct asic_fixed_properties asic_prop;
const struct hl_asic_funcs *asic_funcs;
void *asic_specific;
u32 major; u32 major;
u16 id; u16 id;
u8 disabled; u8 disabled;
/* Parameters for bring-up */
u8 reset_pcilink;
}; };
...@@ -128,4 +263,6 @@ void hl_device_fini(struct hl_device *hdev); ...@@ -128,4 +263,6 @@ void hl_device_fini(struct hl_device *hdev);
int hl_device_suspend(struct hl_device *hdev); int hl_device_suspend(struct hl_device *hdev);
int hl_device_resume(struct hl_device *hdev); int hl_device_resume(struct hl_device *hdev);
void goya_set_asic_funcs(struct hl_device *hdev);
#endif /* HABANALABSP_H_ */ #endif /* HABANALABSP_H_ */
...@@ -122,6 +122,9 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, ...@@ -122,6 +122,9 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
hdev->major = hl_major; hdev->major = hl_major;
/* Parameters for bring-up - set them to defaults */
hdev->reset_pcilink = 0;
hdev->disabled = true; hdev->disabled = true;
hdev->pdev = pdev; /* can be NULL in case of simulator device */ hdev->pdev = pdev; /* can be NULL in case of simulator device */
......
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2019 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef GOYA_H
#define GOYA_H
#include "asic_reg/goya_regs.h"
#include <linux/types.h>
#define SRAM_CFG_BAR_ID 0
#define MSIX_BAR_ID 2
#define DDR_BAR_ID 4
#define CFG_BAR_SIZE 0x10000000ull /* 256MB */
#define MSIX_BAR_SIZE 0x1000ull /* 4KB */
#define CFG_BASE 0x7FFC000000ull
#define CFG_SIZE 0x4000000 /* 32MB CFG + 32MB DBG*/
#define SRAM_BASE_ADDR 0x7FF0000000ull
#define SRAM_SIZE 0x32A0000 /* 50.625MB */
#define DRAM_PHYS_BASE 0x0ull
#define HOST_PHYS_BASE 0x8000000000ull /* 0.5TB */
#define HOST_PHYS_SIZE 0x1000000000000ull /* 0.25PB (48 bits) */
#define GOYA_MSIX_ENTRIES 8
#define QMAN_PQ_ENTRY_SIZE 16 /* Bytes */
#define MAX_ASID 1024
#define PROT_BITS_OFFS 0xF80
#define DMA_MAX_NUM 5
#define TPC_MAX_NUM 8
#endif /* GOYA_H */
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
*
* Copyright 2016-2018 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef HABANALABS_H_
#define HABANALABS_H_
#include <linux/types.h>
#include <linux/ioctl.h>
/*
* Defines that are asic-specific but constitutes as ABI between kernel driver
* and userspace
*/
#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */
#endif /* HABANALABS_H_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment