Commit 0feaf86d authored by Omer Shpigelman's avatar Omer Shpigelman Committed by Greg Kroah-Hartman

habanalabs: add virtual memory and MMU modules

This patch adds the Virtual Memory and MMU modules.

Goya has an internal MMU which provides process isolation on the internal
DDR. The internal MMU also performs translations for transactions that go
from Goya to the Host.

The driver is responsible for allocating and freeing memory on the DDR
upon user request. It also provides an interface to map and unmap DDR and
Host memory to the device address space.

The MMU in Goya supports 3-level and 4-level page tables. With 3-level, the
size of each page is 2MB, while with 4-level the size of each page is 4KB.

In the DDR, the physical pages are always 2MB.
Reviewed-by: default avatarMike Rapoport <rppt@linux.ibm.com>
Signed-off-by: default avatarOmer Shpigelman <oshpigelman@habana.ai>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent eff6f4a0
......@@ -6,7 +6,7 @@ obj-m := habanalabs.o
habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \
command_submission.o
command_submission.o mmu.o
include $(src)/goya/Makefile
habanalabs-y += $(HL_GOYA_FILES)
......@@ -25,8 +25,10 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
for (i = 0 ; i < HL_MAX_PENDING_CS ; i++)
dma_fence_put(ctx->cs_pending[i]);
if (ctx->asid != HL_KERNEL_ASID_ID)
if (ctx->asid != HL_KERNEL_ASID_ID) {
hl_vm_ctx_fini(ctx);
hl_asid_free(hdev, ctx->asid);
}
}
void hl_ctx_do_release(struct kref *ref)
......@@ -96,6 +98,8 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
{
int rc = 0;
ctx->hdev = hdev;
kref_init(&ctx->refcount);
......@@ -113,9 +117,22 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
dev_err(hdev->dev, "No free ASID, failed to create context\n");
return -ENOMEM;
}
rc = hl_vm_ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "Failed to init mem ctx module\n");
rc = -ENOMEM;
goto mem_ctx_err;
}
}
return 0;
mem_ctx_err:
if (ctx->asid != HL_KERNEL_ASID_ID)
hl_asid_free(hdev, ctx->asid);
return rc;
}
void hl_ctx_get(struct hl_device *hdev, struct hl_ctx *ctx)
......
......@@ -615,8 +615,10 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
/* Reset the H/W. It will be in idle state after this returns */
hdev->asic_funcs->hw_fini(hdev, hard_reset);
if (hard_reset)
if (hard_reset) {
hl_vm_fini(hdev);
hl_eq_reset(hdev, &hdev->event_queue);
}
/* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
hl_hw_queue_reset(hdev, hard_reset);
......@@ -677,6 +679,13 @@ int hl_device_reset(struct hl_device *hdev, bool hard_reset,
goto out_err;
}
rc = hl_vm_init(hdev);
if (rc) {
dev_err(hdev->dev,
"Failed to init memory module after hard reset\n");
goto out_err;
}
hl_set_max_power(hdev, hdev->max_power);
hdev->hard_reset_pending = false;
......@@ -861,6 +870,13 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
hdev->asic_name,
hdev->asic_prop.dram_size / 1024 / 1024 / 1024);
rc = hl_vm_init(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to initialize memory module\n");
rc = 0;
goto out_disabled;
}
/*
* hl_hwmon_init must be called after device_late_init, because only
* there we get the information from the device about which
......@@ -977,6 +993,8 @@ void hl_device_fini(struct hl_device *hdev)
/* Reset the H/W. It will be in idle state after this returns */
hdev->asic_funcs->hw_fini(hdev, true);
hl_vm_fini(hdev);
hl_eq_fini(hdev, &hdev->event_queue);
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
......
This diff is collapsed.
This diff is collapsed.
......@@ -188,7 +188,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
hdev->reset_on_lockup = reset_on_lockup;
/* Parameters for bring-up - set them to defaults */
hdev->mmu_enable = 0;
hdev->mmu_enable = 1;
hdev->cpu_enable = 1;
hdev->reset_pcilink = 0;
hdev->cpu_queues_enable = 1;
......
......@@ -18,7 +18,8 @@
static const struct hl_ioctl_desc hl_ioctls[] = {
HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl)
HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl),
HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl)
};
#define HL_CORE_IOCTL_COUNT ARRAY_SIZE(hl_ioctls)
......
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2018 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef INCLUDE_MMU_GENERAL_H_
#define INCLUDE_MMU_GENERAL_H_
#define PAGE_SHIFT_4KB 12
#define PAGE_SHIFT_2MB 21
#define PAGE_SIZE_2MB (_AC(1, UL) << PAGE_SHIFT_2MB)
#define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB)
#define PAGE_MASK_2MB (~(PAGE_SIZE_2MB - 1))
#define PAGE_PRESENT_MASK 0x0000000000001
#define SWAP_OUT_MASK 0x0000000000004
#define LAST_MASK 0x0000000000800
#define PHYS_ADDR_MASK 0x3FFFFFFFFF000ull
#define HOP0_MASK 0x3000000000000ull
#define HOP1_MASK 0x0FF8000000000ull
#define HOP2_MASK 0x0007FC0000000ull
#define HOP3_MASK 0x000003FE00000
#define HOP4_MASK 0x00000001FF000
#define OFFSET_MASK 0x0000000000FFF
#define HOP0_SHIFT 48
#define HOP1_SHIFT 39
#define HOP2_SHIFT 30
#define HOP3_SHIFT 21
#define HOP4_SHIFT 12
#define PTE_PHYS_ADDR_SHIFT 12
#define PTE_PHYS_ADDR_MASK ~0xFFF
#define HL_PTE_SIZE sizeof(u64)
#define HOP_TABLE_SIZE PAGE_SIZE_4KB
#define HOP0_TABLES_TOTAL_SIZE (HOP_TABLE_SIZE * MAX_ASID)
#define MMU_HOP0_PA43_12_SHIFT 12
#define MMU_HOP0_PA49_44_SHIFT (12 + 32)
#define MMU_CONFIG_TIMEOUT_USEC 2000 /* 2 ms */
#endif /* INCLUDE_MMU_GENERAL_H_ */
/* SPDX-License-Identifier: GPL-2.0
*
* Copyright 2016-2018 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
#ifndef INCLUDE_MMU_V1_0_H_
#define INCLUDE_MMU_V1_0_H_
#define MMU_HOP0_PA43_12 0x490004
#define MMU_HOP0_PA49_44 0x490008
#define MMU_ASID_BUSY 0x490000
#endif /* INCLUDE_MMU_V1_0_H_ */
This diff is collapsed.
This diff is collapsed.
......@@ -162,6 +162,108 @@ union hl_wait_cs_args {
struct hl_wait_cs_out out;
};
/* Opcode to alloc device memory */
#define HL_MEM_OP_ALLOC 0
/* Opcode to free previously allocated device memory */
#define HL_MEM_OP_FREE 1
/* Opcode to map host memory */
#define HL_MEM_OP_MAP 2
/* Opcode to unmap previously mapped host memory */
#define HL_MEM_OP_UNMAP 3
/* Memory flags */
#define HL_MEM_CONTIGUOUS 0x1
#define HL_MEM_SHARED 0x2
#define HL_MEM_USERPTR 0x4
struct hl_mem_in {
union {
/* HL_MEM_OP_ALLOC- allocate device memory */
struct {
/* Size to alloc */
__u32 mem_size;
__u32 pad;
} alloc;
/* HL_MEM_OP_FREE - free device memory */
struct {
/* Handle returned from HL_MEM_OP_ALLOC */
__u64 handle;
} free;
/* HL_MEM_OP_MAP - map device memory */
struct {
/*
* Requested virtual address of mapped memory.
* KMD will try to map the requested region to this
* hint address, as long as the address is valid and
* not already mapped. The user should check the
* returned address of the IOCTL to make sure he got
* the hint address. Passing 0 here means that KMD
* will choose the address itself.
*/
__u64 hint_addr;
/* Handle returned from HL_MEM_OP_ALLOC */
__u64 handle;
} map_device;
/* HL_MEM_OP_MAP - map host memory */
struct {
/* Address of allocated host memory */
__u64 host_virt_addr;
/*
* Requested virtual address of mapped memory.
* KMD will try to map the requested region to this
* hint address, as long as the address is valid and
* not already mapped. The user should check the
* returned address of the IOCTL to make sure he got
* the hint address. Passing 0 here means that KMD
* will choose the address itself.
*/
__u64 hint_addr;
/* Size of allocated host memory */
__u32 mem_size;
__u32 pad;
} map_host;
/* HL_MEM_OP_UNMAP - unmap host memory */
struct {
/* Virtual address returned from HL_MEM_OP_MAP */
__u64 device_virt_addr;
} unmap;
};
/* HL_MEM_OP_* */
__u32 op;
/* HL_MEM_* flags */
__u32 flags;
/* Context ID - Currently not in use */
__u32 ctx_id;
__u32 pad;
};
struct hl_mem_out {
union {
/*
* Used for HL_MEM_OP_MAP as the virtual address that was
* assigned in the device VA space.
* A value of 0 means the requested operation failed.
*/
__u64 device_virt_addr;
/*
* Used for HL_MEM_OP_ALLOC. This is the assigned
* handle for the allocated memory
*/
__u64 handle;
};
};
union hl_mem_args {
struct hl_mem_in in;
struct hl_mem_out out;
};
/*
* Command Buffer
* - Request a Command Buffer
......@@ -245,7 +347,25 @@ union hl_wait_cs_args {
#define HL_IOCTL_WAIT_CS \
_IOWR('H', 0x04, union hl_wait_cs_args)
/*
* Memory
* - Map host memory to device MMU
* - Unmap host memory from device MMU
*
* This IOCTL allows the user to map host memory to the device MMU
*
* For host memory, the IOCTL doesn't allocate memory. The user is supposed
* to allocate the memory in user-space (malloc/new). The driver pins the
* physical pages (up to the allowed limit by the OS), assigns a virtual
* address in the device VA space and initializes the device MMU.
*
* There is an option for the user to specify the requested virtual address.
*
*/
#define HL_IOCTL_MEMORY \
_IOWR('H', 0x05, union hl_mem_args)
#define HL_COMMAND_START 0x02
#define HL_COMMAND_END 0x05
#define HL_COMMAND_END 0x06
#endif /* HABANALABS_H_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment