Commit 27ca384c authored by Omer Shpigelman's avatar Omer Shpigelman Committed by Greg Kroah-Hartman

habanalabs: add MMU DRAM default page mapping

This patch provides a workaround for a H/W bug in Goya, where access to
RAZWI from TPC can cause PCI completion timeout.

The WA is to use the device MMU to map any unmapped DRAM memory to a
default page in the DRAM. That way, the TPC will never reach RAZWI upon
accessing a bad address in the DRAM.

When a DRAM page is mapped by the user, its default mapping is
overwritten. Once that page is unmapped, the MMU driver will map that page
to the default page.

To help debugging, the driver will set the default page area to 0x99 on
device initialization.
Signed-off-by: default avatarOmer Shpigelman <oshpigelman@habana.ai>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 60b7dcca
......@@ -304,6 +304,7 @@ static u32 goya_non_fatal_events[GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE] = {
static int goya_armcp_info_get(struct hl_device *hdev);
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
u64 phys_addr);
......@@ -345,6 +346,7 @@ static void goya_get_fixed_properties(struct hl_device *hdev)
SRAM_USER_BASE_OFFSET;
prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
prop->mmu_dram_default_page_addr = MMU_DRAM_DEFAULT_PAGE_ADDR;
if (hdev->pldm)
prop->mmu_pgt_size = 0x800000; /* 8MB */
else
......@@ -359,6 +361,8 @@ static void goya_get_fixed_properties(struct hl_device *hdev)
prop->va_space_host_end_address = VA_HOST_SPACE_END;
prop->va_space_dram_start_address = VA_DDR_SPACE_START;
prop->va_space_dram_end_address = VA_DDR_SPACE_END;
prop->dram_size_for_default_page_mapping =
prop->va_space_dram_end_address;
prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID;
prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
......@@ -816,6 +820,12 @@ static int goya_late_init(struct hl_device *hdev)
goto disable_pci_access;
}
rc = goya_mmu_set_dram_default_page(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to set DRAM default page\n");
goto disable_pci_access;
}
return 0;
disable_pci_access:
......@@ -2648,6 +2658,7 @@ static int goya_mmu_init(struct hl_device *hdev)
return 0;
hdev->dram_supports_virtual_memory = true;
hdev->dram_default_page_mapping = true;
for (i = 0 ; i < prop->max_asid ; i++) {
hop0_addr = prop->mmu_pgt_addr +
......@@ -4303,98 +4314,6 @@ static void goya_update_eq_ci(struct hl_device *hdev, u32 val)
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, val);
}
static int goya_context_switch(struct hl_device *hdev, u32 asid)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct packet_lin_dma *clear_sram_pkt;
struct hl_cs_parser parser;
struct hl_cs_job *job;
u32 cb_size;
struct hl_cb *cb;
int rc;
cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
if (!cb)
return -EFAULT;
clear_sram_pkt = (struct packet_lin_dma *)
(uintptr_t) cb->kernel_address;
memset(clear_sram_pkt, 0, sizeof(*clear_sram_pkt));
cb_size = sizeof(*clear_sram_pkt);
clear_sram_pkt->ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
(DMA_HOST_TO_SRAM << GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT) |
(1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
(1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
(1 << GOYA_PKT_CTL_RB_SHIFT) |
(1 << GOYA_PKT_CTL_MB_SHIFT));
clear_sram_pkt->src_addr = 0x7777777777777777ull;
clear_sram_pkt->dst_addr = prop->sram_base_address;
if (hdev->pldm)
clear_sram_pkt->tsize = 0x10000;
else
clear_sram_pkt->tsize = prop->sram_size;
job = hl_cs_allocate_job(hdev, true);
if (!job) {
dev_err(hdev->dev, "Failed to allocate a new job\n");
rc = -ENOMEM;
goto release_cb;
}
job->id = 0;
job->user_cb = cb;
job->user_cb->cs_cnt++;
job->user_cb_size = cb_size;
job->hw_queue_id = GOYA_QUEUE_ID_DMA_0;
hl_debugfs_add_job(hdev, job);
parser.ctx_id = HL_KERNEL_ASID_ID;
parser.cs_sequence = 0;
parser.job_id = job->id;
parser.hw_queue_id = job->hw_queue_id;
parser.job_userptr_list = &job->userptr_list;
parser.user_cb = job->user_cb;
parser.user_cb_size = job->user_cb_size;
parser.ext_queue = job->ext_queue;
parser.use_virt_addr = hdev->mmu_enable;
rc = hdev->asic_funcs->cs_parser(hdev, &parser);
if (rc) {
dev_err(hdev->dev,
"Failed to parse kernel CB during context switch\n");
goto free_job;
}
job->patched_cb = parser.patched_cb;
job->job_cb_size = parser.patched_cb_size;
job->patched_cb->cs_cnt++;
rc = goya_send_job_on_qman0(hdev, job);
/* no point in setting the asid in case of failure */
if (!rc)
goya_mmu_prepare(hdev, asid);
job->patched_cb->cs_cnt--;
hl_cb_put(job->patched_cb);
free_job:
hl_userptr_delete_list(hdev, &job->userptr_list);
hl_debugfs_remove_job(hdev, job);
kfree(job);
cb->cs_cnt--;
release_cb:
hl_cb_put(cb);
hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
return rc;
}
static void goya_restore_phase_topology(struct hl_device *hdev)
{
int i, num_of_sob_in_longs, num_of_mon_in_longs;
......@@ -4864,41 +4783,37 @@ void *goya_get_events_stat(struct hl_device *hdev, u32 *size)
return goya->events_stat;
}
static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
static int goya_memset_device_memory(struct hl_device *hdev, u64 addr, u32 size,
u64 val, bool is_dram)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct goya_device *goya = hdev->asic_specific;
struct packet_lin_dma *clear_pgt_range_pkt;
struct packet_lin_dma *lin_dma_pkt;
struct hl_cs_parser parser;
struct hl_cs_job *job;
u32 cb_size;
struct hl_cb *cb;
int rc;
if (!(goya->hw_cap_initialized & HW_CAP_MMU))
return 0;
cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
if (!cb)
return -EFAULT;
clear_pgt_range_pkt = (struct packet_lin_dma *)
(uintptr_t) cb->kernel_address;
lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
cb_size = sizeof(*lin_dma_pkt);
memset(clear_pgt_range_pkt, 0, sizeof(*clear_pgt_range_pkt));
cb_size = sizeof(*clear_pgt_range_pkt);
lin_dma_pkt->ctl = ((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
(1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
(1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
(1 << GOYA_PKT_CTL_RB_SHIFT) |
(1 << GOYA_PKT_CTL_MB_SHIFT));
clear_pgt_range_pkt->ctl =
((PACKET_LIN_DMA << GOYA_PKT_CTL_OPCODE_SHIFT) |
(DMA_HOST_TO_DRAM << GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT) |
(1 << GOYA_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
(1 << GOYA_PKT_LIN_DMA_CTL_WO_SHIFT) |
(1 << GOYA_PKT_CTL_RB_SHIFT) |
(1 << GOYA_PKT_CTL_MB_SHIFT));
lin_dma_pkt->ctl |= (is_dram ? DMA_HOST_TO_DRAM : DMA_HOST_TO_SRAM) <<
GOYA_PKT_LIN_DMA_CTL_DMA_DIR_SHIFT;
clear_pgt_range_pkt->src_addr = 0;
clear_pgt_range_pkt->dst_addr = prop->mmu_pgt_addr;
clear_pgt_range_pkt->tsize = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
lin_dma_pkt->src_addr = val;
lin_dma_pkt->dst_addr = addr;
lin_dma_pkt->tsize = size;
job = hl_cs_allocate_job(hdev, true);
if (!job) {
......@@ -4927,8 +4842,7 @@ static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
rc = hdev->asic_funcs->cs_parser(hdev, &parser);
if (rc) {
dev_err(hdev->dev,
"Failed to parse kernel CB when clearing pgt\n");
dev_err(hdev->dev, "Failed to parse kernel CB\n");
goto free_job;
}
......@@ -4954,6 +4868,52 @@ static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
return rc;
}
static int goya_context_switch(struct hl_device *hdev, u32 asid)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
u64 addr = prop->sram_base_address;
u32 size = hdev->pldm ? 0x10000 : prop->sram_size;
u64 val = 0x7777777777777777ull;
int rc;
rc = goya_memset_device_memory(hdev, addr, size, val, false);
if (rc) {
dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
return rc;
}
goya_mmu_prepare(hdev, asid);
return 0;
}
static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct goya_device *goya = hdev->asic_specific;
u64 addr = prop->mmu_pgt_addr;
u32 size = prop->mmu_pgt_size + MMU_DRAM_DEFAULT_PAGE_SIZE +
MMU_CACHE_MNG_SIZE;
if (!(goya->hw_cap_initialized & HW_CAP_MMU))
return 0;
return goya_memset_device_memory(hdev, addr, size, 0, true);
}
static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
u32 size = MMU_DRAM_DEFAULT_PAGE_SIZE;
u64 val = 0x9999999999999999ull;
if (!(goya->hw_cap_initialized & HW_CAP_MMU))
return 0;
return goya_memset_device_memory(hdev, addr, size, val, true);
}
static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
{
struct goya_device *goya = hdev->asic_specific;
......
......@@ -56,18 +56,23 @@
/* DRAM Memory Map */
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
#define MMU_PAGE_TABLES_SIZE 0x0E000000 /* 224MB */
#define MMU_CACHE_MNG_SIZE 0x00001000 /* 4KB */
#define CPU_PQ_PKT_SIZE 0x00001000 /* 4KB */
#define CPU_PQ_DATA_SIZE 0x01FFE000 /* 32MB - 8KB */
#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE
#define MMU_PAGE_TABLES_ADDR (CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE)
#define MMU_CACHE_MNG_ADDR (MMU_PAGE_TABLES_ADDR + MMU_PAGE_TABLES_SIZE)
#define CPU_PQ_PKT_ADDR (MMU_CACHE_MNG_ADDR + MMU_CACHE_MNG_SIZE)
#define CPU_PQ_DATA_ADDR (CPU_PQ_PKT_ADDR + CPU_PQ_PKT_SIZE)
#define DRAM_BASE_ADDR_USER (CPU_PQ_DATA_ADDR + CPU_PQ_DATA_SIZE)
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
#define MMU_PAGE_TABLES_SIZE 0x0DE00000 /* 222MB */
#define MMU_DRAM_DEFAULT_PAGE_SIZE 0x00200000 /* 2MB */
#define MMU_CACHE_MNG_SIZE 0x00001000 /* 4KB */
#define CPU_PQ_PKT_SIZE 0x00001000 /* 4KB */
#define CPU_PQ_DATA_SIZE 0x01FFE000 /* 32MB - 8KB */
#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE
#define MMU_PAGE_TABLES_ADDR (CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE)
#define MMU_DRAM_DEFAULT_PAGE_ADDR (MMU_PAGE_TABLES_ADDR + \
MMU_PAGE_TABLES_SIZE)
#define MMU_CACHE_MNG_ADDR (MMU_DRAM_DEFAULT_PAGE_ADDR + \
MMU_DRAM_DEFAULT_PAGE_SIZE)
#define CPU_PQ_PKT_ADDR (MMU_CACHE_MNG_ADDR + \
MMU_CACHE_MNG_SIZE)
#define CPU_PQ_DATA_ADDR (CPU_PQ_PKT_ADDR + CPU_PQ_PKT_SIZE)
#define DRAM_BASE_ADDR_USER (CPU_PQ_DATA_ADDR + CPU_PQ_DATA_SIZE)
#if (DRAM_BASE_ADDR_USER != 0x20000000)
#error "KMD must reserve 512MB"
......
......@@ -143,7 +143,10 @@ enum hl_device_hw_state {
* mapping DRAM memory.
* @va_space_dram_end_address: end address of virtual memory range for
* mapping DRAM memory.
* @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
* fault.
* @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
* @mmu_dram_default_page_addr: DRAM default page physical address.
* @mmu_pgt_size: MMU page tables total size.
* @mmu_pte_size: PTE size in MMU page tables.
* @mmu_hop_table_size: MMU hop table size.
......@@ -182,7 +185,9 @@ struct asic_fixed_properties {
u64 va_space_host_end_address;
u64 va_space_dram_start_address;
u64 va_space_dram_end_address;
u64 dram_size_for_default_page_mapping;
u64 mmu_pgt_addr;
u64 mmu_dram_default_page_addr;
u32 mmu_pgt_size;
u32 mmu_pte_size;
u32 mmu_hop_table_size;
......@@ -592,6 +597,8 @@ struct hl_va_range {
* @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
* to user so user could inquire about CS. It is used as
* index to cs_pending array.
* @dram_default_hops: array that holds all hops addresses needed for default
* DRAM mapping.
* @cs_lock: spinlock to protect cs_sequence.
* @dram_phys_mem: amount of used physical DRAM memory by this context.
* @thread_restore_token: token to prevent multiple threads of the same context
......@@ -615,6 +622,7 @@ struct hl_ctx {
struct mutex mmu_lock;
struct list_head debugfs_list;
u64 cs_sequence;
u64 *dram_default_hops;
spinlock_t cs_lock;
atomic64_t dram_phys_mem;
atomic_t thread_restore_token;
......@@ -1068,6 +1076,7 @@ struct hl_device_reset_work {
* @reset_on_lockup: true if a reset should be done in case of stuck CS, false
* otherwise.
* @dram_supports_virtual_memory: is MMU enabled towards DRAM.
* @dram_default_page_mapping: is DRAM default page mapping enabled.
* @init_done: is the initialization of the device done.
* @mmu_enable: is MMU enabled.
*/
......@@ -1135,6 +1144,7 @@ struct hl_device {
u8 heartbeat;
u8 reset_on_lockup;
u8 dram_supports_virtual_memory;
u8 dram_default_page_mapping;
u8 init_done;
/* Parameters for bring-up */
......@@ -1329,7 +1339,7 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, u32 size,
int hl_mmu_init(struct hl_device *hdev);
void hl_mmu_fini(struct hl_device *hdev);
void hl_mmu_ctx_init(struct hl_ctx *ctx);
int hl_mmu_ctx_init(struct hl_ctx *ctx);
void hl_mmu_ctx_fini(struct hl_ctx *ctx);
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size);
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size);
......
......@@ -36,6 +36,7 @@
#define HL_PTE_SIZE sizeof(u64)
#define HOP_TABLE_SIZE PAGE_SIZE_4KB
#define PTE_ENTRIES_IN_HOP (HOP_TABLE_SIZE / HL_PTE_SIZE)
#define HOP0_TABLES_TOTAL_SIZE (HOP_TABLE_SIZE * MAX_ASID)
#define MMU_HOP0_PA43_12_SHIFT 12
......
......@@ -925,8 +925,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
goto map_err;
}
hdev->asic_funcs->mmu_invalidate_cache_range(hdev, false, ctx->asid,
ret_vaddr, phys_pg_pack->total_size);
hdev->asic_funcs->mmu_invalidate_cache(hdev, false);
mutex_unlock(&ctx->mmu_lock);
......@@ -1050,8 +1049,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
dev_warn_ratelimited(hdev->dev,
"unmap failed for vaddr: 0x%llx\n", next_vaddr);
hdev->asic_funcs->mmu_invalidate_cache_range(hdev, true, ctx->asid,
vaddr, phys_pg_pack->total_size);
hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
mutex_unlock(&ctx->mmu_lock);
......@@ -1455,7 +1453,11 @@ static int hl_vm_ctx_init_with_ranges(struct hl_ctx *ctx, u64 host_range_start,
struct hl_device *hdev = ctx->hdev;
int rc;
hl_mmu_ctx_init(ctx);
rc = hl_mmu_ctx_init(ctx);
if (rc) {
dev_err(hdev->dev, "failed to init context %d\n", ctx->asid);
return rc;
}
mutex_init(&ctx->mem_hash_lock);
hash_init(ctx->mem_hash);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment