Commit 54bb6744 authored by Omer Shpigelman's avatar Omer Shpigelman Committed by Oded Gabbay

habanalabs: split MMU properties to PCI/DRAM

Split the properties used for MMU mappings to DRAM and PCI (host) types.
This is a prerequisite for future ASICs support.
Note that in Goya ASIC, the PMMU and DMMU are the same (except of page
sizes) as only one MMU mechanism is used for both of the mapping types.
Hence this patch should not have any effect on current behavior.
Signed-off-by: default avatarOmer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent 30919ede
...@@ -307,39 +307,51 @@ static inline u64 get_hop0_addr(struct hl_ctx *ctx) ...@@ -307,39 +307,51 @@ static inline u64 get_hop0_addr(struct hl_ctx *ctx)
(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
} }
static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
u64 virt_addr) u64 virt_addr, u64 mask, u64 shift)
{ {
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
((virt_addr & HOP0_MASK) >> HOP0_SHIFT); ((virt_addr & mask) >> shift);
} }
static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
u64 virt_addr) struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{ {
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
((virt_addr & HOP1_MASK) >> HOP1_SHIFT); mmu_specs->hop0_shift);
} }
static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
u64 virt_addr) struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{ {
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
((virt_addr & HOP2_MASK) >> HOP2_SHIFT); mmu_specs->hop1_shift);
} }
static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
u64 virt_addr) struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{ {
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
((virt_addr & HOP3_MASK) >> HOP3_SHIFT); mmu_specs->hop2_shift);
} }
static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
u64 virt_addr) struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{ {
return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
((virt_addr & HOP4_MASK) >> HOP4_SHIFT); mmu_specs->hop3_shift);
}
static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_specs,
u64 hop_addr, u64 vaddr)
{
return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
mmu_specs->hop4_shift);
} }
static inline u64 get_next_hop_addr(u64 curr_pte) static inline u64 get_next_hop_addr(u64 curr_pte)
...@@ -355,7 +367,10 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -355,7 +367,10 @@ static int mmu_show(struct seq_file *s, void *data)
struct hl_debugfs_entry *entry = s->private; struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev; struct hl_device *hdev = dev_entry->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
struct hl_ctx *ctx; struct hl_ctx *ctx;
bool is_dram_addr;
u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0, u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0, hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
...@@ -377,33 +392,39 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -377,33 +392,39 @@ static int mmu_show(struct seq_file *s, void *data)
return 0; return 0;
} }
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock); mutex_lock(&ctx->mmu_lock);
/* the following lookup is copied from unmap() in mmu.c */ /* the following lookup is copied from unmap() in mmu.c */
hop0_addr = get_hop0_addr(ctx); hop0_addr = get_hop0_addr(ctx);
hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr); hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
hop1_addr = get_next_hop_addr(hop0_pte); hop1_addr = get_next_hop_addr(hop0_pte);
if (hop1_addr == ULLONG_MAX) if (hop1_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr); hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
hop2_addr = get_next_hop_addr(hop1_pte); hop2_addr = get_next_hop_addr(hop1_pte);
if (hop2_addr == ULLONG_MAX) if (hop2_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr); hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
hop3_addr = get_next_hop_addr(hop2_pte); hop3_addr = get_next_hop_addr(hop2_pte);
if (hop3_addr == ULLONG_MAX) if (hop3_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
if (!(hop3_pte & LAST_MASK)) { if (!(hop3_pte & LAST_MASK)) {
...@@ -412,7 +433,8 @@ static int mmu_show(struct seq_file *s, void *data) ...@@ -412,7 +433,8 @@ static int mmu_show(struct seq_file *s, void *data)
if (hop4_addr == ULLONG_MAX) if (hop4_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
virt_addr);
hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr); hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
if (!(hop4_pte & PAGE_PRESENT_MASK)) if (!(hop4_pte & PAGE_PRESENT_MASK))
goto not_mapped; goto not_mapped;
...@@ -534,41 +556,50 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, ...@@ -534,41 +556,50 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
u64 *phys_addr) u64 *phys_addr)
{ {
struct hl_ctx *ctx = hdev->compute_ctx; struct hl_ctx *ctx = hdev->compute_ctx;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
u64 hop_addr, hop_pte_addr, hop_pte; u64 hop_addr, hop_pte_addr, hop_pte;
u64 offset_mask = HOP4_MASK | FLAGS_MASK; u64 offset_mask = HOP4_MASK | FLAGS_MASK;
int rc = 0; int rc = 0;
bool is_dram_addr;
if (!ctx) { if (!ctx) {
dev_err(hdev->dev, "no ctx available\n"); dev_err(hdev->dev, "no ctx available\n");
return -EINVAL; return -EINVAL;
} }
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
mutex_lock(&ctx->mmu_lock); mutex_lock(&ctx->mmu_lock);
/* hop 0 */ /* hop 0 */
hop_addr = get_hop0_addr(ctx); hop_addr = get_hop0_addr(ctx);
hop_pte_addr = get_hop0_pte_addr(ctx, hop_addr, virt_addr); hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
/* hop 1 */ /* hop 1 */
hop_addr = get_next_hop_addr(hop_pte); hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX) if (hop_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop_pte_addr = get_hop1_pte_addr(ctx, hop_addr, virt_addr); hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
/* hop 2 */ /* hop 2 */
hop_addr = get_next_hop_addr(hop_pte); hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX) if (hop_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop_pte_addr = get_hop2_pte_addr(ctx, hop_addr, virt_addr); hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
/* hop 3 */ /* hop 3 */
hop_addr = get_next_hop_addr(hop_pte); hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX) if (hop_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop_pte_addr = get_hop3_pte_addr(ctx, hop_addr, virt_addr); hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
if (!(hop_pte & LAST_MASK)) { if (!(hop_pte & LAST_MASK)) {
...@@ -576,7 +607,8 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, ...@@ -576,7 +607,8 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
hop_addr = get_next_hop_addr(hop_pte); hop_addr = get_next_hop_addr(hop_pte);
if (hop_addr == ULLONG_MAX) if (hop_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr); hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
virt_addr);
hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
offset_mask = FLAGS_MASK; offset_mask = FLAGS_MASK;
......
...@@ -380,6 +380,23 @@ void goya_get_fixed_properties(struct hl_device *hdev) ...@@ -380,6 +380,23 @@ void goya_get_fixed_properties(struct hl_device *hdev)
prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
prop->dram_page_size = PAGE_SIZE_2MB; prop->dram_page_size = PAGE_SIZE_2MB;
prop->dmmu.hop0_shift = HOP0_SHIFT;
prop->dmmu.hop1_shift = HOP1_SHIFT;
prop->dmmu.hop2_shift = HOP2_SHIFT;
prop->dmmu.hop3_shift = HOP3_SHIFT;
prop->dmmu.hop4_shift = HOP4_SHIFT;
prop->dmmu.hop0_mask = HOP0_MASK;
prop->dmmu.hop1_mask = HOP1_MASK;
prop->dmmu.hop2_mask = HOP2_MASK;
prop->dmmu.hop3_mask = HOP3_MASK;
prop->dmmu.hop4_mask = HOP4_MASK;
prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
/* No difference between PMMU and DMMU except of page size */
memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
prop->dmmu.page_size = PAGE_SIZE_2MB;
prop->pmmu.page_size = PAGE_SIZE_4KB;
prop->va_space_host_start_address = VA_HOST_SPACE_START; prop->va_space_host_start_address = VA_HOST_SPACE_START;
prop->va_space_host_end_address = VA_HOST_SPACE_END; prop->va_space_host_end_address = VA_HOST_SPACE_END;
prop->va_space_dram_start_address = VA_DDR_SPACE_START; prop->va_space_dram_start_address = VA_DDR_SPACE_START;
......
...@@ -130,6 +130,36 @@ enum hl_device_hw_state { ...@@ -130,6 +130,36 @@ enum hl_device_hw_state {
HL_DEVICE_HW_STATE_DIRTY HL_DEVICE_HW_STATE_DIRTY
}; };
/**
* struct hl_mmu_properties - ASIC specific MMU address translation properties.
* @hop0_shift: shift of hop 0 mask.
* @hop1_shift: shift of hop 1 mask.
* @hop2_shift: shift of hop 2 mask.
* @hop3_shift: shift of hop 3 mask.
* @hop4_shift: shift of hop 4 mask.
* @hop0_mask: mask to get the PTE address in hop 0.
* @hop1_mask: mask to get the PTE address in hop 1.
* @hop2_mask: mask to get the PTE address in hop 2.
* @hop3_mask: mask to get the PTE address in hop 3.
* @hop4_mask: mask to get the PTE address in hop 4.
* @page_size: default page size used to allocate memory.
* @huge_page_size: page size used to allocate memory with huge pages.
*/
struct hl_mmu_properties {
u64 hop0_shift;
u64 hop1_shift;
u64 hop2_shift;
u64 hop3_shift;
u64 hop4_shift;
u64 hop0_mask;
u64 hop1_mask;
u64 hop2_mask;
u64 hop3_mask;
u64 hop4_mask;
u32 page_size;
u32 huge_page_size;
};
/** /**
* struct asic_fixed_properties - ASIC specific immutable properties. * struct asic_fixed_properties - ASIC specific immutable properties.
* @hw_queues_props: H/W queues properties. * @hw_queues_props: H/W queues properties.
...@@ -137,6 +167,8 @@ enum hl_device_hw_state { ...@@ -137,6 +167,8 @@ enum hl_device_hw_state {
* available sensors. * available sensors.
* @uboot_ver: F/W U-boot version. * @uboot_ver: F/W U-boot version.
* @preboot_ver: F/W Preboot version. * @preboot_ver: F/W Preboot version.
* @dmmu: DRAM MMU address translation properties.
* @pmmu: PCI (host) MMU address translation properties.
* @sram_base_address: SRAM physical start address. * @sram_base_address: SRAM physical start address.
* @sram_end_address: SRAM physical end address. * @sram_end_address: SRAM physical end address.
* @sram_user_base_address - SRAM physical start address for user access. * @sram_user_base_address - SRAM physical start address for user access.
...@@ -173,53 +205,55 @@ enum hl_device_hw_state { ...@@ -173,53 +205,55 @@ enum hl_device_hw_state {
* @psoc_pci_pll_nf: PCI PLL NF value. * @psoc_pci_pll_nf: PCI PLL NF value.
* @psoc_pci_pll_od: PCI PLL OD value. * @psoc_pci_pll_od: PCI PLL OD value.
* @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value. * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
* @completion_queues_count: number of completion queues.
* @high_pll: high PLL frequency used by the device. * @high_pll: high PLL frequency used by the device.
* @cb_pool_cb_cnt: number of CBs in the CB pool. * @cb_pool_cb_cnt: number of CBs in the CB pool.
* @cb_pool_cb_size: size of each CB in the CB pool. * @cb_pool_cb_size: size of each CB in the CB pool.
* @tpc_enabled_mask: which TPCs are enabled. * @tpc_enabled_mask: which TPCs are enabled.
* @completion_queues_count: number of completion queues.
*/ */
struct asic_fixed_properties { struct asic_fixed_properties {
struct hw_queue_properties hw_queues_props[HL_MAX_QUEUES]; struct hw_queue_properties hw_queues_props[HL_MAX_QUEUES];
struct armcp_info armcp_info; struct armcp_info armcp_info;
char uboot_ver[VERSION_MAX_LEN]; char uboot_ver[VERSION_MAX_LEN];
char preboot_ver[VERSION_MAX_LEN]; char preboot_ver[VERSION_MAX_LEN];
u64 sram_base_address; struct hl_mmu_properties dmmu;
u64 sram_end_address; struct hl_mmu_properties pmmu;
u64 sram_user_base_address; u64 sram_base_address;
u64 dram_base_address; u64 sram_end_address;
u64 dram_end_address; u64 sram_user_base_address;
u64 dram_user_base_address; u64 dram_base_address;
u64 dram_size; u64 dram_end_address;
u64 dram_pci_bar_size; u64 dram_user_base_address;
u64 max_power_default; u64 dram_size;
u64 va_space_host_start_address; u64 dram_pci_bar_size;
u64 va_space_host_end_address; u64 max_power_default;
u64 va_space_dram_start_address; u64 va_space_host_start_address;
u64 va_space_dram_end_address; u64 va_space_host_end_address;
u64 dram_size_for_default_page_mapping; u64 va_space_dram_start_address;
u64 pcie_dbi_base_address; u64 va_space_dram_end_address;
u64 pcie_aux_dbi_reg_addr; u64 dram_size_for_default_page_mapping;
u64 mmu_pgt_addr; u64 pcie_dbi_base_address;
u64 mmu_dram_default_page_addr; u64 pcie_aux_dbi_reg_addr;
u32 mmu_pgt_size; u64 mmu_pgt_addr;
u32 mmu_pte_size; u64 mmu_dram_default_page_addr;
u32 mmu_hop_table_size; u32 mmu_pgt_size;
u32 mmu_hop0_tables_total_size; u32 mmu_pte_size;
u32 dram_page_size; u32 mmu_hop_table_size;
u32 cfg_size; u32 mmu_hop0_tables_total_size;
u32 sram_size; u32 dram_page_size;
u32 max_asid; u32 cfg_size;
u32 num_of_events; u32 sram_size;
u32 psoc_pci_pll_nr; u32 max_asid;
u32 psoc_pci_pll_nf; u32 num_of_events;
u32 psoc_pci_pll_od; u32 psoc_pci_pll_nr;
u32 psoc_pci_pll_div_factor; u32 psoc_pci_pll_nf;
u32 high_pll; u32 psoc_pci_pll_od;
u32 cb_pool_cb_cnt; u32 psoc_pci_pll_div_factor;
u32 cb_pool_cb_size; u32 high_pll;
u8 completion_queues_count; u32 cb_pool_cb_cnt;
u8 tpc_enabled_mask; u32 cb_pool_cb_size;
u8 tpc_enabled_mask;
u8 completion_queues_count;
}; };
/** /**
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
#define PAGE_SHIFT_2MB 21 #define PAGE_SHIFT_2MB 21
#define PAGE_SIZE_2MB (_AC(1, UL) << PAGE_SHIFT_2MB) #define PAGE_SIZE_2MB (_AC(1, UL) << PAGE_SHIFT_2MB)
#define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB) #define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB)
#define PAGE_MASK_2MB (~(PAGE_SIZE_2MB - 1))
#define PAGE_PRESENT_MASK 0x0000000000001ull #define PAGE_PRESENT_MASK 0x0000000000001ull
#define SWAP_OUT_MASK 0x0000000000004ull #define SWAP_OUT_MASK 0x0000000000004ull
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/genalloc.h> #include <linux/genalloc.h>
#define PGS_IN_2MB_PAGE (PAGE_SIZE_2MB >> PAGE_SHIFT)
#define HL_MMU_DEBUG 0 #define HL_MMU_DEBUG 0
/* /*
...@@ -516,8 +515,8 @@ static inline int add_va_block(struct hl_device *hdev, ...@@ -516,8 +515,8 @@ static inline int add_va_block(struct hl_device *hdev,
* - Return the start address of the virtual block * - Return the start address of the virtual block
*/ */
static u64 get_va_block(struct hl_device *hdev, static u64 get_va_block(struct hl_device *hdev,
struct hl_va_range *va_range, u64 size, u64 hint_addr, struct hl_va_range *va_range, u64 size, u64 hint_addr,
bool is_userptr) bool is_userptr)
{ {
struct hl_vm_va_block *va_block, *new_va_block = NULL; struct hl_vm_va_block *va_block, *new_va_block = NULL;
u64 valid_start, valid_size, prev_start, prev_end, page_mask, u64 valid_start, valid_size, prev_start, prev_end, page_mask,
...@@ -525,18 +524,17 @@ static u64 get_va_block(struct hl_device *hdev, ...@@ -525,18 +524,17 @@ static u64 get_va_block(struct hl_device *hdev,
u32 page_size; u32 page_size;
bool add_prev = false; bool add_prev = false;
if (is_userptr) { if (is_userptr)
/* /*
* We cannot know if the user allocated memory with huge pages * We cannot know if the user allocated memory with huge pages
* or not, hence we continue with the biggest possible * or not, hence we continue with the biggest possible
* granularity. * granularity.
*/ */
page_size = PAGE_SIZE_2MB; page_size = hdev->asic_prop.pmmu.huge_page_size;
page_mask = PAGE_MASK_2MB; else
} else { page_size = hdev->asic_prop.dmmu.page_size;
page_size = hdev->asic_prop.dram_page_size;
page_mask = ~((u64)page_size - 1); page_mask = ~((u64)page_size - 1);
}
mutex_lock(&va_range->lock); mutex_lock(&va_range->lock);
...@@ -558,7 +556,6 @@ static u64 get_va_block(struct hl_device *hdev, ...@@ -558,7 +556,6 @@ static u64 get_va_block(struct hl_device *hdev,
if (valid_size >= size && if (valid_size >= size &&
(!new_va_block || valid_size < res_valid_size)) { (!new_va_block || valid_size < res_valid_size)) {
new_va_block = va_block; new_va_block = va_block;
res_valid_start = valid_start; res_valid_start = valid_start;
res_valid_size = valid_size; res_valid_size = valid_size;
...@@ -629,7 +626,7 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) ...@@ -629,7 +626,7 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
/* /*
* init_phys_pg_pack_from_userptr - initialize physical page pack from host * init_phys_pg_pack_from_userptr - initialize physical page pack from host
* memory * memory
* @asid: current context ASID * @ctx: current context
* @userptr: userptr to initialize from * @userptr: userptr to initialize from
* @pphys_pg_pack: result pointer * @pphys_pg_pack: result pointer
* *
...@@ -638,16 +635,20 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) ...@@ -638,16 +635,20 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
* - Create a physical page pack from the physical pages related to the given * - Create a physical page pack from the physical pages related to the given
* virtual block * virtual block
*/ */
static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr, static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
struct hl_userptr *userptr,
struct hl_vm_phys_pg_pack **pphys_pg_pack) struct hl_vm_phys_pg_pack **pphys_pg_pack)
{ {
struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu;
struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_vm_phys_pg_pack *phys_pg_pack;
struct scatterlist *sg; struct scatterlist *sg;
dma_addr_t dma_addr; dma_addr_t dma_addr;
u64 page_mask, total_npages; u64 page_mask, total_npages;
u32 npages, page_size = PAGE_SIZE; u32 npages, page_size = PAGE_SIZE,
huge_page_size = mmu_prop->huge_page_size;
bool first = true, is_huge_page_opt = true; bool first = true, is_huge_page_opt = true;
int rc, i, j; int rc, i, j;
u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
if (!phys_pg_pack) if (!phys_pg_pack)
...@@ -655,7 +656,7 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr, ...@@ -655,7 +656,7 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
phys_pg_pack->vm_type = userptr->vm_type; phys_pg_pack->vm_type = userptr->vm_type;
phys_pg_pack->created_from_userptr = true; phys_pg_pack->created_from_userptr = true;
phys_pg_pack->asid = asid; phys_pg_pack->asid = ctx->asid;
atomic_set(&phys_pg_pack->mapping_cnt, 1); atomic_set(&phys_pg_pack->mapping_cnt, 1);
/* Only if all dma_addrs are aligned to 2MB and their /* Only if all dma_addrs are aligned to 2MB and their
...@@ -670,14 +671,14 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr, ...@@ -670,14 +671,14 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
total_npages += npages; total_npages += npages;
if ((npages % PGS_IN_2MB_PAGE) || if ((npages % pgs_in_huge_page) ||
(dma_addr & (PAGE_SIZE_2MB - 1))) (dma_addr & (huge_page_size - 1)))
is_huge_page_opt = false; is_huge_page_opt = false;
} }
if (is_huge_page_opt) { if (is_huge_page_opt) {
page_size = PAGE_SIZE_2MB; page_size = huge_page_size;
total_npages /= PGS_IN_2MB_PAGE; do_div(total_npages, pgs_in_huge_page);
} }
page_mask = ~(((u64) page_size) - 1); page_mask = ~(((u64) page_size) - 1);
...@@ -709,7 +710,7 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr, ...@@ -709,7 +710,7 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
dma_addr += page_size; dma_addr += page_size;
if (is_huge_page_opt) if (is_huge_page_opt)
npages -= PGS_IN_2MB_PAGE; npages -= pgs_in_huge_page;
else else
npages--; npages--;
} }
...@@ -872,7 +873,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, ...@@ -872,7 +873,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
return rc; return rc;
} }
rc = init_phys_pg_pack_from_userptr(ctx->asid, userptr, rc = init_phys_pg_pack_from_userptr(ctx, userptr,
&phys_pg_pack); &phys_pg_pack);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
...@@ -1029,7 +1030,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) ...@@ -1029,7 +1030,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
if (*vm_type == VM_TYPE_USERPTR) { if (*vm_type == VM_TYPE_USERPTR) {
is_userptr = true; is_userptr = true;
userptr = hnode->ptr; userptr = hnode->ptr;
rc = init_phys_pg_pack_from_userptr(ctx->asid, userptr, rc = init_phys_pg_pack_from_userptr(ctx, userptr,
&phys_pg_pack); &phys_pg_pack);
if (rc) { if (rc) {
dev_err(hdev->dev, dev_err(hdev->dev,
......
...@@ -171,29 +171,44 @@ static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, ...@@ -171,29 +171,44 @@ static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
((virt_addr & mask) >> shift); ((virt_addr & mask) >> shift);
} }
static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_prop,
u64 hop_addr, u64 vaddr)
{ {
return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT); return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
mmu_prop->hop0_shift);
} }
static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_prop,
u64 hop_addr, u64 vaddr)
{ {
return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT); return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
mmu_prop->hop1_shift);
} }
static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_prop,
u64 hop_addr, u64 vaddr)
{ {
return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT); return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
mmu_prop->hop2_shift);
} }
static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_prop,
u64 hop_addr, u64 vaddr)
{ {
return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT); return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
mmu_prop->hop3_shift);
} }
static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr) static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
struct hl_mmu_properties *mmu_prop,
u64 hop_addr, u64 vaddr)
{ {
return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT); return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
mmu_prop->hop4_shift);
} }
static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
...@@ -513,24 +528,23 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) ...@@ -513,24 +528,23 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
mutex_destroy(&ctx->mmu_lock); mutex_destroy(&ctx->mmu_lock);
} }
static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
u64 hop0_addr = 0, hop0_pte_addr = 0, u64 hop0_addr = 0, hop0_pte_addr = 0,
hop1_addr = 0, hop1_pte_addr = 0, hop1_addr = 0, hop1_pte_addr = 0,
hop2_addr = 0, hop2_pte_addr = 0, hop2_addr = 0, hop2_pte_addr = 0,
hop3_addr = 0, hop3_pte_addr = 0, hop3_addr = 0, hop3_pte_addr = 0,
hop4_addr = 0, hop4_pte_addr = 0, hop4_addr = 0, hop4_pte_addr = 0,
curr_pte; curr_pte;
bool is_dram_addr, is_huge, clear_hop3 = true; bool is_huge, clear_hop3 = true;
is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB, mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
hop0_addr = get_hop0_addr(ctx); hop0_addr = get_hop0_addr(ctx);
hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
...@@ -539,7 +553,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) ...@@ -539,7 +553,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
if (hop1_addr == ULLONG_MAX) if (hop1_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
...@@ -548,7 +562,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) ...@@ -548,7 +562,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
if (hop2_addr == ULLONG_MAX) if (hop2_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
...@@ -557,7 +571,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) ...@@ -557,7 +571,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
if (hop3_addr == ULLONG_MAX) if (hop3_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
...@@ -575,7 +589,8 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) ...@@ -575,7 +589,8 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
if (hop4_addr == ULLONG_MAX) if (hop4_addr == ULLONG_MAX)
goto not_mapped; goto not_mapped;
hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
...@@ -667,25 +682,36 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr) ...@@ -667,25 +682,36 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr; u64 real_virt_addr;
u32 real_page_size, npages; u32 real_page_size, npages;
int i, rc; int i, rc;
bool is_dram_addr;
if (!hdev->mmu_enable) if (!hdev->mmu_enable)
return 0; return 0;
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
/* /*
* The H/W handles mapping of 4KB/2MB page. Hence if the host page size * The H/W handles mapping of specific page sizes. Hence if the page
* is bigger, we break it to sub-pages and unmap them separately. * size is bigger, we break it to sub-pages and unmap them separately.
*/ */
if ((page_size % PAGE_SIZE_2MB) == 0) { if ((page_size % mmu_prop->huge_page_size) == 0) {
real_page_size = PAGE_SIZE_2MB; real_page_size = mmu_prop->huge_page_size;
} else if ((page_size % PAGE_SIZE_4KB) == 0) { } else if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = PAGE_SIZE_4KB; real_page_size = mmu_prop->page_size;
} else { } else {
dev_err(hdev->dev, dev_err(hdev->dev,
"page size of %u is not 4KB nor 2MB aligned, can't unmap\n", "page size of %u is not %uKB nor %uMB aligned, can't unmap\n",
page_size); page_size,
mmu_prop->page_size >> 10,
mmu_prop->huge_page_size >> 20);
return -EFAULT; return -EFAULT;
} }
...@@ -694,7 +720,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) ...@@ -694,7 +720,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
real_virt_addr = virt_addr; real_virt_addr = virt_addr;
for (i = 0 ; i < npages ; i++) { for (i = 0 ; i < npages ; i++) {
rc = _hl_mmu_unmap(ctx, real_virt_addr); rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
if (rc) if (rc)
return rc; return rc;
...@@ -705,10 +731,11 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size) ...@@ -705,10 +731,11 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
} }
static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
u32 page_size) u32 page_size, bool is_dram_addr)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop; struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
u64 hop0_addr = 0, hop0_pte_addr = 0, u64 hop0_addr = 0, hop0_pte_addr = 0,
hop1_addr = 0, hop1_pte_addr = 0, hop1_addr = 0, hop1_pte_addr = 0,
hop2_addr = 0, hop2_pte_addr = 0, hop2_addr = 0, hop2_pte_addr = 0,
...@@ -716,21 +743,19 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -716,21 +743,19 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
hop4_addr = 0, hop4_pte_addr = 0, hop4_addr = 0, hop4_pte_addr = 0,
curr_pte = 0; curr_pte = 0;
bool hop1_new = false, hop2_new = false, hop3_new = false, bool hop1_new = false, hop2_new = false, hop3_new = false,
hop4_new = false, is_huge, is_dram_addr; hop4_new = false, is_huge;
int rc = -ENOMEM; int rc = -ENOMEM;
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
/* /*
* This mapping function can map a 4KB/2MB page. For 2MB page there are * This mapping function can map a page or a huge page. For huge page
* only 3 hops rather than 4. Currently the DRAM allocation uses 2MB * there are only 3 hops rather than 4. Currently the DRAM allocation
* pages only but user memory could have been allocated with one of the * uses huge pages only but user memory could have been allocated with
* two page sizes. Since this is a common code for all the three cases, * one of the two page sizes. Since this is a common code for all the
* we need this hugs page check. * three cases, we need this hugs page check.
*/ */
is_huge = page_size == PAGE_SIZE_2MB; is_huge = page_size == mmu_prop->huge_page_size;
is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
if (is_dram_addr && !is_huge) { if (is_dram_addr && !is_huge) {
dev_err(hdev->dev, "DRAM mapping should use huge pages only\n"); dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
...@@ -738,28 +763,28 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -738,28 +763,28 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
} }
hop0_addr = get_hop0_addr(ctx); hop0_addr = get_hop0_addr(ctx);
hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr); hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
if (hop1_addr == ULLONG_MAX) if (hop1_addr == ULLONG_MAX)
goto err; goto err;
hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr); hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
if (hop2_addr == ULLONG_MAX) if (hop2_addr == ULLONG_MAX)
goto err; goto err;
hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr); hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
if (hop3_addr == ULLONG_MAX) if (hop3_addr == ULLONG_MAX)
goto err; goto err;
hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr); hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
if (!is_huge) { if (!is_huge) {
...@@ -767,7 +792,8 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -767,7 +792,8 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
if (hop4_addr == ULLONG_MAX) if (hop4_addr == ULLONG_MAX)
goto err; goto err;
hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr); hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
virt_addr);
curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
} }
...@@ -890,25 +916,36 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, ...@@ -890,25 +916,36 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
{ {
struct hl_device *hdev = ctx->hdev; struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr, real_phys_addr; u64 real_virt_addr, real_phys_addr;
u32 real_page_size, npages; u32 real_page_size, npages;
int i, rc, mapped_cnt = 0; int i, rc, mapped_cnt = 0;
bool is_dram_addr;
if (!hdev->mmu_enable) if (!hdev->mmu_enable)
return 0; return 0;
is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
/* /*
* The H/W handles mapping of 4KB/2MB page. Hence if the host page size * The H/W handles mapping of specific page sizes. Hence if the page
* is bigger, we break it to sub-pages and map them separately. * size is bigger, we break it to sub-pages and map them separately.
*/ */
if ((page_size % PAGE_SIZE_2MB) == 0) { if ((page_size % mmu_prop->huge_page_size) == 0) {
real_page_size = PAGE_SIZE_2MB; real_page_size = mmu_prop->huge_page_size;
} else if ((page_size % PAGE_SIZE_4KB) == 0) { } else if ((page_size % mmu_prop->page_size) == 0) {
real_page_size = PAGE_SIZE_4KB; real_page_size = mmu_prop->page_size;
} else { } else {
dev_err(hdev->dev, dev_err(hdev->dev,
"page size of %u is not 4KB nor 2MB aligned, can't map\n", "page size of %u is not %dKB nor %dMB aligned, can't unmap\n",
page_size); page_size,
mmu_prop->page_size >> 10,
mmu_prop->huge_page_size >> 20);
return -EFAULT; return -EFAULT;
} }
...@@ -923,7 +960,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) ...@@ -923,7 +960,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
for (i = 0 ; i < npages ; i++) { for (i = 0 ; i < npages ; i++) {
rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr, rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
real_page_size); real_page_size, is_dram_addr);
if (rc) if (rc)
goto err; goto err;
...@@ -937,7 +974,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) ...@@ -937,7 +974,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
err: err:
real_virt_addr = virt_addr; real_virt_addr = virt_addr;
for (i = 0 ; i < mapped_cnt ; i++) { for (i = 0 ; i < mapped_cnt ; i++) {
if (_hl_mmu_unmap(ctx, real_virt_addr)) if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr))
dev_warn_ratelimited(hdev->dev, dev_warn_ratelimited(hdev->dev,
"failed to unmap va: 0x%llx\n", real_virt_addr); "failed to unmap va: 0x%llx\n", real_virt_addr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment