Commit 5a364c2a authored by Vineet Gupta's avatar Vineet Gupta

ARC: mm: PAE40 support

This is the first working implementation of 40-bit physical address
extension on ARCv2.
Signed-off-by: default avatarAlexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: default avatarVineet Gupta <vgupta@synopsys.com>
parent 25d46418
...@@ -453,6 +453,21 @@ config HIGHMEM ...@@ -453,6 +453,21 @@ config HIGHMEM
kernel. Enable this to potentially allow access to rest of 2G and PAE kernel. Enable this to potentially allow access to rest of 2G and PAE
in future in future
config ARC_HAS_PAE40
bool "Support for the 40-bit Physical Address Extension"
default n
depends on ISA_ARCV2
select HIGHMEM
help
Enable access to physical memory beyond 4G, only supported on
ARC cores with 40 bit Physical Addressing support
config ARCH_PHYS_ADDR_T_64BIT
def_bool ARC_HAS_PAE40
config ARCH_DMA_ADDR_T_64BIT
bool
config ARC_CURR_IN_REG config ARC_CURR_IN_REG
bool "Dedicate Register r25 for current_task pointer" bool "Dedicate Register r25 for current_task pointer"
default y default y
......
...@@ -65,6 +65,7 @@ extern int ioc_exists; ...@@ -65,6 +65,7 @@ extern int ioc_exists;
#if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4) #if defined(CONFIG_ARC_MMU_V3) || defined(CONFIG_ARC_MMU_V4)
#define ARC_REG_IC_PTAG 0x1E #define ARC_REG_IC_PTAG 0x1E
#endif #endif
#define ARC_REG_IC_PTAG_HI 0x1F
/* Bit val in IC_CTRL */ /* Bit val in IC_CTRL */
#define IC_CTRL_CACHE_DISABLE 0x1 #define IC_CTRL_CACHE_DISABLE 0x1
...@@ -77,6 +78,7 @@ extern int ioc_exists; ...@@ -77,6 +78,7 @@ extern int ioc_exists;
#define ARC_REG_DC_FLSH 0x4B #define ARC_REG_DC_FLSH 0x4B
#define ARC_REG_DC_FLDL 0x4C #define ARC_REG_DC_FLDL 0x4C
#define ARC_REG_DC_PTAG 0x5C #define ARC_REG_DC_PTAG 0x5C
#define ARC_REG_DC_PTAG_HI 0x5F
/* Bit val in DC_CTRL */ /* Bit val in DC_CTRL */
#define DC_CTRL_INV_MODE_FLUSH 0x40 #define DC_CTRL_INV_MODE_FLUSH 0x40
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#if (CONFIG_ARC_MMU_VER < 4) #if (CONFIG_ARC_MMU_VER < 4)
#define ARC_REG_TLBPD0 0x405 #define ARC_REG_TLBPD0 0x405
#define ARC_REG_TLBPD1 0x406 #define ARC_REG_TLBPD1 0x406
#define ARC_REG_TLBPD1HI 0 /* Dummy: allows code sharing with ARC700 */
#define ARC_REG_TLBINDEX 0x407 #define ARC_REG_TLBINDEX 0x407
#define ARC_REG_TLBCOMMAND 0x408 #define ARC_REG_TLBCOMMAND 0x408
#define ARC_REG_PID 0x409 #define ARC_REG_PID 0x409
...@@ -31,6 +32,7 @@ ...@@ -31,6 +32,7 @@
#else #else
#define ARC_REG_TLBPD0 0x460 #define ARC_REG_TLBPD0 0x460
#define ARC_REG_TLBPD1 0x461 #define ARC_REG_TLBPD1 0x461
#define ARC_REG_TLBPD1HI 0x463
#define ARC_REG_TLBINDEX 0x464 #define ARC_REG_TLBINDEX 0x464
#define ARC_REG_TLBCOMMAND 0x465 #define ARC_REG_TLBCOMMAND 0x465
#define ARC_REG_PID 0x468 #define ARC_REG_PID 0x468
...@@ -83,6 +85,11 @@ void arc_mmu_init(void); ...@@ -83,6 +85,11 @@ void arc_mmu_init(void);
extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len); extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
void read_decode_mmu_bcr(void); void read_decode_mmu_bcr(void);
static inline int is_pae40_enabled(void)
{
return IS_ENABLED(CONFIG_ARC_HAS_PAE40);
}
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
#endif #endif
...@@ -56,7 +56,11 @@ typedef struct { ...@@ -56,7 +56,11 @@ typedef struct {
#else /* !STRICT_MM_TYPECHECKS */ #else /* !STRICT_MM_TYPECHECKS */
#ifdef CONFIG_ARC_HAS_PAE40
typedef unsigned long long pte_t;
#else
typedef unsigned long pte_t; typedef unsigned long pte_t;
#endif
typedef unsigned long pgd_t; typedef unsigned long pgd_t;
typedef unsigned long pgprot_t; typedef unsigned long pgprot_t;
......
...@@ -49,7 +49,7 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep) ...@@ -49,7 +49,7 @@ pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t ptep)
static inline int __get_order_pgd(void) static inline int __get_order_pgd(void)
{ {
return get_order(PTRS_PER_PGD * 4); return get_order(PTRS_PER_PGD * sizeof(pgd_t));
} }
static inline pgd_t *pgd_alloc(struct mm_struct *mm) static inline pgd_t *pgd_alloc(struct mm_struct *mm)
...@@ -87,7 +87,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) ...@@ -87,7 +87,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
static inline int __get_order_pte(void) static inline int __get_order_pte(void)
{ {
return get_order(PTRS_PER_PTE * 4); return get_order(PTRS_PER_PTE * sizeof(pte_t));
} }
static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
...@@ -110,7 +110,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address) ...@@ -110,7 +110,7 @@ pte_alloc_one(struct mm_struct *mm, unsigned long address)
pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte()); pte_pg = (pgtable_t)__get_free_pages(GFP_KERNEL | __GFP_REPEAT, __get_order_pte());
if (!pte_pg) if (!pte_pg)
return 0; return 0;
memzero((void *)pte_pg, PTRS_PER_PTE * 4); memzero((void *)pte_pg, PTRS_PER_PTE * sizeof(pte_t));
page = virt_to_page(pte_pg); page = virt_to_page(pte_pg);
if (!pgtable_page_ctor(page)) { if (!pgtable_page_ctor(page)) {
__free_page(page); __free_page(page);
......
...@@ -134,7 +134,12 @@ ...@@ -134,7 +134,12 @@
/* Masks for actual TLB "PD"s */ /* Masks for actual TLB "PD"s */
#define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) #define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
#define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
#ifdef CONFIG_ARC_HAS_PAE40
#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
#else
#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) #define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE)
#endif
/************************************************************************** /**************************************************************************
* Mapping of vm_flags (Generic VM) to PTE flags (arch specific) * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
...@@ -272,7 +277,8 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) ...@@ -272,7 +277,8 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
#define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot)
#define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT)
#define pfn_pte(pfn, prot) (__pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))) #define pfn_pte(pfn, prot) (__pte(((pte_t)(pfn) << PAGE_SHIFT) | \
pgprot_val(prot)))
#define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) #define __pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
/* /*
......
...@@ -253,6 +253,10 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, ...@@ -253,6 +253,10 @@ void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
} }
} }
/*
* For ARC700 MMUv3 I-cache and D-cache flushes
* Also reused for HS38 aliasing I-cache configuration
*/
static inline static inline
void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
unsigned long sz, const int op) unsigned long sz, const int op)
...@@ -289,6 +293,16 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, ...@@ -289,6 +293,16 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
if (full_page) if (full_page)
write_aux_reg(aux_tag, paddr); write_aux_reg(aux_tag, paddr);
/*
* This is technically for MMU v4, using the MMU v3 programming model
* Special work for HS38 aliasing I-cache configuratino with PAE40
* - upper 8 bits of paddr need to be written into PTAG_HI
* - (and needs to be written before the lower 32 bits)
* Note that PTAG_HI is hoisted outside the line loop
*/
if (is_pae40_enabled() && op == OP_INV_IC)
write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
while (num_lines-- > 0) { while (num_lines-- > 0) {
if (!full_page) { if (!full_page) {
write_aux_reg(aux_tag, paddr); write_aux_reg(aux_tag, paddr);
...@@ -301,11 +315,17 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, ...@@ -301,11 +315,17 @@ void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
} }
/* /*
* In HS38x (MMU v4), although icache is VIPT, only paddr is needed for cache * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
* maintenance ops (in IVIL reg), as long as icache doesn't alias. * Here's how cache ops are implemented
*
* - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
* - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
* - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
* respectively, similar to MMU v3 programming model, hence
* __cache_line_loop_v3() is used)
* *
* For Aliasing icache, vaddr is also needed (in IVIL), while paddr is * If PAE40 is enabled, independent of aliasing considerations, the higher bits
* specified in PTAG (similar to MMU v3) * needs to be written into PTAG_HI
*/ */
static inline static inline
void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
...@@ -335,6 +355,22 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, ...@@ -335,6 +355,22 @@ void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
/*
* For HS38 PAE40 configuration
* - upper 8 bits of paddr need to be written into PTAG_HI
* - (and needs to be written before the lower 32 bits)
*/
if (is_pae40_enabled()) {
if (cacheop == OP_INV_IC)
/*
* Non aliasing I-cache in HS38,
* aliasing I-cache handled in __cache_line_loop_v3()
*/
write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
else
write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32);
}
while (num_lines-- > 0) { while (num_lines-- > 0) {
write_aux_reg(aux_cmd, paddr); write_aux_reg(aux_cmd, paddr);
paddr += L1_CACHE_BYTES; paddr += L1_CACHE_BYTES;
......
...@@ -109,6 +109,10 @@ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; ...@@ -109,6 +109,10 @@ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
static inline void __tlb_entry_erase(void) static inline void __tlb_entry_erase(void)
{ {
write_aux_reg(ARC_REG_TLBPD1, 0); write_aux_reg(ARC_REG_TLBPD1, 0);
if (is_pae40_enabled())
write_aux_reg(ARC_REG_TLBPD1HI, 0);
write_aux_reg(ARC_REG_TLBPD0, 0); write_aux_reg(ARC_REG_TLBPD0, 0);
write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
} }
...@@ -182,7 +186,7 @@ static void utlb_invalidate(void) ...@@ -182,7 +186,7 @@ static void utlb_invalidate(void)
} }
static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
{ {
unsigned int idx; unsigned int idx;
...@@ -225,10 +229,14 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid) ...@@ -225,10 +229,14 @@ static void tlb_entry_erase(unsigned int vaddr_n_asid)
write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
} }
static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
{ {
write_aux_reg(ARC_REG_TLBPD0, pd0); write_aux_reg(ARC_REG_TLBPD0, pd0);
write_aux_reg(ARC_REG_TLBPD1, pd1); write_aux_reg(ARC_REG_TLBPD1, pd1);
if (is_pae40_enabled())
write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
} }
...@@ -249,6 +257,10 @@ noinline void local_flush_tlb_all(void) ...@@ -249,6 +257,10 @@ noinline void local_flush_tlb_all(void)
/* Load PD0 and PD1 with template for a Blank Entry */ /* Load PD0 and PD1 with template for a Blank Entry */
write_aux_reg(ARC_REG_TLBPD1, 0); write_aux_reg(ARC_REG_TLBPD1, 0);
if (is_pae40_enabled())
write_aux_reg(ARC_REG_TLBPD1HI, 0);
write_aux_reg(ARC_REG_TLBPD0, 0); write_aux_reg(ARC_REG_TLBPD0, 0);
for (entry = 0; entry < num_tlb; entry++) { for (entry = 0; entry < num_tlb; entry++) {
...@@ -503,7 +515,8 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) ...@@ -503,7 +515,8 @@ void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
{ {
unsigned long flags; unsigned long flags;
unsigned int asid_or_sasid, rwx; unsigned int asid_or_sasid, rwx;
unsigned long pd0, pd1; unsigned long pd0;
pte_t pd1;
/* /*
* create_tlb() assumes that current->mm == vma->mm, since * create_tlb() assumes that current->mm == vma->mm, since
...@@ -785,10 +798,11 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) ...@@ -785,10 +798,11 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
n += scnprintf(buf + n, len - n, n += scnprintf(buf + n, len - n,
"MMU [v%x]\t: %dK PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d\n", "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n",
p_mmu->ver, p_mmu->pg_sz_k, super_pg, p_mmu->ver, p_mmu->pg_sz_k, super_pg,
p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
p_mmu->u_dtlb, p_mmu->u_itlb); p_mmu->u_dtlb, p_mmu->u_itlb,
IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40));
return buf; return buf;
} }
...@@ -821,6 +835,9 @@ void arc_mmu_init(void) ...@@ -821,6 +835,9 @@ void arc_mmu_init(void)
panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n", panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n",
(unsigned long)TO_MB(HPAGE_PMD_SIZE)); (unsigned long)TO_MB(HPAGE_PMD_SIZE));
if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
panic("Hardware doesn't support PAE40\n");
/* Enable the MMU */ /* Enable the MMU */
write_aux_reg(ARC_REG_PID, MMU_ENABLE); write_aux_reg(ARC_REG_PID, MMU_ENABLE);
......
...@@ -223,12 +223,16 @@ ex_saved_reg1: ...@@ -223,12 +223,16 @@ ex_saved_reg1:
; (2) y = x & (PTRS_PER_PTE - 1) -> to get index ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
; (3) z = (pgtbl + y * 4) ; (3) z = (pgtbl + y * 4)
#ifdef CONFIG_ARC_HAS_PAE40
#define PTE_SIZE_LOG 3 /* 8 == 2 ^ 3 */
#else
#define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */ #define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */
#endif
; multiply in step (3) above avoided by shifting lesser in step (1) ; multiply in step (3) above avoided by shifting lesser in step (1)
lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG ) lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG )
and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG ) and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG )
ld.aw r0, [r1, r0] ; r0: PTE ld.aw r0, [r1, r0] ; r0: PTE (lower word only for PAE40)
; r1: PTE ptr ; r1: PTE ptr
2: 2:
...@@ -247,6 +251,7 @@ ex_saved_reg1: ...@@ -247,6 +251,7 @@ ex_saved_reg1:
;----------------------------------------------------------------- ;-----------------------------------------------------------------
; Convert Linux PTE entry into TLB entry ; Convert Linux PTE entry into TLB entry
; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu ; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI])
; IN: r0 = PTE, r1 = ptr to PTE ; IN: r0 = PTE, r1 = ptr to PTE
.macro CONV_PTE_TO_TLB .macro CONV_PTE_TO_TLB
...@@ -259,6 +264,10 @@ ex_saved_reg1: ...@@ -259,6 +264,10 @@ ex_saved_reg1:
or r3, r3, r2 or r3, r3, r2
sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C
#ifdef CONFIG_ARC_HAS_PAE40
ld r3, [r1, 4] ; paddr[39..32]
sr r3, [ARC_REG_TLBPD1HI]
#endif
and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment