Commit 84b04d3e authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Catalin Marinas

arm64: kernel: Create initial ID map from C code

The asm code that creates the initial ID map is rather intricate and
hard to follow. This is problematic because it makes adding support for
things like LPA2 or WXN more difficult than necessary. Also, it is
parameterized like the rest of the MM code to run with a configurable
number of levels, which is rather pointless, given that all AArch64 CPUs
implement support for 48-bit virtual addressing, and that many systems
exist with DRAM located outside of the 39-bit addressable range, which
is the only smaller VA size that is widely used, and we need additional
tricks to make things work in that combination.

So let's bite the bullet, and rip out all the asm macros, and fiddly
code, and replace it with a C implementation based on the newly added
routines for creating the early kernel VA mappings. And while at it,
create the initial ID map based on 48-bit virtual addressing as well,
regardless of the number of configured levels for the kernel proper.

Note that this code may execute with the MMU and caches disabled, and is
therefore not permitted to make unaligned accesses. This shouldn't
generally happen in any case for the algorithm as implemented, but to be
sure, let's pass -mstrict-align to the compiler just in case.
Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20240214122845.2033971-66-ardb+git@google.comSigned-off-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
parent 34b98e55
...@@ -345,20 +345,6 @@ alternative_cb_end ...@@ -345,20 +345,6 @@ alternative_cb_end
bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH
.endm .endm
/*
* idmap_get_t0sz - get the T0SZ value needed to cover the ID map
*
* Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
* entire ID map region can be mapped. As T0SZ == (64 - #bits used),
* this number conveniently equals the number of leading zeroes in
* the physical address of _end.
*/
.macro idmap_get_t0sz, reg
adrp \reg, _end
orr \reg, \reg, #(1 << VA_BITS_MIN) - 1
clz \reg, \reg
.endm
/* /*
* tcr_compute_pa_size - set TCR.(I)PS to the highest supported * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
* ID_AA64MMFR0_EL1.PARange value * ID_AA64MMFR0_EL1.PARange value
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#define SWAPPER_TABLE_SHIFT (SWAPPER_BLOCK_SHIFT + PAGE_SHIFT - 3) #define SWAPPER_TABLE_SHIFT (SWAPPER_BLOCK_SHIFT + PAGE_SHIFT - 3)
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - SWAPPER_SKIP_LEVEL) #define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - SWAPPER_SKIP_LEVEL)
#define INIT_IDMAP_PGTABLE_LEVELS (IDMAP_LEVELS - SWAPPER_SKIP_LEVEL)
#define IDMAP_VA_BITS 48 #define IDMAP_VA_BITS 48
#define IDMAP_LEVELS ARM64_HW_PGTABLE_LEVELS(IDMAP_VA_BITS) #define IDMAP_LEVELS ARM64_HW_PGTABLE_LEVELS(IDMAP_VA_BITS)
...@@ -48,44 +49,39 @@ ...@@ -48,44 +49,39 @@
#define EARLY_ENTRIES(vstart, vend, shift, add) \ #define EARLY_ENTRIES(vstart, vend, shift, add) \
(SPAN_NR_ENTRIES(vstart, vend, shift) + (add)) (SPAN_NR_ENTRIES(vstart, vend, shift) + (add))
#define EARLY_LEVEL(lvl, vstart, vend, add) \ #define EARLY_LEVEL(lvl, lvls, vstart, vend, add) \
(SWAPPER_PGTABLE_LEVELS > lvl ? EARLY_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * (PAGE_SHIFT - 3), add) : 0) (lvls > lvl ? EARLY_ENTRIES(vstart, vend, SWAPPER_BLOCK_SHIFT + lvl * (PAGE_SHIFT - 3), add) : 0)
#define EARLY_PAGES(vstart, vend, add) (1 /* PGDIR page */ \ #define EARLY_PAGES(lvls, vstart, vend, add) (1 /* PGDIR page */ \
+ EARLY_LEVEL(3, (vstart), (vend), add) /* each entry needs a next level page table */ \ + EARLY_LEVEL(3, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
+ EARLY_LEVEL(2, (vstart), (vend), add) /* each entry needs a next level page table */ \ + EARLY_LEVEL(2, (lvls), (vstart), (vend), add) /* each entry needs a next level page table */ \
+ EARLY_LEVEL(1, (vstart), (vend), add))/* each entry needs a next level page table */ + EARLY_LEVEL(1, (lvls), (vstart), (vend), add))/* each entry needs a next level page table */
#define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(KIMAGE_VADDR, _end, EXTRA_PAGE) + EARLY_SEGMENT_EXTRA_PAGES)) #define INIT_DIR_SIZE (PAGE_SIZE * (EARLY_PAGES(SWAPPER_PGTABLE_LEVELS, KIMAGE_VADDR, _end, EXTRA_PAGE) \
+ EARLY_SEGMENT_EXTRA_PAGES))
/* the initial ID map may need two extra pages if it needs to be extended */ #define INIT_IDMAP_DIR_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, KIMAGE_VADDR, _end, 1))
#if VA_BITS < 48 #define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + EARLY_IDMAP_EXTRA_PAGES) * PAGE_SIZE)
#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
#else #define INIT_IDMAP_FDT_PAGES (EARLY_PAGES(INIT_IDMAP_PGTABLE_LEVELS, 0UL, UL(MAX_FDT_SIZE), 1) - 1)
#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE) #define INIT_IDMAP_FDT_SIZE ((INIT_IDMAP_FDT_PAGES + EARLY_IDMAP_EXTRA_FDT_PAGES) * PAGE_SIZE)
#endif
#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE, 1)
/* The number of segments in the kernel image (text, rodata, inittext, initdata, data+bss) */ /* The number of segments in the kernel image (text, rodata, inittext, initdata, data+bss) */
#define KERNEL_SEGMENT_COUNT 5 #define KERNEL_SEGMENT_COUNT 5
#if SWAPPER_BLOCK_SIZE > SEGMENT_ALIGN #if SWAPPER_BLOCK_SIZE > SEGMENT_ALIGN
#define EARLY_SEGMENT_EXTRA_PAGES (KERNEL_SEGMENT_COUNT + 1) #define EARLY_SEGMENT_EXTRA_PAGES (KERNEL_SEGMENT_COUNT + 1)
#else
#define EARLY_SEGMENT_EXTRA_PAGES 0
#endif
/* /*
* Initial memory map attributes. * The initial ID map consists of the kernel image, mapped as two separate
* segments, and may appear misaligned wrt the swapper block size. This means
* we need 3 additional pages. The DT could straddle a swapper block boundary,
* so it may need 2.
*/ */
#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED | PTE_UXN) #define EARLY_IDMAP_EXTRA_PAGES 3
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S | PTE_UXN) #define EARLY_IDMAP_EXTRA_FDT_PAGES 2
#ifdef CONFIG_ARM64_4K_PAGES
#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS | PTE_WRITE)
#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
#else #else
#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE) #define EARLY_SEGMENT_EXTRA_PAGES 0
#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) #define EARLY_IDMAP_EXTRA_PAGES 0
#define EARLY_IDMAP_EXTRA_FDT_PAGES 0
#endif #endif
#endif /* __ASM_KERNEL_PGTABLE_H */ #endif /* __ASM_KERNEL_PGTABLE_H */
...@@ -61,11 +61,9 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm) ...@@ -61,11 +61,9 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
} }
/* /*
* TCR.T0SZ value to use when the ID map is active. Usually equals * TCR.T0SZ value to use when the ID map is active.
* TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
* physical memory, in which case it will be smaller.
*/ */
extern int idmap_t0sz; #define idmap_t0sz TCR_T0SZ(IDMAP_VA_BITS)
/* /*
* Ensure TCR.T0SZ is set to the provided value. * Ensure TCR.T0SZ is set to the provided value.
......
This diff is collapsed.
...@@ -52,6 +52,7 @@ PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus); ...@@ -52,6 +52,7 @@ PROVIDE(__pi_cavium_erratum_27456_cpus = cavium_erratum_27456_cpus);
PROVIDE(__pi__ctype = _ctype); PROVIDE(__pi__ctype = _ctype);
PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed); PROVIDE(__pi_memstart_offset_seed = memstart_offset_seed);
PROVIDE(__pi_init_idmap_pg_dir = init_idmap_pg_dir);
PROVIDE(__pi_init_pg_dir = init_pg_dir); PROVIDE(__pi_init_pg_dir = init_pg_dir);
PROVIDE(__pi_init_pg_end = init_pg_end); PROVIDE(__pi_init_pg_end = init_pg_end);
......
...@@ -11,6 +11,9 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ ...@@ -11,6 +11,9 @@ KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
-fno-asynchronous-unwind-tables -fno-unwind-tables \ -fno-asynchronous-unwind-tables -fno-unwind-tables \
$(call cc-option,-fno-addrsig) $(call cc-option,-fno-addrsig)
# this code may run with the MMU off so disable unaligned accesses
CFLAGS_map_range.o += -mstrict-align
# remove SCS flags from all objects in this directory # remove SCS flags from all objects in this directory
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
# disable LTO # disable LTO
......
...@@ -128,6 +128,22 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level) ...@@ -128,6 +128,22 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
} }
} }
static void __init map_fdt(u64 fdt)
{
static u8 ptes[INIT_IDMAP_FDT_SIZE] __initdata __aligned(PAGE_SIZE);
u64 efdt = fdt + MAX_FDT_SIZE;
u64 ptep = (u64)ptes;
/*
* Map up to MAX_FDT_SIZE bytes, but avoid overlap with
* the kernel image.
*/
map_range(&ptep, fdt, (u64)_text > fdt ? min((u64)_text, efdt) : efdt,
fdt, PAGE_KERNEL, IDMAP_ROOT_LEVEL,
(pte_t *)init_idmap_pg_dir, false, 0);
dsb(ishst);
}
asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
{ {
static char const chosen_str[] __initconst = "/chosen"; static char const chosen_str[] __initconst = "/chosen";
...@@ -136,6 +152,8 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt) ...@@ -136,6 +152,8 @@ asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
int root_level = 4 - CONFIG_PGTABLE_LEVELS; int root_level = 4 - CONFIG_PGTABLE_LEVELS;
int chosen; int chosen;
map_fdt((u64)fdt);
/* Clear BSS and the initial page tables */ /* Clear BSS and the initial page tables */
memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start); memset(__bss_start, 0, (u64)init_pg_end - (u64)__bss_start);
......
...@@ -86,3 +86,15 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot, ...@@ -86,3 +86,15 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
tbl++; tbl++;
} }
} }
asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir)
{
u64 ptep = (u64)pg_dir + PAGE_SIZE;
map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
PAGE_KERNEL_ROX, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
PAGE_KERNEL, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
return ptep;
}
...@@ -21,6 +21,8 @@ static inline void *prel64_to_pointer(const prel64_t *offset) ...@@ -21,6 +21,8 @@ static inline void *prel64_to_pointer(const prel64_t *offset)
extern bool dynamic_scs_is_enabled; extern bool dynamic_scs_is_enabled;
extern pgd_t init_idmap_pg_dir[];
void init_feature_override(u64 boot_status, const void *fdt, int chosen); void init_feature_override(u64 boot_status, const void *fdt, int chosen);
u64 kaslr_early_init(void *fdt, int chosen); u64 kaslr_early_init(void *fdt, int chosen);
void relocate_kernel(u64 offset); void relocate_kernel(u64 offset);
...@@ -30,3 +32,5 @@ void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot, ...@@ -30,3 +32,5 @@ void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
int level, pte_t *tbl, bool may_use_cont, u64 va_offset); int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
asmlinkage void early_map_kernel(u64 boot_status, void *fdt); asmlinkage void early_map_kernel(u64 boot_status, void *fdt);
asmlinkage u64 create_init_idmap(pgd_t *pgd);
...@@ -45,8 +45,6 @@ ...@@ -45,8 +45,6 @@
#define NO_CONT_MAPPINGS BIT(1) #define NO_CONT_MAPPINGS BIT(1)
#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */ #define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
int idmap_t0sz __ro_after_init;
#if VA_BITS > 48 #if VA_BITS > 48
u64 vabits_actual __ro_after_init = VA_BITS_MIN; u64 vabits_actual __ro_after_init = VA_BITS_MIN;
EXPORT_SYMBOL(vabits_actual); EXPORT_SYMBOL(vabits_actual);
...@@ -793,8 +791,6 @@ void __init paging_init(void) ...@@ -793,8 +791,6 @@ void __init paging_init(void)
pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir)); pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
extern pgd_t init_idmap_pg_dir[]; extern pgd_t init_idmap_pg_dir[];
idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
map_kernel(pgdp); map_kernel(pgdp);
map_mem(pgdp); map_mem(pgdp);
...@@ -809,7 +805,6 @@ void __init paging_init(void) ...@@ -809,7 +805,6 @@ void __init paging_init(void)
memblock_allow_resize(); memblock_allow_resize();
create_idmap(); create_idmap();
idmap_t0sz = TCR_T0SZ(IDMAP_VA_BITS);
} }
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_MEMORY_HOTPLUG
......
...@@ -200,7 +200,8 @@ SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1) ...@@ -200,7 +200,8 @@ SYM_FUNC_ALIAS(__pi_idmap_cpu_replace_ttbr1, idmap_cpu_replace_ttbr1)
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
#define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS | PTE_WRITE) #define KPTI_NG_PTE_FLAGS (PTE_ATTRINDX(MT_NORMAL) | PTE_TYPE_PAGE | \
PTE_AF | PTE_SHARED | PTE_UXN | PTE_WRITE)
.pushsection ".idmap.text", "a" .pushsection ".idmap.text", "a"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment