Commit fa2a8445 authored by Kristina Martsenko's avatar Kristina Martsenko Committed by Catalin Marinas

arm64: allow ID map to be extended to 52 bits

Currently, when using VA_BITS < 48, if the ID map text happens to be
placed in physical memory above VA_BITS, we increase the VA size (up to
48) and create a new table level, in order to map in the ID map text.
This is okay because the system always supports 48 bits of VA.

This patch extends the code such that if the system supports 52 bits of
VA, and the ID map text is placed that high up, then we increase the VA
size accordingly, up to 52.

One difference from the current implementation is that so far the
condition of VA_BITS < 48 has meant that the top level table is always
"full", with the maximum number of entries, and an extra table level is
always needed. Now, when VA_BITS = 48 (and using 64k pages), the top
level table is not full, and we simply need to increase the number of
entries in it, instead of creating a new table level.
Tested-by: default avatarSuzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: default avatarSuzuki K Poulose <suzuki.poulose@arm.com>
Reviewed-by: default avatarMarc Zyngier <marc.zyngier@arm.com>
Tested-by: default avatarBob Picco <bob.picco@oracle.com>
Reviewed-by: default avatarBob Picco <bob.picco@oracle.com>
Signed-off-by: default avatarKristina Martsenko <kristina.martsenko@arm.com>
[catalin.marinas@arm.com: reduce arguments to __create_hyp_mappings()]
[catalin.marinas@arm.com: reworked/renamed __cpu_uses_extended_idmap_level()]
Signed-off-by: default avatarCatalin Marinas <catalin.marinas@arm.com>
parent 75387b92
...@@ -211,6 +211,11 @@ static inline bool __kvm_cpu_uses_extended_idmap(void) ...@@ -211,6 +211,11 @@ static inline bool __kvm_cpu_uses_extended_idmap(void)
return false; return false;
} }
static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
{
return PTRS_PER_PGD;
}
static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
pgd_t *hyp_pgd, pgd_t *hyp_pgd,
pgd_t *merged_hyp_pgd, pgd_t *merged_hyp_pgd,
......
...@@ -344,10 +344,8 @@ alternative_endif ...@@ -344,10 +344,8 @@ alternative_endif
* tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
*/ */
.macro tcr_set_idmap_t0sz, valreg, tmpreg .macro tcr_set_idmap_t0sz, valreg, tmpreg
#ifndef CONFIG_ARM64_VA_BITS_48
ldr_l \tmpreg, idmap_t0sz ldr_l \tmpreg, idmap_t0sz
bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
#endif
.endm .endm
/* /*
......
...@@ -273,7 +273,12 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); ...@@ -273,7 +273,12 @@ void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled);
static inline bool __kvm_cpu_uses_extended_idmap(void) static inline bool __kvm_cpu_uses_extended_idmap(void)
{ {
return __cpu_uses_extended_idmap(); return __cpu_uses_extended_idmap_level();
}
static inline unsigned long __kvm_idmap_ptrs_per_pgd(void)
{
return idmap_ptrs_per_pgd;
} }
/* /*
......
...@@ -63,6 +63,7 @@ static inline void cpu_set_reserved_ttbr0(void) ...@@ -63,6 +63,7 @@ static inline void cpu_set_reserved_ttbr0(void)
* physical memory, in which case it will be smaller. * physical memory, in which case it will be smaller.
*/ */
extern u64 idmap_t0sz; extern u64 idmap_t0sz;
extern u64 idmap_ptrs_per_pgd;
static inline bool __cpu_uses_extended_idmap(void) static inline bool __cpu_uses_extended_idmap(void)
{ {
...@@ -70,6 +71,15 @@ static inline bool __cpu_uses_extended_idmap(void) ...@@ -70,6 +71,15 @@ static inline bool __cpu_uses_extended_idmap(void)
unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)));
} }
/*
* True if the extended ID map requires an extra level of translation table
* to be configured.
*/
static inline bool __cpu_uses_extended_idmap_level(void)
{
return ARM64_HW_PGTABLE_LEVELS((64 - idmap_t0sz)) > CONFIG_PGTABLE_LEVELS;
}
/* /*
* Set TCR.T0SZ to its default value (based on VA_BITS) * Set TCR.T0SZ to its default value (based on VA_BITS)
*/ */
......
...@@ -176,7 +176,7 @@ ENDPROC(preserve_boot_args) ...@@ -176,7 +176,7 @@ ENDPROC(preserve_boot_args)
* ptrs: #imm pointers per table page * ptrs: #imm pointers per table page
* *
* Preserves: virt * Preserves: virt
* Corrupts: tmp1, tmp2 * Corrupts: ptrs, tmp1, tmp2
* Returns: tbl -> next level table page address * Returns: tbl -> next level table page address
*/ */
.macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
...@@ -184,7 +184,8 @@ ENDPROC(preserve_boot_args) ...@@ -184,7 +184,8 @@ ENDPROC(preserve_boot_args)
phys_to_pte \tmp1, \tmp2 phys_to_pte \tmp1, \tmp2
orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
lsr \tmp1, \virt, #\shift lsr \tmp1, \virt, #\shift
and \tmp1, \tmp1, #\ptrs - 1 // table index sub \ptrs, \ptrs, #1
and \tmp1, \tmp1, \ptrs // table index
str \tmp2, [\tbl, \tmp1, lsl #3] str \tmp2, [\tbl, \tmp1, lsl #3]
add \tbl, \tbl, #PAGE_SIZE // next level table page add \tbl, \tbl, #PAGE_SIZE // next level table page
.endm .endm
...@@ -194,15 +195,17 @@ ENDPROC(preserve_boot_args) ...@@ -194,15 +195,17 @@ ENDPROC(preserve_boot_args)
* block entry in the next level (tbl) for the given virtual address. * block entry in the next level (tbl) for the given virtual address.
* *
* Preserves: tbl, next, virt * Preserves: tbl, next, virt
* Corrupts: tmp1, tmp2 * Corrupts: ptrs_per_pgd, tmp1, tmp2
*/ */
.macro create_pgd_entry, tbl, virt, tmp1, tmp2 .macro create_pgd_entry, tbl, virt, ptrs_per_pgd, tmp1, tmp2
create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 create_table_entry \tbl, \virt, PGDIR_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2
#if SWAPPER_PGTABLE_LEVELS > 3 #if SWAPPER_PGTABLE_LEVELS > 3
create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2 mov \ptrs_per_pgd, PTRS_PER_PUD
create_table_entry \tbl, \virt, PUD_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2
#endif #endif
#if SWAPPER_PGTABLE_LEVELS > 2 #if SWAPPER_PGTABLE_LEVELS > 2
create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 mov \ptrs_per_pgd, PTRS_PER_PTE
create_table_entry \tbl, \virt, SWAPPER_TABLE_SHIFT, \ptrs_per_pgd, \tmp1, \tmp2
#endif #endif
.endm .endm
...@@ -266,26 +269,13 @@ __create_page_tables: ...@@ -266,26 +269,13 @@ __create_page_tables:
adrp x0, idmap_pg_dir adrp x0, idmap_pg_dir
adrp x3, __idmap_text_start // __pa(__idmap_text_start) adrp x3, __idmap_text_start // __pa(__idmap_text_start)
#ifndef CONFIG_ARM64_VA_BITS_48
#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
#define EXTRA_PTRS (1 << (48 - EXTRA_SHIFT))
/*
* If VA_BITS < 48, it may be too small to allow for an ID mapping to be
* created that covers system RAM if that is located sufficiently high
* in the physical address space. So for the ID map, use an extended
* virtual range in that case, by configuring an additional translation
* level.
* First, we have to verify our assumption that the current value of
* VA_BITS was chosen such that all translation levels are fully
* utilised, and that lowering T0SZ will always result in an additional
* translation level to be configured.
*/
#if VA_BITS != EXTRA_SHIFT
#error "Mismatch between VA_BITS and page size/number of translation levels"
#endif
/* /*
* VA_BITS may be too small to allow for an ID mapping to be created
* that covers system RAM if that is located sufficiently high in the
* physical address space. So for the ID map, use an extended virtual
* range in that case, and configure an additional translation level
* if needed.
*
* Calculate the maximum allowed value for TCR_EL1.T0SZ so that the * Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
* entire ID map region can be mapped. As T0SZ == (64 - #bits used), * entire ID map region can be mapped. As T0SZ == (64 - #bits used),
* this number conveniently equals the number of leading zeroes in * this number conveniently equals the number of leading zeroes in
...@@ -294,18 +284,41 @@ __create_page_tables: ...@@ -294,18 +284,41 @@ __create_page_tables:
adrp x5, __idmap_text_end adrp x5, __idmap_text_end
clz x5, x5 clz x5, x5
cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough?
b.ge 1f // .. then skip additional level b.ge 1f // .. then skip VA range extension
adr_l x6, idmap_t0sz adr_l x6, idmap_t0sz
str x5, [x6] str x5, [x6]
dmb sy dmb sy
dc ivac, x6 // Invalidate potentially stale cache line dc ivac, x6 // Invalidate potentially stale cache line
create_table_entry x0, x3, EXTRA_SHIFT, EXTRA_PTRS, x5, x6 #if (VA_BITS < 48)
1: #define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
/*
* If VA_BITS < 48, we have to configure an additional table level.
* First, we have to verify our assumption that the current value of
* VA_BITS was chosen such that all translation levels are fully
* utilised, and that lowering T0SZ will always result in an additional
* translation level to be configured.
*/
#if VA_BITS != EXTRA_SHIFT
#error "Mismatch between VA_BITS and page size/number of translation levels"
#endif #endif
create_pgd_entry x0, x3, x5, x6 mov x4, EXTRA_PTRS
create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
#else
/*
* If VA_BITS == 48, we don't have to configure an additional
* translation level, but the top-level table has more entries.
*/
mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
str_l x4, idmap_ptrs_per_pgd, x5
#endif
1:
ldr_l x4, idmap_ptrs_per_pgd
create_pgd_entry x0, x3, x4, x5, x6
mov x5, x3 // __pa(__idmap_text_start) mov x5, x3 // __pa(__idmap_text_start)
adr_l x6, __idmap_text_end // __pa(__idmap_text_end) adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
create_block_map x0, x7, x3, x5, x6, x4 create_block_map x0, x7, x3, x5, x6, x4
...@@ -316,7 +329,8 @@ __create_page_tables: ...@@ -316,7 +329,8 @@ __create_page_tables:
adrp x0, swapper_pg_dir adrp x0, swapper_pg_dir
mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text) mov_q x5, KIMAGE_VADDR + TEXT_OFFSET // compile time __va(_text)
add x5, x5, x23 // add KASLR displacement add x5, x5, x23 // add KASLR displacement
create_pgd_entry x0, x5, x3, x6 mov x4, PTRS_PER_PGD
create_pgd_entry x0, x5, x4, x3, x6
adrp x6, _end // runtime __pa(_end) adrp x6, _end // runtime __pa(_end)
adrp x3, _text // runtime __pa(_text) adrp x3, _text // runtime __pa(_text)
sub x6, x6, x3 // _end - _text sub x6, x6, x3 // _end - _text
......
...@@ -72,24 +72,23 @@ __do_hyp_init: ...@@ -72,24 +72,23 @@ __do_hyp_init:
mov x5, #TCR_EL2_RES1 mov x5, #TCR_EL2_RES1
orr x4, x4, x5 orr x4, x4, x5
#ifndef CONFIG_ARM64_VA_BITS_48
/* /*
* If we are running with VA_BITS < 48, we may be running with an extra * The ID map may be configured to use an extended virtual address
* level of translation in the ID map. This is only the case if system * range. This is only the case if system RAM is out of range for the
* RAM is out of range for the currently configured page size and number * currently configured page size and VA_BITS, in which case we will
* of translation levels, in which case we will also need the extra * also need the extended virtual range for the HYP ID map, or we won't
* level for the HYP ID map, or we won't be able to enable the EL2 MMU. * be able to enable the EL2 MMU.
* *
* However, at EL2, there is only one TTBR register, and we can't switch * However, at EL2, there is only one TTBR register, and we can't switch
* between translation tables *and* update TCR_EL2.T0SZ at the same * between translation tables *and* update TCR_EL2.T0SZ at the same
* time. Bottom line: we need the extra level in *both* our translation * time. Bottom line: we need to use the extended range with *both* our
* tables. * translation tables.
* *
* So use the same T0SZ value we use for the ID map. * So use the same T0SZ value we use for the ID map.
*/ */
ldr_l x5, idmap_t0sz ldr_l x5, idmap_t0sz
bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
#endif
/* /*
* Set the PS bits in TCR_EL2. * Set the PS bits in TCR_EL2.
*/ */
......
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#define NO_CONT_MAPPINGS BIT(1) #define NO_CONT_MAPPINGS BIT(1)
u64 idmap_t0sz = TCR_T0SZ(VA_BITS); u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
u64 kimage_voffset __ro_after_init; u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset); EXPORT_SYMBOL(kimage_voffset);
......
...@@ -629,14 +629,20 @@ static int __create_hyp_mappings(pgd_t *pgdp, ...@@ -629,14 +629,20 @@ static int __create_hyp_mappings(pgd_t *pgdp,
{ {
pgd_t *pgd; pgd_t *pgd;
pud_t *pud; pud_t *pud;
unsigned long addr, next; unsigned long addr, next, ptrs_per_pgd = PTRS_PER_PGD;
int err = 0; int err = 0;
/*
* If it's not the hyp_pgd, fall back to the kvm idmap layout.
*/
if (pgdp != hyp_pgd)
ptrs_per_pgd = __kvm_idmap_ptrs_per_pgd();
mutex_lock(&kvm_hyp_pgd_mutex); mutex_lock(&kvm_hyp_pgd_mutex);
addr = start & PAGE_MASK; addr = start & PAGE_MASK;
end = PAGE_ALIGN(end); end = PAGE_ALIGN(end);
do { do {
pgd = pgdp + pgd_index(addr); pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1));
if (pgd_none(*pgd)) { if (pgd_none(*pgd)) {
pud = pud_alloc_one(NULL, addr); pud = pud_alloc_one(NULL, addr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment