Commit d9e9a641 authored by Dave Hansen's avatar Dave Hansen Committed by Ingo Molnar

x86/mm/pti: Allocate a separate user PGD

Kernel page table isolation requires to have two PGDs. One for the kernel,
which contains the full kernel mapping plus the user space mapping and one
for user space which contains the user space mappings and the minimal set
of kernel mappings which are required by the architecture to be able to
transition from and to user space.

Add the necessary preliminaries.

[ tglx: Split out from the big kaiser dump. EFI fixup from Kirill ]
Signed-off-by: default avatarDave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Reviewed-by: default avatarBorislav Petkov <bp@suse.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: David Laight <David.Laight@aculab.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eduardo Valentin <eduval@amazon.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: aliguori@amazon.com
Cc: daniel.gruss@iaik.tugraz.at
Cc: hughd@google.com
Cc: keescook@google.com
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 1c4de1ff
...@@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {} ...@@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
*/ */
extern gfp_t __userpte_alloc_gfp; extern gfp_t __userpte_alloc_gfp;
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
* Instead of one PGD, we acquire two PGDs. Being order-1, it is
* both 8k in size and 8k-aligned. That lets us just flip bit 12
* in a pointer to swap between the two 4k halves.
*/
#define PGD_ALLOCATION_ORDER 1
#else
#define PGD_ALLOCATION_ORDER 0
#endif
/* /*
* Allocate and free page tables. * Allocate and free page tables.
*/ */
......
...@@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag) ...@@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag)
.balign PAGE_SIZE; \ .balign PAGE_SIZE; \
GLOBAL(name) GLOBAL(name)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
* Each PGD needs to be 8k long and 8k aligned. We do not
* ever go out to userspace with these, so we do not
* strictly *need* the second page, but this allows us to
* have a single set_pgd() implementation that does not
* need to worry about whether it has 4k or 8k to work
* with.
*
* This ensures PGDs are 8k long:
*/
#define PTI_USER_PGD_FILL 512
/* This ensures they are 8k-aligned: */
#define NEXT_PGD_PAGE(name) \
.balign 2 * PAGE_SIZE; \
GLOBAL(name)
#else
#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
#define PTI_USER_PGD_FILL 0
#endif
/* Automate the creation of 1 to 1 mapping pmd entries */ /* Automate the creation of 1 to 1 mapping pmd entries */
#define PMDS(START, PERM, COUNT) \ #define PMDS(START, PERM, COUNT) \
i = 0 ; \ i = 0 ; \
...@@ -350,13 +371,14 @@ GLOBAL(name) ...@@ -350,13 +371,14 @@ GLOBAL(name)
.endr .endr
__INITDATA __INITDATA
NEXT_PAGE(early_top_pgt) NEXT_PGD_PAGE(early_top_pgt)
.fill 511,8,0 .fill 511,8,0
#ifdef CONFIG_X86_5LEVEL #ifdef CONFIG_X86_5LEVEL
.quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC .quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#else #else
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
#endif #endif
.fill PTI_USER_PGD_FILL,8,0
NEXT_PAGE(early_dynamic_pgts) NEXT_PAGE(early_dynamic_pgts)
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
...@@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts) ...@@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts)
.data .data
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH) #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
NEXT_PAGE(init_top_pgt) NEXT_PGD_PAGE(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_START_KERNEL*8, 0 .org init_top_pgt + PGD_START_KERNEL*8, 0
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
.fill PTI_USER_PGD_FILL,8,0
NEXT_PAGE(level3_ident_pgt) NEXT_PAGE(level3_ident_pgt)
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
...@@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt) ...@@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt)
*/ */
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
#else #else
NEXT_PAGE(init_top_pgt) NEXT_PGD_PAGE(init_top_pgt)
.fill 512,8,0 .fill 512,8,0
.fill PTI_USER_PGD_FILL,8,0
#endif #endif
#ifdef CONFIG_X86_5LEVEL #ifdef CONFIG_X86_5LEVEL
......
...@@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd) ...@@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd)
kmem_cache_free(pgd_cache, pgd); kmem_cache_free(pgd_cache, pgd);
} }
#else #else
static inline pgd_t *_pgd_alloc(void) static inline pgd_t *_pgd_alloc(void)
{ {
return (pgd_t *)__get_free_page(PGALLOC_GFP); return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
} }
static inline void _pgd_free(pgd_t *pgd) static inline void _pgd_free(pgd_t *pgd)
{ {
free_page((unsigned long)pgd); free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
} }
#endif /* CONFIG_X86_PAE */ #endif /* CONFIG_X86_PAE */
......
...@@ -195,6 +195,9 @@ static pgd_t *efi_pgd; ...@@ -195,6 +195,9 @@ static pgd_t *efi_pgd;
* because we want to avoid inserting EFI region mappings (EFI_VA_END * because we want to avoid inserting EFI region mappings (EFI_VA_END
* to EFI_VA_START) into the standard kernel page tables. Everything * to EFI_VA_START) into the standard kernel page tables. Everything
* else can be shared, see efi_sync_low_kernel_mappings(). * else can be shared, see efi_sync_low_kernel_mappings().
*
* We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
* allocation.
*/ */
int __init efi_alloc_page_tables(void) int __init efi_alloc_page_tables(void)
{ {
...@@ -207,7 +210,7 @@ int __init efi_alloc_page_tables(void) ...@@ -207,7 +210,7 @@ int __init efi_alloc_page_tables(void)
return 0; return 0;
gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO; gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
efi_pgd = (pgd_t *)__get_free_page(gfp_mask); efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
if (!efi_pgd) if (!efi_pgd)
return -ENOMEM; return -ENOMEM;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment