Commit 4a692ae3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_mm_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm updates from Borislav Petkov:

 - Flush *all* mappings from the TLB after switching to the trampoline
   pagetable to prevent any stale entries' presence

 - Flush global mappings from the TLB, in addition to the CR3-write,
   after switching off of the trampoline_pgd during boot to clear the
   identity mappings

 - Prevent instrumentation issues resulting from the above changes

* tag 'x86_mm_for_v5.17_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Prevent early boot triple-faults with instrumentation
  x86/mm: Include spinlock_t definition in pgtable.
  x86/mm: Flush global TLB when switching to trampoline page-table
  x86/mm/64: Flush global TLB on boot and AP bringup
  x86/realmode: Add comment for Global bit usage in trampoline_pgd
  x86/mm: Add missing <asm/cpufeatures.h> dependency to <asm/page_64.h>
parents bfed6efb b64dfcde
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <asm/page_64_types.h> #include <asm/page_64_types.h>
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <asm/cpufeatures.h>
#include <asm/alternative.h> #include <asm/alternative.h>
/* duplicated to the one in bootmem.h */ /* duplicated to the one in bootmem.h */
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot))) #define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot)))
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/spinlock.h>
#include <asm/x86_init.h> #include <asm/x86_init.h>
#include <asm/pkru.h> #include <asm/pkru.h>
#include <asm/fpu/api.h> #include <asm/fpu/api.h>
......
...@@ -89,6 +89,7 @@ static inline void set_real_mode_mem(phys_addr_t mem) ...@@ -89,6 +89,7 @@ static inline void set_real_mode_mem(phys_addr_t mem)
} }
void reserve_real_mode(void); void reserve_real_mode(void);
void load_trampoline_pgtable(void);
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
...@@ -261,4 +261,9 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); ...@@ -261,4 +261,9 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
#endif /* !MODULE */ #endif /* !MODULE */
static inline void __native_tlb_flush_global(unsigned long cr4)
{
native_write_cr4(cr4 ^ X86_CR4_PGE);
native_write_cr4(cr4);
}
#endif /* _ASM_X86_TLBFLUSH_H */ #endif /* _ASM_X86_TLBFLUSH_H */
...@@ -384,7 +384,7 @@ void native_write_cr0(unsigned long val) ...@@ -384,7 +384,7 @@ void native_write_cr0(unsigned long val)
} }
EXPORT_SYMBOL(native_write_cr0); EXPORT_SYMBOL(native_write_cr0);
void native_write_cr4(unsigned long val) void __no_profile native_write_cr4(unsigned long val)
{ {
unsigned long bits_changed = 0; unsigned long bits_changed = 0;
......
...@@ -487,6 +487,10 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) ...@@ -487,6 +487,10 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
clear_bss(); clear_bss();
/*
* This needs to happen *before* kasan_early_init() because latter maps stuff
* into that page.
*/
clear_page(init_top_pgt); clear_page(init_top_pgt);
/* /*
...@@ -498,6 +502,16 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) ...@@ -498,6 +502,16 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
kasan_early_init(); kasan_early_init();
/*
* Flush global TLB entries which could be left over from the trampoline page
* table.
*
* This needs to happen *after* kasan_early_init() as KASAN-enabled .configs
* instrument native_write_cr4() so KASAN must be initialized for that
* instrumentation to work.
*/
__native_tlb_flush_global(this_cpu_read(cpu_tlbstate.cr4));
idt_setup_early_handler(); idt_setup_early_handler();
copy_bootdata(__va(real_mode_data)); copy_bootdata(__va(real_mode_data));
......
...@@ -166,9 +166,26 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) ...@@ -166,9 +166,26 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
call sev_verify_cbit call sev_verify_cbit
popq %rsi popq %rsi
/* Switch to new page-table */ /*
* Switch to new page-table
*
* For the boot CPU this switches to early_top_pgt which still has the
* indentity mappings present. The secondary CPUs will switch to the
* init_top_pgt here, away from the trampoline_pgd and unmap the
* indentity mapped ranges.
*/
movq %rax, %cr3 movq %rax, %cr3
/*
* Do a global TLB flush after the CR3 switch to make sure the TLB
* entries from the identity mapping are flushed.
*/
movq %cr4, %rcx
movq %rcx, %rax
xorq $X86_CR4_PGE, %rcx
movq %rcx, %cr4
movq %rax, %cr4
/* Ensure I am executing from virtual addresses */ /* Ensure I am executing from virtual addresses */
movq $1f, %rax movq $1f, %rax
ANNOTATE_RETPOLINE_SAFE ANNOTATE_RETPOLINE_SAFE
......
...@@ -113,17 +113,9 @@ void __noreturn machine_real_restart(unsigned int type) ...@@ -113,17 +113,9 @@ void __noreturn machine_real_restart(unsigned int type)
spin_unlock(&rtc_lock); spin_unlock(&rtc_lock);
/* /*
* Switch back to the initial page table. * Switch to the trampoline page table.
*/ */
#ifdef CONFIG_X86_32 load_trampoline_pgtable();
load_cr3(initial_page_table);
#else
write_cr3(real_mode_header->trampoline_pgd);
/* Exiting long mode will fail if CR4.PCIDE is set. */
if (boot_cpu_has(X86_FEATURE_PCID))
cr4_clear_bits(X86_CR4_PCIDE);
#endif
/* Jump to the identity-mapped low memory code */ /* Jump to the identity-mapped low memory code */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
......
...@@ -714,6 +714,11 @@ static void __init memory_map_bottom_up(unsigned long map_start, ...@@ -714,6 +714,11 @@ static void __init memory_map_bottom_up(unsigned long map_start,
static void __init init_trampoline(void) static void __init init_trampoline(void)
{ {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/*
* The code below will alias kernel page-tables in the user-range of the
* address space, including the Global bit. So global TLB entries will
* be created when using the trampoline page-table.
*/
if (!kaslr_memory_enabled()) if (!kaslr_memory_enabled())
trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
else else
......
...@@ -1148,7 +1148,7 @@ void flush_tlb_one_user(unsigned long addr) ...@@ -1148,7 +1148,7 @@ void flush_tlb_one_user(unsigned long addr)
*/ */
STATIC_NOPV void native_flush_tlb_global(void) STATIC_NOPV void native_flush_tlb_global(void)
{ {
unsigned long cr4, flags; unsigned long flags;
if (static_cpu_has(X86_FEATURE_INVPCID)) { if (static_cpu_has(X86_FEATURE_INVPCID)) {
/* /*
...@@ -1168,11 +1168,7 @@ STATIC_NOPV void native_flush_tlb_global(void) ...@@ -1168,11 +1168,7 @@ STATIC_NOPV void native_flush_tlb_global(void)
*/ */
raw_local_irq_save(flags); raw_local_irq_save(flags);
cr4 = this_cpu_read(cpu_tlbstate.cr4); __native_tlb_flush_global(this_cpu_read(cpu_tlbstate.cr4));
/* toggle PGE */
native_write_cr4(cr4 ^ X86_CR4_PGE);
/* write old PGE again and flush TLBs */
native_write_cr4(cr4);
raw_local_irq_restore(flags); raw_local_irq_restore(flags);
} }
......
...@@ -17,6 +17,32 @@ u32 *trampoline_cr4_features; ...@@ -17,6 +17,32 @@ u32 *trampoline_cr4_features;
/* Hold the pgd entry used on booting additional CPUs */ /* Hold the pgd entry used on booting additional CPUs */
pgd_t trampoline_pgd_entry; pgd_t trampoline_pgd_entry;
void load_trampoline_pgtable(void)
{
#ifdef CONFIG_X86_32
load_cr3(initial_page_table);
#else
/*
* This function is called before exiting to real-mode and that will
* fail with CR4.PCIDE still set.
*/
if (boot_cpu_has(X86_FEATURE_PCID))
cr4_clear_bits(X86_CR4_PCIDE);
write_cr3(real_mode_header->trampoline_pgd);
#endif
/*
* The CR3 write above will not flush global TLB entries.
* Stale, global entries from previous page tables may still be
* present. Flush those stale entries.
*
* This ensures that memory accessed while running with
* trampoline_pgd is *actually* mapped into trampoline_pgd.
*/
__flush_tlb_all();
}
void __init reserve_real_mode(void) void __init reserve_real_mode(void)
{ {
phys_addr_t mem; phys_addr_t mem;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment