Commit d099637d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-urgent-2024-05-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull misc x86 fixes from Ingo Molnar:

 - Remove the broken vsyscall emulation code from
   the page fault code

 - Fix kexec crash triggered by certain SEV RMP
   table layouts

 - Fix unchecked MSR access error when disabling
   the x2APIC via iommu=off

* tag 'x86-urgent-2024-05-05' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Remove broken vsyscall emulation code from the page fault code
  x86/apic: Don't access the APIC when disabling x2APIC
  x86/sev: Add callback to apply RMP table fixups for kexec
  x86/e820: Add a new e820 table update helper
parents 80f8b450 02b670c1
......@@ -98,11 +98,6 @@ static int addr_to_vsyscall_nr(unsigned long addr)
static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
/*
* XXX: if access_ok, get_user, and put_user handled
* sig_on_uaccess_err, this could go away.
*/
if (!access_ok((void __user *)ptr, size)) {
struct thread_struct *thread = &current->thread;
......@@ -120,10 +115,8 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size)
bool emulate_vsyscall(unsigned long error_code,
struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr, syscall_nr, tmp;
int prev_sig_on_uaccess_err;
long ret;
unsigned long orig_dx;
......@@ -172,8 +165,6 @@ bool emulate_vsyscall(unsigned long error_code,
goto sigsegv;
}
tsk = current;
/*
* Check for access_ok violations and find the syscall nr.
*
......@@ -234,12 +225,8 @@ bool emulate_vsyscall(unsigned long error_code,
goto do_ret; /* skip requested */
/*
* With a real vsyscall, page faults cause SIGSEGV. We want to
* preserve that behavior to make writing exploits harder.
* With a real vsyscall, page faults cause SIGSEGV.
*/
prev_sig_on_uaccess_err = current->thread.sig_on_uaccess_err;
current->thread.sig_on_uaccess_err = 1;
ret = -EFAULT;
switch (vsyscall_nr) {
case 0:
......@@ -262,23 +249,12 @@ bool emulate_vsyscall(unsigned long error_code,
break;
}
current->thread.sig_on_uaccess_err = prev_sig_on_uaccess_err;
check_fault:
if (ret == -EFAULT) {
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
warn_bad_vsyscall(KERN_INFO, regs,
"vsyscall fault (exploit attempt?)");
/*
* If we failed to generate a signal for any reason,
* generate one here. (This should be impossible.)
*/
if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
!sigismember(&tsk->pending.signal, SIGSEGV)))
goto sigsegv;
return true; /* Don't emulate the ret. */
goto sigsegv;
}
regs->ax = ret;
......
......@@ -17,6 +17,7 @@ extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
extern void e820__range_add (u64 start, u64 size, enum e820_type type);
extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type);
extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
extern void e820__print_table(char *who);
extern int e820__update_table(struct e820_table *table);
......
......@@ -472,7 +472,6 @@ struct thread_struct {
unsigned long iopl_emul;
unsigned int iopl_warn:1;
unsigned int sig_on_uaccess_err:1;
/*
* Protection Keys Register for Userspace. Loaded immediately on
......
......@@ -269,6 +269,7 @@ int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immut
int rmp_make_shared(u64 pfn, enum pg_level level);
void snp_leak_pages(u64 pfn, unsigned int npages);
void kdump_sev_callback(void);
void snp_fixup_e820_tables(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
......@@ -282,6 +283,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
static inline void kdump_sev_callback(void) { }
static inline void snp_fixup_e820_tables(void) {}
#endif
#endif
......@@ -1771,7 +1771,7 @@ void x2apic_setup(void)
__x2apic_enable();
}
static __init void apic_set_fixmap(void);
static __init void apic_set_fixmap(bool read_apic);
static __init void x2apic_disable(void)
{
......@@ -1793,7 +1793,12 @@ static __init void x2apic_disable(void)
}
__x2apic_disable();
apic_set_fixmap();
/*
* Don't reread the APIC ID as it was already done from
* check_x2apic() and the APIC driver still is a x2APIC variant,
* which fails to do the read after x2APIC was disabled.
*/
apic_set_fixmap(false);
}
static __init void x2apic_enable(void)
......@@ -2057,13 +2062,14 @@ void __init init_apic_mappings(void)
}
}
static __init void apic_set_fixmap(void)
static __init void apic_set_fixmap(bool read_apic)
{
set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
apic_mmio_base = APIC_BASE;
apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
apic_mmio_base, mp_lapic_addr);
apic_read_boot_cpu_id(false);
if (read_apic)
apic_read_boot_cpu_id(false);
}
void __init register_lapic_address(unsigned long address)
......@@ -2073,7 +2079,7 @@ void __init register_lapic_address(unsigned long address)
mp_lapic_addr = address;
if (!x2apic_mode)
apic_set_fixmap();
apic_set_fixmap(true);
}
/*
......
......@@ -532,9 +532,10 @@ u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum
return __e820__range_update(e820_table, start, size, old_type, new_type);
}
static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type)
u64 __init e820__range_update_table(struct e820_table *t, u64 start, u64 size,
enum e820_type old_type, enum e820_type new_type)
{
return __e820__range_update(e820_table_kexec, start, size, old_type, new_type);
return __e820__range_update(t, start, size, old_type, new_type);
}
/* Remove a range of memory from the E820 table: */
......@@ -806,7 +807,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align)
addr = memblock_phys_alloc(size, align);
if (addr) {
e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
e820__range_update_table(e820_table_kexec, addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n");
e820__update_table_kexec();
}
......
......@@ -723,39 +723,8 @@ kernelmode_fixup_or_oops(struct pt_regs *regs, unsigned long error_code,
WARN_ON_ONCE(user_mode(regs));
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
/*
* Any interrupt that takes a fault gets the fixup. This makes
* the below recursive fault logic only apply to a faults from
* task context.
*/
if (in_interrupt())
return;
/*
* Per the above we're !in_interrupt(), aka. task context.
*
* In this case we need to make sure we're not recursively
* faulting through the emulate_vsyscall() logic.
*/
if (current->thread.sig_on_uaccess_err && signal) {
sanitize_error_code(address, &error_code);
set_signal_archinfo(address, error_code);
if (si_code == SEGV_PKUERR) {
force_sig_pkuerr((void __user *)address, pkey);
} else {
/* XXX: hwpoison faults will set the wrong code. */
force_sig_fault(signal, si_code, (void __user *)address);
}
}
/*
* Barring that, we can do the fixup and be happy.
*/
if (fixup_exception(regs, X86_TRAP_PF, error_code, address))
return;
}
/*
* AMD erratum #91 manifests as a spurious page fault on a PREFETCH
......
......@@ -102,6 +102,13 @@ void __init mem_encrypt_setup_arch(void)
phys_addr_t total_mem = memblock_phys_mem_size();
unsigned long size;
/*
* Do RMP table fixups after the e820 tables have been setup by
* e820__memory_setup().
*/
if (cc_platform_has(CC_ATTR_HOST_SEV_SNP))
snp_fixup_e820_tables();
if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return;
......
......@@ -163,6 +163,42 @@ bool snp_probe_rmptable_info(void)
return true;
}
static void __init __snp_fixup_e820_tables(u64 pa)
{
if (IS_ALIGNED(pa, PMD_SIZE))
return;
/*
* Handle cases where the RMP table placement by the BIOS is not
* 2M aligned and the kexec kernel could try to allocate
* from within that chunk which then causes a fatal RMP fault.
*
* The e820_table needs to be updated as it is converted to
* kernel memory resources and used by KEXEC_FILE_LOAD syscall
* to load kexec segments.
*
* The e820_table_firmware needs to be updated as it is exposed
* to sysfs and used by the KEXEC_LOAD syscall to load kexec
* segments.
*
* The e820_table_kexec needs to be updated as it passed to
* the kexec-ed kernel.
*/
pa = ALIGN_DOWN(pa, PMD_SIZE);
if (e820__mapped_any(pa, pa + PMD_SIZE, E820_TYPE_RAM)) {
pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa);
e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
}
}
void __init snp_fixup_e820_tables(void)
{
__snp_fixup_e820_tables(probed_rmp_base);
__snp_fixup_e820_tables(probed_rmp_base + probed_rmp_size);
}
/*
* Do the necessary preparations which are verified by the firmware as
* described in the SNP_INIT_EX firmware command description in the SNP
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment