Commit e53000b1 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "Misc fixes:

   - fix the s2ram regression related to confusion around segment
     register restoration, plus related cleanups that make the code more
     robust

   - a guess-unwinder Kconfig dependency fix

   - an isoimage build target fix for certain tool chain combinations

   - instruction decoder opcode map fixes+updates, and the syncing of
     the kernel decoder headers to the objtool headers

   - a kmmio tracing fix

   - two 5-level paging related fixes

   - a topology enumeration fix on certain SMP systems"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  objtool: Resync objtool's instruction decoder source code copy with the kernel's latest version
  x86/decoder: Fix and update the opcodes map
  x86/power: Make restore_processor_context() sane
  x86/power/32: Move SYSENTER MSR restoration to fix_processor_context()
  x86/power/64: Use struct desc_ptr for the IDT in struct saved_context
  x86/unwinder/guess: Prevent using CONFIG_UNWINDER_GUESS=y with CONFIG_STACKDEPOT=y
  x86/build: Don't verify mtools configuration file for isoimage
  x86/mm/kmmio: Fix mmiotrace for page unaligned addresses
  x86/boot/compressed/64: Print error if 5-level paging is not supported
  x86/boot/compressed/64: Detect and handle 5-level paging at boot-time
  x86/smpboot: Do not use smp_num_siblings in __max_logical_packages calculation
parents 1f76a755 215eada7
......@@ -400,6 +400,7 @@ config UNWINDER_FRAME_POINTER
config UNWINDER_GUESS
bool "Guess unwinder"
depends on EXPERT
depends on !STACKDEPOT
---help---
This option enables the "guess" unwinder for unwinding kernel stack
traces. It scans the stack and reports every kernel text address it
......
......@@ -80,6 +80,7 @@ vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/kaslr.o
ifdef CONFIG_X86_64
vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/pagetable.o
vmlinux-objs-y += $(obj)/mem_encrypt.o
vmlinux-objs-y += $(obj)/pgtable_64.o
endif
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
......
......@@ -305,10 +305,18 @@ ENTRY(startup_64)
leaq boot_stack_end(%rbx), %rsp
#ifdef CONFIG_X86_5LEVEL
/* Check if 5-level paging has already enabled */
movq %cr4, %rax
testl $X86_CR4_LA57, %eax
jnz lvl5
/*
* Check if we need to enable 5-level paging.
* RSI holds real mode data and need to be preserved across
* a function call.
*/
pushq %rsi
call l5_paging_required
popq %rsi
/* If l5_paging_required() returned zero, we're done here. */
cmpq $0, %rax
je lvl5
/*
* At this point we are in long mode with 4-level paging enabled,
......
......@@ -169,6 +169,16 @@ void __puthex(unsigned long value)
}
}
static bool l5_supported(void)
{
/* Check if leaf 7 is supported. */
if (native_cpuid_eax(0) < 7)
return 0;
/* Check if la57 is supported. */
return native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31));
}
#if CONFIG_X86_NEED_RELOCS
static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
......@@ -362,6 +372,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
console_init();
debug_putstr("early console in extract_kernel\n");
if (IS_ENABLED(CONFIG_X86_5LEVEL) && !l5_supported()) {
error("This linux kernel as configured requires 5-level paging\n"
"This CPU does not support the required 'cr4.la57' feature\n"
"Unable to boot - please use a kernel appropriate for your CPU\n");
}
free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
......
#include <asm/processor.h>
/*
* __force_order is used by special_insns.h asm code to force instruction
* serialization.
*
* It is not referenced from the code, but GCC < 5 with -fPIE would fail
* due to an undefined symbol. Define it to make these ancient GCCs work.
*/
unsigned long __force_order;
int l5_paging_required(void)
{
/* Check if leaf 7 is supported. */
if (native_cpuid_eax(0) < 7)
return 0;
/* Check if la57 is supported. */
if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
return 0;
/* Check if 5-level paging has already been enabled. */
if (native_read_cr4() & X86_CR4_LA57)
return 0;
return 1;
}
......@@ -44,9 +44,9 @@ FDINITRD=$6
# Make sure the files actually exist
verify "$FBZIMAGE"
verify "$MTOOLSRC"
genbzdisk() {
verify "$MTOOLSRC"
mformat a:
syslinux $FIMAGE
echo "$KCMDLINE" | mcopy - a:syslinux.cfg
......@@ -57,6 +57,7 @@ genbzdisk() {
}
genfdimage144() {
verify "$MTOOLSRC"
dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null
mformat v:
syslinux $FIMAGE
......@@ -68,6 +69,7 @@ genfdimage144() {
}
genfdimage288() {
verify "$MTOOLSRC"
dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null
mformat w:
syslinux $FIMAGE
......
......@@ -12,7 +12,13 @@
/* image of the saved processor state */
struct saved_context {
u16 es, fs, gs, ss;
/*
* On x86_32, all segment registers, with the possible exception of
* gs, are saved at kernel entry in pt_regs.
*/
#ifdef CONFIG_X86_32_LAZY_GS
u16 gs;
#endif
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
bool misc_enable_saved;
......
......@@ -20,8 +20,20 @@
*/
struct saved_context {
struct pt_regs regs;
u16 ds, es, fs, gs, ss;
unsigned long gs_base, gs_kernel_base, fs_base;
/*
* User CS and SS are saved in current_pt_regs(). The rest of the
* segment selectors need to be saved and restored here.
*/
u16 ds, es, fs, gs;
/*
* Usermode FSBASE and GSBASE may not match the fs and gs selectors,
* so we save them separately. We save the kernelmode GSBASE to
* restore percpu access after resume.
*/
unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
unsigned long cr0, cr2, cr3, cr4, cr8;
u64 misc_enable;
bool misc_enable_saved;
......@@ -30,8 +42,7 @@ struct saved_context {
u16 gdt_pad; /* Unused */
struct desc_ptr gdt_desc;
u16 idt_pad;
u16 idt_limit;
unsigned long idt_base;
struct desc_ptr idt;
u16 ldt;
u16 tss;
unsigned long tr;
......
......@@ -106,7 +106,7 @@ EXPORT_SYMBOL(__max_logical_packages);
static unsigned int logical_packages __read_mostly;
/* Maximum number of SMT threads on any online core */
int __max_smt_threads __read_mostly;
int __read_mostly __max_smt_threads = 1;
/* Flag to indicate if a complete sched domain rebuild is required */
bool x86_topology_update;
......@@ -1304,7 +1304,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
* Today neither Intel nor AMD support heterogenous systems so
* extrapolate the boot cpu's data to all packages.
*/
ncpus = cpu_data(0).booted_cores * smp_num_siblings;
ncpus = cpu_data(0).booted_cores * topology_max_smt_threads();
__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
pr_info("Max logical packages: %u\n", __max_logical_packages);
......
......@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
ff:
ff: UD0
EndTable
Table: 3-byte opcode 1 (0x0f 0x38)
......@@ -717,7 +717,7 @@ AVXcode: 2
7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
80: INVEPT Gy,Mdq (66)
81: INVPID Gy,Mdq (66)
81: INVVPID Gy,Mdq (66)
82: INVPCID Gy,Mdq (66)
83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
88: vexpandps/d Vpd,Wpd (66),(ev)
......@@ -970,6 +970,15 @@ GrpTable: Grp9
EndTable
GrpTable: Grp10
# all are UD1
0: UD1
1: UD1
2: UD1
3: UD1
4: UD1
5: UD1
6: UD1
7: UD1
EndTable
# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
......
......@@ -404,11 +404,11 @@ void iounmap(volatile void __iomem *addr)
return;
}
mmiotrace_iounmap(addr);
addr = (volatile void __iomem *)
(PAGE_MASK & (unsigned long __force)addr);
mmiotrace_iounmap(addr);
/* Use the vm area unlocked, assuming the caller
ensures there isn't another iounmap for the same address
in parallel. Reuse of the virtual address is prevented by
......
......@@ -435,17 +435,18 @@ int register_kmmio_probe(struct kmmio_probe *p)
unsigned long flags;
int ret = 0;
unsigned long size = 0;
unsigned long addr = p->addr & PAGE_MASK;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
unsigned int l;
pte_t *pte;
spin_lock_irqsave(&kmmio_lock, flags);
if (get_kmmio_probe(p->addr)) {
if (get_kmmio_probe(addr)) {
ret = -EEXIST;
goto out;
}
pte = lookup_address(p->addr, &l);
pte = lookup_address(addr, &l);
if (!pte) {
ret = -EINVAL;
goto out;
......@@ -454,7 +455,7 @@ int register_kmmio_probe(struct kmmio_probe *p)
kmmio_count++;
list_add_rcu(&p->list, &kmmio_probes);
while (size < size_lim) {
if (add_kmmio_fault_page(p->addr + size))
if (add_kmmio_fault_page(addr + size))
pr_err("Unable to set page fault.\n");
size += page_level_size(l);
}
......@@ -528,19 +529,20 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
{
unsigned long flags;
unsigned long size = 0;
unsigned long addr = p->addr & PAGE_MASK;
const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
struct kmmio_fault_page *release_list = NULL;
struct kmmio_delayed_release *drelease;
unsigned int l;
pte_t *pte;
pte = lookup_address(p->addr, &l);
pte = lookup_address(addr, &l);
if (!pte)
return;
spin_lock_irqsave(&kmmio_lock, flags);
while (size < size_lim) {
release_kmmio_fault_page(p->addr + size, &release_list);
release_kmmio_fault_page(addr + size, &release_list);
size += page_level_size(l);
}
list_del_rcu(&p->list);
......
......@@ -82,12 +82,8 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* descriptor tables
*/
#ifdef CONFIG_X86_32
store_idt(&ctxt->idt);
#else
/* CONFIG_X86_64 */
store_idt((struct desc_ptr *)&ctxt->idt_limit);
#endif
/*
* We save it here, but restore it only in the hibernate case.
* For ACPI S3 resume, this is loaded via 'early_gdt_desc' in 64-bit
......@@ -103,22 +99,18 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* segment registers
*/
#ifdef CONFIG_X86_32
savesegment(es, ctxt->es);
savesegment(fs, ctxt->fs);
#ifdef CONFIG_X86_32_LAZY_GS
savesegment(gs, ctxt->gs);
savesegment(ss, ctxt->ss);
#else
/* CONFIG_X86_64 */
asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds));
asm volatile ("movw %%es, %0" : "=m" (ctxt->es));
asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs));
asm volatile ("movw %%gs, %0" : "=m" (ctxt->gs));
asm volatile ("movw %%ss, %0" : "=m" (ctxt->ss));
#endif
#ifdef CONFIG_X86_64
savesegment(gs, ctxt->gs);
savesegment(fs, ctxt->fs);
savesegment(ds, ctxt->ds);
savesegment(es, ctxt->es);
rdmsrl(MSR_FS_BASE, ctxt->fs_base);
rdmsrl(MSR_GS_BASE, ctxt->gs_base);
rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
rdmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
rdmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
mtrr_save_fixed_ranges(NULL);
rdmsrl(MSR_EFER, ctxt->efer);
......@@ -178,6 +170,9 @@ static void fix_processor_context(void)
write_gdt_entry(desc, GDT_ENTRY_TSS, &tss, DESC_TSS);
syscall_init(); /* This sets MSR_*STAR and related */
#else
if (boot_cpu_has(X86_FEATURE_SEP))
enable_sep_cpu();
#endif
load_TR_desc(); /* This does ltr */
load_mm_ldt(current->active_mm); /* This does lldt */
......@@ -190,9 +185,12 @@ static void fix_processor_context(void)
}
/**
* __restore_processor_state - restore the contents of CPU registers saved
* by __save_processor_state()
* @ctxt - structure to load the registers contents from
* __restore_processor_state - restore the contents of CPU registers saved
* by __save_processor_state()
* @ctxt - structure to load the registers contents from
*
* The asm code that gets us here will have restored a usable GDT, although
* it will be pointing to the wrong alias.
*/
static void notrace __restore_processor_state(struct saved_context *ctxt)
{
......@@ -215,57 +213,50 @@ static void notrace __restore_processor_state(struct saved_context *ctxt)
write_cr2(ctxt->cr2);
write_cr0(ctxt->cr0);
/* Restore the IDT. */
load_idt(&ctxt->idt);
/*
* now restore the descriptor tables to their proper values
* ltr is done i fix_processor_context().
* Just in case the asm code got us here with the SS, DS, or ES
* out of sync with the GDT, update them.
*/
#ifdef CONFIG_X86_32
load_idt(&ctxt->idt);
#else
/* CONFIG_X86_64 */
load_idt((const struct desc_ptr *)&ctxt->idt_limit);
#endif
loadsegment(ss, __KERNEL_DS);
loadsegment(ds, __USER_DS);
loadsegment(es, __USER_DS);
#ifdef CONFIG_X86_64
/*
* We need GSBASE restored before percpu access can work.
* percpu access can happen in exception handlers or in complicated
* helpers like load_gs_index().
* Restore percpu access. Percpu access can happen in exception
* handlers or in complicated helpers like load_gs_index().
*/
wrmsrl(MSR_GS_BASE, ctxt->gs_base);
#ifdef CONFIG_X86_64
wrmsrl(MSR_GS_BASE, ctxt->kernelmode_gs_base);
#else
loadsegment(fs, __KERNEL_PERCPU);
loadsegment(gs, __KERNEL_STACK_CANARY);
#endif
/* Restore the TSS, RO GDT, LDT, and usermode-relevant MSRs. */
fix_processor_context();
/*
* Restore segment registers. This happens after restoring the GDT
* and LDT, which happen in fix_processor_context().
* Now that we have descriptor tables fully restored and working
* exception handling, restore the usermode segments.
*/
#ifdef CONFIG_X86_32
#ifdef CONFIG_X86_64
loadsegment(ds, ctxt->es);
loadsegment(es, ctxt->es);
loadsegment(fs, ctxt->fs);
loadsegment(gs, ctxt->gs);
loadsegment(ss, ctxt->ss);
/*
* sysenter MSRs
*/
if (boot_cpu_has(X86_FEATURE_SEP))
enable_sep_cpu();
#else
/* CONFIG_X86_64 */
asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds));
asm volatile ("movw %0, %%es" :: "r" (ctxt->es));
asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs));
load_gs_index(ctxt->gs);
asm volatile ("movw %0, %%ss" :: "r" (ctxt->ss));
/*
* Restore FSBASE and user GSBASE after reloading the respective
* segment selectors.
* Restore FSBASE and GSBASE after restoring the selectors, since
* restoring the selectors clobbers the bases. Keep in mind
* that MSR_KERNEL_GS_BASE is horribly misnamed.
*/
wrmsrl(MSR_FS_BASE, ctxt->fs_base);
wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
wrmsrl(MSR_KERNEL_GS_BASE, ctxt->usermode_gs_base);
#elif defined(CONFIG_X86_32_LAZY_GS)
loadsegment(gs, ctxt->gs);
#endif
do_fpu_end();
......
......@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
ff:
ff: UD0
EndTable
Table: 3-byte opcode 1 (0x0f 0x38)
......@@ -717,7 +717,7 @@ AVXcode: 2
7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
80: INVEPT Gy,Mdq (66)
81: INVPID Gy,Mdq (66)
81: INVVPID Gy,Mdq (66)
82: INVPCID Gy,Mdq (66)
83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
88: vexpandps/d Vpd,Wpd (66),(ev)
......@@ -896,7 +896,7 @@ EndTable
GrpTable: Grp3_1
0: TEST Eb,Ib
1:
1: TEST Eb,Ib
2: NOT Eb
3: NEG Eb
4: MUL AL,Eb
......@@ -970,6 +970,15 @@ GrpTable: Grp9
EndTable
GrpTable: Grp10
# all are UD1
0: UD1
1: UD1
2: UD1
3: UD1
4: UD1
5: UD1
6: UD1
7: UD1
EndTable
# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
......
......@@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
ff:
ff: UD0
EndTable
Table: 3-byte opcode 1 (0x0f 0x38)
......@@ -717,7 +717,7 @@ AVXcode: 2
7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
80: INVEPT Gy,Mdq (66)
81: INVPID Gy,Mdq (66)
81: INVVPID Gy,Mdq (66)
82: INVPCID Gy,Mdq (66)
83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
88: vexpandps/d Vpd,Wpd (66),(ev)
......@@ -970,6 +970,15 @@ GrpTable: Grp9
EndTable
GrpTable: Grp10
# all are UD1
0: UD1
1: UD1
2: UD1
3: UD1
4: UD1
5: UD1
6: UD1
7: UD1
EndTable
# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment