Commit e24f9c5f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_urgent_for_v5.11_rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:
 "I hope this is the last batch of x86/urgent updates for this round:

   - Remove superfluous EFI PGD range checks which lead to those
     assertions failing with certain kernel configs and LLVM.

   - Disable setting breakpoints on facilities involved in #DB exception
     handling to avoid infinite loops.

   - Add extra serialization to non-serializing MSRs (IA32_TSC_DEADLINE
     and x2 APIC MSRs) to adhere to SDM's recommendation and avoid any
     theoretical issues.

   - Re-add the EPB MSR reading on turbostat so that it works on older
     kernels which don't have the corresponding EPB sysfs file.

   - Add Alder Lake to the list of CPUs which support split lock.

   - Fix %dr6 register handling in order to be able to set watchpoints
     with gdb again.

   - Disable CET instrumentation in the kernel so that gcc doesn't add
     ENDBR64 to kernel code and thus confuse tracing"

* tag 'x86_urgent_for_v5.11_rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/efi: Remove EFI PGD build time checks
  x86/debug: Prevent data breakpoints on cpu_dr7
  x86/debug: Prevent data breakpoints on __per_cpu_offset
  x86/apic: Add extra serialization for non-serializing MSRs
  tools/power/turbostat: Fallback to an MSR read for EPB
  x86/split_lock: Enable the split lock feature on another Alder Lake CPU
  x86/debug: Fix DR6 handling
  x86/build: Disable CET instrumentation in the kernel
parents 2db138bb 816ef8d7
...@@ -949,12 +949,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) ...@@ -949,12 +949,6 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
# change __FILE__ to the relative path from the srctree # change __FILE__ to the relative path from the srctree
KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
# ensure -fcf-protection is disabled when using retpoline as it is
# incompatible with -mindirect-branch=thunk-extern
ifdef CONFIG_RETPOLINE
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
endif
# include additional Makefiles when needed # include additional Makefiles when needed
include-y := scripts/Makefile.extrawarn include-y := scripts/Makefile.extrawarn
include-$(CONFIG_KASAN) += scripts/Makefile.kasan include-$(CONFIG_KASAN) += scripts/Makefile.kasan
......
...@@ -120,6 +120,9 @@ else ...@@ -120,6 +120,9 @@ else
KBUILD_CFLAGS += -mno-red-zone KBUILD_CFLAGS += -mno-red-zone
KBUILD_CFLAGS += -mcmodel=kernel KBUILD_CFLAGS += -mcmodel=kernel
# Intel CET isn't enabled in the kernel
KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none)
endif endif
ifdef CONFIG_X86_X32 ifdef CONFIG_X86_X32
......
...@@ -197,16 +197,6 @@ static inline bool apic_needs_pit(void) { return true; } ...@@ -197,16 +197,6 @@ static inline bool apic_needs_pit(void) { return true; }
#endif /* !CONFIG_X86_LOCAL_APIC */ #endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC #ifdef CONFIG_X86_X2APIC
/*
* Make previous memory operations globally visible before
* sending the IPI through x2apic wrmsr. We need a serializing instruction or
* mfence for this.
*/
static inline void x2apic_wrmsr_fence(void)
{
asm volatile("mfence" : : : "memory");
}
static inline void native_apic_msr_write(u32 reg, u32 v) static inline void native_apic_msr_write(u32 reg, u32 v)
{ {
if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR || if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
......
...@@ -84,4 +84,22 @@ do { \ ...@@ -84,4 +84,22 @@ do { \
#include <asm-generic/barrier.h> #include <asm-generic/barrier.h>
/*
* Make previous memory operations globally visible before
* a WRMSR.
*
* MFENCE makes writes visible, but only affects load/store
* instructions. WRMSR is unfortunately not a load/store
* instruction and is unaffected by MFENCE. The LFENCE ensures
* that the WRMSR is not reordered.
*
* Most WRMSRs are full serializing instructions themselves and
* do not require this barrier. This is only required for the
* IA32_TSC_DEADLINE and X2APIC MSRs.
*/
static inline void weak_wrmsr_fence(void)
{
asm volatile("mfence; lfence" : : : "memory");
}
#endif /* _ASM_X86_BARRIER_H */ #endif /* _ASM_X86_BARRIER_H */
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include <asm/perf_event.h> #include <asm/perf_event.h>
#include <asm/x86_init.h> #include <asm/x86_init.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <asm/barrier.h>
#include <asm/mpspec.h> #include <asm/mpspec.h>
#include <asm/i8259.h> #include <asm/i8259.h>
#include <asm/proto.h> #include <asm/proto.h>
...@@ -477,6 +478,9 @@ static int lapic_next_deadline(unsigned long delta, ...@@ -477,6 +478,9 @@ static int lapic_next_deadline(unsigned long delta,
{ {
u64 tsc; u64 tsc;
/* This MSR is special and need a special fence: */
weak_wrmsr_fence();
tsc = rdtsc(); tsc = rdtsc();
wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR)); wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
return 0; return 0;
......
...@@ -29,7 +29,8 @@ static void x2apic_send_IPI(int cpu, int vector) ...@@ -29,7 +29,8 @@ static void x2apic_send_IPI(int cpu, int vector)
{ {
u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu); u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
x2apic_wrmsr_fence(); /* x2apic MSRs are special and need a special fence: */
weak_wrmsr_fence();
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL); __x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
} }
...@@ -41,7 +42,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) ...@@ -41,7 +42,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
unsigned long flags; unsigned long flags;
u32 dest; u32 dest;
x2apic_wrmsr_fence(); /* x2apic MSRs are special and need a special fence: */
weak_wrmsr_fence();
local_irq_save(flags); local_irq_save(flags);
tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask); tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
......
...@@ -43,7 +43,8 @@ static void x2apic_send_IPI(int cpu, int vector) ...@@ -43,7 +43,8 @@ static void x2apic_send_IPI(int cpu, int vector)
{ {
u32 dest = per_cpu(x86_cpu_to_apicid, cpu); u32 dest = per_cpu(x86_cpu_to_apicid, cpu);
x2apic_wrmsr_fence(); /* x2apic MSRs are special and need a special fence: */
weak_wrmsr_fence();
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL); __x2apic_send_IPI_dest(dest, vector, APIC_DEST_PHYSICAL);
} }
...@@ -54,7 +55,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest) ...@@ -54,7 +55,8 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
unsigned long this_cpu; unsigned long this_cpu;
unsigned long flags; unsigned long flags;
x2apic_wrmsr_fence(); /* x2apic MSRs are special and need a special fence: */
weak_wrmsr_fence();
local_irq_save(flags); local_irq_save(flags);
...@@ -125,7 +127,8 @@ void __x2apic_send_IPI_shorthand(int vector, u32 which) ...@@ -125,7 +127,8 @@ void __x2apic_send_IPI_shorthand(int vector, u32 which)
{ {
unsigned long cfg = __prepare_ICR(which, vector, 0); unsigned long cfg = __prepare_ICR(which, vector, 0);
x2apic_wrmsr_fence(); /* x2apic MSRs are special and need a special fence: */
weak_wrmsr_fence();
native_x2apic_icr_write(cfg, 0); native_x2apic_icr_write(cfg, 0);
} }
......
...@@ -1159,6 +1159,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { ...@@ -1159,6 +1159,7 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1),
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1),
{} {}
}; };
......
...@@ -269,6 +269,20 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) ...@@ -269,6 +269,20 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
CPU_ENTRY_AREA_TOTAL_SIZE)) CPU_ENTRY_AREA_TOTAL_SIZE))
return true; return true;
/*
* When FSGSBASE is enabled, paranoid_entry() fetches the per-CPU
* GSBASE value via __per_cpu_offset or pcpu_unit_offsets.
*/
#ifdef CONFIG_SMP
if (within_area(addr, end, (unsigned long)__per_cpu_offset,
sizeof(unsigned long) * nr_cpu_ids))
return true;
#else
if (within_area(addr, end, (unsigned long)&pcpu_unit_offsets,
sizeof(pcpu_unit_offsets)))
return true;
#endif
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
/* The original rw GDT is being used after load_direct_gdt() */ /* The original rw GDT is being used after load_direct_gdt() */
if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu), if (within_area(addr, end, (unsigned long)get_cpu_gdt_rw(cpu),
...@@ -293,6 +307,14 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) ...@@ -293,6 +307,14 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
(unsigned long)&per_cpu(cpu_tlbstate, cpu), (unsigned long)&per_cpu(cpu_tlbstate, cpu),
sizeof(struct tlb_state))) sizeof(struct tlb_state)))
return true; return true;
/*
* When in guest (X86_FEATURE_HYPERVISOR), local_db_save()
* will read per-cpu cpu_dr7 before clear dr7 register.
*/
if (within_area(addr, end, (unsigned long)&per_cpu(cpu_dr7, cpu),
sizeof(cpu_dr7)))
return true;
} }
return false; return false;
...@@ -491,15 +513,12 @@ static int hw_breakpoint_handler(struct die_args *args) ...@@ -491,15 +513,12 @@ static int hw_breakpoint_handler(struct die_args *args)
struct perf_event *bp; struct perf_event *bp;
unsigned long *dr6_p; unsigned long *dr6_p;
unsigned long dr6; unsigned long dr6;
bool bpx;
/* The DR6 value is pointed by args->err */ /* The DR6 value is pointed by args->err */
dr6_p = (unsigned long *)ERR_PTR(args->err); dr6_p = (unsigned long *)ERR_PTR(args->err);
dr6 = *dr6_p; dr6 = *dr6_p;
/* If it's a single step, TRAP bits are random */
if (dr6 & DR_STEP)
return NOTIFY_DONE;
/* Do an early return if no trap bits are set in DR6 */ /* Do an early return if no trap bits are set in DR6 */
if ((dr6 & DR_TRAP_BITS) == 0) if ((dr6 & DR_TRAP_BITS) == 0)
return NOTIFY_DONE; return NOTIFY_DONE;
...@@ -509,28 +528,29 @@ static int hw_breakpoint_handler(struct die_args *args) ...@@ -509,28 +528,29 @@ static int hw_breakpoint_handler(struct die_args *args)
if (likely(!(dr6 & (DR_TRAP0 << i)))) if (likely(!(dr6 & (DR_TRAP0 << i))))
continue; continue;
bp = this_cpu_read(bp_per_reg[i]);
if (!bp)
continue;
bpx = bp->hw.info.type == X86_BREAKPOINT_EXECUTE;
/* /*
* The counter may be concurrently released but that can only * TF and data breakpoints are traps and can be merged, however
* occur from a call_rcu() path. We can then safely fetch * instruction breakpoints are faults and will be raised
* the breakpoint, use its callback, touch its counter * separately.
* while we are in an rcu_read_lock() path. *
* However DR6 can indicate both TF and instruction
* breakpoints. In that case take TF as that has precedence and
* delay the instruction breakpoint for the next exception.
*/ */
rcu_read_lock(); if (bpx && (dr6 & DR_STEP))
continue;
bp = this_cpu_read(bp_per_reg[i]);
/* /*
* Reset the 'i'th TRAP bit in dr6 to denote completion of * Reset the 'i'th TRAP bit in dr6 to denote completion of
* exception handling * exception handling
*/ */
(*dr6_p) &= ~(DR_TRAP0 << i); (*dr6_p) &= ~(DR_TRAP0 << i);
/*
* bp can be NULL due to lazy debug register switching
* or due to concurrent perf counter removing.
*/
if (!bp) {
rcu_read_unlock();
break;
}
perf_bp_event(bp, args->regs); perf_bp_event(bp, args->regs);
...@@ -538,11 +558,10 @@ static int hw_breakpoint_handler(struct die_args *args) ...@@ -538,11 +558,10 @@ static int hw_breakpoint_handler(struct die_args *args)
* Set up resume flag to avoid breakpoint recursion when * Set up resume flag to avoid breakpoint recursion when
* returning back to origin. * returning back to origin.
*/ */
if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE) if (bpx)
args->regs->flags |= X86_EFLAGS_RF; args->regs->flags |= X86_EFLAGS_RF;
rcu_read_unlock();
} }
/* /*
* Further processing in do_debug() is needed for a) user-space * Further processing in do_debug() is needed for a) user-space
* breakpoints (to generate signals) and b) when the system has * breakpoints (to generate signals) and b) when the system has
......
...@@ -115,31 +115,12 @@ void efi_sync_low_kernel_mappings(void) ...@@ -115,31 +115,12 @@ void efi_sync_low_kernel_mappings(void)
pud_t *pud_k, *pud_efi; pud_t *pud_k, *pud_efi;
pgd_t *efi_pgd = efi_mm.pgd; pgd_t *efi_pgd = efi_mm.pgd;
/*
* We can share all PGD entries apart from the one entry that
* covers the EFI runtime mapping space.
*
* Make sure the EFI runtime region mappings are guaranteed to
* only span a single PGD entry and that the entry also maps
* other important kernel regions.
*/
MAYBE_BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END));
MAYBE_BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) !=
(EFI_VA_END & PGDIR_MASK));
pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET);
pgd_k = pgd_offset_k(PAGE_OFFSET); pgd_k = pgd_offset_k(PAGE_OFFSET);
num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET);
memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries);
/*
* As with PGDs, we share all P4D entries apart from the one entry
* that covers the EFI runtime mapping space.
*/
BUILD_BUG_ON(p4d_index(EFI_VA_END) != p4d_index(MODULES_END));
BUILD_BUG_ON((EFI_VA_START & P4D_MASK) != (EFI_VA_END & P4D_MASK));
pgd_efi = efi_pgd + pgd_index(EFI_VA_END); pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
pgd_k = pgd_offset_k(EFI_VA_END); pgd_k = pgd_offset_k(EFI_VA_END);
p4d_efi = p4d_offset(pgd_efi, 0); p4d_efi = p4d_offset(pgd_efi, 0);
......
...@@ -1834,12 +1834,15 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) ...@@ -1834,12 +1834,15 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
int get_epb(int cpu) int get_epb(int cpu)
{ {
char path[128 + PATH_BYTES]; char path[128 + PATH_BYTES];
unsigned long long msr;
int ret, epb = -1; int ret, epb = -1;
FILE *fp; FILE *fp;
sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu);
fp = fopen_or_die(path, "r"); fp = fopen(path, "r");
if (!fp)
goto msr_fallback;
ret = fscanf(fp, "%d", &epb); ret = fscanf(fp, "%d", &epb);
if (ret != 1) if (ret != 1)
...@@ -1848,6 +1851,11 @@ int get_epb(int cpu) ...@@ -1848,6 +1851,11 @@ int get_epb(int cpu)
fclose(fp); fclose(fp);
return epb; return epb;
msr_fallback:
get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
return msr & 0xf;
} }
void get_apic_id(struct thread_data *t) void get_apic_id(struct thread_data *t)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment