Commit 63d1cb53 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'powerpc-5.13-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:

 - Fix a regression in the conversion of the 64-bit BookE interrupt
   entry to C.

 - Fix KVM hosts running with the hash MMU since the recent KVM gfn
   changes.

 - Fix a deadlock in our paravirt spinlocks when hcall tracing is
   enabled.

 - Several fixes for oopses in our runtime code patching for security
   mitigations.

 - A couple of minor fixes for the recent conversion of 32-bit interrupt
   entry/exit to C.

 - Fix __get_user() causing spurious crashes in sigreturn due to a bad
   inline asm constraint, spotted with GCC 11.

 - A fix for the way we track IRQ masking state vs NMI interrupts when
   using the new scv system call entry path.

 - A couple more minor fixes.

Thanks to Cédric Le Goater, Christian Zigotzky, Christophe Leroy,
Naveen N. Rao, Nicholas Piggin Paul Menzel, and Sean Christopherson.

* tag 'powerpc-5.13-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/64e/interrupt: Fix nvgprs being clobbered
  powerpc/64s: Make NMI record implicitly soft-masked code as irqs disabled
  powerpc/64s: Fix stf mitigation patching w/strict RWX & hash
  powerpc/64s: Fix entry flush patching w/strict RWX & hash
  powerpc/64s: Fix crashes when toggling entry flush barrier
  powerpc/64s: Fix crashes when toggling stf barrier
  KVM: PPC: Book3S HV: Fix kvm_unmap_gfn_range_hv() for Hash MMU
  powerpc/legacy_serial: Fix UBSAN: array-index-out-of-bounds
  powerpc/signal: Fix possible build failure with unsafe_copy_fpr_{to/from}_user
  powerpc/uaccess: Fix __get_user() with CONFIG_CC_HAS_ASM_GOTO_OUTPUT
  powerpc/pseries: warn if recursing into the hcall tracing code
  powerpc/pseries: use notrace hcall variant for H_CEDE idle
  powerpc/pseries: Don't trace hcall tracing wrapper
  powerpc/pseries: Fix hcall tracing recursion in pv queued spinlocks
  powerpc/syscall: Calling kuap_save_and_lock() is wrong
  powerpc/interrupts: Fix kuep_unlock() call
parents c12a29ed c6ac667b
......@@ -448,6 +448,9 @@
*/
long plpar_hcall_norets(unsigned long opcode, ...);
/* Variant which does not do hcall tracing */
long plpar_hcall_norets_notrace(unsigned long opcode, ...);
/**
* plpar_hcall: - Make a pseries hypervisor call
* @opcode: The hypervisor call to make.
......
......@@ -153,8 +153,6 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
*/
static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
{
if (user_mode(regs))
kuep_unlock();
}
static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
......@@ -222,6 +220,13 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) &&
regs->nip < (unsigned long)__end_interrupts) {
// Kernel code running below __end_interrupts is
// implicitly soft-masked.
regs->softe = IRQS_ALL_DISABLED;
}
/* Don't do any per-CPU operations until interrupt state is fixed */
if (nmi_disables_ftrace(regs)) {
......
......@@ -28,19 +28,35 @@ static inline u32 yield_count_of(int cpu)
return be32_to_cpu(yield_count);
}
/*
* Spinlock code confers and prods, so don't trace the hcalls because the
* tracing code takes spinlocks which can cause recursion deadlocks.
*
* These calls are made while the lock is not held: the lock slowpath yields if
* it can not acquire the lock, and unlock slow path might prod if a waiter has
* yielded). So this may not be a problem for simple spin locks because the
* tracing does not technically recurse on the lock, but we avoid it anyway.
*
* However the queued spin lock contended path is more strictly ordered: the
* H_CONFER hcall is made after the task has queued itself on the lock, so then
* recursing on that lock will cause the task to then queue up again behind the
* first instance (or worse: queued spinlocks use tricks that assume a context
* never waits on more than one spinlock, so such recursion may cause random
* corruption in the lock code).
*/
static inline void yield_to_preempted(int cpu, u32 yield_count)
{
plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
plpar_hcall_norets_notrace(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
}
static inline void prod_cpu(int cpu)
{
plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
plpar_hcall_norets_notrace(H_PROD, get_hard_smp_processor_id(cpu));
}
static inline void yield_to_any(void)
{
plpar_hcall_norets(H_CONFER, -1, 0);
plpar_hcall_norets_notrace(H_CONFER, -1, 0);
}
#else
static inline bool is_shared_processor(void)
......
......@@ -28,7 +28,11 @@ static inline void set_cede_latency_hint(u8 latency_hint)
static inline long cede_processor(void)
{
return plpar_hcall_norets(H_CEDE);
/*
* We cannot call tracepoints inside RCU idle regions which
* means we must not trace H_CEDE.
*/
return plpar_hcall_norets_notrace(H_CEDE);
}
static inline long extended_cede_processor(unsigned long latency_hint)
......
......@@ -157,7 +157,7 @@ do { \
"2: lwz%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \
EX_TABLE(2b, %l2) \
: "=r" (x) \
: "=&r" (x) \
: "m" (*addr) \
: \
: label)
......
......@@ -340,6 +340,12 @@ ret_from_mc_except:
andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \
bne masked_interrupt_book3e_##n
/*
* Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is
* called, because that does SAVE_NVGPRS which must see the original register
* values, otherwise the scratch values might be restored when exiting the
* interrupt.
*/
#define PROLOG_ADDITION_2REGS_GEN(n) \
std r14,PACA_EXGEN+EX_R14(r13); \
std r15,PACA_EXGEN+EX_R15(r13)
......@@ -535,6 +541,10 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
std r14,_DAR(r1)
std r15,_DSISR(r1)
ld r14,PACA_EXGEN+EX_R14(r13)
ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x300)
b storage_fault_common
......@@ -544,6 +554,10 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
li r15,0
mr r14,r10
std r14,_DAR(r1)
std r15,_DSISR(r1)
ld r14,PACA_EXGEN+EX_R14(r13)
ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x400)
b storage_fault_common
......@@ -557,6 +571,10 @@ __end_interrupts:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
std r14,_DAR(r1)
std r15,_DSISR(r1)
ld r14,PACA_EXGEN+EX_R14(r13)
ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x600)
b alignment_more /* no room, go out of line */
......@@ -565,10 +583,10 @@ __end_interrupts:
NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
PROLOG_ADDITION_1REG)
mfspr r14,SPRN_ESR
EXCEPTION_COMMON(0x700)
std r14,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXGEN+EX_R14(r13)
EXCEPTION_COMMON(0x700)
addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
REST_NVGPRS(r1)
b interrupt_return
......@@ -725,11 +743,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
* normal exception
*/
mfspr r14,SPRN_DBSR
EXCEPTION_COMMON_CRIT(0xd00)
std r14,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXCRIT+EX_R14(r13)
ld r15,PACA_EXCRIT+EX_R15(r13)
EXCEPTION_COMMON_CRIT(0xd00)
addi r3,r1,STACK_FRAME_OVERHEAD
bl DebugException
REST_NVGPRS(r1)
b interrupt_return
......@@ -796,11 +814,11 @@ kernel_dbg_exc:
* normal exception
*/
mfspr r14,SPRN_DBSR
EXCEPTION_COMMON_DBG(0xd08)
std r14,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXDBG+EX_R14(r13)
ld r15,PACA_EXDBG+EX_R15(r13)
EXCEPTION_COMMON_DBG(0xd08)
addi r3,r1,STACK_FRAME_OVERHEAD
bl DebugException
REST_NVGPRS(r1)
b interrupt_return
......@@ -931,11 +949,7 @@ masked_interrupt_book3e_0x2c0:
* original values stashed away in the PACA
*/
storage_fault_common:
std r14,_DAR(r1)
std r15,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXGEN+EX_R14(r13)
ld r15,PACA_EXGEN+EX_R15(r13)
bl do_page_fault
b interrupt_return
......@@ -944,11 +958,7 @@ storage_fault_common:
* continues here.
*/
alignment_more:
std r14,_DAR(r1)
std r15,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
ld r14,PACA_EXGEN+EX_R14(r13)
ld r15,PACA_EXGEN+EX_R15(r13)
bl alignment_exception
REST_NVGPRS(r1)
b interrupt_return
......
......@@ -34,9 +34,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
syscall_fn f;
kuep_lock();
#ifdef CONFIG_PPC32
kuap_save_and_lock(regs);
#endif
regs->orig_gpr3 = r3;
......@@ -427,6 +424,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned
/* Restore user access locks last */
kuap_user_restore(regs);
kuep_unlock();
return ret;
}
......
......@@ -356,13 +356,16 @@ static void __init setup_legacy_serial_console(int console)
static int __init ioremap_legacy_serial_console(void)
{
struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console];
struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console];
struct plat_serial8250_port *port;
struct legacy_serial_info *info;
void __iomem *vaddr;
if (legacy_serial_console < 0)
return 0;
info = &legacy_serial_infos[legacy_serial_console];
port = &legacy_serial_ports[legacy_serial_console];
if (!info->early_addr)
return 0;
......
......@@ -166,9 +166,9 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
#else
#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0)
#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0)
static inline unsigned long
copy_fpr_to_user(void __user *to, struct task_struct *task)
......
......@@ -840,7 +840,7 @@ bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
kvm_unmap_radix(kvm, range->slot, gfn);
} else {
for (gfn = range->start; gfn < range->end; gfn++)
kvm_unmap_rmapp(kvm, range->slot, range->start);
kvm_unmap_rmapp(kvm, range->slot, gfn);
}
return false;
......
......@@ -14,6 +14,7 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/sched/mm.h>
#include <linux/stop_machine.h>
#include <asm/cputable.h>
#include <asm/code-patching.h>
#include <asm/page.h>
......@@ -149,17 +150,17 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
pr_devel("patching dest %lx\n", (unsigned long)dest);
patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
if (types & STF_BARRIER_FALLBACK)
// See comment in do_entry_flush_fixups() RE order of patching
if (types & STF_BARRIER_FALLBACK) {
patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
patch_branch((struct ppc_inst *)(dest + 1),
(unsigned long)&stf_barrier_fallback,
BRANCH_SET_LINK);
else
patch_instruction((struct ppc_inst *)(dest + 1),
ppc_inst(instrs[1]));
patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
(unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK);
} else {
patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
}
}
printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
......@@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
: "unknown");
}
static int __do_stf_barrier_fixups(void *data)
{
enum stf_barrier_type *types = data;
do_stf_entry_barrier_fixups(*types);
do_stf_exit_barrier_fixups(*types);
return 0;
}
void do_stf_barrier_fixups(enum stf_barrier_type types)
{
do_stf_entry_barrier_fixups(types);
do_stf_exit_barrier_fixups(types);
/*
* The call to the fallback entry flush, and the fallback/sync-ori exit
* flush can not be safely patched in/out while other CPUs are executing
* them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs
* spin in the stop machine core with interrupts hard disabled.
*/
stop_machine(__do_stf_barrier_fixups, &types, NULL);
}
void do_uaccess_flush_fixups(enum l1d_flush_type types)
......@@ -284,8 +299,9 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
: "unknown");
}
void do_entry_flush_fixups(enum l1d_flush_type types)
static int __do_entry_flush_fixups(void *data)
{
enum l1d_flush_type types = *(enum l1d_flush_type *)data;
unsigned int instrs[3], *dest;
long *start, *end;
int i;
......@@ -309,6 +325,31 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
if (types & L1D_FLUSH_MTTRIG)
instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
/*
* If we're patching in or out the fallback flush we need to be careful about the
* order in which we patch instructions. That's because it's possible we could
* take a page fault after patching one instruction, so the sequence of
* instructions must be safe even in a half patched state.
*
* To make that work, when patching in the fallback flush we patch in this order:
* - the mflr (dest)
* - the mtlr (dest + 2)
* - the branch (dest + 1)
*
* That ensures the sequence is safe to execute at any point. In contrast if we
* patch the mtlr last, it's possible we could return from the branch and not
* restore LR, leading to a crash later.
*
* When patching out the fallback flush (either with nops or another flush type),
* we patch in this order:
* - the branch (dest + 1)
* - the mtlr (dest + 2)
* - the mflr (dest)
*
* Note we are protected by stop_machine() from other CPUs executing the code in a
* semi-patched state.
*/
start = PTRRELOC(&__start___entry_flush_fixup);
end = PTRRELOC(&__stop___entry_flush_fixup);
for (i = 0; start < end; start++, i++) {
......@@ -316,15 +357,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
pr_devel("patching dest %lx\n", (unsigned long)dest);
patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
if (types == L1D_FLUSH_FALLBACK)
patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback,
BRANCH_SET_LINK);
else
if (types == L1D_FLUSH_FALLBACK) {
patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
patch_branch((struct ppc_inst *)(dest + 1),
(unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
} else {
patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
}
}
start = PTRRELOC(&__start___scv_entry_flush_fixup);
......@@ -334,15 +376,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
pr_devel("patching dest %lx\n", (unsigned long)dest);
patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
if (types == L1D_FLUSH_FALLBACK)
patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&scv_entry_flush_fallback,
BRANCH_SET_LINK);
else
if (types == L1D_FLUSH_FALLBACK) {
patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
patch_branch((struct ppc_inst *)(dest + 1),
(unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
} else {
patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
}
}
......@@ -354,6 +397,19 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
: "ori type" :
(types & L1D_FLUSH_MTTRIG) ? "mttrig type"
: "unknown");
return 0;
}
void do_entry_flush_fixups(enum l1d_flush_type types)
{
/*
* The call to the fallback flush can not be safely patched in/out while
* other CPUs are executing it. So call __do_entry_flush_fixups() on one
* CPU while all other CPUs spin in the stop machine core with interrupts
* hard disabled.
*/
stop_machine(__do_entry_flush_fixups, &types, NULL);
}
void do_rfi_flush_fixups(enum l1d_flush_type types)
......
......@@ -102,6 +102,16 @@ END_FTR_SECTION(0, 1); \
#define HCALL_BRANCH(LABEL)
#endif
_GLOBAL_TOC(plpar_hcall_norets_notrace)
HMT_MEDIUM
mfcr r0
stw r0,8(r1)
HVSC /* invoke the hypervisor */
lwz r0,8(r1)
mtcrf 0xff,r0
blr /* return r3 = status */
_GLOBAL_TOC(plpar_hcall_norets)
HMT_MEDIUM
......
......@@ -1829,30 +1829,28 @@ void hcall_tracepoint_unregfunc(void)
#endif
/*
* Since the tracing code might execute hcalls we need to guard against
* recursion. One example of this are spinlocks calling H_YIELD on
* shared processor partitions.
* Keep track of hcall tracing depth and prevent recursion. Warn if any is
* detected because it may indicate a problem. This will not catch all
* problems with tracing code making hcalls, because the tracing might have
* been invoked from a non-hcall, so the first hcall could recurse into it
* without warning here, but this better than nothing.
*
* Hcalls with specific problems being traced should use the _notrace
* plpar_hcall variants.
*/
static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
{
unsigned long flags;
unsigned int *depth;
/*
* We cannot call tracepoints inside RCU idle regions which
* means we must not trace H_CEDE.
*/
if (opcode == H_CEDE)
return;
local_irq_save(flags);
depth = this_cpu_ptr(&hcall_trace_depth);
if (*depth)
if (WARN_ON_ONCE(*depth))
goto out;
(*depth)++;
......@@ -1864,19 +1862,16 @@ void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
local_irq_restore(flags);
}
void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
{
unsigned long flags;
unsigned int *depth;
if (opcode == H_CEDE)
return;
local_irq_save(flags);
depth = this_cpu_ptr(&hcall_trace_depth);
if (*depth)
if (*depth) /* Don't warn again on the way out */
goto out;
(*depth)++;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment