Commit 8001a975 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'powerpc-4.13-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
 "All fixes for code that went in this cycle.

   - a revert of an optimisation to the syscall exit path, which could
     lead to an oops on either older machines or machines with > 1TB of
     memory

   - disable some deep idle states if the firmware configuration for
     them fails

   - re-enable HARD/SOFT lockup detectors in defconfigs after a Kconfig
     change

   - six fairly small patches fixing bugs in our new watchdog code

  Thanks to: Gautham R Shenoy, Nicholas Piggin"

* tag 'powerpc-4.13-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/watchdog: add locking around init/exit functions
  powerpc/watchdog: Fix marking of stuck CPUs
  powerpc/watchdog: Fix final-check recovered case
  powerpc/watchdog: Moderate touch_nmi_watchdog overhead
  powerpc/watchdog: Improve watchdog lock primitive
  powerpc: NMI IPI improve lock primitive
  powerpc/configs: Re-enable HARD/SOFT lockup detectors
  powerpc/powernv/idle: Disable LOSE_FULL_CONTEXT states when stop-api fails
  Revert "powerpc/64: Avoid restore_math call if possible in syscall exit"
parents b2dbdf2c 96ea91e7
...@@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y ...@@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_LATENCYTOP=y CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BLK_DEV_IO_TRACE=y
......
...@@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y ...@@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_MUTEXES=y
CONFIG_LATENCYTOP=y CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y CONFIG_SCHED_TRACER=y
......
...@@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y ...@@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_LATENCYTOP=y CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BLK_DEV_IO_TRACE=y
......
...@@ -223,17 +223,27 @@ system_call_exit: ...@@ -223,17 +223,27 @@ system_call_exit:
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- .Lsyscall_exit_work bne- .Lsyscall_exit_work
/* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */ andi. r0,r8,MSR_FP
li r7,MSR_FP beq 2f
#ifdef CONFIG_ALTIVEC #ifdef CONFIG_ALTIVEC
oris r7,r7,MSR_VEC@h andis. r0,r8,MSR_VEC@h
bne 3f
#endif #endif
and r0,r8,r7 2: addi r3,r1,STACK_FRAME_OVERHEAD
cmpd r0,r7 #ifdef CONFIG_PPC_BOOK3S
bne .Lsyscall_restore_math li r10,MSR_RI
.Lsyscall_restore_math_cont: mtmsrd r10,1 /* Restore RI */
#endif
bl restore_math
#ifdef CONFIG_PPC_BOOK3S
li r11,0
mtmsrd r11,1
#endif
ld r8,_MSR(r1)
ld r3,RESULT(r1)
li r11,-MAX_ERRNO
cmpld r3,r11 3: cmpld r3,r11
ld r5,_CCR(r1) ld r5,_CCR(r1)
bge- .Lsyscall_error bge- .Lsyscall_error
.Lsyscall_error_cont: .Lsyscall_error_cont:
...@@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ...@@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r5,_CCR(r1) std r5,_CCR(r1)
b .Lsyscall_error_cont b .Lsyscall_error_cont
.Lsyscall_restore_math:
/*
* Some initial tests from restore_math to avoid the heavyweight
* C code entry and MSR manipulations.
*/
LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
and. r0,r0,r8
bne 1f
ld r7,PACACURRENT(r13)
lbz r0,THREAD+THREAD_LOAD_FP(r7)
#ifdef CONFIG_ALTIVEC
lbz r6,THREAD+THREAD_LOAD_VEC(r7)
add r0,r0,r6
#endif
cmpdi r0,0
beq .Lsyscall_restore_math_cont
1: addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_BOOK3S
li r10,MSR_RI
mtmsrd r10,1 /* Restore RI */
#endif
bl restore_math
#ifdef CONFIG_PPC_BOOK3S
li r11,0
mtmsrd r11,1
#endif
/* Restore volatiles, reload MSR from updated one */
ld r8,_MSR(r1)
ld r3,RESULT(r1)
li r11,-MAX_ERRNO
b .Lsyscall_restore_math_cont
/* Traced system call support */ /* Traced system call support */
.Lsyscall_dotrace: .Lsyscall_dotrace:
bl save_nvgprs bl save_nvgprs
......
...@@ -511,10 +511,6 @@ void restore_math(struct pt_regs *regs) ...@@ -511,10 +511,6 @@ void restore_math(struct pt_regs *regs)
{ {
unsigned long msr; unsigned long msr;
/*
* Syscall exit makes a similar initial check before branching
* to restore_math. Keep them in synch.
*/
if (!msr_tm_active(regs->msr) && if (!msr_tm_active(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread)) !current->thread.load_fp && !loadvec(current->thread))
return; return;
......
...@@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags) ...@@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
hard_irq_disable(); hard_irq_disable();
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
raw_local_irq_restore(*flags); raw_local_irq_restore(*flags);
cpu_relax(); spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
raw_local_irq_save(*flags); raw_local_irq_save(*flags);
hard_irq_disable(); hard_irq_disable();
} }
...@@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags) ...@@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
static void nmi_ipi_lock(void) static void nmi_ipi_lock(void)
{ {
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
cpu_relax(); spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
} }
static void nmi_ipi_unlock(void) static void nmi_ipi_unlock(void)
...@@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) ...@@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
nmi_ipi_lock_start(&flags); nmi_ipi_lock_start(&flags);
while (nmi_ipi_busy_count) { while (nmi_ipi_busy_count) {
nmi_ipi_unlock_end(&flags); nmi_ipi_unlock_end(&flags);
cpu_relax(); spin_until_cond(nmi_ipi_busy_count == 0);
nmi_ipi_lock_start(&flags); nmi_ipi_lock_start(&flags);
} }
......
...@@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags) ...@@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags)
* This may be called from low level interrupt handlers at some * This may be called from low level interrupt handlers at some
* point in future. * point in future.
*/ */
local_irq_save(*flags); raw_local_irq_save(*flags);
while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) hard_irq_disable(); /* Make it soft-NMI safe */
cpu_relax(); while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
raw_local_irq_restore(*flags);
spin_until_cond(!test_bit(0, &__wd_smp_lock));
raw_local_irq_save(*flags);
hard_irq_disable();
}
} }
static inline void wd_smp_unlock(unsigned long *flags) static inline void wd_smp_unlock(unsigned long *flags)
{ {
clear_bit_unlock(0, &__wd_smp_lock); clear_bit_unlock(0, &__wd_smp_lock);
local_irq_restore(*flags); raw_local_irq_restore(*flags);
} }
static void wd_lockup_ipi(struct pt_regs *regs) static void wd_lockup_ipi(struct pt_regs *regs)
...@@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs) ...@@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs)
nmi_panic(regs, "Hard LOCKUP"); nmi_panic(regs, "Hard LOCKUP");
} }
static void set_cpu_stuck(int cpu, u64 tb) static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
{ {
cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
if (cpumask_empty(&wd_smp_cpus_pending)) { if (cpumask_empty(&wd_smp_cpus_pending)) {
wd_smp_last_reset_tb = tb; wd_smp_last_reset_tb = tb;
cpumask_andnot(&wd_smp_cpus_pending, cpumask_andnot(&wd_smp_cpus_pending,
...@@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb) ...@@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb)
&wd_smp_cpus_stuck); &wd_smp_cpus_stuck);
} }
} }
static void set_cpu_stuck(int cpu, u64 tb)
{
set_cpumask_stuck(cpumask_of(cpu), tb);
}
static void watchdog_smp_panic(int cpu, u64 tb) static void watchdog_smp_panic(int cpu, u64 tb)
{ {
...@@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb) ...@@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb)
} }
smp_flush_nmi_ipi(1000000); smp_flush_nmi_ipi(1000000);
/* Take the stuck CPU out of the watch group */ /* Take the stuck CPUs out of the watch group */
for_each_cpu(c, &wd_smp_cpus_pending) set_cpumask_stuck(&wd_smp_cpus_pending, tb);
set_cpu_stuck(c, tb);
out:
wd_smp_unlock(&flags); wd_smp_unlock(&flags);
printk_safe_flush(); printk_safe_flush();
...@@ -152,6 +159,11 @@ static void watchdog_smp_panic(int cpu, u64 tb) ...@@ -152,6 +159,11 @@ static void watchdog_smp_panic(int cpu, u64 tb)
if (hardlockup_panic) if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP"); nmi_panic(NULL, "Hard LOCKUP");
return;
out:
wd_smp_unlock(&flags);
} }
static void wd_smp_clear_cpu_pending(int cpu, u64 tb) static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
...@@ -258,9 +270,11 @@ static void wd_timer_fn(unsigned long data) ...@@ -258,9 +270,11 @@ static void wd_timer_fn(unsigned long data)
void arch_touch_nmi_watchdog(void) void arch_touch_nmi_watchdog(void)
{ {
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
watchdog_timer_interrupt(cpu); if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
watchdog_timer_interrupt(cpu);
} }
EXPORT_SYMBOL(arch_touch_nmi_watchdog); EXPORT_SYMBOL(arch_touch_nmi_watchdog);
...@@ -283,6 +297,8 @@ static void stop_watchdog_timer_on(unsigned int cpu) ...@@ -283,6 +297,8 @@ static void stop_watchdog_timer_on(unsigned int cpu)
static int start_wd_on_cpu(unsigned int cpu) static int start_wd_on_cpu(unsigned int cpu)
{ {
unsigned long flags;
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
WARN_ON(1); WARN_ON(1);
return 0; return 0;
...@@ -297,12 +313,14 @@ static int start_wd_on_cpu(unsigned int cpu) ...@@ -297,12 +313,14 @@ static int start_wd_on_cpu(unsigned int cpu)
if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
return 0; return 0;
wd_smp_lock(&flags);
cpumask_set_cpu(cpu, &wd_cpus_enabled); cpumask_set_cpu(cpu, &wd_cpus_enabled);
if (cpumask_weight(&wd_cpus_enabled) == 1) { if (cpumask_weight(&wd_cpus_enabled) == 1) {
cpumask_set_cpu(cpu, &wd_smp_cpus_pending); cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
wd_smp_last_reset_tb = get_tb(); wd_smp_last_reset_tb = get_tb();
} }
smp_wmb(); wd_smp_unlock(&flags);
start_watchdog_timer_on(cpu); start_watchdog_timer_on(cpu);
return 0; return 0;
...@@ -310,12 +328,17 @@ static int start_wd_on_cpu(unsigned int cpu) ...@@ -310,12 +328,17 @@ static int start_wd_on_cpu(unsigned int cpu)
static int stop_wd_on_cpu(unsigned int cpu) static int stop_wd_on_cpu(unsigned int cpu)
{ {
unsigned long flags;
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
return 0; /* Can happen in CPU unplug case */ return 0; /* Can happen in CPU unplug case */
stop_watchdog_timer_on(cpu); stop_watchdog_timer_on(cpu);
wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_cpus_enabled); cpumask_clear_cpu(cpu, &wd_cpus_enabled);
wd_smp_unlock(&flags);
wd_smp_clear_cpu_pending(cpu, get_tb()); wd_smp_clear_cpu_pending(cpu, get_tb());
return 0; return 0;
......
...@@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE; ...@@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
*/ */
static u64 pnv_deepest_stop_psscr_val; static u64 pnv_deepest_stop_psscr_val;
static u64 pnv_deepest_stop_psscr_mask; static u64 pnv_deepest_stop_psscr_mask;
static u64 pnv_deepest_stop_flag;
static bool deepest_stop_found; static bool deepest_stop_found;
static int pnv_save_sprs_for_deep_states(void) static int pnv_save_sprs_for_deep_states(void)
...@@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void) ...@@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void)
update_subcore_sibling_mask(); update_subcore_sibling_mask();
if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
pnv_save_sprs_for_deep_states(); int rc = pnv_save_sprs_for_deep_states();
if (likely(!rc))
return;
/*
* The stop-api is unable to restore hypervisor
* resources on wakeup from platform idle states which
* lose full context. So disable such states.
*/
supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
if (cpu_has_feature(CPU_FTR_ARCH_300) &&
(pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
/*
* Use the default stop state for CPU-Hotplug
* if available.
*/
if (default_stop_found) {
pnv_deepest_stop_psscr_val =
pnv_default_stop_val;
pnv_deepest_stop_psscr_mask =
pnv_default_stop_mask;
pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
pnv_deepest_stop_psscr_val);
} else { /* Fallback to snooze loop for CPU-Hotplug */
deepest_stop_found = false;
pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
}
}
}
} }
u32 pnv_get_supported_cpuidle_states(void) u32 pnv_get_supported_cpuidle_states(void)
...@@ -375,7 +408,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu) ...@@ -375,7 +408,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
pnv_deepest_stop_psscr_val; pnv_deepest_stop_psscr_val;
srr1 = power9_idle_stop(psscr); srr1 = power9_idle_stop(psscr);
} else if (idle_states & OPAL_PM_WINKLE_ENABLED) { } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
(idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
srr1 = power7_idle_insn(PNV_THREAD_WINKLE); srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
} else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
(idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
...@@ -553,6 +587,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, ...@@ -553,6 +587,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
max_residency_ns = residency_ns[i]; max_residency_ns = residency_ns[i];
pnv_deepest_stop_psscr_val = psscr_val[i]; pnv_deepest_stop_psscr_val = psscr_val[i];
pnv_deepest_stop_psscr_mask = psscr_mask[i]; pnv_deepest_stop_psscr_mask = psscr_mask[i];
pnv_deepest_stop_flag = flags[i];
deepest_stop_found = true; deepest_stop_found = true;
} }
......
...@@ -235,6 +235,7 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len, ...@@ -235,6 +235,7 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
return -1; return -1;
} }
extern u32 pnv_get_supported_cpuidle_states(void);
static int powernv_add_idle_states(void) static int powernv_add_idle_states(void)
{ {
struct device_node *power_mgt; struct device_node *power_mgt;
...@@ -248,6 +249,8 @@ static int powernv_add_idle_states(void) ...@@ -248,6 +249,8 @@ static int powernv_add_idle_states(void)
const char *names[CPUIDLE_STATE_MAX]; const char *names[CPUIDLE_STATE_MAX];
u32 has_stop_states = 0; u32 has_stop_states = 0;
int i, rc; int i, rc;
u32 supported_flags = pnv_get_supported_cpuidle_states();
/* Currently we have snooze statically defined */ /* Currently we have snooze statically defined */
...@@ -362,6 +365,13 @@ static int powernv_add_idle_states(void) ...@@ -362,6 +365,13 @@ static int powernv_add_idle_states(void)
for (i = 0; i < dt_idle_states; i++) { for (i = 0; i < dt_idle_states; i++) {
unsigned int exit_latency, target_residency; unsigned int exit_latency, target_residency;
bool stops_timebase = false; bool stops_timebase = false;
/*
* Skip the platform idle state whose flag isn't in
* the supported_cpuidle_states flag mask.
*/
if ((flags[i] & supported_flags) != flags[i])
continue;
/* /*
* If an idle state has exit latency beyond * If an idle state has exit latency beyond
* POWERNV_THRESHOLD_LATENCY_NS then don't use it * POWERNV_THRESHOLD_LATENCY_NS then don't use it
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment