Commit 8001a975 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'powerpc-4.13-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
 "All fixes for code that went in this cycle.

   - a revert of an optimisation to the syscall exit path, which could
     lead to an oops on either older machines or machines with > 1TB of
     memory

   - disable some deep idle states if the firmware configuration for
     them fails

   - re-enable HARD/SOFT lockup detectors in defconfigs after a Kconfig
     change

   - six fairly small patches fixing bugs in our new watchdog code

  Thanks to: Gautham R Shenoy, Nicholas Piggin"

* tag 'powerpc-4.13-6' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/watchdog: add locking around init/exit functions
  powerpc/watchdog: Fix marking of stuck CPUs
  powerpc/watchdog: Fix final-check recovered case
  powerpc/watchdog: Moderate touch_nmi_watchdog overhead
  powerpc/watchdog: Improve watchdog lock primitive
  powerpc: NMI IPI improve lock primitive
  powerpc/configs: Re-enable HARD/SOFT lockup detectors
  powerpc/powernv/idle: Disable LOSE_FULL_CONTEXT states when stop-api fails
  Revert "powerpc/64: Avoid restore_math call if possible in syscall exit"
parents b2dbdf2c 96ea91e7
......@@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
......
......@@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_DEBUG_MUTEXES=y
CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y
......
......@@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y
CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
......
......@@ -223,17 +223,27 @@ system_call_exit:
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- .Lsyscall_exit_work
/* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */
li r7,MSR_FP
andi. r0,r8,MSR_FP
beq 2f
#ifdef CONFIG_ALTIVEC
oris r7,r7,MSR_VEC@h
andis. r0,r8,MSR_VEC@h
bne 3f
#endif
and r0,r8,r7
cmpd r0,r7
bne .Lsyscall_restore_math
.Lsyscall_restore_math_cont:
2: addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_BOOK3S
li r10,MSR_RI
mtmsrd r10,1 /* Restore RI */
#endif
bl restore_math
#ifdef CONFIG_PPC_BOOK3S
li r11,0
mtmsrd r11,1
#endif
ld r8,_MSR(r1)
ld r3,RESULT(r1)
li r11,-MAX_ERRNO
cmpld r3,r11
3: cmpld r3,r11
ld r5,_CCR(r1)
bge- .Lsyscall_error
.Lsyscall_error_cont:
......@@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r5,_CCR(r1)
b .Lsyscall_error_cont
.Lsyscall_restore_math:
/*
* Some initial tests from restore_math to avoid the heavyweight
* C code entry and MSR manipulations.
*/
LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
and. r0,r0,r8
bne 1f
ld r7,PACACURRENT(r13)
lbz r0,THREAD+THREAD_LOAD_FP(r7)
#ifdef CONFIG_ALTIVEC
lbz r6,THREAD+THREAD_LOAD_VEC(r7)
add r0,r0,r6
#endif
cmpdi r0,0
beq .Lsyscall_restore_math_cont
1: addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_BOOK3S
li r10,MSR_RI
mtmsrd r10,1 /* Restore RI */
#endif
bl restore_math
#ifdef CONFIG_PPC_BOOK3S
li r11,0
mtmsrd r11,1
#endif
/* Restore volatiles, reload MSR from updated one */
ld r8,_MSR(r1)
ld r3,RESULT(r1)
li r11,-MAX_ERRNO
b .Lsyscall_restore_math_cont
/* Traced system call support */
.Lsyscall_dotrace:
bl save_nvgprs
......
......@@ -511,10 +511,6 @@ void restore_math(struct pt_regs *regs)
{
unsigned long msr;
/*
* Syscall exit makes a similar initial check before branching
* to restore_math. Keep them in synch.
*/
if (!msr_tm_active(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread))
return;
......
......@@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
hard_irq_disable();
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
raw_local_irq_restore(*flags);
cpu_relax();
spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
raw_local_irq_save(*flags);
hard_irq_disable();
}
......@@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
static void nmi_ipi_lock(void)
{
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
cpu_relax();
spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
}
static void nmi_ipi_unlock(void)
......@@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
nmi_ipi_lock_start(&flags);
while (nmi_ipi_busy_count) {
nmi_ipi_unlock_end(&flags);
cpu_relax();
spin_until_cond(nmi_ipi_busy_count == 0);
nmi_ipi_lock_start(&flags);
}
......
......@@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags)
* This may be called from low level interrupt handlers at some
* point in future.
*/
local_irq_save(*flags);
while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock)))
cpu_relax();
raw_local_irq_save(*flags);
hard_irq_disable(); /* Make it soft-NMI safe */
while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
raw_local_irq_restore(*flags);
spin_until_cond(!test_bit(0, &__wd_smp_lock));
raw_local_irq_save(*flags);
hard_irq_disable();
}
}
static inline void wd_smp_unlock(unsigned long *flags)
{
clear_bit_unlock(0, &__wd_smp_lock);
local_irq_restore(*flags);
raw_local_irq_restore(*flags);
}
static void wd_lockup_ipi(struct pt_regs *regs)
......@@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs)
nmi_panic(regs, "Hard LOCKUP");
}
static void set_cpu_stuck(int cpu, u64 tb)
static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
{
cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
if (cpumask_empty(&wd_smp_cpus_pending)) {
wd_smp_last_reset_tb = tb;
cpumask_andnot(&wd_smp_cpus_pending,
......@@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb)
&wd_smp_cpus_stuck);
}
}
static void set_cpu_stuck(int cpu, u64 tb)
{
set_cpumask_stuck(cpumask_of(cpu), tb);
}
static void watchdog_smp_panic(int cpu, u64 tb)
{
......@@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb)
}
smp_flush_nmi_ipi(1000000);
/* Take the stuck CPU out of the watch group */
for_each_cpu(c, &wd_smp_cpus_pending)
set_cpu_stuck(c, tb);
/* Take the stuck CPUs out of the watch group */
set_cpumask_stuck(&wd_smp_cpus_pending, tb);
out:
wd_smp_unlock(&flags);
printk_safe_flush();
......@@ -152,6 +159,11 @@ static void watchdog_smp_panic(int cpu, u64 tb)
if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP");
return;
out:
wd_smp_unlock(&flags);
}
static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
......@@ -258,9 +270,11 @@ static void wd_timer_fn(unsigned long data)
void arch_touch_nmi_watchdog(void)
{
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id();
watchdog_timer_interrupt(cpu);
if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
watchdog_timer_interrupt(cpu);
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
......@@ -283,6 +297,8 @@ static void stop_watchdog_timer_on(unsigned int cpu)
static int start_wd_on_cpu(unsigned int cpu)
{
unsigned long flags;
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
WARN_ON(1);
return 0;
......@@ -297,12 +313,14 @@ static int start_wd_on_cpu(unsigned int cpu)
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
return 0;
wd_smp_lock(&flags);
cpumask_set_cpu(cpu, &wd_cpus_enabled);
if (cpumask_weight(&wd_cpus_enabled) == 1) {
cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
wd_smp_last_reset_tb = get_tb();
}
smp_wmb();
wd_smp_unlock(&flags);
start_watchdog_timer_on(cpu);
return 0;
......@@ -310,12 +328,17 @@ static int start_wd_on_cpu(unsigned int cpu)
static int stop_wd_on_cpu(unsigned int cpu)
{
unsigned long flags;
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
return 0; /* Can happen in CPU unplug case */
stop_watchdog_timer_on(cpu);
wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
wd_smp_unlock(&flags);
wd_smp_clear_cpu_pending(cpu, get_tb());
return 0;
......
......@@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
*/
static u64 pnv_deepest_stop_psscr_val;
static u64 pnv_deepest_stop_psscr_mask;
static u64 pnv_deepest_stop_flag;
static bool deepest_stop_found;
static int pnv_save_sprs_for_deep_states(void)
......@@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void)
update_subcore_sibling_mask();
if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT)
pnv_save_sprs_for_deep_states();
if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
int rc = pnv_save_sprs_for_deep_states();
if (likely(!rc))
return;
/*
* The stop-api is unable to restore hypervisor
* resources on wakeup from platform idle states which
* lose full context. So disable such states.
*/
supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
if (cpu_has_feature(CPU_FTR_ARCH_300) &&
(pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
/*
* Use the default stop state for CPU-Hotplug
* if available.
*/
if (default_stop_found) {
pnv_deepest_stop_psscr_val =
pnv_default_stop_val;
pnv_deepest_stop_psscr_mask =
pnv_default_stop_mask;
pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
pnv_deepest_stop_psscr_val);
} else { /* Fallback to snooze loop for CPU-Hotplug */
deepest_stop_found = false;
pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
}
}
}
}
u32 pnv_get_supported_cpuidle_states(void)
......@@ -375,7 +408,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
pnv_deepest_stop_psscr_val;
srr1 = power9_idle_stop(psscr);
} else if (idle_states & OPAL_PM_WINKLE_ENABLED) {
} else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
(idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
} else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
(idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
......@@ -553,6 +587,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
max_residency_ns = residency_ns[i];
pnv_deepest_stop_psscr_val = psscr_val[i];
pnv_deepest_stop_psscr_mask = psscr_mask[i];
pnv_deepest_stop_flag = flags[i];
deepest_stop_found = true;
}
......
......@@ -235,6 +235,7 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
return -1;
}
extern u32 pnv_get_supported_cpuidle_states(void);
static int powernv_add_idle_states(void)
{
struct device_node *power_mgt;
......@@ -248,6 +249,8 @@ static int powernv_add_idle_states(void)
const char *names[CPUIDLE_STATE_MAX];
u32 has_stop_states = 0;
int i, rc;
u32 supported_flags = pnv_get_supported_cpuidle_states();
/* Currently we have snooze statically defined */
......@@ -362,6 +365,13 @@ static int powernv_add_idle_states(void)
for (i = 0; i < dt_idle_states; i++) {
unsigned int exit_latency, target_residency;
bool stops_timebase = false;
/*
* Skip the platform idle state whose flag isn't in
* the supported_cpuidle_states flag mask.
*/
if ((flags[i] & supported_flags) != flags[i])
continue;
/*
* If an idle state has exit latency beyond
* POWERNV_THRESHOLD_LATENCY_NS then don't use it
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment