Commit 15c659ff authored by Michael Ellerman's avatar Michael Ellerman

Merge branch 'fixes' into next

There's a non-trivial dependency between some commits we want to put in
next and the KVM prefetch work around that went into fixes. So merge
fixes into next.
parents 516fa8d0 1a92a80a
...@@ -59,6 +59,19 @@ machine-$(CONFIG_PPC64) += 64 ...@@ -59,6 +59,19 @@ machine-$(CONFIG_PPC64) += 64
machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le
UTS_MACHINE := $(subst $(space),,$(machine-y)) UTS_MACHINE := $(subst $(space),,$(machine-y))
# XXX This needs to be before we override LD below
ifdef CONFIG_PPC32
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
else
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
# Have the linker provide sfpr if possible.
# There is a corresponding test in arch/powerpc/lib/Makefile
KBUILD_LDFLAGS_MODULE += --save-restore-funcs
else
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
endif
endif
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
override LD += -EL override LD += -EL
LDEMULATION := lppc LDEMULATION := lppc
...@@ -190,18 +203,6 @@ else ...@@ -190,18 +203,6 @@ else
CHECKFLAGS += -D__LITTLE_ENDIAN__ CHECKFLAGS += -D__LITTLE_ENDIAN__
endif endif
ifdef CONFIG_PPC32
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
else
ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
# Have the linker provide sfpr if possible.
# There is a corresponding test in arch/powerpc/lib/Makefile
KBUILD_LDFLAGS_MODULE += --save-restore-funcs
else
KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
endif
endif
ifeq ($(CONFIG_476FPE_ERR46),y) ifeq ($(CONFIG_476FPE_ERR46),y)
KBUILD_LDFLAGS_MODULE += --ppc476-workaround \ KBUILD_LDFLAGS_MODULE += --ppc476-workaround \
-T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds -T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds
......
...@@ -25,12 +25,20 @@ compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ ...@@ -25,12 +25,20 @@ compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ
BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-fno-strict-aliasing -Os -msoft-float -pipe \ -fno-strict-aliasing -Os -msoft-float -pipe \
-fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
-isystem $(shell $(CROSS32CC) -print-file-name=include) \
-D$(compress-y) -D$(compress-y)
BOOTCC := $(CC)
ifdef CONFIG_PPC64_BOOT_WRAPPER ifdef CONFIG_PPC64_BOOT_WRAPPER
BOOTCFLAGS += -m64 BOOTCFLAGS += -m64
else
BOOTCFLAGS += -m32
ifdef CROSS32_COMPILE
BOOTCC := $(CROSS32_COMPILE)gcc
endif
endif endif
BOOTCFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include)
ifdef CONFIG_CPU_BIG_ENDIAN ifdef CONFIG_CPU_BIG_ENDIAN
BOOTCFLAGS += -mbig-endian BOOTCFLAGS += -mbig-endian
else else
...@@ -183,10 +191,10 @@ clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \ ...@@ -183,10 +191,10 @@ clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \
empty.c zImage.coff.lds zImage.ps3.lds zImage.lds empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
quiet_cmd_bootcc = BOOTCC $@ quiet_cmd_bootcc = BOOTCC $@
cmd_bootcc = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $< cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $<
quiet_cmd_bootas = BOOTAS $@ quiet_cmd_bootas = BOOTAS $@
cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $< cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
quiet_cmd_bootar = BOOTAR $@ quiet_cmd_bootar = BOOTAR $@
cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@ cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
......
...@@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y ...@@ -293,7 +293,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_LATENCYTOP=y CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BLK_DEV_IO_TRACE=y
......
...@@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y ...@@ -324,7 +324,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_DEBUG_MUTEXES=y CONFIG_DEBUG_MUTEXES=y
CONFIG_LATENCYTOP=y CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y CONFIG_SCHED_TRACER=y
......
...@@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y ...@@ -291,7 +291,8 @@ CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_LATENCYTOP=y CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y CONFIG_BLK_DEV_IO_TRACE=y
......
...@@ -59,13 +59,14 @@ extern struct patb_entry *partition_tb; ...@@ -59,13 +59,14 @@ extern struct patb_entry *partition_tb;
#define PRTS_MASK 0x1f /* process table size field */ #define PRTS_MASK 0x1f /* process table size field */
#define PRTB_MASK 0x0ffffffffffff000UL #define PRTB_MASK 0x0ffffffffffff000UL
/* /* Number of supported PID bits */
* Limit process table to PAGE_SIZE table. This extern unsigned int mmu_pid_bits;
* also limit the max pid we can support.
* MAX_USER_CONTEXT * 16 bytes of space. /* Base PID to allocate from */
*/ extern unsigned int mmu_base_pid;
#define PRTB_SIZE_SHIFT (CONTEXT_BITS + 4)
#define PRTB_ENTRIES (1ul << CONTEXT_BITS) #define PRTB_SIZE_SHIFT (mmu_pid_bits + 4)
#define PRTB_ENTRIES (1ul << mmu_pid_bits)
/* /*
* Power9 currently only support 64K partition table size. * Power9 currently only support 64K partition table size.
......
...@@ -614,9 +614,17 @@ static inline pte_t pte_mkdevmap(pte_t pte) ...@@ -614,9 +614,17 @@ static inline pte_t pte_mkdevmap(pte_t pte)
return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP); return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
} }
/*
* This is potentially called with a pmd as the argument, in which case it's not
* safe to check _PAGE_DEVMAP unless we also confirm that _PAGE_PTE is set.
* That's because the bit we use for _PAGE_DEVMAP is not reserved for software
* use in page directory entries (ie. non-ptes).
*/
static inline int pte_devmap(pte_t pte) static inline int pte_devmap(pte_t pte)
{ {
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DEVMAP)); u64 mask = cpu_to_be64(_PAGE_DEVMAP | _PAGE_PTE);
return (pte_raw(pte) & mask) == mask;
} }
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
......
...@@ -45,7 +45,7 @@ extern void set_context(unsigned long id, pgd_t *pgd); ...@@ -45,7 +45,7 @@ extern void set_context(unsigned long id, pgd_t *pgd);
#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_BOOK3S_64
extern void radix__switch_mmu_context(struct mm_struct *prev, extern void radix__switch_mmu_context(struct mm_struct *prev,
struct mm_struct *next); struct mm_struct *next);
static inline void switch_mmu_context(struct mm_struct *prev, static inline void switch_mmu_context(struct mm_struct *prev,
struct mm_struct *next, struct mm_struct *next,
struct task_struct *tsk) struct task_struct *tsk)
...@@ -67,6 +67,12 @@ extern void __destroy_context(unsigned long context_id); ...@@ -67,6 +67,12 @@ extern void __destroy_context(unsigned long context_id);
extern void mmu_context_init(void); extern void mmu_context_init(void);
#endif #endif
#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
#else
static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
#endif
extern void switch_cop(struct mm_struct *next); extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm); extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm); extern void drop_cop(unsigned long acop, struct mm_struct *mm);
...@@ -79,10 +85,32 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, ...@@ -79,10 +85,32 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
struct mm_struct *next, struct mm_struct *next,
struct task_struct *tsk) struct task_struct *tsk)
{ {
bool new_on_cpu = false;
/* Mark this context has been used on the new CPU */ /* Mark this context has been used on the new CPU */
if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) {
cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
/*
* This full barrier orders the store to the cpumask above vs
* a subsequent operation which allows this CPU to begin loading
* translations for next.
*
* When using the radix MMU that operation is the load of the
* MMU context id, which is then moved to SPRN_PID.
*
* For the hash MMU it is either the first load from slb_cache
* in switch_slb(), and/or the store of paca->mm_ctx_id in
* copy_mm_to_paca().
*
* On the read side the barrier is in pte_xchg(), which orders
* the store to the PTE vs the load of mm_cpumask.
*/
smp_mb();
new_on_cpu = true;
}
/* 32-bit keeps track of the current PGDIR in the thread struct */ /* 32-bit keeps track of the current PGDIR in the thread struct */
#ifdef CONFIG_PPC32 #ifdef CONFIG_PPC32
tsk->thread.pgdir = next->pgd; tsk->thread.pgdir = next->pgd;
...@@ -103,6 +131,10 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, ...@@ -103,6 +131,10 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev,
if (cpu_has_feature(CPU_FTR_ALTIVEC)) if (cpu_has_feature(CPU_FTR_ALTIVEC))
asm volatile ("dssall"); asm volatile ("dssall");
#endif /* CONFIG_ALTIVEC */ #endif /* CONFIG_ALTIVEC */
if (new_on_cpu)
radix_kvm_prefetch_workaround(next);
/* /*
* The actual HW switching method differs between the various * The actual HW switching method differs between the various
* sub architectures. Out of line for now * sub architectures. Out of line for now
......
...@@ -87,6 +87,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) ...@@ -87,6 +87,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
unsigned long *p = (unsigned long *)ptep; unsigned long *p = (unsigned long *)ptep;
__be64 prev; __be64 prev;
/* See comment in switch_mm_irqs_off() */
prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old), prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old),
(__force unsigned long)pte_raw(new)); (__force unsigned long)pte_raw(new));
......
...@@ -62,6 +62,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new) ...@@ -62,6 +62,7 @@ static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
{ {
unsigned long *p = (unsigned long *)ptep; unsigned long *p = (unsigned long *)ptep;
/* See comment in switch_mm_irqs_off() */
return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new)); return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new));
} }
#endif #endif
......
...@@ -223,17 +223,27 @@ system_call_exit: ...@@ -223,17 +223,27 @@ system_call_exit:
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK) andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
bne- .Lsyscall_exit_work bne- .Lsyscall_exit_work
/* If MSR_FP and MSR_VEC are set in user msr, then no need to restore */ andi. r0,r8,MSR_FP
li r7,MSR_FP beq 2f
#ifdef CONFIG_ALTIVEC #ifdef CONFIG_ALTIVEC
oris r7,r7,MSR_VEC@h andis. r0,r8,MSR_VEC@h
bne 3f
#endif #endif
and r0,r8,r7 2: addi r3,r1,STACK_FRAME_OVERHEAD
cmpd r0,r7 #ifdef CONFIG_PPC_BOOK3S
bne .Lsyscall_restore_math li r10,MSR_RI
.Lsyscall_restore_math_cont: mtmsrd r10,1 /* Restore RI */
#endif
bl restore_math
#ifdef CONFIG_PPC_BOOK3S
li r11,0
mtmsrd r11,1
#endif
ld r8,_MSR(r1)
ld r3,RESULT(r1)
li r11,-MAX_ERRNO
cmpld r3,r11 3: cmpld r3,r11
ld r5,_CCR(r1) ld r5,_CCR(r1)
bge- .Lsyscall_error bge- .Lsyscall_error
.Lsyscall_error_cont: .Lsyscall_error_cont:
...@@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) ...@@ -267,40 +277,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r5,_CCR(r1) std r5,_CCR(r1)
b .Lsyscall_error_cont b .Lsyscall_error_cont
.Lsyscall_restore_math:
/*
* Some initial tests from restore_math to avoid the heavyweight
* C code entry and MSR manipulations.
*/
LOAD_REG_IMMEDIATE(r0, MSR_TS_MASK)
and. r0,r0,r8
bne 1f
ld r7,PACACURRENT(r13)
lbz r0,THREAD+THREAD_LOAD_FP(r7)
#ifdef CONFIG_ALTIVEC
lbz r6,THREAD+THREAD_LOAD_VEC(r7)
add r0,r0,r6
#endif
cmpdi r0,0
beq .Lsyscall_restore_math_cont
1: addi r3,r1,STACK_FRAME_OVERHEAD
#ifdef CONFIG_PPC_BOOK3S
li r10,MSR_RI
mtmsrd r10,1 /* Restore RI */
#endif
bl restore_math
#ifdef CONFIG_PPC_BOOK3S
li r11,0
mtmsrd r11,1
#endif
/* Restore volatiles, reload MSR from updated one */
ld r8,_MSR(r1)
ld r3,RESULT(r1)
li r11,-MAX_ERRNO
b .Lsyscall_restore_math_cont
/* Traced system call support */ /* Traced system call support */
.Lsyscall_dotrace: .Lsyscall_dotrace:
bl save_nvgprs bl save_nvgprs
......
...@@ -1325,10 +1325,18 @@ EXC_VIRT_NONE(0x5800, 0x100) ...@@ -1325,10 +1325,18 @@ EXC_VIRT_NONE(0x5800, 0x100)
std r10,PACA_EXGEN+EX_R13(r13); \ std r10,PACA_EXGEN+EX_R13(r13); \
EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H) EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H)
/*
* Branch to soft_nmi_interrupt using the emergency stack. The emergency
* stack is one that is usable by maskable interrupts so long as MSR_EE
* remains off. It is used for recovery when something has corrupted the
* normal kernel stack, for example. The "soft NMI" must not use the process
* stack because we want irq disabled sections to avoid touching the stack
* at all (other than PMU interrupts), so use the emergency stack for this,
* and run it entirely with interrupts hard disabled.
*/
EXC_COMMON_BEGIN(soft_nmi_common) EXC_COMMON_BEGIN(soft_nmi_common)
mr r10,r1 mr r10,r1
ld r1,PACAEMERGSP(r13) ld r1,PACAEMERGSP(r13)
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE subi r1,r1,INT_FRAME_SIZE
EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900, EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900,
system_reset, soft_nmi_interrupt, system_reset, soft_nmi_interrupt,
......
...@@ -514,11 +514,17 @@ pnv_restore_hyp_resource_arch300: ...@@ -514,11 +514,17 @@ pnv_restore_hyp_resource_arch300:
/* /*
* Workaround for POWER9, if we lost resources, the ERAT * Workaround for POWER9, if we lost resources, the ERAT
* might have been mixed up and needs flushing. We also need * might have been mixed up and needs flushing. We also need
* to reload MMCR0 (see comment above). * to reload MMCR0 (see comment above). We also need to set
* then clear bit 60 in MMCRA to ensure the PMU starts running.
*/ */
blt cr3,1f blt cr3,1f
PPC_INVALIDATE_ERAT PPC_INVALIDATE_ERAT
ld r1,PACAR1(r13) ld r1,PACAR1(r13)
mfspr r4,SPRN_MMCRA
ori r4,r4,(1 << (63-60))
mtspr SPRN_MMCRA,r4
xori r4,r4,(1 << (63-60))
mtspr SPRN_MMCRA,r4
ld r4,_MMCR0(r1) ld r4,_MMCR0(r1)
mtspr SPRN_MMCR0,r4 mtspr SPRN_MMCR0,r4
1: 1:
......
...@@ -145,6 +145,19 @@ notrace unsigned int __check_irq_replay(void) ...@@ -145,6 +145,19 @@ notrace unsigned int __check_irq_replay(void)
/* Clear bit 0 which we wouldn't clear otherwise */ /* Clear bit 0 which we wouldn't clear otherwise */
local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
if (happened & PACA_IRQ_HARD_DIS) {
/*
* We may have missed a decrementer interrupt if hard disabled.
* Check the decrementer register in case we had a rollover
* while hard disabled.
*/
if (!(happened & PACA_IRQ_DEC)) {
if (decrementer_check_overflow()) {
local_paca->irq_happened |= PACA_IRQ_DEC;
happened |= PACA_IRQ_DEC;
}
}
}
/* /*
* Force the delivery of pending soft-disabled interrupts on PS3. * Force the delivery of pending soft-disabled interrupts on PS3.
...@@ -170,7 +183,7 @@ notrace unsigned int __check_irq_replay(void) ...@@ -170,7 +183,7 @@ notrace unsigned int __check_irq_replay(void)
* in case we also had a rollover while hard disabled * in case we also had a rollover while hard disabled
*/ */
local_paca->irq_happened &= ~PACA_IRQ_DEC; local_paca->irq_happened &= ~PACA_IRQ_DEC;
if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow()) if (happened & PACA_IRQ_DEC)
return 0x900; return 0x900;
/* Finally check if an external interrupt happened */ /* Finally check if an external interrupt happened */
......
...@@ -363,7 +363,8 @@ void enable_kernel_vsx(void) ...@@ -363,7 +363,8 @@ void enable_kernel_vsx(void)
cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX); cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) { if (current->thread.regs &&
(current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) {
check_if_tm_restore_required(current); check_if_tm_restore_required(current);
/* /*
* If a thread has already been reclaimed then the * If a thread has already been reclaimed then the
...@@ -383,7 +384,7 @@ void flush_vsx_to_thread(struct task_struct *tsk) ...@@ -383,7 +384,7 @@ void flush_vsx_to_thread(struct task_struct *tsk)
{ {
if (tsk->thread.regs) { if (tsk->thread.regs) {
preempt_disable(); preempt_disable();
if (tsk->thread.regs->msr & MSR_VSX) { if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) {
BUG_ON(tsk != current); BUG_ON(tsk != current);
giveup_vsx(tsk); giveup_vsx(tsk);
} }
...@@ -505,10 +506,6 @@ void restore_math(struct pt_regs *regs) ...@@ -505,10 +506,6 @@ void restore_math(struct pt_regs *regs)
{ {
unsigned long msr; unsigned long msr;
/*
* Syscall exit makes a similar initial check before branching
* to restore_math. Keep them in synch.
*/
if (!msr_tm_active(regs->msr) && if (!msr_tm_active(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread)) !current->thread.load_fp && !loadvec(current->thread))
return; return;
......
...@@ -127,12 +127,19 @@ static void flush_tmregs_to_thread(struct task_struct *tsk) ...@@ -127,12 +127,19 @@ static void flush_tmregs_to_thread(struct task_struct *tsk)
* If task is not current, it will have been flushed already to * If task is not current, it will have been flushed already to
* it's thread_struct during __switch_to(). * it's thread_struct during __switch_to().
* *
* A reclaim flushes ALL the state. * A reclaim flushes ALL the state or if not in TM save TM SPRs
* in the appropriate thread structures from live.
*/ */
if (tsk == current && MSR_TM_SUSPENDED(mfmsr())) if (tsk != current)
tm_reclaim_current(TM_CAUSE_SIGNAL); return;
if (MSR_TM_SUSPENDED(mfmsr())) {
tm_reclaim_current(TM_CAUSE_SIGNAL);
} else {
tm_enable();
tm_save_sprs(&(tsk->thread));
}
} }
#else #else
static inline void flush_tmregs_to_thread(struct task_struct *tsk) { } static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
......
...@@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags) ...@@ -351,7 +351,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
hard_irq_disable(); hard_irq_disable();
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) { while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
raw_local_irq_restore(*flags); raw_local_irq_restore(*flags);
cpu_relax(); spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
raw_local_irq_save(*flags); raw_local_irq_save(*flags);
hard_irq_disable(); hard_irq_disable();
} }
...@@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags) ...@@ -360,7 +360,7 @@ static void nmi_ipi_lock_start(unsigned long *flags)
static void nmi_ipi_lock(void) static void nmi_ipi_lock(void)
{ {
while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) while (atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
cpu_relax(); spin_until_cond(atomic_read(&__nmi_ipi_lock) == 0);
} }
static void nmi_ipi_unlock(void) static void nmi_ipi_unlock(void)
...@@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) ...@@ -475,7 +475,7 @@ int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
nmi_ipi_lock_start(&flags); nmi_ipi_lock_start(&flags);
while (nmi_ipi_busy_count) { while (nmi_ipi_busy_count) {
nmi_ipi_unlock_end(&flags); nmi_ipi_unlock_end(&flags);
cpu_relax(); spin_until_cond(nmi_ipi_busy_count == 0);
nmi_ipi_lock_start(&flags); nmi_ipi_lock_start(&flags);
} }
...@@ -1003,21 +1003,13 @@ static struct sched_domain_topology_level powerpc_topology[] = { ...@@ -1003,21 +1003,13 @@ static struct sched_domain_topology_level powerpc_topology[] = {
{ NULL, }, { NULL, },
}; };
static __init long smp_setup_cpu_workfn(void *data __always_unused)
{
smp_ops->setup_cpu(boot_cpuid);
return 0;
}
void __init smp_cpus_done(unsigned int max_cpus) void __init smp_cpus_done(unsigned int max_cpus)
{ {
/* /*
* We want the setup_cpu() here to be called on the boot CPU, but * We are running pinned to the boot CPU, see rest_init().
* init might run on any CPU, so make sure it's invoked on the boot
* CPU.
*/ */
if (smp_ops && smp_ops->setup_cpu) if (smp_ops && smp_ops->setup_cpu)
work_on_cpu_safe(boot_cpuid, smp_setup_cpu_workfn, NULL); smp_ops->setup_cpu(boot_cpuid);
if (smp_ops && smp_ops->bringup_done) if (smp_ops && smp_ops->bringup_done)
smp_ops->bringup_done(); smp_ops->bringup_done();
......
...@@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags) ...@@ -71,15 +71,20 @@ static inline void wd_smp_lock(unsigned long *flags)
* This may be called from low level interrupt handlers at some * This may be called from low level interrupt handlers at some
* point in future. * point in future.
*/ */
local_irq_save(*flags); raw_local_irq_save(*flags);
while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) hard_irq_disable(); /* Make it soft-NMI safe */
cpu_relax(); while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
raw_local_irq_restore(*flags);
spin_until_cond(!test_bit(0, &__wd_smp_lock));
raw_local_irq_save(*flags);
hard_irq_disable();
}
} }
static inline void wd_smp_unlock(unsigned long *flags) static inline void wd_smp_unlock(unsigned long *flags)
{ {
clear_bit_unlock(0, &__wd_smp_lock); clear_bit_unlock(0, &__wd_smp_lock);
local_irq_restore(*flags); raw_local_irq_restore(*flags);
} }
static void wd_lockup_ipi(struct pt_regs *regs) static void wd_lockup_ipi(struct pt_regs *regs)
...@@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs) ...@@ -96,10 +101,10 @@ static void wd_lockup_ipi(struct pt_regs *regs)
nmi_panic(regs, "Hard LOCKUP"); nmi_panic(regs, "Hard LOCKUP");
} }
static void set_cpu_stuck(int cpu, u64 tb) static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb)
{ {
cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask);
cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask);
if (cpumask_empty(&wd_smp_cpus_pending)) { if (cpumask_empty(&wd_smp_cpus_pending)) {
wd_smp_last_reset_tb = tb; wd_smp_last_reset_tb = tb;
cpumask_andnot(&wd_smp_cpus_pending, cpumask_andnot(&wd_smp_cpus_pending,
...@@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb) ...@@ -107,6 +112,10 @@ static void set_cpu_stuck(int cpu, u64 tb)
&wd_smp_cpus_stuck); &wd_smp_cpus_stuck);
} }
} }
static void set_cpu_stuck(int cpu, u64 tb)
{
set_cpumask_stuck(cpumask_of(cpu), tb);
}
static void watchdog_smp_panic(int cpu, u64 tb) static void watchdog_smp_panic(int cpu, u64 tb)
{ {
...@@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb) ...@@ -135,11 +144,9 @@ static void watchdog_smp_panic(int cpu, u64 tb)
} }
smp_flush_nmi_ipi(1000000); smp_flush_nmi_ipi(1000000);
/* Take the stuck CPU out of the watch group */ /* Take the stuck CPUs out of the watch group */
for_each_cpu(c, &wd_smp_cpus_pending) set_cpumask_stuck(&wd_smp_cpus_pending, tb);
set_cpu_stuck(c, tb);
out:
wd_smp_unlock(&flags); wd_smp_unlock(&flags);
printk_safe_flush(); printk_safe_flush();
...@@ -152,6 +159,11 @@ static void watchdog_smp_panic(int cpu, u64 tb) ...@@ -152,6 +159,11 @@ static void watchdog_smp_panic(int cpu, u64 tb)
if (hardlockup_panic) if (hardlockup_panic)
nmi_panic(NULL, "Hard LOCKUP"); nmi_panic(NULL, "Hard LOCKUP");
return;
out:
wd_smp_unlock(&flags);
} }
static void wd_smp_clear_cpu_pending(int cpu, u64 tb) static void wd_smp_clear_cpu_pending(int cpu, u64 tb)
...@@ -261,9 +273,11 @@ static void wd_timer_fn(unsigned long data) ...@@ -261,9 +273,11 @@ static void wd_timer_fn(unsigned long data)
void arch_touch_nmi_watchdog(void) void arch_touch_nmi_watchdog(void)
{ {
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
int cpu = smp_processor_id(); int cpu = smp_processor_id();
watchdog_timer_interrupt(cpu); if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
watchdog_timer_interrupt(cpu);
} }
EXPORT_SYMBOL(arch_touch_nmi_watchdog); EXPORT_SYMBOL(arch_touch_nmi_watchdog);
...@@ -286,6 +300,8 @@ static void stop_watchdog_timer_on(unsigned int cpu) ...@@ -286,6 +300,8 @@ static void stop_watchdog_timer_on(unsigned int cpu)
static int start_wd_on_cpu(unsigned int cpu) static int start_wd_on_cpu(unsigned int cpu)
{ {
unsigned long flags;
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
WARN_ON(1); WARN_ON(1);
return 0; return 0;
...@@ -300,12 +316,14 @@ static int start_wd_on_cpu(unsigned int cpu) ...@@ -300,12 +316,14 @@ static int start_wd_on_cpu(unsigned int cpu)
if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
return 0; return 0;
wd_smp_lock(&flags);
cpumask_set_cpu(cpu, &wd_cpus_enabled); cpumask_set_cpu(cpu, &wd_cpus_enabled);
if (cpumask_weight(&wd_cpus_enabled) == 1) { if (cpumask_weight(&wd_cpus_enabled) == 1) {
cpumask_set_cpu(cpu, &wd_smp_cpus_pending); cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
wd_smp_last_reset_tb = get_tb(); wd_smp_last_reset_tb = get_tb();
} }
smp_wmb(); wd_smp_unlock(&flags);
start_watchdog_timer_on(cpu); start_watchdog_timer_on(cpu);
return 0; return 0;
...@@ -313,12 +331,17 @@ static int start_wd_on_cpu(unsigned int cpu) ...@@ -313,12 +331,17 @@ static int start_wd_on_cpu(unsigned int cpu)
static int stop_wd_on_cpu(unsigned int cpu) static int stop_wd_on_cpu(unsigned int cpu)
{ {
unsigned long flags;
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
return 0; /* Can happen in CPU unplug case */ return 0; /* Can happen in CPU unplug case */
stop_watchdog_timer_on(cpu); stop_watchdog_timer_on(cpu);
wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_cpus_enabled); cpumask_clear_cpu(cpu, &wd_cpus_enabled);
wd_smp_unlock(&flags);
wd_smp_clear_cpu_pending(cpu, get_tb()); wd_smp_clear_cpu_pending(cpu, get_tb());
return 0; return 0;
......
...@@ -1443,12 +1443,14 @@ mc_cont: ...@@ -1443,12 +1443,14 @@ mc_cont:
ori r6,r6,1 ori r6,r6,1
mtspr SPRN_CTRLT,r6 mtspr SPRN_CTRLT,r6
4: 4:
/* Read the guest SLB and save it away */ /* Check if we are running hash or radix and store it in cr2 */
ld r5, VCPU_KVM(r9) ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5) lbz r0, KVM_RADIX(r5)
cmpwi r0, 0 cmpwi cr2,r0,0
/* Read the guest SLB and save it away */
li r5, 0 li r5, 0
bne 3f /* for radix, save 0 entries */ bne cr2, 3f /* for radix, save 0 entries */
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
mtctr r0 mtctr r0
li r6,0 li r6,0
...@@ -1712,11 +1714,6 @@ BEGIN_FTR_SECTION_NESTED(96) ...@@ -1712,11 +1714,6 @@ BEGIN_FTR_SECTION_NESTED(96)
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
22: 22:
/* Clear out SLB */
li r5,0
slbmte r5,r5
slbia
ptesync
/* Restore host values of some registers */ /* Restore host values of some registers */
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
...@@ -1737,10 +1734,56 @@ BEGIN_FTR_SECTION ...@@ -1737,10 +1734,56 @@ BEGIN_FTR_SECTION
mtspr SPRN_PID, r7 mtspr SPRN_PID, r7
mtspr SPRN_IAMR, r8 mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
#ifdef CONFIG_PPC_RADIX_MMU
/*
* Are we running hash or radix ?
*/
beq cr2,3f
/* Radix: Handle the case where the guest used an illegal PID */
LOAD_REG_ADDR(r4, mmu_base_pid)
lwz r3, VCPU_GUEST_PID(r9)
lwz r5, 0(r4)
cmpw cr0,r3,r5
blt 2f
/*
* Illegal PID, the HW might have prefetched and cached in the TLB
* some translations for the LPID 0 / guest PID combination which
* Linux doesn't know about, so we need to flush that PID out of
* the TLB. First we need to set LPIDR to 0 so tlbiel applies to
* the right context.
*/
li r0,0
mtspr SPRN_LPID,r0
isync
/* Then do a congruence class local flush */
ld r6,VCPU_KVM(r9)
lwz r0,KVM_TLB_SETS(r6)
mtctr r0
li r7,0x400 /* IS field = 0b01 */
ptesync
sldi r0,r3,32 /* RS has PID */
1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */
addi r7,r7,0x1000
bdnz 1b
ptesync
2: /* Flush the ERAT on radix P9 DD1 guest exit */
BEGIN_FTR_SECTION BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT PPC_INVALIDATE_ERAT
END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1)
b 4f
#endif /* CONFIG_PPC_RADIX_MMU */
/* Hash: clear out SLB */
3: li r5,0
slbmte r5,r5
slbia
ptesync
4:
/* /*
* POWER7/POWER8 guest -> host partition switch code. * POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do * We don't have to lock against tlbies but we do
......
...@@ -124,9 +124,10 @@ static int hash__init_new_context(struct mm_struct *mm) ...@@ -124,9 +124,10 @@ static int hash__init_new_context(struct mm_struct *mm)
static int radix__init_new_context(struct mm_struct *mm) static int radix__init_new_context(struct mm_struct *mm)
{ {
unsigned long rts_field; unsigned long rts_field;
int index; int index, max_id;
index = alloc_context_id(1, PRTB_ENTRIES - 1); max_id = (1 << mmu_pid_bits) - 1;
index = alloc_context_id(mmu_base_pid, max_id);
if (index < 0) if (index < 0)
return index; return index;
......
...@@ -25,6 +25,9 @@ ...@@ -25,6 +25,9 @@
#include <trace/events/thp.h> #include <trace/events/thp.h>
unsigned int mmu_pid_bits;
unsigned int mmu_base_pid;
static int native_register_process_table(unsigned long base, unsigned long pg_sz, static int native_register_process_table(unsigned long base, unsigned long pg_sz,
unsigned long table_size) unsigned long table_size)
{ {
...@@ -265,11 +268,34 @@ static void __init radix_init_pgtable(void) ...@@ -265,11 +268,34 @@ static void __init radix_init_pgtable(void)
for_each_memblock(memory, reg) for_each_memblock(memory, reg)
WARN_ON(create_physical_mapping(reg->base, WARN_ON(create_physical_mapping(reg->base,
reg->base + reg->size)); reg->base + reg->size));
/* Find out how many PID bits are supported */
if (cpu_has_feature(CPU_FTR_HVMODE)) {
if (!mmu_pid_bits)
mmu_pid_bits = 20;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* When KVM is possible, we only use the top half of the
* PID space to avoid collisions between host and guest PIDs
* which can cause problems due to prefetch when exiting the
* guest with AIL=3
*/
mmu_base_pid = 1 << (mmu_pid_bits - 1);
#else
mmu_base_pid = 1;
#endif
} else {
/* The guest uses the bottom half of the PID space */
if (!mmu_pid_bits)
mmu_pid_bits = 19;
mmu_base_pid = 1;
}
/* /*
* Allocate Partition table and process table for the * Allocate Partition table and process table for the
* host. * host.
*/ */
BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large."); BUG_ON(PRTB_SIZE_SHIFT > 36);
process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT);
/* /*
* Fill in the process table. * Fill in the process table.
...@@ -343,6 +369,12 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, ...@@ -343,6 +369,12 @@ static int __init radix_dt_scan_page_sizes(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0) if (type == NULL || strcmp(type, "cpu") != 0)
return 0; return 0;
/* Find MMU PID size */
prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
if (prop && size == 4)
mmu_pid_bits = be32_to_cpup(prop);
/* Grab page size encodings */
prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size); prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
if (!prop) if (!prop)
return 0; return 0;
......
...@@ -36,7 +36,7 @@ void subpage_prot_free(struct mm_struct *mm) ...@@ -36,7 +36,7 @@ void subpage_prot_free(struct mm_struct *mm)
} }
} }
addr = 0; addr = 0;
for (i = 0; i < 2; ++i) { for (i = 0; i < (TASK_SIZE_USER64 >> 43); ++i) {
p = spt->protptrs[i]; p = spt->protptrs[i];
if (!p) if (!p)
continue; continue;
......
...@@ -12,12 +12,12 @@ ...@@ -12,12 +12,12 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/memblock.h> #include <linux/memblock.h>
#include <asm/ppc-opcode.h>
#include <asm/ppc-opcode.h>
#include <asm/tlb.h> #include <asm/tlb.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/trace.h> #include <asm/trace.h>
#include <asm/cputhreads.h>
#define RIC_FLUSH_TLB 0 #define RIC_FLUSH_TLB 0
#define RIC_FLUSH_PWC 1 #define RIC_FLUSH_PWC 1
...@@ -478,3 +478,44 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, ...@@ -478,3 +478,44 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm,
else else
radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize); radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize);
} }
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
{
unsigned int pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
return;
/*
* If this context hasn't run on that CPU before and KVM is
* around, there's a slim chance that the guest on another
* CPU just brought in obsolete translation into the TLB of
* this CPU due to a bad prefetch using the guest PID on
* the way into the hypervisor.
*
* We work around this here. If KVM is possible, we check if
* any sibling thread is in KVM. If it is, the window may exist
* and thus we flush that PID from the core.
*
* A potential future improvement would be to mark which PIDs
* have never been used on the system and avoid it if the PID
* is new and the process has no other cpumask bit set.
*/
if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
int cpu = smp_processor_id();
int sib = cpu_first_thread_sibling(cpu);
bool flush = false;
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
if (sib == cpu)
continue;
if (paca[sib].kvm_hstate.kvm_vcpu)
flush = true;
}
if (flush)
_tlbiel_pid(pid, RIC_FLUSH_ALL);
}
}
EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
...@@ -89,7 +89,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk, ...@@ -89,7 +89,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
goto err; goto err;
ret = of_irq_to_resource(np, 0, &res[1]); ret = of_irq_to_resource(np, 0, &res[1]);
if (!ret) if (ret <= 0)
goto err; goto err;
pdev = platform_device_alloc("mpc83xx_spi", i); pdev = platform_device_alloc("mpc83xx_spi", i);
......
...@@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE; ...@@ -56,6 +56,7 @@ u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
*/ */
static u64 pnv_deepest_stop_psscr_val; static u64 pnv_deepest_stop_psscr_val;
static u64 pnv_deepest_stop_psscr_mask; static u64 pnv_deepest_stop_psscr_mask;
static u64 pnv_deepest_stop_flag;
static bool deepest_stop_found; static bool deepest_stop_found;
static int pnv_save_sprs_for_deep_states(void) static int pnv_save_sprs_for_deep_states(void)
...@@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void) ...@@ -185,8 +186,40 @@ static void pnv_alloc_idle_core_states(void)
update_subcore_sibling_mask(); update_subcore_sibling_mask();
if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
pnv_save_sprs_for_deep_states(); int rc = pnv_save_sprs_for_deep_states();
if (likely(!rc))
return;
/*
* The stop-api is unable to restore hypervisor
* resources on wakeup from platform idle states which
* lose full context. So disable such states.
*/
supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
if (cpu_has_feature(CPU_FTR_ARCH_300) &&
(pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
/*
* Use the default stop state for CPU-Hotplug
* if available.
*/
if (default_stop_found) {
pnv_deepest_stop_psscr_val =
pnv_default_stop_val;
pnv_deepest_stop_psscr_mask =
pnv_default_stop_mask;
pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
pnv_deepest_stop_psscr_val);
} else { /* Fallback to snooze loop for CPU-Hotplug */
deepest_stop_found = false;
pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
}
}
}
} }
u32 pnv_get_supported_cpuidle_states(void) u32 pnv_get_supported_cpuidle_states(void)
...@@ -397,7 +430,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu) ...@@ -397,7 +430,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
pnv_deepest_stop_psscr_val; pnv_deepest_stop_psscr_val;
srr1 = power9_idle_stop(psscr); srr1 = power9_idle_stop(psscr);
} else if (idle_states & OPAL_PM_WINKLE_ENABLED) { } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
(idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
srr1 = power7_idle_insn(PNV_THREAD_WINKLE); srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
} else if ((idle_states & OPAL_PM_SLEEP_ENABLED) || } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
(idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) { (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
...@@ -585,6 +619,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags, ...@@ -585,6 +619,7 @@ static int __init pnv_power9_idle_init(struct device_node *np, u32 *flags,
max_residency_ns = residency_ns[i]; max_residency_ns = residency_ns[i];
pnv_deepest_stop_psscr_val = psscr_val[i]; pnv_deepest_stop_psscr_val = psscr_val[i];
pnv_deepest_stop_psscr_mask = psscr_mask[i]; pnv_deepest_stop_psscr_mask = psscr_mask[i];
pnv_deepest_stop_flag = flags[i];
deepest_stop_found = true; deepest_stop_found = true;
} }
......
...@@ -1851,6 +1851,14 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask) ...@@ -1851,6 +1851,14 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
/* 4GB offset bypasses 32-bit space */ /* 4GB offset bypasses 32-bit space */
set_dma_offset(&pdev->dev, (1ULL << 32)); set_dma_offset(&pdev->dev, (1ULL << 32));
set_dma_ops(&pdev->dev, &dma_direct_ops); set_dma_ops(&pdev->dev, &dma_direct_ops);
} else if (dma_mask >> 32 && dma_mask != DMA_BIT_MASK(64)) {
/*
* Fail the request if a DMA mask between 32 and 64 bits
* was requested but couldn't be fulfilled. Ideally we
* would do this for 64-bits but historically we have
* always fallen back to 32-bits.
*/
return -ENOMEM;
} else { } else {
dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n"); dev_info(&pdev->dev, "Using 32-bit DMA via iommu\n");
set_dma_ops(&pdev->dev, &dma_iommu_ops); set_dma_ops(&pdev->dev, &dma_iommu_ops);
......
...@@ -82,7 +82,6 @@ static int pSeries_reconfig_remove_node(struct device_node *np) ...@@ -82,7 +82,6 @@ static int pSeries_reconfig_remove_node(struct device_node *np)
of_detach_node(np); of_detach_node(np);
of_node_put(parent); of_node_put(parent);
of_node_put(np); /* Must decrement the refcount */
return 0; return 0;
} }
......
...@@ -235,6 +235,7 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len, ...@@ -235,6 +235,7 @@ static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
return -1; return -1;
} }
extern u32 pnv_get_supported_cpuidle_states(void);
static int powernv_add_idle_states(void) static int powernv_add_idle_states(void)
{ {
struct device_node *power_mgt; struct device_node *power_mgt;
...@@ -248,6 +249,8 @@ static int powernv_add_idle_states(void) ...@@ -248,6 +249,8 @@ static int powernv_add_idle_states(void)
const char *names[CPUIDLE_STATE_MAX]; const char *names[CPUIDLE_STATE_MAX];
u32 has_stop_states = 0; u32 has_stop_states = 0;
int i, rc; int i, rc;
u32 supported_flags = pnv_get_supported_cpuidle_states();
/* Currently we have snooze statically defined */ /* Currently we have snooze statically defined */
...@@ -362,6 +365,13 @@ static int powernv_add_idle_states(void) ...@@ -362,6 +365,13 @@ static int powernv_add_idle_states(void)
for (i = 0; i < dt_idle_states; i++) { for (i = 0; i < dt_idle_states; i++) {
unsigned int exit_latency, target_residency; unsigned int exit_latency, target_residency;
bool stops_timebase = false; bool stops_timebase = false;
/*
* Skip the platform idle state whose flag isn't in
* the supported_cpuidle_states flag mask.
*/
if ((flags[i] & supported_flags) != flags[i])
continue;
/* /*
* If an idle state has exit latency beyond * If an idle state has exit latency beyond
* POWERNV_THRESHOLD_LATENCY_NS then don't use it * POWERNV_THRESHOLD_LATENCY_NS then don't use it
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment