Commit 5e220483 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'powerpc-4.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:

 - a fix for hugetlb with 4K pages, broken by our recent changes for
   split PMD PTL.

 - set the correct assembler machine type on e500mc, needed since
   binutils 2.26 introduced two forms for the "wait" instruction.

 - a fix for potential missed TLB flushes with MADV_[FREE|DONTNEED] etc.
   and THP on Power9 Radix.

 - three fixes to try and make our panic handling more robust by hard
   disabling interrupts, and not marking stopped CPUs as offline because
   they haven't been properly offlined.

 - three other minor fixes.

Thanks to: Aneesh Kumar K.V, Michael Jeanson, Nicholas Piggin.

* tag 'powerpc-4.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/mm/hash/4k: Free hugetlb page table caches correctly.
  powerpc/64s/radix: Fix radix_kvm_prefetch_workaround paca access of not possible CPU
  powerpc/64s: Fix build failures with CONFIG_NMI_IPI=n
  powerpc/64: hard disable irqs on the panic()ing CPU
  powerpc: smp_send_stop do not offline stopped CPUs
  powerpc/64: hard disable irqs in panic_smp_self_stop
  powerpc/64s: Fix DT CPU features Power9 DD2.1 logic
  powerpc/64s/radix: Fix MADV_[FREE|DONTNEED] TLB flush miss problem with THP
  powerpc/e500mc: Set assembler machine type to e500mc
parents 7ab366e4 fadd03c6
...@@ -244,6 +244,7 @@ cpu-as-$(CONFIG_4xx) += -Wa,-m405 ...@@ -244,6 +244,7 @@ cpu-as-$(CONFIG_4xx) += -Wa,-m405
cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec) cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec)
cpu-as-$(CONFIG_E200) += -Wa,-me200 cpu-as-$(CONFIG_E200) += -Wa,-me200
cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4
cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc)
KBUILD_AFLAGS += $(cpu-as-y) KBUILD_AFLAGS += $(cpu-as-y)
KBUILD_CFLAGS += $(cpu-as-y) KBUILD_CFLAGS += $(cpu-as-y)
......
...@@ -108,6 +108,7 @@ static inline void pgtable_free(void *table, unsigned index_size) ...@@ -108,6 +108,7 @@ static inline void pgtable_free(void *table, unsigned index_size)
} }
#define check_pgt_cache() do { } while (0) #define check_pgt_cache() do { } while (0)
#define get_hugepd_cache_index(x) (x)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb, static inline void pgtable_free_tlb(struct mmu_gather *tlb,
......
...@@ -49,6 +49,27 @@ static inline int hugepd_ok(hugepd_t hpd) ...@@ -49,6 +49,27 @@ static inline int hugepd_ok(hugepd_t hpd)
} }
#define is_hugepd(hpd) (hugepd_ok(hpd)) #define is_hugepd(hpd) (hugepd_ok(hpd))
/*
* 16M and 16G huge page directory tables are allocated from slab cache
*
*/
#define H_16M_CACHE_INDEX (PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE - 24)
#define H_16G_CACHE_INDEX \
(PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + H_PUD_INDEX_SIZE - 34)
static inline int get_hugepd_cache_index(int index)
{
switch (index) {
case H_16M_CACHE_INDEX:
return HTLB_16M_INDEX;
case H_16G_CACHE_INDEX:
return HTLB_16G_INDEX;
default:
BUG();
}
/* should not reach */
}
#else /* !CONFIG_HUGETLB_PAGE */ #else /* !CONFIG_HUGETLB_PAGE */
static inline int pmd_huge(pmd_t pmd) { return 0; } static inline int pmd_huge(pmd_t pmd) { return 0; }
static inline int pud_huge(pud_t pud) { return 0; } static inline int pud_huge(pud_t pud) { return 0; }
......
...@@ -45,8 +45,17 @@ static inline int hugepd_ok(hugepd_t hpd) ...@@ -45,8 +45,17 @@ static inline int hugepd_ok(hugepd_t hpd)
{ {
return 0; return 0;
} }
#define is_hugepd(pdep) 0 #define is_hugepd(pdep) 0
/*
* This should never get called
*/
static inline int get_hugepd_cache_index(int index)
{
BUG();
}
#else /* !CONFIG_HUGETLB_PAGE */ #else /* !CONFIG_HUGETLB_PAGE */
static inline int pmd_huge(pmd_t pmd) { return 0; } static inline int pmd_huge(pmd_t pmd) { return 0; }
static inline int pud_huge(pud_t pud) { return 0; } static inline int pud_huge(pud_t pud) { return 0; }
......
...@@ -287,6 +287,11 @@ enum pgtable_index { ...@@ -287,6 +287,11 @@ enum pgtable_index {
PMD_INDEX, PMD_INDEX,
PUD_INDEX, PUD_INDEX,
PGD_INDEX, PGD_INDEX,
/*
* Below are used with 4k page size and hugetlb
*/
HTLB_16M_INDEX,
HTLB_16G_INDEX,
}; };
extern unsigned long __vmalloc_start; extern unsigned long __vmalloc_start;
......
...@@ -8,7 +8,7 @@ extern void arch_touch_nmi_watchdog(void); ...@@ -8,7 +8,7 @@ extern void arch_touch_nmi_watchdog(void);
static inline void arch_touch_nmi_watchdog(void) {} static inline void arch_touch_nmi_watchdog(void) {}
#endif #endif
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_STACKTRACE) #if defined(CONFIG_NMI_IPI) && defined(CONFIG_STACKTRACE)
extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask, extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
bool exclude_self); bool exclude_self);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
......
...@@ -109,6 +109,7 @@ static inline void pgtable_free(void *table, unsigned index_size) ...@@ -109,6 +109,7 @@ static inline void pgtable_free(void *table, unsigned index_size)
} }
#define check_pgt_cache() do { } while (0) #define check_pgt_cache() do { } while (0)
#define get_hugepd_cache_index(x) (x)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb, static inline void pgtable_free_tlb(struct mmu_gather *tlb,
......
...@@ -141,6 +141,7 @@ static inline void pgtable_free(void *table, int shift) ...@@ -141,6 +141,7 @@ static inline void pgtable_free(void *table, int shift)
} }
} }
#define get_hugepd_cache_index(x) (x)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift) static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
{ {
......
...@@ -711,7 +711,8 @@ static __init void cpufeatures_cpu_quirks(void) ...@@ -711,7 +711,8 @@ static __init void cpufeatures_cpu_quirks(void)
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST; cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG; cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
} else /* DD2.1 and up have DD2_1 */ } else if ((version & 0xffff0000) == 0x004e0000)
/* DD2.1 and up have DD2_1 */
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
if ((version & 0xffff0000) == 0x004e0000) { if ((version & 0xffff0000) == 0x004e0000) {
......
...@@ -700,12 +700,19 @@ EXPORT_SYMBOL(check_legacy_ioport); ...@@ -700,12 +700,19 @@ EXPORT_SYMBOL(check_legacy_ioport);
static int ppc_panic_event(struct notifier_block *this, static int ppc_panic_event(struct notifier_block *this,
unsigned long event, void *ptr) unsigned long event, void *ptr)
{ {
/*
* panic does a local_irq_disable, but we really
* want interrupts to be hard disabled.
*/
hard_irq_disable();
/* /*
* If firmware-assisted dump has been registered then trigger * If firmware-assisted dump has been registered then trigger
* firmware-assisted dump and let firmware handle everything else. * firmware-assisted dump and let firmware handle everything else.
*/ */
crash_fadump(NULL, ptr); crash_fadump(NULL, ptr);
ppc_md.panic(ptr); /* May not return */ if (ppc_md.panic)
ppc_md.panic(ptr); /* May not return */
return NOTIFY_DONE; return NOTIFY_DONE;
} }
...@@ -716,7 +723,8 @@ static struct notifier_block ppc_panic_block = { ...@@ -716,7 +723,8 @@ static struct notifier_block ppc_panic_block = {
void __init setup_panic(void) void __init setup_panic(void)
{ {
if (!ppc_md.panic) /* PPC64 always does a hard irq disable in its panic handler */
if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic)
return; return;
atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block); atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
} }
......
...@@ -387,6 +387,14 @@ void early_setup_secondary(void) ...@@ -387,6 +387,14 @@ void early_setup_secondary(void)
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
void panic_smp_self_stop(void)
{
hard_irq_disable();
spin_begin();
while (1)
spin_cpu_relax();
}
#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE) #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
static bool use_spinloop(void) static bool use_spinloop(void)
{ {
......
...@@ -600,9 +600,6 @@ static void nmi_stop_this_cpu(struct pt_regs *regs) ...@@ -600,9 +600,6 @@ static void nmi_stop_this_cpu(struct pt_regs *regs)
nmi_ipi_busy_count--; nmi_ipi_busy_count--;
nmi_ipi_unlock(); nmi_ipi_unlock();
/* Remove this CPU */
set_cpu_online(smp_processor_id(), false);
spin_begin(); spin_begin();
while (1) while (1)
spin_cpu_relax(); spin_cpu_relax();
...@@ -617,9 +614,6 @@ void smp_send_stop(void) ...@@ -617,9 +614,6 @@ void smp_send_stop(void)
static void stop_this_cpu(void *dummy) static void stop_this_cpu(void *dummy)
{ {
/* Remove this CPU */
set_cpu_online(smp_processor_id(), false);
hard_irq_disable(); hard_irq_disable();
spin_begin(); spin_begin();
while (1) while (1)
......
...@@ -196,7 +196,7 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk, ...@@ -196,7 +196,7 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable); EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable);
#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */ #endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
#ifdef CONFIG_PPC_BOOK3S_64 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
static void handle_backtrace_ipi(struct pt_regs *regs) static void handle_backtrace_ipi(struct pt_regs *regs)
{ {
nmi_cpu_backtrace(regs); nmi_cpu_backtrace(regs);
...@@ -242,4 +242,4 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) ...@@ -242,4 +242,4 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
{ {
nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
} }
#endif /* CONFIG_PPC64 */ #endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
...@@ -337,7 +337,8 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif ...@@ -337,7 +337,8 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
if (shift >= pdshift) if (shift >= pdshift)
hugepd_free(tlb, hugepte); hugepd_free(tlb, hugepte);
else else
pgtable_free_tlb(tlb, hugepte, pdshift - shift); pgtable_free_tlb(tlb, hugepte,
get_hugepd_cache_index(pdshift - shift));
} }
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
......
...@@ -409,6 +409,18 @@ static inline void pgtable_free(void *table, int index) ...@@ -409,6 +409,18 @@ static inline void pgtable_free(void *table, int index)
case PUD_INDEX: case PUD_INDEX:
kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table); kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table);
break; break;
#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
/* 16M hugepd directory at pud level */
case HTLB_16M_INDEX:
BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0);
kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table);
break;
/* 16G hugepd directory at the pgd level */
case HTLB_16G_INDEX:
BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0);
kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table);
break;
#endif
/* We don't free pgd table via RCU callback */ /* We don't free pgd table via RCU callback */
default: default:
BUG(); BUG();
......
...@@ -689,22 +689,17 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range); ...@@ -689,22 +689,17 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33; static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2; static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, static inline void __radix__flush_tlb_range(struct mm_struct *mm,
unsigned long end) unsigned long start, unsigned long end,
bool flush_all_sizes)
{ {
struct mm_struct *mm = vma->vm_mm;
unsigned long pid; unsigned long pid;
unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
unsigned long page_size = 1UL << page_shift; unsigned long page_size = 1UL << page_shift;
unsigned long nr_pages = (end - start) >> page_shift; unsigned long nr_pages = (end - start) >> page_shift;
bool local, full; bool local, full;
#ifdef CONFIG_HUGETLB_PAGE
if (is_vm_hugetlb_page(vma))
return radix__flush_hugetlb_tlb_range(vma, start, end);
#endif
pid = mm->context.id; pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT)) if (unlikely(pid == MMU_NO_CONTEXT))
return; return;
...@@ -738,37 +733,64 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, ...@@ -738,37 +733,64 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
_tlbie_pid(pid, RIC_FLUSH_TLB); _tlbie_pid(pid, RIC_FLUSH_TLB);
} }
} else { } else {
bool hflush = false; bool hflush = flush_all_sizes;
bool gflush = flush_all_sizes;
unsigned long hstart, hend; unsigned long hstart, hend;
unsigned long gstart, gend;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
hend = end >> HPAGE_PMD_SHIFT;
if (hstart < hend) {
hstart <<= HPAGE_PMD_SHIFT;
hend <<= HPAGE_PMD_SHIFT;
hflush = true; hflush = true;
if (hflush) {
hstart = (start + PMD_SIZE - 1) & PMD_MASK;
hend = end & PMD_MASK;
if (hstart == hend)
hflush = false;
}
if (gflush) {
gstart = (start + PUD_SIZE - 1) & PUD_MASK;
gend = end & PUD_MASK;
if (gstart == gend)
gflush = false;
} }
#endif
asm volatile("ptesync": : :"memory"); asm volatile("ptesync": : :"memory");
if (local) { if (local) {
__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush) if (hflush)
__tlbiel_va_range(hstart, hend, pid, __tlbiel_va_range(hstart, hend, pid,
HPAGE_PMD_SIZE, MMU_PAGE_2M); PMD_SIZE, MMU_PAGE_2M);
if (gflush)
__tlbiel_va_range(gstart, gend, pid,
PUD_SIZE, MMU_PAGE_1G);
asm volatile("ptesync": : :"memory"); asm volatile("ptesync": : :"memory");
} else { } else {
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize); __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush) if (hflush)
__tlbie_va_range(hstart, hend, pid, __tlbie_va_range(hstart, hend, pid,
HPAGE_PMD_SIZE, MMU_PAGE_2M); PMD_SIZE, MMU_PAGE_2M);
if (gflush)
__tlbie_va_range(gstart, gend, pid,
PUD_SIZE, MMU_PAGE_1G);
fixup_tlbie(); fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory"); asm volatile("eieio; tlbsync; ptesync": : :"memory");
} }
} }
preempt_enable(); preempt_enable();
} }
void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
#ifdef CONFIG_HUGETLB_PAGE
if (is_vm_hugetlb_page(vma))
return radix__flush_hugetlb_tlb_range(vma, start, end);
#endif
__radix__flush_tlb_range(vma->vm_mm, start, end, false);
}
EXPORT_SYMBOL(radix__flush_tlb_range); EXPORT_SYMBOL(radix__flush_tlb_range);
static int radix_get_mmu_psize(int page_size) static int radix_get_mmu_psize(int page_size)
...@@ -837,6 +859,8 @@ void radix__tlb_flush(struct mmu_gather *tlb) ...@@ -837,6 +859,8 @@ void radix__tlb_flush(struct mmu_gather *tlb)
int psize = 0; int psize = 0;
struct mm_struct *mm = tlb->mm; struct mm_struct *mm = tlb->mm;
int page_size = tlb->page_size; int page_size = tlb->page_size;
unsigned long start = tlb->start;
unsigned long end = tlb->end;
/* /*
* if page size is not something we understand, do a full mm flush * if page size is not something we understand, do a full mm flush
...@@ -847,15 +871,45 @@ void radix__tlb_flush(struct mmu_gather *tlb) ...@@ -847,15 +871,45 @@ void radix__tlb_flush(struct mmu_gather *tlb)
*/ */
if (tlb->fullmm) { if (tlb->fullmm) {
__flush_all_mm(mm, true); __flush_all_mm(mm, true);
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
} else if (mm_tlb_flush_nested(mm)) {
/*
* If there is a concurrent invalidation that is clearing ptes,
* then it's possible this invalidation will miss one of those
* cleared ptes and miss flushing the TLB. If this invalidate
* returns before the other one flushes TLBs, that can result
* in it returning while there are still valid TLBs inside the
* range to be invalidated.
*
* See mm/memory.c:tlb_finish_mmu() for more details.
*
* The solution to this is ensure the entire range is always
* flushed here. The problem for powerpc is that the flushes
* are page size specific, so this "forced flush" would not
* do the right thing if there are a mix of page sizes in
* the range to be invalidated. So use __flush_tlb_range
* which invalidates all possible page sizes in the range.
*
* PWC flush probably is not be required because the core code
* shouldn't free page tables in this path, but accounting
* for the possibility makes us a bit more robust.
*
* need_flush_all is an uncommon case because page table
* teardown should be done with exclusive locks held (but
* after locks are dropped another invalidate could come
* in), it could be optimized further if necessary.
*/
if (!tlb->need_flush_all)
__radix__flush_tlb_range(mm, start, end, true);
else
radix__flush_all_mm(mm);
#endif
} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
if (!tlb->need_flush_all) if (!tlb->need_flush_all)
radix__flush_tlb_mm(mm); radix__flush_tlb_mm(mm);
else else
radix__flush_all_mm(mm); radix__flush_all_mm(mm);
} else { } else {
unsigned long start = tlb->start;
unsigned long end = tlb->end;
if (!tlb->need_flush_all) if (!tlb->need_flush_all)
radix__flush_tlb_range_psize(mm, start, end, psize); radix__flush_tlb_range_psize(mm, start, end, psize);
else else
...@@ -1043,6 +1097,8 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) ...@@ -1043,6 +1097,8 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
if (sib == cpu) if (sib == cpu)
continue; continue;
if (!cpu_possible(sib))
continue;
if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu) if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
flush = true; flush = true;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment