Commit b30e7590 authored by Aneesh Kumar K.V's avatar Aneesh Kumar K.V Committed by Michael Ellerman

powerpc/mm: Switch to generic RCU get_user_pages_fast

This patch switch the ppc arch to use the generic RCU based
gup implementation.
Signed-off-by: default avatarAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent f30c59e9
...@@ -149,6 +149,7 @@ config PPC ...@@ -149,6 +149,7 @@ config PPC
select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_ATOMIC_RMW
select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
select NO_BOOTMEM select NO_BOOTMEM
select HAVE_GENERIC_RCU_GUP
config GENERIC_CSUM config GENERIC_CSUM
def_bool CPU_LITTLE_ENDIAN def_bool CPU_LITTLE_ENDIAN
......
...@@ -48,7 +48,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd) ...@@ -48,7 +48,7 @@ static inline unsigned int hugepd_shift(hugepd_t hpd)
#endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_BOOK3S_64 */
static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
unsigned pdshift) unsigned pdshift)
{ {
/* /*
...@@ -58,9 +58,9 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, ...@@ -58,9 +58,9 @@ static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
*/ */
unsigned long idx = 0; unsigned long idx = 0;
pte_t *dir = hugepd_page(*hpdp); pte_t *dir = hugepd_page(hpd);
#ifndef CONFIG_PPC_FSL_BOOK3E #ifndef CONFIG_PPC_FSL_BOOK3E
idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
#endif #endif
return dir + idx; return dir + idx;
...@@ -193,7 +193,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma, ...@@ -193,7 +193,7 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
} }
#define hugepd_shift(x) 0 #define hugepd_shift(x) 0
static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
unsigned pdshift) unsigned pdshift)
{ {
return 0; return 0;
......
...@@ -379,13 +379,14 @@ static inline int hugepd_ok(hugepd_t hpd) ...@@ -379,13 +379,14 @@ static inline int hugepd_ok(hugepd_t hpd)
} }
#endif #endif
#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep)))) #define is_hugepd(hpd) (hugepd_ok(hpd))
#define pgd_huge pgd_huge #define pgd_huge pgd_huge
int pgd_huge(pgd_t pgd); int pgd_huge(pgd_t pgd);
#else /* CONFIG_HUGETLB_PAGE */ #else /* CONFIG_HUGETLB_PAGE */
#define is_hugepd(pdep) 0 #define is_hugepd(pdep) 0
#define pgd_huge(pgd) 0 #define pgd_huge(pgd) 0
#endif /* CONFIG_HUGETLB_PAGE */ #endif /* CONFIG_HUGETLB_PAGE */
#define __hugepd(x) ((hugepd_t) { (x) })
struct page; struct page;
extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
......
...@@ -600,6 +600,5 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, ...@@ -600,6 +600,5 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
*/ */
return true; return true;
} }
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */ #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
...@@ -274,11 +274,9 @@ extern void paging_init(void); ...@@ -274,11 +274,9 @@ extern void paging_init(void);
*/ */
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *); extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr);
extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr); unsigned long end, int write,
struct page **pages, int *nr);
#ifndef CONFIG_TRANSPARENT_HUGEPAGE #ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_large(pmd) 0 #define pmd_large(pmd) 0
#define has_transparent_hugepage() 0 #define has_transparent_hugepage() 0
......
...@@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror ...@@ -6,7 +6,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-y := fault.o mem.o pgtable.o gup.o mmap.o \ obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(CONFIG_WORD_SIZE).o \ init_$(CONFIG_WORD_SIZE).o \
pgtable_$(CONFIG_WORD_SIZE).o pgtable_$(CONFIG_WORD_SIZE).o
obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \ obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
......
/*
* Lockless get_user_pages_fast for powerpc
*
* Copyright (C) 2008 Nick Piggin
* Copyright (C) 2008 Novell Inc.
*/
#undef DEBUG
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/vmstat.h>
#include <linux/pagemap.h>
#include <linux/rwsem.h>
#include <asm/pgtable.h>
#ifdef __HAVE_ARCH_PTE_SPECIAL
/*
* The performance critical leaf functions are made noinline otherwise gcc
* inlines everything into a single function which results in too much
* register pressure.
*/
static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long mask, result;
pte_t *ptep;
result = _PAGE_PRESENT|_PAGE_USER;
if (write)
result |= _PAGE_RW;
mask = result | _PAGE_SPECIAL;
ptep = pte_offset_kernel(&pmd, addr);
do {
pte_t pte = ACCESS_ONCE(*ptep);
struct page *page;
/*
* Similar to the PMD case, NUMA hinting must take slow path
*/
if (pte_numa(pte))
return 0;
if ((pte_val(pte) & mask) != result)
return 0;
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
page = pte_page(pte);
if (!page_cache_get_speculative(page))
return 0;
if (unlikely(pte_val(pte) != pte_val(*ptep))) {
put_page(page);
return 0;
}
pages[*nr] = page;
(*nr)++;
} while (ptep++, addr += PAGE_SIZE, addr != end);
return 1;
}
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pmd_t *pmdp;
pmdp = pmd_offset(&pud, addr);
do {
pmd_t pmd = ACCESS_ONCE(*pmdp);
next = pmd_addr_end(addr, end);
/*
* If we find a splitting transparent hugepage we
* return zero. That will result in taking the slow
* path which will call wait_split_huge_page()
* if the pmd is still in splitting state
*/
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
return 0;
if (pmd_huge(pmd) || pmd_large(pmd)) {
/*
* NUMA hinting faults need to be handled in the GUP
* slowpath for accounting purposes and so that they
* can be serialised against THP migration.
*/
if (pmd_numa(pmd))
return 0;
if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
write, pages, nr))
return 0;
} else if (is_hugepd(pmdp)) {
if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
addr, next, write, pages, nr))
return 0;
} else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
return 0;
} while (pmdp++, addr = next, addr != end);
return 1;
}
static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp;
pudp = pud_offset(&pgd, addr);
do {
pud_t pud = ACCESS_ONCE(*pudp);
next = pud_addr_end(addr, end);
if (pud_none(pud))
return 0;
if (pud_huge(pud)) {
if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next,
write, pages, nr))
return 0;
} else if (is_hugepd(pudp)) {
if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
addr, next, write, pages, nr))
return 0;
} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
return 0;
} while (pudp++, addr = next, addr != end);
return 1;
}
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
unsigned long next;
unsigned long flags;
pgd_t *pgdp;
int nr = 0;
pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
start &= PAGE_MASK;
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
start, len)))
return 0;
pr_devel(" aligned: %lx .. %lx\n", start, end);
/*
* XXX: batch / limit 'nr', to avoid large irq off latency
* needs some instrumenting to determine the common sizes used by
* important workloads (eg. DB2), and whether limiting the batch size
* will decrease performance.
*
* It seems like we're in the clear for the moment. Direct-IO is
* the main guy that batches up lots of get_user_pages, and even
* they are limited to 64-at-a-time which is not so many.
*/
/*
* This doesn't prevent pagetable teardown, but does prevent
* the pagetables from being freed on powerpc.
*
* So long as we atomically load page table pointers versus teardown,
* we can follow the address down to the the page and take a ref on it.
*/
local_irq_save(flags);
pgdp = pgd_offset(mm, addr);
do {
pgd_t pgd = ACCESS_ONCE(*pgdp);
pr_devel(" %016lx: normal pgd %p\n", addr,
(void *)pgd_val(pgd));
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
if (pgd_huge(pgd)) {
if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
write, pages, &nr))
break;
} else if (is_hugepd(pgdp)) {
if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
addr, next, write, pages, &nr))
break;
} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
return nr;
}
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
struct mm_struct *mm = current->mm;
int nr, ret;
start &= PAGE_MASK;
nr = __get_user_pages_fast(start, nr_pages, write, pages);
ret = nr;
if (nr < nr_pages) {
pr_devel(" slow path ! nr = %d\n", nr);
/* Try to get the remaining pages with get_user_pages */
start += nr << PAGE_SHIFT;
pages += nr;
down_read(&mm->mmap_sem);
ret = get_user_pages(current, mm, start,
nr_pages - nr, write, 0, pages, NULL);
up_read(&mm->mmap_sem);
/* Have to be a bit careful with return values */
if (nr > 0) {
if (ret < 0)
ret = nr;
else
ret += nr;
}
}
return ret;
}
#endif /* __HAVE_ARCH_PTE_SPECIAL */
...@@ -233,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz ...@@ -233,7 +233,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
return NULL; return NULL;
return hugepte_offset(hpdp, addr, pdshift); return hugepte_offset(*hpdp, addr, pdshift);
} }
#else #else
...@@ -273,7 +273,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz ...@@ -273,7 +273,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz
if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift)) if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
return NULL; return NULL;
return hugepte_offset(hpdp, addr, pdshift); return hugepte_offset(*hpdp, addr, pdshift);
} }
#endif #endif
...@@ -541,7 +541,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, ...@@ -541,7 +541,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
do { do {
pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr);
next = pmd_addr_end(addr, end); next = pmd_addr_end(addr, end);
if (!is_hugepd(pmd)) { if (!is_hugepd(__hugepd(pmd_val(*pmd)))) {
/* /*
* if it is not hugepd pointer, we should already find * if it is not hugepd pointer, we should already find
* it cleared. * it cleared.
...@@ -590,7 +590,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, ...@@ -590,7 +590,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
do { do {
pud = pud_offset(pgd, addr); pud = pud_offset(pgd, addr);
next = pud_addr_end(addr, end); next = pud_addr_end(addr, end);
if (!is_hugepd(pud)) { if (!is_hugepd(__hugepd(pud_val(*pud)))) {
if (pud_none_or_clear_bad(pud)) if (pud_none_or_clear_bad(pud))
continue; continue;
hugetlb_free_pmd_range(tlb, pud, addr, next, floor, hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
...@@ -656,7 +656,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb, ...@@ -656,7 +656,7 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
do { do {
next = pgd_addr_end(addr, end); next = pgd_addr_end(addr, end);
pgd = pgd_offset(tlb->mm, addr); pgd = pgd_offset(tlb->mm, addr);
if (!is_hugepd(pgd)) { if (!is_hugepd(__hugepd(pgd_val(*pgd)))) {
if (pgd_none_or_clear_bad(pgd)) if (pgd_none_or_clear_bad(pgd))
continue; continue;
hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling); hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
...@@ -716,12 +716,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, ...@@ -716,12 +716,11 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
return (__boundary - 1 < end - 1) ? __boundary : end; return (__boundary - 1 < end - 1) ? __boundary : end;
} }
int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift,
unsigned long addr, unsigned long end, unsigned long end, int write, struct page **pages, int *nr)
int write, struct page **pages, int *nr)
{ {
pte_t *ptep; pte_t *ptep;
unsigned long sz = 1UL << hugepd_shift(*hugepd); unsigned long sz = 1UL << hugepd_shift(hugepd);
unsigned long next; unsigned long next;
ptep = hugepte_offset(hugepd, addr, pdshift); ptep = hugepte_offset(hugepd, addr, pdshift);
...@@ -964,7 +963,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift ...@@ -964,7 +963,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
else if (pgd_huge(pgd)) { else if (pgd_huge(pgd)) {
ret_pte = (pte_t *) pgdp; ret_pte = (pte_t *) pgdp;
goto out; goto out;
} else if (is_hugepd(&pgd)) } else if (is_hugepd(__hugepd(pgd_val(pgd))))
hpdp = (hugepd_t *)&pgd; hpdp = (hugepd_t *)&pgd;
else { else {
/* /*
...@@ -981,7 +980,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift ...@@ -981,7 +980,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
else if (pud_huge(pud)) { else if (pud_huge(pud)) {
ret_pte = (pte_t *) pudp; ret_pte = (pte_t *) pudp;
goto out; goto out;
} else if (is_hugepd(&pud)) } else if (is_hugepd(__hugepd(pud_val(pud))))
hpdp = (hugepd_t *)&pud; hpdp = (hugepd_t *)&pud;
else { else {
pdshift = PMD_SHIFT; pdshift = PMD_SHIFT;
...@@ -1002,7 +1001,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift ...@@ -1002,7 +1001,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
if (pmd_huge(pmd) || pmd_large(pmd)) { if (pmd_huge(pmd) || pmd_large(pmd)) {
ret_pte = (pte_t *) pmdp; ret_pte = (pte_t *) pmdp;
goto out; goto out;
} else if (is_hugepd(&pmd)) } else if (is_hugepd(__hugepd(pmd_val(pmd))))
hpdp = (hugepd_t *)&pmd; hpdp = (hugepd_t *)&pmd;
else else
return pte_offset_kernel(&pmd, ea); return pte_offset_kernel(&pmd, ea);
...@@ -1011,7 +1010,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift ...@@ -1011,7 +1010,7 @@ pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift
if (!hpdp) if (!hpdp)
return NULL; return NULL;
ret_pte = hugepte_offset(hpdp, ea, pdshift); ret_pte = hugepte_offset(*hpdp, ea, pdshift);
pdshift = hugepd_shift(*hpdp); pdshift = hugepd_shift(*hpdp);
out: out:
if (shift) if (shift)
...@@ -1041,14 +1040,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, ...@@ -1041,14 +1040,6 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
if ((pte_val(pte) & mask) != mask) if ((pte_val(pte) & mask) != mask)
return 0; return 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* check for splitting here
*/
if (pmd_trans_splitting(pte_pmd(pte)))
return 0;
#endif
/* hugepages are never "special" */ /* hugepages are never "special" */
VM_BUG_ON(!pfn_valid(pte_pfn(pte))); VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment