Commit 376a34ef authored by John Hubbard's avatar John Hubbard Committed by Linus Torvalds

mm/gup: refactor and de-duplicate gup_fast() code

There were two nearly identical sets of code for gup_fast() style of
walking the page tables with interrupts disabled.  This has lead to the
usual maintenance problems that arise from having duplicated code.

There is already a core internal routine in gup.c for gup_fast(), so just
enhance it very slightly: allow skipping the fall-back to "slow" (regular)
get_user_pages(), via the new FOLL_FAST_ONLY flag.  Then, just call
internal_get_user_pages_fast() from __get_user_pages_fast(), and adjust
the API to match pre-existing API behavior.

There is a change in behavior from this refactoring: the nested form of
interrupt disabling is used in all gup_fast() variants now.  That's
because there is only one place that interrupt disabling for page walking
is done, and so the safer form is required.  This should, if anything,
eliminate possible (rare) bugs, because the non-nested form of enabling
interrupts was fragile at best.

[jhubbard@nvidia.com: fixup]
  Link: http://lkml.kernel.org/r/20200521233841.1279742-1-jhubbard@nvidia.comSigned-off-by: default avatarJohn Hubbard <jhubbard@nvidia.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: David Airlie <airlied@linux.ie>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: "Joonas Lahtinen" <joonas.lahtinen@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://lkml.kernel.org/r/20200519002124.2025955-3-jhubbard@nvidia.comSigned-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 9e1f0580
...@@ -2816,6 +2816,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, ...@@ -2816,6 +2816,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */ #define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */ #define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */ #define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */
#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */
/* /*
* FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
......
...@@ -2731,10 +2731,12 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages, ...@@ -2731,10 +2731,12 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
struct page **pages) struct page **pages)
{ {
unsigned long addr, len, end; unsigned long addr, len, end;
unsigned long flags;
int nr_pinned = 0, ret = 0; int nr_pinned = 0, ret = 0;
if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM | if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
FOLL_FORCE | FOLL_PIN | FOLL_GET))) FOLL_FORCE | FOLL_PIN | FOLL_GET |
FOLL_FAST_ONLY)))
return -EINVAL; return -EINVAL;
start = untagged_addr(start) & PAGE_MASK; start = untagged_addr(start) & PAGE_MASK;
...@@ -2753,16 +2755,36 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages, ...@@ -2753,16 +2755,36 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
* order to avoid confusing the normal COW routines. So only * order to avoid confusing the normal COW routines. So only
* targets that are already writable are safe to do by just * targets that are already writable are safe to do by just
* looking at the page tables. * looking at the page tables.
*
* NOTE! With FOLL_FAST_ONLY we allow read-only gup_fast() here,
* because there is no slow path to fall back on. But you'd
* better be careful about possible COW pages - you'll get _a_
* COW page, but not necessarily the one you intended to get
* depending on what COW event happens after this. COW may break
* the page copy in a random direction.
*
* Disable interrupts. The nested form is used, in order to allow
* full, general purpose use of this routine.
*
* With interrupts disabled, we block page table pages from being
* freed from under us. See struct mmu_table_batch comments in
* include/asm-generic/tlb.h for more details.
*
* We do not adopt an rcu_read_lock(.) here as we also want to
* block IPIs that come from THPs splitting.
*/ */
if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) && if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) && gup_fast_permitted(start, end)) {
gup_fast_permitted(start, end)) { unsigned long fast_flags = gup_flags;
local_irq_disable(); if (!(gup_flags & FOLL_FAST_ONLY))
gup_pgd_range(addr, end, gup_flags | FOLL_WRITE, pages, &nr_pinned); fast_flags |= FOLL_WRITE;
local_irq_enable();
local_irq_save(flags);
gup_pgd_range(addr, end, fast_flags, pages, &nr_pinned);
local_irq_restore(flags);
ret = nr_pinned; ret = nr_pinned;
} }
if (nr_pinned < nr_pages) { if (nr_pinned < nr_pages && !(gup_flags & FOLL_FAST_ONLY)) {
/* Try to get the remaining pages with get_user_pages */ /* Try to get the remaining pages with get_user_pages */
start += nr_pinned << PAGE_SHIFT; start += nr_pinned << PAGE_SHIFT;
pages += nr_pinned; pages += nr_pinned;
...@@ -2798,51 +2820,30 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages, ...@@ -2798,51 +2820,30 @@ static int internal_get_user_pages_fast(unsigned long start, int nr_pages,
int __get_user_pages_fast(unsigned long start, int nr_pages, int write, int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages) struct page **pages)
{ {
unsigned long len, end; int nr_pinned;
unsigned long flags;
int nr_pinned = 0;
/* /*
* Internally (within mm/gup.c), gup fast variants must set FOLL_GET, * Internally (within mm/gup.c), gup fast variants must set FOLL_GET,
* because gup fast is always a "pin with a +1 page refcount" request. * because gup fast is always a "pin with a +1 page refcount" request.
*
* FOLL_FAST_ONLY is required in order to match the API description of
* this routine: no fall back to regular ("slow") GUP.
*/ */
unsigned int gup_flags = FOLL_GET; unsigned int gup_flags = FOLL_GET | FOLL_FAST_ONLY;
if (write) if (write)
gup_flags |= FOLL_WRITE; gup_flags |= FOLL_WRITE;
start = untagged_addr(start) & PAGE_MASK; nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
len = (unsigned long) nr_pages << PAGE_SHIFT; pages);
end = start + len;
if (end <= start)
return 0;
if (unlikely(!access_ok((void __user *)start, len)))
return 0;
/* /*
* Disable interrupts. We use the nested form as we can already have * As specified in the API description above, this routine is not
* interrupts disabled by get_futex_key. * allowed to return negative values. However, the common core
* * routine internal_get_user_pages_fast() *can* return -errno.
* With interrupts disabled, we block page table pages from being * Therefore, correct for that here:
* freed from under us. See struct mmu_table_batch comments in
* include/asm-generic/tlb.h for more details.
*
* We do not adopt an rcu_read_lock(.) here as we also want to
* block IPIs that come from THPs splitting.
*
* NOTE! We allow read-only gup_fast() here, but you'd better be
* careful about possible COW pages. You'll get _a_ COW page, but
* not necessarily the one you intended to get depending on what
* COW event happens after this. COW may break the page copy in a
* random direction.
*/ */
if (nr_pinned < 0)
if (IS_ENABLED(CONFIG_HAVE_FAST_GUP) && nr_pinned = 0;
gup_fast_permitted(start, end)) {
local_irq_save(flags);
gup_pgd_range(start, end, gup_flags, pages, &nr_pinned);
local_irq_restore(flags);
}
return nr_pinned; return nr_pinned;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment