Commit d64e2dbc authored by Jason Gunthorpe's avatar Jason Gunthorpe Committed by Andrew Morton

mm/gup: simplify the external interface functions and consolidate invariants

The GUP family of functions have a complex, but fairly well defined, set
of invariants for their arguments.  Currently these are sprinkled about,
sometimes in duplicate through many functions.

Internally we don't follow all the invariants that the external interface
has to follow, so place these checks directly at the exported interface. 
This ensures the internal functions never reach a violated invariant.

Remove the duplicated invariant checks.

The end result is to make these functions fully internal:
 __get_user_pages_locked()
 internal_get_user_pages_fast()
 __gup_longterm_locked()

And all the other functions call directly into one of these.

Link: https://lkml.kernel.org/r/5-v2-987e91b59705+36b-gup_tidy_jgg@nvidia.comSigned-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
Suggested-by: default avatarJohn Hubbard <jhubbard@nvidia.com>
Reviewed-by: default avatarJohn Hubbard <jhubbard@nvidia.com>
Acked-by: default avatarMike Rapoport (IBM) <rppt@kernel.org>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 7ce154fe
...@@ -215,7 +215,6 @@ int __must_check try_grab_page(struct page *page, unsigned int flags) ...@@ -215,7 +215,6 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
{ {
struct folio *folio = page_folio(page); struct folio *folio = page_folio(page);
WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
if (WARN_ON_ONCE(folio_ref_count(folio) <= 0)) if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
return -ENOMEM; return -ENOMEM;
...@@ -818,7 +817,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, ...@@ -818,7 +817,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
if (vma_is_secretmem(vma)) if (vma_is_secretmem(vma))
return NULL; return NULL;
if (foll_flags & FOLL_PIN) if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
return NULL; return NULL;
page = follow_page_mask(vma, address, foll_flags, &ctx); page = follow_page_mask(vma, address, foll_flags, &ctx);
...@@ -975,9 +974,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) ...@@ -975,9 +974,6 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma)) if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if ((gup_flags & FOLL_LONGTERM) && (gup_flags & FOLL_PCI_P2PDMA))
return -EOPNOTSUPP;
if (vma_is_secretmem(vma)) if (vma_is_secretmem(vma))
return -EFAULT; return -EFAULT;
...@@ -1354,11 +1350,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm, ...@@ -1354,11 +1350,6 @@ static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
long ret, pages_done; long ret, pages_done;
bool must_unlock = false; bool must_unlock = false;
if (locked) {
/* if VM_FAULT_RETRY can be returned, vmas become invalid */
BUG_ON(vmas);
}
/* /*
* The internal caller expects GUP to manage the lock internally and the * The internal caller expects GUP to manage the lock internally and the
* lock must be released when this returns. * lock must be released when this returns.
...@@ -2087,16 +2078,6 @@ static long __gup_longterm_locked(struct mm_struct *mm, ...@@ -2087,16 +2078,6 @@ static long __gup_longterm_locked(struct mm_struct *mm,
return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
locked, gup_flags); locked, gup_flags);
/*
* If we get to this point then FOLL_LONGTERM is set, and FOLL_LONGTERM
* implies FOLL_PIN (although the reverse is not true). Therefore it is
* correct to unconditionally call check_and_migrate_movable_pages()
* which assumes pages have been pinned via FOLL_PIN.
*
* Enforce the above reasoning by asserting that FOLL_PIN is set.
*/
if (WARN_ON(!(gup_flags & FOLL_PIN)))
return -EINVAL;
flags = memalloc_pin_save(); flags = memalloc_pin_save();
do { do {
nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages, nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages,
...@@ -2106,28 +2087,66 @@ static long __gup_longterm_locked(struct mm_struct *mm, ...@@ -2106,28 +2087,66 @@ static long __gup_longterm_locked(struct mm_struct *mm,
rc = nr_pinned_pages; rc = nr_pinned_pages;
break; break;
} }
/* FOLL_LONGTERM implies FOLL_PIN */
rc = check_and_migrate_movable_pages(nr_pinned_pages, pages); rc = check_and_migrate_movable_pages(nr_pinned_pages, pages);
} while (rc == -EAGAIN); } while (rc == -EAGAIN);
memalloc_pin_restore(flags); memalloc_pin_restore(flags);
return rc ? rc : nr_pinned_pages; return rc ? rc : nr_pinned_pages;
} }
static bool is_valid_gup_flags(unsigned int gup_flags) /*
* Check that the given flags are valid for the exported gup/pup interface, and
* update them with the required flags that the caller must have set.
*/
static bool is_valid_gup_args(struct page **pages, struct vm_area_struct **vmas,
int *locked, unsigned int *gup_flags_p,
unsigned int to_set)
{ {
unsigned int gup_flags = *gup_flags_p;
/* /*
* FOLL_PIN must only be set internally by the pin_user_pages*() APIs, * These flags not allowed to be specified externally to the gup
* never directly by the caller, so enforce that with an assertion: * interfaces:
* - FOLL_PIN/FOLL_TRIED/FOLL_FAST_ONLY are internal only
* - FOLL_REMOTE is internal only and used on follow_page()
*/ */
if (WARN_ON_ONCE(gup_flags & FOLL_PIN)) if (WARN_ON_ONCE(gup_flags & (FOLL_PIN | FOLL_TRIED |
FOLL_REMOTE | FOLL_FAST_ONLY)))
return false; return false;
gup_flags |= to_set;
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
return false;
/* LONGTERM can only be specified when pinning */
if (WARN_ON_ONCE(!(gup_flags & FOLL_PIN) && (gup_flags & FOLL_LONGTERM)))
return false;
/* Pages input must be given if using GET/PIN */
if (WARN_ON_ONCE((gup_flags & (FOLL_GET | FOLL_PIN)) && !pages))
return false;
/* At the external interface locked must be set */
if (WARN_ON_ONCE(locked && *locked != 1))
return false;
/* We want to allow the pgmap to be hot-unplugged at all times */
if (WARN_ON_ONCE((gup_flags & FOLL_LONGTERM) &&
(gup_flags & FOLL_PCI_P2PDMA)))
return false;
/* /*
* FOLL_PIN is a prerequisite to FOLL_LONGTERM. Another way of saying * Can't use VMAs with locked, as locked allows GUP to unlock
* that is, FOLL_LONGTERM is a specific case, more restrictive case of * which invalidates the vmas array
* FOLL_PIN.
*/ */
if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM)) if (WARN_ON_ONCE(vmas && locked))
return false; return false;
*gup_flags_p = gup_flags;
return true; return true;
} }
...@@ -2197,11 +2216,12 @@ long get_user_pages_remote(struct mm_struct *mm, ...@@ -2197,11 +2216,12 @@ long get_user_pages_remote(struct mm_struct *mm,
unsigned int gup_flags, struct page **pages, unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked) struct vm_area_struct **vmas, int *locked)
{ {
if (!is_valid_gup_flags(gup_flags)) if (!is_valid_gup_args(pages, vmas, locked, &gup_flags,
FOLL_TOUCH | FOLL_REMOTE))
return -EINVAL; return -EINVAL;
return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, locked, return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, locked,
gup_flags | FOLL_TOUCH | FOLL_REMOTE); gup_flags);
} }
EXPORT_SYMBOL(get_user_pages_remote); EXPORT_SYMBOL(get_user_pages_remote);
...@@ -2235,11 +2255,11 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, ...@@ -2235,11 +2255,11 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages, unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas) struct vm_area_struct **vmas)
{ {
if (!is_valid_gup_flags(gup_flags)) if (!is_valid_gup_args(pages, vmas, NULL, &gup_flags, FOLL_TOUCH))
return -EINVAL; return -EINVAL;
return __get_user_pages_locked(current->mm, start, nr_pages, pages, return __get_user_pages_locked(current->mm, start, nr_pages, pages,
vmas, NULL, gup_flags | FOLL_TOUCH); vmas, NULL, gup_flags);
} }
EXPORT_SYMBOL(get_user_pages); EXPORT_SYMBOL(get_user_pages);
...@@ -2263,8 +2283,11 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, ...@@ -2263,8 +2283,11 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
{ {
int locked = 0; int locked = 0;
if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_TOUCH))
return -EINVAL;
return __get_user_pages_locked(current->mm, start, nr_pages, pages, return __get_user_pages_locked(current->mm, start, nr_pages, pages,
NULL, &locked, gup_flags | FOLL_TOUCH); NULL, &locked, gup_flags);
} }
EXPORT_SYMBOL(get_user_pages_unlocked); EXPORT_SYMBOL(get_user_pages_unlocked);
...@@ -2992,7 +3015,9 @@ int get_user_pages_fast_only(unsigned long start, int nr_pages, ...@@ -2992,7 +3015,9 @@ int get_user_pages_fast_only(unsigned long start, int nr_pages,
* FOLL_FAST_ONLY is required in order to match the API description of * FOLL_FAST_ONLY is required in order to match the API description of
* this routine: no fall back to regular ("slow") GUP. * this routine: no fall back to regular ("slow") GUP.
*/ */
gup_flags |= FOLL_GET | FOLL_FAST_ONLY; if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
FOLL_GET | FOLL_FAST_ONLY))
return -EINVAL;
nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags, nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
pages); pages);
...@@ -3029,16 +3054,14 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast_only); ...@@ -3029,16 +3054,14 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
int get_user_pages_fast(unsigned long start, int nr_pages, int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages) unsigned int gup_flags, struct page **pages)
{ {
if (!is_valid_gup_flags(gup_flags))
return -EINVAL;
/* /*
* The caller may or may not have explicitly set FOLL_GET; either way is * The caller may or may not have explicitly set FOLL_GET; either way is
* OK. However, internally (within mm/gup.c), gup fast variants must set * OK. However, internally (within mm/gup.c), gup fast variants must set
* FOLL_GET, because gup fast is always a "pin with a +1 page refcount" * FOLL_GET, because gup fast is always a "pin with a +1 page refcount"
* request. * request.
*/ */
gup_flags |= FOLL_GET; if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_GET))
return -EINVAL;
return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
} }
EXPORT_SYMBOL_GPL(get_user_pages_fast); EXPORT_SYMBOL_GPL(get_user_pages_fast);
...@@ -3062,14 +3085,8 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast); ...@@ -3062,14 +3085,8 @@ EXPORT_SYMBOL_GPL(get_user_pages_fast);
int pin_user_pages_fast(unsigned long start, int nr_pages, int pin_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages) unsigned int gup_flags, struct page **pages)
{ {
/* FOLL_GET and FOLL_PIN are mutually exclusive. */ if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags, FOLL_PIN))
if (WARN_ON_ONCE(gup_flags & FOLL_GET))
return -EINVAL; return -EINVAL;
if (WARN_ON_ONCE(!pages))
return -EINVAL;
gup_flags |= FOLL_PIN;
return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages); return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
} }
EXPORT_SYMBOL_GPL(pin_user_pages_fast); EXPORT_SYMBOL_GPL(pin_user_pages_fast);
...@@ -3085,20 +3102,14 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages, ...@@ -3085,20 +3102,14 @@ int pin_user_pages_fast_only(unsigned long start, int nr_pages,
{ {
int nr_pinned; int nr_pinned;
/*
* FOLL_GET and FOLL_PIN are mutually exclusive. Note that the API
* rules require returning 0, rather than -errno:
*/
if (WARN_ON_ONCE(gup_flags & FOLL_GET))
return 0;
if (WARN_ON_ONCE(!pages))
return 0;
/* /*
* FOLL_FAST_ONLY is required in order to match the API description of * FOLL_FAST_ONLY is required in order to match the API description of
* this routine: no fall back to regular ("slow") GUP. * this routine: no fall back to regular ("slow") GUP.
*/ */
gup_flags |= (FOLL_PIN | FOLL_FAST_ONLY); if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
FOLL_PIN | FOLL_FAST_ONLY))
return 0;
nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags, nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
pages); pages);
/* /*
...@@ -3140,16 +3151,11 @@ long pin_user_pages_remote(struct mm_struct *mm, ...@@ -3140,16 +3151,11 @@ long pin_user_pages_remote(struct mm_struct *mm,
unsigned int gup_flags, struct page **pages, unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *locked) struct vm_area_struct **vmas, int *locked)
{ {
/* FOLL_GET and FOLL_PIN are mutually exclusive. */ if (!is_valid_gup_args(pages, vmas, locked, &gup_flags,
if (WARN_ON_ONCE(gup_flags & FOLL_GET)) FOLL_PIN | FOLL_TOUCH | FOLL_REMOTE))
return -EINVAL; return 0;
if (WARN_ON_ONCE(!pages))
return -EINVAL;
return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked, return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked,
gup_flags | FOLL_PIN | FOLL_TOUCH | gup_flags);
FOLL_REMOTE);
} }
EXPORT_SYMBOL(pin_user_pages_remote); EXPORT_SYMBOL(pin_user_pages_remote);
...@@ -3174,14 +3180,8 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages, ...@@ -3174,14 +3180,8 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages, unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas) struct vm_area_struct **vmas)
{ {
/* FOLL_GET and FOLL_PIN are mutually exclusive. */ if (!is_valid_gup_args(pages, vmas, NULL, &gup_flags, FOLL_PIN))
if (WARN_ON_ONCE(gup_flags & FOLL_GET)) return 0;
return -EINVAL;
if (WARN_ON_ONCE(!pages))
return -EINVAL;
gup_flags |= FOLL_PIN;
return __gup_longterm_locked(current->mm, start, nr_pages, return __gup_longterm_locked(current->mm, start, nr_pages,
pages, vmas, NULL, gup_flags); pages, vmas, NULL, gup_flags);
} }
...@@ -3195,15 +3195,12 @@ EXPORT_SYMBOL(pin_user_pages); ...@@ -3195,15 +3195,12 @@ EXPORT_SYMBOL(pin_user_pages);
long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages, long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags) struct page **pages, unsigned int gup_flags)
{ {
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE(gup_flags & FOLL_GET))
return -EINVAL;
int locked = 0; int locked = 0;
if (WARN_ON_ONCE(!pages)) if (!is_valid_gup_args(pages, NULL, NULL, &gup_flags,
return -EINVAL; FOLL_PIN | FOLL_TOUCH))
return 0;
gup_flags |= FOLL_PIN | FOLL_TOUCH;
return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL, return __gup_longterm_locked(current->mm, start, nr_pages, pages, NULL,
&locked, gup_flags); &locked, gup_flags);
} }
......
...@@ -1042,11 +1042,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, ...@@ -1042,11 +1042,6 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
assert_spin_locked(pmd_lockptr(mm, pmd)); assert_spin_locked(pmd_lockptr(mm, pmd));
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
return NULL;
if (flags & FOLL_WRITE && !pmd_write(*pmd)) if (flags & FOLL_WRITE && !pmd_write(*pmd))
return NULL; return NULL;
...@@ -1205,11 +1200,6 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, ...@@ -1205,11 +1200,6 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
if (flags & FOLL_WRITE && !pud_write(*pud)) if (flags & FOLL_WRITE && !pud_write(*pud))
return NULL; return NULL;
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
return NULL;
if (pud_present(*pud) && pud_devmap(*pud)) if (pud_present(*pud) && pud_devmap(*pud))
/* pass */; /* pass */;
else else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment