Commit 2a15efc9 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

mm: follow_hugetlb_page flags

follow_hugetlb_page() shouldn't be guessing about the coredump case
either: pass the foll_flags down to it, instead of just the write bit.

Remove that obscure huge_zeropage_ok() test.  The decision is easy,
though unlike the non-huge case - here vm_ops->fault is always set.
But we know that a fault would serve up zeroes, unless there's
already a hugetlbfs pagecache page to back the range.

(Alternatively, since hugetlb pages aren't swapped out under pressure,
you could save more dump space by arguing that a page not yet faulted
into this process cannot be relevant to the dump; but that would be
more surprising.)
Signed-off-by: default avatarHugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: default avatarRik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 8e4b9a60
...@@ -24,7 +24,9 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user * ...@@ -24,7 +24,9 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *
int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **,
unsigned long *, int *, int, unsigned int flags);
void unmap_hugepage_range(struct vm_area_struct *, void unmap_hugepage_range(struct vm_area_struct *,
unsigned long, unsigned long, struct page *); unsigned long, unsigned long, struct page *);
void __unmap_hugepage_range(struct vm_area_struct *, void __unmap_hugepage_range(struct vm_area_struct *,
......
...@@ -2016,6 +2016,23 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h, ...@@ -2016,6 +2016,23 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h,
return find_lock_page(mapping, idx); return find_lock_page(mapping, idx);
} }
/* Return whether there is a pagecache page to back given address within VMA */
static bool hugetlbfs_backed(struct hstate *h,
struct vm_area_struct *vma, unsigned long address)
{
struct address_space *mapping;
pgoff_t idx;
struct page *page;
mapping = vma->vm_file->f_mapping;
idx = vma_hugecache_offset(h, vma, address);
page = find_get_page(mapping, idx);
if (page)
put_page(page);
return page != NULL;
}
static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *ptep, unsigned int flags) unsigned long address, pte_t *ptep, unsigned int flags)
{ {
...@@ -2211,54 +2228,52 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address, ...@@ -2211,54 +2228,52 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
return NULL; return NULL;
} }
static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
{
if (!ptep || write || shared)
return 0;
else
return huge_pte_none(huge_ptep_get(ptep));
}
int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas, struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, int *length, int i, unsigned long *position, int *length, int i,
int write) unsigned int flags)
{ {
unsigned long pfn_offset; unsigned long pfn_offset;
unsigned long vaddr = *position; unsigned long vaddr = *position;
int remainder = *length; int remainder = *length;
struct hstate *h = hstate_vma(vma); struct hstate *h = hstate_vma(vma);
int zeropage_ok = 0;
int shared = vma->vm_flags & VM_SHARED;
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
while (vaddr < vma->vm_end && remainder) { while (vaddr < vma->vm_end && remainder) {
pte_t *pte; pte_t *pte;
int absent;
struct page *page; struct page *page;
/* /*
* Some archs (sparc64, sh*) have multiple pte_ts to * Some archs (sparc64, sh*) have multiple pte_ts to
* each hugepage. We have to make * sure we get the * each hugepage. We have to make sure we get the
* first, for the page indexing below to work. * first, for the page indexing below to work.
*/ */
pte = huge_pte_offset(mm, vaddr & huge_page_mask(h)); pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
if (huge_zeropage_ok(pte, write, shared)) absent = !pte || huge_pte_none(huge_ptep_get(pte));
zeropage_ok = 1;
/*
* When coredumping, it suits get_dump_page if we just return
* an error if there's a hole and no huge pagecache to back it.
*/
if (absent &&
((flags & FOLL_DUMP) && !hugetlbfs_backed(h, vma, vaddr))) {
remainder = 0;
break;
}
if (!pte || if (absent ||
(huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) || ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) {
(write && !pte_write(huge_ptep_get(pte)))) {
int ret; int ret;
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
ret = hugetlb_fault(mm, vma, vaddr, write); ret = hugetlb_fault(mm, vma, vaddr,
(flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
if (!(ret & VM_FAULT_ERROR)) if (!(ret & VM_FAULT_ERROR))
continue; continue;
remainder = 0; remainder = 0;
if (!i)
i = -EFAULT;
break; break;
} }
...@@ -2266,9 +2281,6 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2266,9 +2281,6 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
page = pte_page(huge_ptep_get(pte)); page = pte_page(huge_ptep_get(pte));
same_page: same_page:
if (pages) { if (pages) {
if (zeropage_ok)
pages[i] = ZERO_PAGE(0);
else
pages[i] = mem_map_offset(page, pfn_offset); pages[i] = mem_map_offset(page, pfn_offset);
get_page(pages[i]); get_page(pages[i]);
} }
...@@ -2293,7 +2305,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2293,7 +2305,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
*length = remainder; *length = remainder;
*position = vaddr; *position = vaddr;
return i; return i ? i : -EFAULT;
} }
void hugetlb_change_protection(struct vm_area_struct *vma, void hugetlb_change_protection(struct vm_area_struct *vma,
......
...@@ -1260,17 +1260,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -1260,17 +1260,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
!(vm_flags & vma->vm_flags)) !(vm_flags & vma->vm_flags))
return i ? : -EFAULT; return i ? : -EFAULT;
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i, write);
continue;
}
foll_flags = FOLL_TOUCH; foll_flags = FOLL_TOUCH;
if (pages) if (pages)
foll_flags |= FOLL_GET; foll_flags |= FOLL_GET;
if (flags & GUP_FLAGS_DUMP) if (flags & GUP_FLAGS_DUMP)
foll_flags |= FOLL_DUMP; foll_flags |= FOLL_DUMP;
if (write)
foll_flags |= FOLL_WRITE;
if (is_vm_hugetlb_page(vma)) {
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i, foll_flags);
continue;
}
do { do {
struct page *page; struct page *page;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment