Commit bebeb3d6 authored by Michel Lespinasse's avatar Michel Lespinasse Committed by Linus Torvalds

mm: introduce mm_populate() for populating new vmas

When creating new mappings using the MAP_POPULATE / MAP_LOCKED flags (or
with MCL_FUTURE in effect), we want to populate the pages within the
newly created vmas.  This may take a while as we may have to read pages
from disk, so ideally we want to do this outside of the write-locked
mmap_sem region.

This change introduces mm_populate(), which is used to defer populating
such mappings until after the mmap_sem write lock has been released.
This is implemented as a generalization of the former do_mlock_pages(),
which accomplished the same task but was using during mlock() /
mlockall().
Signed-off-by: default avatarMichel Lespinasse <walken@google.com>
Reported-by: default avatarAndy Lutomirski <luto@amacapital.net>
Acked-by: default avatarRik van Riel <riel@redhat.com>
Tested-by: default avatarAndy Lutomirski <luto@amacapital.net>
Cc: Greg Ungerer <gregungerer@westnet.com.au>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 940e7da5
...@@ -103,6 +103,7 @@ static int aio_setup_ring(struct kioctx *ctx) ...@@ -103,6 +103,7 @@ static int aio_setup_ring(struct kioctx *ctx)
unsigned nr_events = ctx->max_reqs; unsigned nr_events = ctx->max_reqs;
unsigned long size; unsigned long size;
int nr_pages; int nr_pages;
bool populate;
/* Compensate for the ring buffer's head/tail overlap entry */ /* Compensate for the ring buffer's head/tail overlap entry */
nr_events += 2; /* 1 is required, 2 for good luck */ nr_events += 2; /* 1 is required, 2 for good luck */
...@@ -129,7 +130,8 @@ static int aio_setup_ring(struct kioctx *ctx) ...@@ -129,7 +130,8 @@ static int aio_setup_ring(struct kioctx *ctx)
down_write(&ctx->mm->mmap_sem); down_write(&ctx->mm->mmap_sem);
info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size,
PROT_READ|PROT_WRITE, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, 0); MAP_ANONYMOUS|MAP_PRIVATE, 0,
&populate);
if (IS_ERR((void *)info->mmap_base)) { if (IS_ERR((void *)info->mmap_base)) {
up_write(&ctx->mm->mmap_sem); up_write(&ctx->mm->mmap_sem);
info->mmap_size = 0; info->mmap_size = 0;
...@@ -147,6 +149,8 @@ static int aio_setup_ring(struct kioctx *ctx) ...@@ -147,6 +149,8 @@ static int aio_setup_ring(struct kioctx *ctx)
aio_free_ring(ctx); aio_free_ring(ctx);
return -EAGAIN; return -EAGAIN;
} }
if (populate)
mm_populate(info->mmap_base, info->mmap_size);
ctx->user_id = info->mmap_base; ctx->user_id = info->mmap_base;
......
...@@ -1474,11 +1474,23 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo ...@@ -1474,11 +1474,23 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
extern unsigned long mmap_region(struct file *file, unsigned long addr, extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, unsigned long flags, unsigned long len, unsigned long flags,
vm_flags_t vm_flags, unsigned long pgoff); vm_flags_t vm_flags, unsigned long pgoff);
extern unsigned long do_mmap_pgoff(struct file *, unsigned long, extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long, unsigned long, unsigned long len, unsigned long prot, unsigned long flags,
unsigned long, unsigned long); unsigned long pgoff, bool *populate);
extern int do_munmap(struct mm_struct *, unsigned long, size_t); extern int do_munmap(struct mm_struct *, unsigned long, size_t);
#ifdef CONFIG_MMU
extern int __mm_populate(unsigned long addr, unsigned long len,
int ignore_errors);
static inline void mm_populate(unsigned long addr, unsigned long len)
{
/* Ignore errors */
(void) __mm_populate(addr, len, 1);
}
#else
static inline void mm_populate(unsigned long addr, unsigned long len) {}
#endif
/* These take the mm semaphore themselves */ /* These take the mm semaphore themselves */
extern unsigned long vm_brk(unsigned long, unsigned long); extern unsigned long vm_brk(unsigned long, unsigned long);
extern int vm_munmap(unsigned long, size_t); extern int vm_munmap(unsigned long, size_t);
......
...@@ -967,11 +967,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, ...@@ -967,11 +967,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
unsigned long flags; unsigned long flags;
unsigned long prot; unsigned long prot;
int acc_mode; int acc_mode;
unsigned long user_addr;
struct ipc_namespace *ns; struct ipc_namespace *ns;
struct shm_file_data *sfd; struct shm_file_data *sfd;
struct path path; struct path path;
fmode_t f_mode; fmode_t f_mode;
bool populate = false;
err = -EINVAL; err = -EINVAL;
if (shmid < 0) if (shmid < 0)
...@@ -1070,13 +1070,15 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, ...@@ -1070,13 +1070,15 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
goto invalid; goto invalid;
} }
user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0); addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
*raddr = user_addr; *raddr = addr;
err = 0; err = 0;
if (IS_ERR_VALUE(user_addr)) if (IS_ERR_VALUE(addr))
err = (long)user_addr; err = (long)addr;
invalid: invalid:
up_write(&current->mm->mmap_sem); up_write(&current->mm->mmap_sem);
if (populate)
mm_populate(addr, size);
out_fput: out_fput:
fput(file); fput(file);
......
...@@ -416,7 +416,14 @@ static int do_mlock(unsigned long start, size_t len, int on) ...@@ -416,7 +416,14 @@ static int do_mlock(unsigned long start, size_t len, int on)
return error; return error;
} }
static int do_mlock_pages(unsigned long start, size_t len, int ignore_errors) /*
* __mm_populate - populate and/or mlock pages within a range of address space.
*
* This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
* flags. VMAs must be already marked with the desired vm_flags, and
* mmap_sem must not be held.
*/
int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
unsigned long end, nstart, nend; unsigned long end, nstart, nend;
...@@ -498,7 +505,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) ...@@ -498,7 +505,7 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len)
error = do_mlock(start, len, 1); error = do_mlock(start, len, 1);
up_write(&current->mm->mmap_sem); up_write(&current->mm->mmap_sem);
if (!error) if (!error)
error = do_mlock_pages(start, len, 0); error = __mm_populate(start, len, 0);
return error; return error;
} }
...@@ -564,10 +571,8 @@ SYSCALL_DEFINE1(mlockall, int, flags) ...@@ -564,10 +571,8 @@ SYSCALL_DEFINE1(mlockall, int, flags)
capable(CAP_IPC_LOCK)) capable(CAP_IPC_LOCK))
ret = do_mlockall(flags); ret = do_mlockall(flags);
up_write(&current->mm->mmap_sem); up_write(&current->mm->mmap_sem);
if (!ret && (flags & MCL_CURRENT)) { if (!ret && (flags & MCL_CURRENT))
/* Ignore errors */ mm_populate(0, TASK_SIZE);
do_mlock_pages(0, TASK_SIZE, 1);
}
out: out:
return ret; return ret;
} }
......
...@@ -1154,12 +1154,15 @@ static inline unsigned long round_hint_to_min(unsigned long hint) ...@@ -1154,12 +1154,15 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long len, unsigned long prot,
unsigned long flags, unsigned long pgoff) unsigned long flags, unsigned long pgoff,
bool *populate)
{ {
struct mm_struct * mm = current->mm; struct mm_struct * mm = current->mm;
struct inode *inode; struct inode *inode;
vm_flags_t vm_flags; vm_flags_t vm_flags;
*populate = false;
/* /*
* Does the application expect PROT_READ to imply PROT_EXEC? * Does the application expect PROT_READ to imply PROT_EXEC?
* *
...@@ -1280,7 +1283,12 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, ...@@ -1280,7 +1283,12 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
} }
} }
return mmap_region(file, addr, len, flags, vm_flags, pgoff); addr = mmap_region(file, addr, len, flags, vm_flags, pgoff);
if (!IS_ERR_VALUE(addr) &&
((vm_flags & VM_LOCKED) ||
(flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
*populate = true;
return addr;
} }
SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
...@@ -1531,10 +1539,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr, ...@@ -1531,10 +1539,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) { if (vm_flags & VM_LOCKED) {
if (!mlock_vma_pages_range(vma, addr, addr + len)) if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
vma == get_gate_vma(current->mm)))
mm->locked_vm += (len >> PAGE_SHIFT); mm->locked_vm += (len >> PAGE_SHIFT);
} else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) else
make_pages_present(addr, addr + len); vma->vm_flags &= ~VM_LOCKED;
}
if (file) if (file)
uprobe_mmap(vma); uprobe_mmap(vma);
......
...@@ -1250,7 +1250,8 @@ unsigned long do_mmap_pgoff(struct file *file, ...@@ -1250,7 +1250,8 @@ unsigned long do_mmap_pgoff(struct file *file,
unsigned long len, unsigned long len,
unsigned long prot, unsigned long prot,
unsigned long flags, unsigned long flags,
unsigned long pgoff) unsigned long pgoff,
bool *populate)
{ {
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct vm_region *region; struct vm_region *region;
...@@ -1260,6 +1261,8 @@ unsigned long do_mmap_pgoff(struct file *file, ...@@ -1260,6 +1261,8 @@ unsigned long do_mmap_pgoff(struct file *file,
kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff); kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
*populate = false;
/* decide whether we should attempt the mapping, and if so what sort of /* decide whether we should attempt the mapping, and if so what sort of
* mapping */ * mapping */
ret = validate_mmap_request(file, addr, len, prot, flags, pgoff, ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
......
...@@ -355,12 +355,16 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, ...@@ -355,12 +355,16 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr,
{ {
unsigned long ret; unsigned long ret;
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
bool populate;
ret = security_mmap_file(file, prot, flag); ret = security_mmap_file(file, prot, flag);
if (!ret) { if (!ret) {
down_write(&mm->mmap_sem); down_write(&mm->mmap_sem);
ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff); ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff,
&populate);
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
if (!IS_ERR_VALUE(ret) && populate)
mm_populate(ret, len);
} }
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment