Commit c78b023f authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] numa api: Add VMA hooks for policy

From: Andi Kleen <ak@suse.de>

NUMA API adds a policy to each VMA.  During VMA creattion, merging and
splitting these policies must be handled properly.  This patch adds the calls
to this. 

It is a nop when CONFIG_NUMA is not defined.
parent 490b582a
...@@ -104,6 +104,7 @@ ia64_elf32_init (struct pt_regs *regs) ...@@ -104,6 +104,7 @@ ia64_elf32_init (struct pt_regs *regs)
vma->vm_pgoff = 0; vma->vm_pgoff = 0;
vma->vm_file = NULL; vma->vm_file = NULL;
vma->vm_private_data = NULL; vma->vm_private_data = NULL;
mpol_set_vma_default(vma);
down_write(&current->mm->mmap_sem); down_write(&current->mm->mmap_sem);
{ {
insert_vm_struct(current->mm, vma); insert_vm_struct(current->mm, vma);
...@@ -190,6 +191,7 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack) ...@@ -190,6 +191,7 @@ ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
mpnt->vm_pgoff = 0; mpnt->vm_pgoff = 0;
mpnt->vm_file = NULL; mpnt->vm_file = NULL;
mpnt->vm_private_data = 0; mpnt->vm_private_data = 0;
mpol_set_vma_default(mpnt);
insert_vm_struct(current->mm, mpnt); insert_vm_struct(current->mm, mpnt);
current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
} }
......
...@@ -2321,6 +2321,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon ...@@ -2321,6 +2321,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon
vma->vm_ops = NULL; vma->vm_ops = NULL;
vma->vm_pgoff = 0; vma->vm_pgoff = 0;
vma->vm_file = NULL; vma->vm_file = NULL;
mpol_set_vma_default(vma);
vma->vm_private_data = NULL; vma->vm_private_data = NULL;
/* /*
......
...@@ -132,6 +132,7 @@ ia64_init_addr_space (void) ...@@ -132,6 +132,7 @@ ia64_init_addr_space (void)
vma->vm_pgoff = 0; vma->vm_pgoff = 0;
vma->vm_file = NULL; vma->vm_file = NULL;
vma->vm_private_data = NULL; vma->vm_private_data = NULL;
mpol_set_vma_default(vma);
insert_vm_struct(current->mm, vma); insert_vm_struct(current->mm, vma);
} }
...@@ -144,6 +145,7 @@ ia64_init_addr_space (void) ...@@ -144,6 +145,7 @@ ia64_init_addr_space (void)
vma->vm_end = PAGE_SIZE; vma->vm_end = PAGE_SIZE;
vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED; vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
mpol_set_vma_default(vma);
insert_vm_struct(current->mm, vma); insert_vm_struct(current->mm, vma);
} }
} }
......
...@@ -752,7 +752,7 @@ static int unswap_by_read(unsigned short *map, unsigned long max, ...@@ -752,7 +752,7 @@ static int unswap_by_read(unsigned short *map, unsigned long max,
/* Get a page for the entry, using the existing /* Get a page for the entry, using the existing
swap cache page if there is one. Otherwise, swap cache page if there is one. Otherwise,
get a clean page and read the swap into it. */ get a clean page and read the swap into it. */
page = read_swap_cache_async(entry); page = read_swap_cache_async(entry, NULL, 0);
if (!page) { if (!page) {
swap_free(entry); swap_free(entry);
return -ENOMEM; return -ENOMEM;
......
...@@ -69,6 +69,7 @@ int setup_arg_pages32(struct linux_binprm *bprm, int executable_stack) ...@@ -69,6 +69,7 @@ int setup_arg_pages32(struct linux_binprm *bprm, int executable_stack)
mpnt->vm_ops = NULL; mpnt->vm_ops = NULL;
mpnt->vm_pgoff = 0; mpnt->vm_pgoff = 0;
mpnt->vm_file = NULL; mpnt->vm_file = NULL;
mpol_set_vma_default(mpnt);
INIT_LIST_HEAD(&mpnt->shared); INIT_LIST_HEAD(&mpnt->shared);
mpnt->vm_private_data = (void *) 0; mpnt->vm_private_data = (void *) 0;
insert_vm_struct(mm, mpnt); insert_vm_struct(mm, mpnt);
......
...@@ -365,6 +365,7 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) ...@@ -365,6 +365,7 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
mpnt->vm_ops = NULL; mpnt->vm_ops = NULL;
mpnt->vm_pgoff = 0; mpnt->vm_pgoff = 0;
mpnt->vm_file = NULL; mpnt->vm_file = NULL;
mpol_set_vma_default(mpnt);
INIT_LIST_HEAD(&mpnt->shared); INIT_LIST_HEAD(&mpnt->shared);
mpnt->vm_private_data = (void *) 0; mpnt->vm_private_data = (void *) 0;
insert_vm_struct(mm, mpnt); insert_vm_struct(mm, mpnt);
......
...@@ -427,6 +427,7 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack) ...@@ -427,6 +427,7 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
mpnt->vm_ops = NULL; mpnt->vm_ops = NULL;
mpnt->vm_pgoff = 0; mpnt->vm_pgoff = 0;
mpnt->vm_file = NULL; mpnt->vm_file = NULL;
mpol_set_vma_default(mpnt);
INIT_LIST_HEAD(&mpnt->shared); INIT_LIST_HEAD(&mpnt->shared);
mpnt->vm_private_data = (void *) 0; mpnt->vm_private_data = (void *) 0;
insert_vm_struct(mm, mpnt); insert_vm_struct(mm, mpnt);
......
...@@ -791,6 +791,7 @@ asmlinkage NORET_TYPE void do_exit(long code) ...@@ -791,6 +791,7 @@ asmlinkage NORET_TYPE void do_exit(long code)
__exit_fs(tsk); __exit_fs(tsk);
exit_namespace(tsk); exit_namespace(tsk);
exit_thread(); exit_thread();
mpol_free(tsk->mempolicy);
if (tsk->signal->leader) if (tsk->signal->leader)
disassociate_ctty(1); disassociate_ctty(1);
......
...@@ -271,6 +271,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) ...@@ -271,6 +271,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
struct rb_node **rb_link, *rb_parent; struct rb_node **rb_link, *rb_parent;
int retval; int retval;
unsigned long charge = 0; unsigned long charge = 0;
struct mempolicy *pol;
down_write(&oldmm->mmap_sem); down_write(&oldmm->mmap_sem);
flush_cache_mm(current->mm); flush_cache_mm(current->mm);
...@@ -312,6 +313,11 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) ...@@ -312,6 +313,11 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
if (!tmp) if (!tmp)
goto fail_nomem; goto fail_nomem;
*tmp = *mpnt; *tmp = *mpnt;
pol = mpol_copy(vma_policy(mpnt));
retval = PTR_ERR(pol);
if (IS_ERR(pol))
goto fail_nomem_policy;
vma_set_policy(tmp, pol);
tmp->vm_flags &= ~VM_LOCKED; tmp->vm_flags &= ~VM_LOCKED;
tmp->vm_mm = mm; tmp->vm_mm = mm;
tmp->vm_next = NULL; tmp->vm_next = NULL;
...@@ -358,6 +364,8 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) ...@@ -358,6 +364,8 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
flush_tlb_mm(current->mm); flush_tlb_mm(current->mm);
up_write(&oldmm->mmap_sem); up_write(&oldmm->mmap_sem);
return retval; return retval;
fail_nomem_policy:
kmem_cache_free(vm_area_cachep, tmp);
fail_nomem: fail_nomem:
retval = -ENOMEM; retval = -ENOMEM;
fail: fail:
...@@ -964,10 +972,16 @@ struct task_struct *copy_process(unsigned long clone_flags, ...@@ -964,10 +972,16 @@ struct task_struct *copy_process(unsigned long clone_flags,
p->security = NULL; p->security = NULL;
p->io_context = NULL; p->io_context = NULL;
p->audit_context = NULL; p->audit_context = NULL;
p->mempolicy = mpol_copy(p->mempolicy);
if (IS_ERR(p->mempolicy)) {
retval = PTR_ERR(p->mempolicy);
p->mempolicy = NULL;
goto bad_fork_cleanup;
}
retval = -ENOMEM; retval = -ENOMEM;
if ((retval = security_task_alloc(p))) if ((retval = security_task_alloc(p)))
goto bad_fork_cleanup; goto bad_fork_cleanup_policy;
if ((retval = audit_alloc(p))) if ((retval = audit_alloc(p)))
goto bad_fork_cleanup_security; goto bad_fork_cleanup_security;
/* copy all the process information */ /* copy all the process information */
...@@ -1113,6 +1127,8 @@ struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1113,6 +1127,8 @@ struct task_struct *copy_process(unsigned long clone_flags,
audit_free(p); audit_free(p);
bad_fork_cleanup_security: bad_fork_cleanup_security:
security_task_free(p); security_task_free(p);
bad_fork_cleanup_policy:
mpol_free(p->mempolicy);
bad_fork_cleanup: bad_fork_cleanup:
if (p->pid > 0) if (p->pid > 0)
free_pidmap(p->pid); free_pidmap(p->pid);
......
...@@ -387,7 +387,8 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -387,7 +387,8 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
struct vm_area_struct *prev, struct vm_area_struct *prev,
struct rb_node *rb_parent, unsigned long addr, struct rb_node *rb_parent, unsigned long addr,
unsigned long end, unsigned long vm_flags, unsigned long end, unsigned long vm_flags,
struct file *file, unsigned long pgoff) struct file *file, unsigned long pgoff,
struct mempolicy *policy)
{ {
spinlock_t *lock = &mm->page_table_lock; spinlock_t *lock = &mm->page_table_lock;
struct inode *inode = file ? file->f_dentry->d_inode : NULL; struct inode *inode = file ? file->f_dentry->d_inode : NULL;
...@@ -411,6 +412,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -411,6 +412,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
* Can it merge with the predecessor? * Can it merge with the predecessor?
*/ */
if (prev->vm_end == addr && if (prev->vm_end == addr &&
mpol_equal(vma_policy(prev), policy) &&
can_vma_merge_after(prev, vm_flags, file, pgoff)) { can_vma_merge_after(prev, vm_flags, file, pgoff)) {
struct vm_area_struct *next; struct vm_area_struct *next;
int need_up = 0; int need_up = 0;
...@@ -428,6 +430,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -428,6 +430,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
*/ */
next = prev->vm_next; next = prev->vm_next;
if (next && prev->vm_end == next->vm_start && if (next && prev->vm_end == next->vm_start &&
vma_mpol_equal(prev, next) &&
can_vma_merge_before(next, vm_flags, file, can_vma_merge_before(next, vm_flags, file,
pgoff, (end - addr) >> PAGE_SHIFT)) { pgoff, (end - addr) >> PAGE_SHIFT)) {
prev->vm_end = next->vm_end; prev->vm_end = next->vm_end;
...@@ -440,6 +443,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -440,6 +443,7 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
fput(file); fput(file);
mm->map_count--; mm->map_count--;
mpol_free(vma_policy(next));
kmem_cache_free(vm_area_cachep, next); kmem_cache_free(vm_area_cachep, next);
return prev; return prev;
} }
...@@ -455,6 +459,8 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -455,6 +459,8 @@ static struct vm_area_struct *vma_merge(struct mm_struct *mm,
prev = prev->vm_next; prev = prev->vm_next;
if (prev) { if (prev) {
merge_next: merge_next:
if (!mpol_equal(policy, vma_policy(prev)))
return 0;
if (!can_vma_merge_before(prev, vm_flags, file, if (!can_vma_merge_before(prev, vm_flags, file,
pgoff, (end - addr) >> PAGE_SHIFT)) pgoff, (end - addr) >> PAGE_SHIFT))
return NULL; return NULL;
...@@ -631,7 +637,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, ...@@ -631,7 +637,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
/* Can we just expand an old anonymous mapping? */ /* Can we just expand an old anonymous mapping? */
if (!file && !(vm_flags & VM_SHARED) && rb_parent) if (!file && !(vm_flags & VM_SHARED) && rb_parent)
if (vma_merge(mm, prev, rb_parent, addr, addr + len, if (vma_merge(mm, prev, rb_parent, addr, addr + len,
vm_flags, NULL, 0)) vm_flags, NULL, pgoff, NULL))
goto out; goto out;
/* /*
...@@ -654,6 +660,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, ...@@ -654,6 +660,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
vma->vm_file = NULL; vma->vm_file = NULL;
vma->vm_private_data = NULL; vma->vm_private_data = NULL;
vma->vm_next = NULL; vma->vm_next = NULL;
mpol_set_vma_default(vma);
INIT_LIST_HEAD(&vma->shared); INIT_LIST_HEAD(&vma->shared);
if (file) { if (file) {
...@@ -693,7 +700,9 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, ...@@ -693,7 +700,9 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
addr = vma->vm_start; addr = vma->vm_start;
if (!file || !rb_parent || !vma_merge(mm, prev, rb_parent, addr, if (!file || !rb_parent || !vma_merge(mm, prev, rb_parent, addr,
addr + len, vma->vm_flags, file, pgoff)) { vma->vm_end,
vma->vm_flags, file, pgoff,
vma_policy(vma))) {
vma_link(mm, vma, prev, rb_link, rb_parent); vma_link(mm, vma, prev, rb_link, rb_parent);
if (correct_wcount) if (correct_wcount)
atomic_inc(&inode->i_writecount); atomic_inc(&inode->i_writecount);
...@@ -703,6 +712,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, ...@@ -703,6 +712,7 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
atomic_inc(&inode->i_writecount); atomic_inc(&inode->i_writecount);
fput(file); fput(file);
} }
mpol_free(vma_policy(vma));
kmem_cache_free(vm_area_cachep, vma); kmem_cache_free(vm_area_cachep, vma);
} }
out: out:
...@@ -1118,6 +1128,7 @@ static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area) ...@@ -1118,6 +1128,7 @@ static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
remove_shared_vm_struct(area); remove_shared_vm_struct(area);
mpol_free(vma_policy(area));
if (area->vm_ops && area->vm_ops->close) if (area->vm_ops && area->vm_ops->close)
area->vm_ops->close(area); area->vm_ops->close(area);
if (area->vm_file) if (area->vm_file)
...@@ -1200,6 +1211,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1200,6 +1211,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
unsigned long addr, int new_below) unsigned long addr, int new_below)
{ {
struct mempolicy *pol;
struct vm_area_struct *new; struct vm_area_struct *new;
struct address_space *mapping = NULL; struct address_space *mapping = NULL;
...@@ -1222,6 +1234,13 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, ...@@ -1222,6 +1234,13 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT); new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
} }
pol = mpol_copy(vma_policy(vma));
if (IS_ERR(pol)) {
kmem_cache_free(vm_area_cachep, new);
return PTR_ERR(pol);
}
vma_set_policy(new, pol);
if (new->vm_file) if (new->vm_file)
get_file(new->vm_file); get_file(new->vm_file);
...@@ -1391,7 +1410,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) ...@@ -1391,7 +1410,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
/* Can we just expand an old anonymous mapping? */ /* Can we just expand an old anonymous mapping? */
if (rb_parent && vma_merge(mm, prev, rb_parent, addr, addr + len, if (rb_parent && vma_merge(mm, prev, rb_parent, addr, addr + len,
flags, NULL, 0)) flags, NULL, 0, NULL))
goto out; goto out;
/* /*
...@@ -1412,6 +1431,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) ...@@ -1412,6 +1431,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
vma->vm_pgoff = 0; vma->vm_pgoff = 0;
vma->vm_file = NULL; vma->vm_file = NULL;
vma->vm_private_data = NULL; vma->vm_private_data = NULL;
mpol_set_vma_default(vma);
INIT_LIST_HEAD(&vma->shared); INIT_LIST_HEAD(&vma->shared);
vma_link(mm, vma, prev, rb_link, rb_parent); vma_link(mm, vma, prev, rb_link, rb_parent);
...@@ -1472,6 +1492,7 @@ void exit_mmap(struct mm_struct *mm) ...@@ -1472,6 +1492,7 @@ void exit_mmap(struct mm_struct *mm)
} }
if (vma->vm_file) if (vma->vm_file)
fput(vma->vm_file); fput(vma->vm_file);
mpol_free(vma_policy(vma));
kmem_cache_free(vm_area_cachep, vma); kmem_cache_free(vm_area_cachep, vma);
vma = next; vma = next;
} }
...@@ -1508,7 +1529,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, ...@@ -1508,7 +1529,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
new_vma = vma_merge(mm, prev, rb_parent, addr, addr + len, new_vma = vma_merge(mm, prev, rb_parent, addr, addr + len,
vma->vm_flags, vma->vm_file, pgoff); vma->vm_flags, vma->vm_file, pgoff, vma_policy(vma));
if (new_vma) { if (new_vma) {
/* /*
* Source vma may have been merged into new_vma * Source vma may have been merged into new_vma
......
...@@ -125,6 +125,8 @@ mprotect_attempt_merge(struct vm_area_struct *vma, struct vm_area_struct *prev, ...@@ -125,6 +125,8 @@ mprotect_attempt_merge(struct vm_area_struct *vma, struct vm_area_struct *prev,
return 0; return 0;
if (vma->vm_file || (vma->vm_flags & VM_SHARED)) if (vma->vm_file || (vma->vm_flags & VM_SHARED))
return 0; return 0;
if (!vma_mpol_equal(vma, prev))
return 0;
/* /*
* If the whole area changes to the protection of the previous one * If the whole area changes to the protection of the previous one
...@@ -136,6 +138,7 @@ mprotect_attempt_merge(struct vm_area_struct *vma, struct vm_area_struct *prev, ...@@ -136,6 +138,7 @@ mprotect_attempt_merge(struct vm_area_struct *vma, struct vm_area_struct *prev,
__vma_unlink(mm, vma, prev); __vma_unlink(mm, vma, prev);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
mpol_free(vma_policy(vma));
kmem_cache_free(vm_area_cachep, vma); kmem_cache_free(vm_area_cachep, vma);
mm->map_count--; mm->map_count--;
return 1; return 1;
...@@ -317,12 +320,14 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot) ...@@ -317,12 +320,14 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot)
if (next && prev->vm_end == next->vm_start && if (next && prev->vm_end == next->vm_start &&
can_vma_merge(next, prev->vm_flags) && can_vma_merge(next, prev->vm_flags) &&
vma_mpol_equal(prev, next) &&
!prev->vm_file && !(prev->vm_flags & VM_SHARED)) { !prev->vm_file && !(prev->vm_flags & VM_SHARED)) {
spin_lock(&prev->vm_mm->page_table_lock); spin_lock(&prev->vm_mm->page_table_lock);
prev->vm_end = next->vm_end; prev->vm_end = next->vm_end;
__vma_unlink(prev->vm_mm, next, prev); __vma_unlink(prev->vm_mm, next, prev);
spin_unlock(&prev->vm_mm->page_table_lock); spin_unlock(&prev->vm_mm->page_table_lock);
mpol_free(vma_policy(next));
kmem_cache_free(vm_area_cachep, next); kmem_cache_free(vm_area_cachep, next);
prev->vm_mm->map_count--; prev->vm_mm->map_count--;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment