Commit c40f6f8b authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-2.6-nommu

* git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-2.6-nommu:
  NOMMU: Support XIP on initramfs
  NOMMU: Teach kobjsize() about VMA regions.
  FLAT: Don't attempt to expand the userspace stack to fill the space allocated
  FDPIC: Don't attempt to expand the userspace stack to fill the space allocated
  NOMMU: Improve procfs output using per-MM VMAs
  NOMMU: Make mmap allocation page trimming behaviour configurable.
  NOMMU: Make VMAs per MM as for MMU-mode linux
  NOMMU: Delete askedalloc and realalloc variables
  NOMMU: Rename ARM's struct vm_region
  NOMMU: Fix cleanup handling in ramfs_nommu_get_umapped_area()
parents 1a7d0f0b cb6ff208
......@@ -109,12 +109,18 @@ and it's also much more restricted in the latter case:
FURTHER NOTES ON NO-MMU MMAP
============================
(*) A request for a private mapping of less than a page in size may not return
a page-aligned buffer. This is because the kernel calls kmalloc() to
allocate the buffer, not get_free_page().
(*) A request for a private mapping of a file may return a buffer that is not
page-aligned. This is because XIP may take place, and the data may not be
paged aligned in the backing store.
(*) A list of all the mappings on the system is visible through /proc/maps in
no-MMU mode.
(*) A request for an anonymous mapping will always be page aligned. If
possible the size of the request should be a power of two otherwise some
of the space may be wasted as the kernel must allocate a power-of-2
granule but will only discard the excess if appropriately configured as
this has an effect on fragmentation.
(*) A list of all the private copy and anonymous mappings on the system is
visible through /proc/maps in no-MMU mode.
(*) A list of all the mappings in use by a process is visible through
/proc/<pid>/maps in no-MMU mode.
......@@ -242,3 +248,18 @@ PROVIDING SHAREABLE BLOCK DEVICE SUPPORT
Provision of shared mappings on block device files is exactly the same as for
character devices. If there isn't a real device underneath, then the driver
should allocate sufficient contiguous memory to honour any supported mapping.
=================================
ADJUSTING PAGE TRIMMING BEHAVIOUR
=================================
NOMMU mmap automatically rounds up to the nearest power-of-2 number of pages
when performing an allocation. This can have adverse effects on memory
fragmentation, and as such, is left configurable. The default behaviour is to
aggressively trim allocations and discard any excess pages back in to the page
allocator. In order to retain finer-grained control over fragmentation, this
behaviour can either be disabled completely, or bumped up to a higher page
watermark where trimming begins.
Page trimming behaviour is configurable via the sysctl `vm.nr_trim_pages'.
......@@ -38,6 +38,7 @@ Currently, these files are in /proc/sys/vm:
- numa_zonelist_order
- nr_hugepages
- nr_overcommit_hugepages
- nr_trim_pages (only if CONFIG_MMU=n)
==============================================================
......@@ -348,3 +349,20 @@ Change the maximum size of the hugepage pool. The maximum is
nr_hugepages + nr_overcommit_hugepages.
See Documentation/vm/hugetlbpage.txt
==============================================================
nr_trim_pages
This is available only on NOMMU kernels.
This value adjusts the excess page trimming behaviour of power-of-2 aligned
NOMMU mmap allocations.
A value of 0 disables trimming of allocations entirely, while a value of 1
trims excess pages aggressively. Any value >= 1 acts as the watermark where
trimming of allocations is initiated.
The default value is 1.
See Documentation/nommu-mmap.txt for more information.
......@@ -24,7 +24,6 @@ typedef struct {
* modified for 2.6 by Hyok S. Choi <hyok.choi@samsung.com>
*/
typedef struct {
struct vm_list_struct *vmlist;
unsigned long end_brk;
} mm_context_t;
......
......@@ -71,7 +71,7 @@ static DEFINE_SPINLOCK(consistent_lock);
* the amount of RAM found at boot time.) I would imagine that get_vm_area()
* would have to initialise this each time prior to calling vm_region_alloc().
*/
struct vm_region {
struct arm_vm_region {
struct list_head vm_list;
unsigned long vm_start;
unsigned long vm_end;
......@@ -79,20 +79,20 @@ struct vm_region {
int vm_active;
};
static struct vm_region consistent_head = {
static struct arm_vm_region consistent_head = {
.vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
.vm_start = CONSISTENT_BASE,
.vm_end = CONSISTENT_END,
};
static struct vm_region *
vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
static struct arm_vm_region *
arm_vm_region_alloc(struct arm_vm_region *head, size_t size, gfp_t gfp)
{
unsigned long addr = head->vm_start, end = head->vm_end - size;
unsigned long flags;
struct vm_region *c, *new;
struct arm_vm_region *c, *new;
new = kmalloc(sizeof(struct vm_region), gfp);
new = kmalloc(sizeof(struct arm_vm_region), gfp);
if (!new)
goto out;
......@@ -127,9 +127,9 @@ vm_region_alloc(struct vm_region *head, size_t size, gfp_t gfp)
return NULL;
}
static struct vm_region *vm_region_find(struct vm_region *head, unsigned long addr)
static struct arm_vm_region *arm_vm_region_find(struct arm_vm_region *head, unsigned long addr)
{
struct vm_region *c;
struct arm_vm_region *c;
list_for_each_entry(c, &head->vm_list, vm_list) {
if (c->vm_active && c->vm_start == addr)
......@@ -149,7 +149,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
pgprot_t prot)
{
struct page *page;
struct vm_region *c;
struct arm_vm_region *c;
unsigned long order;
u64 mask = ISA_DMA_THRESHOLD, limit;
......@@ -214,7 +214,7 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
/*
* Allocate a virtual address in the consistent mapping region.
*/
c = vm_region_alloc(&consistent_head, size,
c = arm_vm_region_alloc(&consistent_head, size,
gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
if (c) {
pte_t *pte;
......@@ -311,13 +311,13 @@ static int dma_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size)
{
unsigned long flags, user_size, kern_size;
struct vm_region *c;
struct arm_vm_region *c;
int ret = -ENXIO;
user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
spin_lock_irqsave(&consistent_lock, flags);
c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr);
spin_unlock_irqrestore(&consistent_lock, flags);
if (c) {
......@@ -359,7 +359,7 @@ EXPORT_SYMBOL(dma_mmap_writecombine);
*/
void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle)
{
struct vm_region *c;
struct arm_vm_region *c;
unsigned long flags, addr;
pte_t *ptep;
int idx;
......@@ -378,7 +378,7 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr
size = PAGE_ALIGN(size);
spin_lock_irqsave(&consistent_lock, flags);
c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
c = arm_vm_region_find(&consistent_head, (unsigned long)cpu_addr);
if (!c)
goto no_area;
......
......@@ -10,7 +10,6 @@ struct sram_list_struct {
};
typedef struct {
struct vm_list_struct *vmlist;
unsigned long end_brk;
unsigned long stack_start;
......
......@@ -160,15 +160,15 @@ put_reg(struct task_struct *task, int regno, unsigned long data)
static inline int is_user_addr_valid(struct task_struct *child,
unsigned long start, unsigned long len)
{
struct vm_list_struct *vml;
struct vm_area_struct *vma;
struct sram_list_struct *sraml;
/* overflow */
if (start + len < start)
return -EIO;
for (vml = child->mm->context.vmlist; vml; vml = vml->next)
if (start >= vml->vma->vm_start && start + len < vml->vma->vm_end)
vma = find_vma(child->mm, start);
if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
return 0;
for (sraml = child->mm->context.sram_list; sraml; sraml = sraml->next)
......
......@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/fs.h>
#include <linux/rbtree.h>
#include <asm/traps.h>
#include <asm/cacheflush.h>
#include <asm/cplb.h>
......@@ -83,6 +84,7 @@ static void decode_address(char *buf, unsigned long address)
struct mm_struct *mm;
unsigned long flags, offset;
unsigned char in_atomic = (bfin_read_IPEND() & 0x10) || in_atomic();
struct rb_node *n;
#ifdef CONFIG_KALLSYMS
unsigned long symsize;
......@@ -128,9 +130,10 @@ static void decode_address(char *buf, unsigned long address)
if (!mm)
continue;
vml = mm->context.vmlist;
while (vml) {
struct vm_area_struct *vma = vml->vma;
for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
struct vm_area_struct *vma;
vma = rb_entry(n, struct vm_area_struct, vm_rb);
if (address >= vma->vm_start && address < vma->vm_end) {
char _tmpbuf[256];
......@@ -176,8 +179,6 @@ static void decode_address(char *buf, unsigned long address)
goto done;
}
vml = vml->next;
}
if (!in_atomic)
mmput(mm);
......
......@@ -69,7 +69,8 @@ static inline int put_reg(struct task_struct *task, int regno,
}
/*
* check that an address falls within the bounds of the target process's memory mappings
* check that an address falls within the bounds of the target process's memory
* mappings
*/
static inline int is_user_addr_valid(struct task_struct *child,
unsigned long start, unsigned long len)
......@@ -79,11 +80,11 @@ static inline int is_user_addr_valid(struct task_struct *child,
return -EIO;
return 0;
#else
struct vm_list_struct *vml;
struct vm_area_struct *vma;
for (vml = child->mm->context.vmlist; vml; vml = vml->next)
if (start >= vml->vma->vm_start && start + len <= vml->vma->vm_end)
return 0;
vma = find_vma(child->mm, start);
if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
return 0;
return -EIO;
#endif
......
......@@ -4,7 +4,6 @@
/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
typedef struct {
struct vm_list_struct *vmlist;
unsigned long end_brk;
} mm_context_t;
......
......@@ -4,7 +4,6 @@
/* Copyright (C) 2002, David McCullough <davidm@snapgear.com> */
typedef struct {
struct vm_list_struct *vmlist;
unsigned long end_brk;
} mm_context_t;
......
......@@ -9,7 +9,6 @@ typedef struct {
mm_context_id_t id;
void *vdso;
#else
struct vm_list_struct *vmlist;
unsigned long end_brk;
#endif
#ifdef CONFIG_BINFMT_ELF_FDPIC
......
......@@ -168,9 +168,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
struct elf_fdpic_params exec_params, interp_params;
struct elf_phdr *phdr;
unsigned long stack_size, entryaddr;
#ifndef CONFIG_MMU
unsigned long fullsize;
#endif
#ifdef ELF_FDPIC_PLAT_INIT
unsigned long dynaddr;
#endif
......@@ -390,11 +387,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
goto error_kill;
}
/* expand the stack mapping to use up the entire allocation granule */
fullsize = kobjsize((char *) current->mm->start_brk);
if (!IS_ERR_VALUE(do_mremap(current->mm->start_brk, stack_size,
fullsize, 0, 0)))
stack_size = fullsize;
up_write(&current->mm->mmap_sem);
current->mm->brk = current->mm->start_brk;
......@@ -1567,11 +1559,9 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
static int elf_fdpic_dump_segments(struct file *file, size_t *size,
unsigned long *limit, unsigned long mm_flags)
{
struct vm_list_struct *vml;
for (vml = current->mm->context.vmlist; vml; vml = vml->next) {
struct vm_area_struct *vma = vml->vma;
struct vm_area_struct *vma;
for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
if (!maydump(vma, mm_flags))
continue;
......@@ -1617,9 +1607,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
elf_fpxregset_t *xfpu = NULL;
#endif
int thread_status_size = 0;
#ifndef CONFIG_MMU
struct vm_list_struct *vml;
#endif
elf_addr_t *auxv;
unsigned long mm_flags;
......@@ -1685,13 +1672,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
fill_prstatus(prstatus, current, signr);
elf_core_copy_regs(&prstatus->pr_reg, regs);
#ifdef CONFIG_MMU
segs = current->mm->map_count;
#else
segs = 0;
for (vml = current->mm->context.vmlist; vml; vml = vml->next)
segs++;
#endif
#ifdef ELF_CORE_EXTRA_PHDRS
segs += ELF_CORE_EXTRA_PHDRS;
#endif
......@@ -1766,20 +1747,10 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
mm_flags = current->mm->flags;
/* write program headers for segments dump */
for (
#ifdef CONFIG_MMU
vma = current->mm->mmap; vma; vma = vma->vm_next
#else
vml = current->mm->context.vmlist; vml; vml = vml->next
#endif
) {
for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
struct elf_phdr phdr;
size_t sz;
#ifndef CONFIG_MMU
vma = vml->vma;
#endif
sz = vma->vm_end - vma->vm_start;
phdr.p_type = PT_LOAD;
......
......@@ -417,8 +417,8 @@ static int load_flat_file(struct linux_binprm * bprm,
unsigned long textpos = 0, datapos = 0, result;
unsigned long realdatastart = 0;
unsigned long text_len, data_len, bss_len, stack_len, flags;
unsigned long len, reallen, memp = 0;
unsigned long extra, rlim;
unsigned long len, memp = 0;
unsigned long memp_size, extra, rlim;
unsigned long *reloc = 0, *rp;
struct inode *inode;
int i, rev, relocs = 0;
......@@ -543,17 +543,10 @@ static int load_flat_file(struct linux_binprm * bprm,
}
len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
down_write(&current->mm->mmap_sem);
realdatastart = do_mmap(0, 0, len,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
/* Remap to use all availabe slack region space */
if (realdatastart && (realdatastart < (unsigned long)-4096)) {
reallen = kobjsize((void *)realdatastart);
if (reallen > len) {
realdatastart = do_mremap(realdatastart, len,
reallen, MREMAP_FIXED, realdatastart);
}
}
up_write(&current->mm->mmap_sem);
if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) {
......@@ -591,21 +584,14 @@ static int load_flat_file(struct linux_binprm * bprm,
reloc = (unsigned long *) (datapos+(ntohl(hdr->reloc_start)-text_len));
memp = realdatastart;
memp_size = len;
} else {
len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
down_write(&current->mm->mmap_sem);
textpos = do_mmap(0, 0, len,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
/* Remap to use all availabe slack region space */
if (textpos && (textpos < (unsigned long) -4096)) {
reallen = kobjsize((void *)textpos);
if (reallen > len) {
textpos = do_mremap(textpos, len, reallen,
MREMAP_FIXED, textpos);
}
}
up_write(&current->mm->mmap_sem);
if (!textpos || textpos >= (unsigned long) -4096) {
......@@ -622,7 +608,7 @@ static int load_flat_file(struct linux_binprm * bprm,
reloc = (unsigned long *) (textpos + ntohl(hdr->reloc_start) +
MAX_SHARED_LIBS * sizeof(unsigned long));
memp = textpos;
memp_size = len;
#ifdef CONFIG_BINFMT_ZFLAT
/*
* load it all in and treat it like a RAM load from now on
......@@ -680,10 +666,12 @@ static int load_flat_file(struct linux_binprm * bprm,
* set up the brk stuff, uses any slack left in data/bss/stack
* allocation. We put the brk after the bss (between the bss
* and stack) like other platforms.
* Userspace code relies on the stack pointer starting out at
* an address right at the end of a page.
*/
current->mm->start_brk = datapos + data_len + bss_len;
current->mm->brk = (current->mm->start_brk + 3) & ~3;
current->mm->context.end_brk = memp + kobjsize((void *) memp) - stack_len;
current->mm->context.end_brk = memp + memp_size - stack_len;
}
if (flags & FLAT_FLAG_KTRACE)
......@@ -790,8 +778,8 @@ static int load_flat_file(struct linux_binprm * bprm,
/* zero the BSS, BRK and stack areas */
memset((void*)(datapos + data_len), 0, bss_len +
(memp + kobjsize((void *) memp) - stack_len - /* end brk */
libinfo->lib_list[id].start_brk) + /* start brk */
(memp + memp_size - stack_len - /* end brk */
libinfo->lib_list[id].start_brk) + /* start brk */
stack_len);
return 0;
......
......@@ -41,8 +41,6 @@ do { \
(vmi)->used = 0; \
(vmi)->largest_chunk = 0; \
} while(0)
extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *);
#endif
extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
......
......@@ -73,6 +73,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
"HighFree: %8lu kB\n"
"LowTotal: %8lu kB\n"
"LowFree: %8lu kB\n"
#endif
#ifndef CONFIG_MMU
"MmapCopy: %8lu kB\n"
#endif
"SwapTotal: %8lu kB\n"
"SwapFree: %8lu kB\n"
......@@ -115,6 +118,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
K(i.freehigh),
K(i.totalram-i.totalhigh),
K(i.freeram-i.freehigh),
#endif
#ifndef CONFIG_MMU
K((unsigned long) atomic_read(&mmap_pages_allocated)),
#endif
K(i.totalswap),
K(i.freeswap),
......
......@@ -33,33 +33,33 @@
#include "internal.h"
/*
* display a single VMA to a sequenced file
* display a single region to a sequenced file
*/
int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
static int nommu_region_show(struct seq_file *m, struct vm_region *region)
{
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
int flags, len;
flags = vma->vm_flags;
file = vma->vm_file;
flags = region->vm_flags;
file = region->vm_file;
if (file) {
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
struct inode *inode = region->vm_file->f_path.dentry->d_inode;
dev = inode->i_sb->s_dev;
ino = inode->i_ino;
}
seq_printf(m,
"%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
vma->vm_start,
vma->vm_end,
region->vm_start,
region->vm_end,
flags & VM_READ ? 'r' : '-',
flags & VM_WRITE ? 'w' : '-',
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
((loff_t)vma->vm_pgoff) << PAGE_SHIFT,
((loff_t)region->vm_pgoff) << PAGE_SHIFT,
MAJOR(dev), MINOR(dev), ino, &len);
if (file) {
......@@ -75,61 +75,54 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
}
/*
* display a list of all the VMAs the kernel knows about
* display a list of all the REGIONs the kernel knows about
* - nommu kernals have a single flat list
*/
static int nommu_vma_list_show(struct seq_file *m, void *v)
static int nommu_region_list_show(struct seq_file *m, void *_p)
{
struct vm_area_struct *vma;
struct rb_node *p = _p;
vma = rb_entry((struct rb_node *) v, struct vm_area_struct, vm_rb);
return nommu_vma_show(m, vma);
return nommu_region_show(m, rb_entry(p, struct vm_region, vm_rb));
}
static void *nommu_vma_list_start(struct seq_file *m, loff_t *_pos)
static void *nommu_region_list_start(struct seq_file *m, loff_t *_pos)
{
struct rb_node *_rb;
struct rb_node *p;
loff_t pos = *_pos;
void *next = NULL;
down_read(&nommu_vma_sem);
down_read(&nommu_region_sem);
for (_rb = rb_first(&nommu_vma_tree); _rb; _rb = rb_next(_rb)) {
if (pos == 0) {
next = _rb;
break;
}
pos--;
}
return next;
for (p = rb_first(&nommu_region_tree); p; p = rb_next(p))
if (pos-- == 0)
return p;
return NULL;
}
static void nommu_vma_list_stop(struct seq_file *m, void *v)
static void nommu_region_list_stop(struct seq_file *m, void *v)
{
up_read(&nommu_vma_sem);
up_read(&nommu_region_sem);
}
static void *nommu_vma_list_next(struct seq_file *m, void *v, loff_t *pos)
static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
{
(*pos)++;
return rb_next((struct rb_node *) v);
}
static const struct seq_operations proc_nommu_vma_list_seqop = {
.start = nommu_vma_list_start,
.next = nommu_vma_list_next,
.stop = nommu_vma_list_stop,
.show = nommu_vma_list_show
static struct seq_operations proc_nommu_region_list_seqop = {
.start = nommu_region_list_start,
.next = nommu_region_list_next,
.stop = nommu_region_list_stop,
.show = nommu_region_list_show
};
static int proc_nommu_vma_list_open(struct inode *inode, struct file *file)
static int proc_nommu_region_list_open(struct inode *inode, struct file *file)
{
return seq_open(file, &proc_nommu_vma_list_seqop);
return seq_open(file, &proc_nommu_region_list_seqop);
}
static const struct file_operations proc_nommu_vma_list_operations = {
.open = proc_nommu_vma_list_open,
static const struct file_operations proc_nommu_region_list_operations = {
.open = proc_nommu_region_list_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
......@@ -137,7 +130,7 @@ static const struct file_operations proc_nommu_vma_list_operations = {
static int __init proc_nommu_init(void)
{
proc_create("maps", S_IRUGO, NULL, &proc_nommu_vma_list_operations);
proc_create("maps", S_IRUGO, NULL, &proc_nommu_region_list_operations);
return 0;
}
......
......@@ -15,25 +15,32 @@
*/
void task_mem(struct seq_file *m, struct mm_struct *mm)
{
struct vm_list_struct *vml;
unsigned long bytes = 0, sbytes = 0, slack = 0;
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *p;
unsigned long bytes = 0, sbytes = 0, slack = 0, size;
down_read(&mm->mmap_sem);
for (vml = mm->context.vmlist; vml; vml = vml->next) {
if (!vml->vma)
continue;
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
bytes += kobjsize(vma);
region = vma->vm_region;
if (region) {
size = kobjsize(region);
size += region->vm_end - region->vm_start;
} else {
size = vma->vm_end - vma->vm_start;
}
bytes += kobjsize(vml);
if (atomic_read(&mm->mm_count) > 1 ||
atomic_read(&vml->vma->vm_usage) > 1
) {
sbytes += kobjsize((void *) vml->vma->vm_start);
sbytes += kobjsize(vml->vma);
vma->vm_flags & VM_MAYSHARE) {
sbytes += size;
} else {
bytes += kobjsize((void *) vml->vma->vm_start);
bytes += kobjsize(vml->vma);
slack += kobjsize((void *) vml->vma->vm_start) -
(vml->vma->vm_end - vml->vma->vm_start);
bytes += size;
if (region)
slack = region->vm_end - vma->vm_end;
}
}
......@@ -70,13 +77,14 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
unsigned long task_vsize(struct mm_struct *mm)
{
struct vm_list_struct *tbp;
struct vm_area_struct *vma;
struct rb_node *p;
unsigned long vsize = 0;
down_read(&mm->mmap_sem);
for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) {
if (tbp->vma)
vsize += kobjsize((void *) tbp->vma->vm_start);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
vsize += vma->vm_end - vma->vm_start;
}
up_read(&mm->mmap_sem);
return vsize;
......@@ -85,15 +93,19 @@ unsigned long task_vsize(struct mm_struct *mm)
int task_statm(struct mm_struct *mm, int *shared, int *text,
int *data, int *resident)
{
struct vm_list_struct *tbp;
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *p;
int size = kobjsize(mm);
down_read(&mm->mmap_sem);
for (tbp = mm->context.vmlist; tbp; tbp = tbp->next) {
size += kobjsize(tbp);
if (tbp->vma) {
size += kobjsize(tbp->vma);
size += kobjsize((void *) tbp->vma->vm_start);
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) {
vma = rb_entry(p, struct vm_area_struct, vm_rb);
size += kobjsize(vma);
region = vma->vm_region;
if (region) {
size += kobjsize(region);
size += region->vm_end - region->vm_start;
}
}
......@@ -104,21 +116,63 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
return size;
}
/*
* display a single VMA to a sequenced file
*/
static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
{
unsigned long ino = 0;
struct file *file;
dev_t dev = 0;
int flags, len;
flags = vma->vm_flags;
file = vma->vm_file;
if (file) {
struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
dev = inode->i_sb->s_dev;
ino = inode->i_ino;
}
seq_printf(m,
"%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
vma->vm_start,
vma->vm_end,
flags & VM_READ ? 'r' : '-',
flags & VM_WRITE ? 'w' : '-',
flags & VM_EXEC ? 'x' : '-',
flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
vma->vm_pgoff << PAGE_SHIFT,
MAJOR(dev), MINOR(dev), ino, &len);
if (file) {
len = 25 + sizeof(void *) * 6 - len;
if (len < 1)
len = 1;
seq_printf(m, "%*c", len, ' ');
seq_path(m, &file->f_path, "");
}
seq_putc(m, '\n');
return 0;
}
/*
* display mapping lines for a particular process's /proc/pid/maps
*/
static int show_map(struct seq_file *m, void *_vml)
static int show_map(struct seq_file *m, void *_p)
{
struct vm_list_struct *vml = _vml;
struct rb_node *p = _p;
return nommu_vma_show(m, vml->vma);
return nommu_vma_show(m, rb_entry(p, struct vm_area_struct, vm_rb));
}
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct proc_maps_private *priv = m->private;
struct vm_list_struct *vml;
struct mm_struct *mm;
struct rb_node *p;
loff_t n = *pos;
/* pin the task and mm whilst we play with them */
......@@ -134,9 +188,9 @@ static void *m_start(struct seq_file *m, loff_t *pos)
}
/* start from the Nth VMA */
for (vml = mm->context.vmlist; vml; vml = vml->next)
for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
if (n-- == 0)
return vml;
return p;
return NULL;
}
......@@ -152,12 +206,12 @@ static void m_stop(struct seq_file *m, void *_vml)
}
}
static void *m_next(struct seq_file *m, void *_vml, loff_t *pos)
static void *m_next(struct seq_file *m, void *_p, loff_t *pos)
{
struct vm_list_struct *vml = _vml;
struct rb_node *p = _p;
(*pos)++;
return vml ? vml->next : NULL;
return p ? rb_next(p) : NULL;
}
static const struct seq_operations proc_pid_maps_ops = {
......
......@@ -262,11 +262,11 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
ret = -ENOMEM;
pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL);
if (!pages)
goto out;
goto out_free;
nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages);
if (nr != lpages)
goto out; /* leave if some pages were missing */
goto out_free_pages; /* leave if some pages were missing */
/* check the pages for physical adjacency */
ptr = pages;
......@@ -274,19 +274,18 @@ unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
page++;
for (loop = lpages; loop > 1; loop--)
if (*ptr++ != page++)
goto out;
goto out_free_pages;
/* okay - all conditions fulfilled */
ret = (unsigned long) page_address(pages[0]);
out:
if (pages) {
ptr = pages;
for (loop = lpages; loop > 0; loop--)
put_page(*ptr++);
kfree(pages);
}
out_free_pages:
ptr = pages;
for (loop = nr; loop > 0; loop--)
put_page(*ptr++);
out_free:
kfree(pages);
out:
return ret;
}
......
......@@ -22,7 +22,6 @@ typedef struct {
unsigned long dtlb_ptd_mapping; /* [DAMR5] PTD mapping for dtlb cached PGE */
#else
struct vm_list_struct *vmlist;
unsigned long end_brk;
#endif
......
......@@ -4,7 +4,6 @@
#if !defined(CONFIG_MMU)
typedef struct {
struct vm_list_struct *vmlist;
unsigned long end_brk;
} mm_context_t;
......
......@@ -56,19 +56,9 @@ extern unsigned long mmap_min_addr;
extern struct kmem_cache *vm_area_cachep;
/*
* This struct defines the per-mm list of VMAs for uClinux. If CONFIG_MMU is
* disabled, then there's a single shared list of VMAs maintained by the
* system, and mm's subscribe to these individually
*/
struct vm_list_struct {
struct vm_list_struct *next;
struct vm_area_struct *vma;
};
#ifndef CONFIG_MMU
extern struct rb_root nommu_vma_tree;
extern struct rw_semaphore nommu_vma_sem;
extern struct rb_root nommu_region_tree;
extern struct rw_semaphore nommu_region_sem;
extern unsigned int kobjsize(const void *objp);
#endif
......@@ -1061,6 +1051,7 @@ extern void memmap_init_zone(unsigned long, int, unsigned long,
unsigned long, enum memmap_context);
extern void setup_per_zone_pages_min(void);
extern void mem_init(void);
extern void __init mmap_init(void);
extern void show_mem(void);
extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
......@@ -1072,6 +1063,9 @@ extern void setup_per_cpu_pageset(void);
static inline void setup_per_cpu_pageset(void) {}
#endif
/* nommu.c */
extern atomic_t mmap_pages_allocated;
/* prio_tree.c */
void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
......
......@@ -96,6 +96,23 @@ struct page {
#endif /* WANT_PAGE_VIRTUAL */
};
/*
* A region containing a mapping of a non-memory backed file under NOMMU
* conditions. These are held in a global tree and are pinned by the VMAs that
* map parts of them.
*/
struct vm_region {
struct rb_node vm_rb; /* link in global region tree */
unsigned long vm_flags; /* VMA vm_flags */
unsigned long vm_start; /* start address of region */
unsigned long vm_end; /* region initialised to here */
unsigned long vm_top; /* region allocated to here */
unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
struct file *vm_file; /* the backing file or NULL */
atomic_t vm_usage; /* region usage count */
};
/*
* This struct defines a memory VMM memory area. There is one of these
* per VM-area/task. A VM area is any part of the process virtual memory
......@@ -152,7 +169,7 @@ struct vm_area_struct {
unsigned long vm_truncate_count;/* truncate_count or restart_addr */
#ifndef CONFIG_MMU
atomic_t vm_usage; /* refcount (VMAs shared if !MMU) */
struct vm_region *vm_region; /* NOMMU mapping region */
#endif
#ifdef CONFIG_NUMA
struct mempolicy *vm_policy; /* NUMA policy for the VMA */
......
......@@ -317,6 +317,7 @@ static int __init do_name(void)
if (wfd >= 0) {
sys_fchown(wfd, uid, gid);
sys_fchmod(wfd, mode);
sys_ftruncate(wfd, body_len);
vcollected = kstrdup(collected, GFP_KERNEL);
state = CopyFile;
}
......
......@@ -990,6 +990,7 @@ asmlinkage long sys_shmdt(char __user *shmaddr)
*/
vma = find_vma(mm, addr);
#ifdef CONFIG_MMU
while (vma) {
next = vma->vm_next;
......@@ -1034,6 +1035,17 @@ asmlinkage long sys_shmdt(char __user *shmaddr)
vma = next;
}
#else /* CONFIG_MMU */
/* under NOMMU conditions, the exact address to be destroyed must be
* given */
retval = -EINVAL;
if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
retval = 0;
}
#endif
up_write(&mm->mmap_sem);
return retval;
}
......
......@@ -1481,12 +1481,10 @@ void __init proc_caches_init(void)
fs_cachep = kmem_cache_create("fs_cache",
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
vm_area_cachep = kmem_cache_create("vm_area_struct",
sizeof(struct vm_area_struct), 0,
SLAB_PANIC, NULL);
mm_cachep = kmem_cache_create("mm_struct",
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
mmap_init();
}
/*
......
......@@ -82,6 +82,9 @@ extern int percpu_pagelist_fraction;
extern int compat_log;
extern int latencytop_enabled;
extern int sysctl_nr_open_min, sysctl_nr_open_max;
#ifndef CONFIG_MMU
extern int sysctl_nr_trim_pages;
#endif
#ifdef CONFIG_RCU_TORTURE_TEST
extern int rcutorture_runnable;
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
......@@ -1102,6 +1105,17 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
#else
{
.ctl_name = CTL_UNNUMBERED,
.procname = "nr_trim_pages",
.data = &sysctl_nr_trim_pages,
.maxlen = sizeof(sysctl_nr_trim_pages),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &zero,
},
#endif
{
.ctl_name = VM_LAPTOP_MODE,
......
......@@ -512,6 +512,13 @@ config DEBUG_VIRTUAL
If unsure, say N.
config DEBUG_NOMMU_REGIONS
bool "Debug the global anon/private NOMMU mapping region tree"
depends on DEBUG_KERNEL && !MMU
help
This option causes the global tree of anonymous and private mapping
regions to be regularly checked for invalid topology.
config DEBUG_WRITECOUNT
bool "Debug filesystem writers count"
depends on DEBUG_KERNEL
......
......@@ -2472,3 +2472,13 @@ void mm_drop_all_locks(struct mm_struct *mm)
mutex_unlock(&mm_all_locks_mutex);
}
/*
* initialise the VMA slab
*/
void __init mmap_init(void)
{
vm_area_cachep = kmem_cache_create("vm_area_struct",
sizeof(struct vm_area_struct), 0,
SLAB_PANIC, NULL);
}
......@@ -6,11 +6,11 @@
*
* See Documentation/nommu-mmap.txt
*
* Copyright (c) 2004-2005 David Howells <dhowells@redhat.com>
* Copyright (c) 2004-2008 David Howells <dhowells@redhat.com>
* Copyright (c) 2000-2003 David McCullough <davidm@snapgear.com>
* Copyright (c) 2000-2001 D Jeff Dionne <jeff@uClinux.org>
* Copyright (c) 2002 Greg Ungerer <gerg@snapgear.com>
* Copyright (c) 2007 Paul Mundt <lethal@linux-sh.org>
* Copyright (c) 2007-2008 Paul Mundt <lethal@linux-sh.org>
*/
#include <linux/module.h>
......@@ -33,6 +33,28 @@
#include <asm/uaccess.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include "internal.h"
static inline __attribute__((format(printf, 1, 2)))
void no_printk(const char *fmt, ...)
{
}
#if 0
#define kenter(FMT, ...) \
printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
#define kleave(FMT, ...) \
printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
#define kdebug(FMT, ...) \
printk(KERN_DEBUG "xxx" FMT"yyy\n", ##__VA_ARGS__)
#else
#define kenter(FMT, ...) \
no_printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
#define kleave(FMT, ...) \
no_printk(KERN_DEBUG "<== %s()"FMT"\n", __func__, ##__VA_ARGS__)
#define kdebug(FMT, ...) \
no_printk(KERN_DEBUG FMT"\n", ##__VA_ARGS__)
#endif
#include "internal.h"
......@@ -40,19 +62,22 @@ void *high_memory;
struct page *mem_map;
unsigned long max_mapnr;
unsigned long num_physpages;
unsigned long askedalloc, realalloc;
atomic_long_t vm_committed_space = ATOMIC_LONG_INIT(0);
int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
int sysctl_overcommit_ratio = 50; /* default is 50% */
int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
int sysctl_nr_trim_pages = 1; /* page trimming behaviour */
int heap_stack_gap = 0;
atomic_t mmap_pages_allocated;
EXPORT_SYMBOL(mem_map);
EXPORT_SYMBOL(num_physpages);
/* list of shareable VMAs */
struct rb_root nommu_vma_tree = RB_ROOT;
DECLARE_RWSEM(nommu_vma_sem);
/* list of mapped, potentially shareable regions */
static struct kmem_cache *vm_region_jar;
struct rb_root nommu_region_tree = RB_ROOT;
DECLARE_RWSEM(nommu_region_sem);
struct vm_operations_struct generic_file_vm_ops = {
};
......@@ -123,6 +148,20 @@ unsigned int kobjsize(const void *objp)
if (PageSlab(page))
return ksize(objp);
/*
* If it's not a compound page, see if we have a matching VMA
* region. This test is intentionally done in reverse order,
* so if there's no VMA, we still fall through and hand back
* PAGE_SIZE for 0-order pages.
*/
if (!PageCompound(page)) {
struct vm_area_struct *vma;
vma = find_vma(current->mm, (unsigned long)objp);
if (vma)
return vma->vm_end - vma->vm_start;
}
/*
* The ksize() function is only guaranteed to work for pointers
* returned by kmalloc(). So handle arbitrary pointers here.
......@@ -401,129 +440,178 @@ asmlinkage unsigned long sys_brk(unsigned long brk)
return mm->brk = brk;
}
#ifdef DEBUG
static void show_process_blocks(void)
/*
* initialise the VMA and region record slabs
*/
void __init mmap_init(void)
{
struct vm_list_struct *vml;
printk("Process blocks %d:", current->pid);
for (vml = &current->mm->context.vmlist; vml; vml = vml->next) {
printk(" %p: %p", vml, vml->vma);
if (vml->vma)
printk(" (%d @%lx #%d)",
kobjsize((void *) vml->vma->vm_start),
vml->vma->vm_start,
atomic_read(&vml->vma->vm_usage));
printk(vml->next ? " ->" : ".\n");
}
vm_region_jar = kmem_cache_create("vm_region_jar",
sizeof(struct vm_region), 0,
SLAB_PANIC, NULL);
vm_area_cachep = kmem_cache_create("vm_area_struct",
sizeof(struct vm_area_struct), 0,
SLAB_PANIC, NULL);
}
#endif /* DEBUG */
/*
* add a VMA into a process's mm_struct in the appropriate place in the list
* - should be called with mm->mmap_sem held writelocked
* validate the region tree
* - the caller must hold the region lock
*/
static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml)
#ifdef CONFIG_DEBUG_NOMMU_REGIONS
static noinline void validate_nommu_regions(void)
{
struct vm_list_struct **ppv;
for (ppv = &current->mm->context.vmlist; *ppv; ppv = &(*ppv)->next)
if ((*ppv)->vma->vm_start > vml->vma->vm_start)
break;
vml->next = *ppv;
*ppv = vml;
struct vm_region *region, *last;
struct rb_node *p, *lastp;
lastp = rb_first(&nommu_region_tree);
if (!lastp)
return;
last = rb_entry(lastp, struct vm_region, vm_rb);
if (unlikely(last->vm_end <= last->vm_start))
BUG();
if (unlikely(last->vm_top < last->vm_end))
BUG();
while ((p = rb_next(lastp))) {
region = rb_entry(p, struct vm_region, vm_rb);
last = rb_entry(lastp, struct vm_region, vm_rb);
if (unlikely(region->vm_end <= region->vm_start))
BUG();
if (unlikely(region->vm_top < region->vm_end))
BUG();
if (unlikely(region->vm_start < last->vm_top))
BUG();
lastp = p;
}
}
#else
#define validate_nommu_regions() do {} while(0)
#endif
/*
* look up the first VMA in which addr resides, NULL if none
* - should be called with mm->mmap_sem at least held readlocked
* add a region into the global tree
*/
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
static void add_nommu_region(struct vm_region *region)
{
struct vm_list_struct *loop, *vml;
struct vm_region *pregion;
struct rb_node **p, *parent;
/* search the vm_start ordered list */
vml = NULL;
for (loop = mm->context.vmlist; loop; loop = loop->next) {
if (loop->vma->vm_start > addr)
break;
vml = loop;
validate_nommu_regions();
BUG_ON(region->vm_start & ~PAGE_MASK);
parent = NULL;
p = &nommu_region_tree.rb_node;
while (*p) {
parent = *p;
pregion = rb_entry(parent, struct vm_region, vm_rb);
if (region->vm_start < pregion->vm_start)
p = &(*p)->rb_left;
else if (region->vm_start > pregion->vm_start)
p = &(*p)->rb_right;
else if (pregion == region)
return;
else
BUG();
}
if (vml && vml->vma->vm_end > addr)
return vml->vma;
rb_link_node(&region->vm_rb, parent, p);
rb_insert_color(&region->vm_rb, &nommu_region_tree);
return NULL;
validate_nommu_regions();
}
EXPORT_SYMBOL(find_vma);
/*
* find a VMA
* - we don't extend stack VMAs under NOMMU conditions
* delete a region from the global tree
*/
struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
static void delete_nommu_region(struct vm_region *region)
{
return find_vma(mm, addr);
}
BUG_ON(!nommu_region_tree.rb_node);
int expand_stack(struct vm_area_struct *vma, unsigned long address)
{
return -ENOMEM;
validate_nommu_regions();
rb_erase(&region->vm_rb, &nommu_region_tree);
validate_nommu_regions();
}
/*
* look up the first VMA exactly that exactly matches addr
* - should be called with mm->mmap_sem at least held readlocked
* free a contiguous series of pages
*/
static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
unsigned long addr)
static void free_page_series(unsigned long from, unsigned long to)
{
struct vm_list_struct *vml;
/* search the vm_start ordered list */
for (vml = mm->context.vmlist; vml; vml = vml->next) {
if (vml->vma->vm_start == addr)
return vml->vma;
if (vml->vma->vm_start > addr)
break;
for (; from < to; from += PAGE_SIZE) {
struct page *page = virt_to_page(from);
kdebug("- free %lx", from);
atomic_dec(&mmap_pages_allocated);
if (page_count(page) != 1)
kdebug("free page %p [%d]", page, page_count(page));
put_page(page);
}
return NULL;
}
/*
* find a VMA in the global tree
* release a reference to a region
* - the caller must hold the region semaphore, which this releases
* - the region may not have been added to the tree yet, in which case vm_top
* will equal vm_start
*/
static inline struct vm_area_struct *find_nommu_vma(unsigned long start)
static void __put_nommu_region(struct vm_region *region)
__releases(nommu_region_sem)
{
struct vm_area_struct *vma;
struct rb_node *n = nommu_vma_tree.rb_node;
kenter("%p{%d}", region, atomic_read(&region->vm_usage));
while (n) {
vma = rb_entry(n, struct vm_area_struct, vm_rb);
BUG_ON(!nommu_region_tree.rb_node);
if (start < vma->vm_start)
n = n->rb_left;
else if (start > vma->vm_start)
n = n->rb_right;
else
return vma;
if (atomic_dec_and_test(&region->vm_usage)) {
if (region->vm_top > region->vm_start)
delete_nommu_region(region);
up_write(&nommu_region_sem);
if (region->vm_file)
fput(region->vm_file);
/* IO memory and memory shared directly out of the pagecache
* from ramfs/tmpfs mustn't be released here */
if (region->vm_flags & VM_MAPPED_COPY) {
kdebug("free series");
free_page_series(region->vm_start, region->vm_top);
}
kmem_cache_free(vm_region_jar, region);
} else {
up_write(&nommu_region_sem);
}
}
return NULL;
/*
* release a reference to a region
*/
static void put_nommu_region(struct vm_region *region)
{
down_write(&nommu_region_sem);
__put_nommu_region(region);
}
/*
* add a VMA in the global tree
* add a VMA into a process's mm_struct in the appropriate place in the list
* and tree and add to the address space's page tree also if not an anonymous
* page
* - should be called with mm->mmap_sem held writelocked
*/
static void add_nommu_vma(struct vm_area_struct *vma)
static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
{
struct vm_area_struct *pvma;
struct vm_area_struct *pvma, **pp;
struct address_space *mapping;
struct rb_node **p = &nommu_vma_tree.rb_node;
struct rb_node *parent = NULL;
struct rb_node **p, *parent;
kenter(",%p", vma);
BUG_ON(!vma->vm_region);
mm->map_count++;
vma->vm_mm = mm;
/* add the VMA to the mapping */
if (vma->vm_file) {
......@@ -534,42 +622,62 @@ static void add_nommu_vma(struct vm_area_struct *vma)
flush_dcache_mmap_unlock(mapping);
}
/* add the VMA to the master list */
/* add the VMA to the tree */
parent = NULL;
p = &mm->mm_rb.rb_node;
while (*p) {
parent = *p;
pvma = rb_entry(parent, struct vm_area_struct, vm_rb);
if (vma->vm_start < pvma->vm_start) {
/* sort by: start addr, end addr, VMA struct addr in that order
* (the latter is necessary as we may get identical VMAs) */
if (vma->vm_start < pvma->vm_start)
p = &(*p)->rb_left;
}
else if (vma->vm_start > pvma->vm_start) {
else if (vma->vm_start > pvma->vm_start)
p = &(*p)->rb_right;
}
else {
/* mappings are at the same address - this can only
* happen for shared-mem chardevs and shared file
* mappings backed by ramfs/tmpfs */
BUG_ON(!(pvma->vm_flags & VM_SHARED));
if (vma < pvma)
p = &(*p)->rb_left;
else if (vma > pvma)
p = &(*p)->rb_right;
else
BUG();
}
else if (vma->vm_end < pvma->vm_end)
p = &(*p)->rb_left;
else if (vma->vm_end > pvma->vm_end)
p = &(*p)->rb_right;
else if (vma < pvma)
p = &(*p)->rb_left;
else if (vma > pvma)
p = &(*p)->rb_right;
else
BUG();
}
rb_link_node(&vma->vm_rb, parent, p);
rb_insert_color(&vma->vm_rb, &nommu_vma_tree);
rb_insert_color(&vma->vm_rb, &mm->mm_rb);
/* add VMA to the VMA list also */
for (pp = &mm->mmap; (pvma = *pp); pp = &(*pp)->vm_next) {
if (pvma->vm_start > vma->vm_start)
break;
if (pvma->vm_start < vma->vm_start)
continue;
if (pvma->vm_end < vma->vm_end)
break;
}
vma->vm_next = *pp;
*pp = vma;
}
/*
* delete a VMA from the global list
* delete a VMA from its owning mm_struct and address space
*/
static void delete_nommu_vma(struct vm_area_struct *vma)
static void delete_vma_from_mm(struct vm_area_struct *vma)
{
struct vm_area_struct **pp;
struct address_space *mapping;
struct mm_struct *mm = vma->vm_mm;
kenter("%p", vma);
mm->map_count--;
if (mm->mmap_cache == vma)
mm->mmap_cache = NULL;
/* remove the VMA from the mapping */
if (vma->vm_file) {
......@@ -580,8 +688,115 @@ static void delete_nommu_vma(struct vm_area_struct *vma)
flush_dcache_mmap_unlock(mapping);
}
/* remove from the master list */
rb_erase(&vma->vm_rb, &nommu_vma_tree);
/* remove from the MM's tree and list */
rb_erase(&vma->vm_rb, &mm->mm_rb);
for (pp = &mm->mmap; *pp; pp = &(*pp)->vm_next) {
if (*pp == vma) {
*pp = vma->vm_next;
break;
}
}
vma->vm_mm = NULL;
}
/*
* destroy a VMA record
*/
static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
{
kenter("%p", vma);
if (vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma);
if (vma->vm_file) {
fput(vma->vm_file);
if (vma->vm_flags & VM_EXECUTABLE)
removed_exe_file_vma(mm);
}
put_nommu_region(vma->vm_region);
kmem_cache_free(vm_area_cachep, vma);
}
/*
* look up the first VMA in which addr resides, NULL if none
* - should be called with mm->mmap_sem at least held readlocked
*/
struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma;
struct rb_node *n = mm->mm_rb.rb_node;
/* check the cache first */
vma = mm->mmap_cache;
if (vma && vma->vm_start <= addr && vma->vm_end > addr)
return vma;
/* trawl the tree (there may be multiple mappings in which addr
* resides) */
for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
vma = rb_entry(n, struct vm_area_struct, vm_rb);
if (vma->vm_start > addr)
return NULL;
if (vma->vm_end > addr) {
mm->mmap_cache = vma;
return vma;
}
}
return NULL;
}
EXPORT_SYMBOL(find_vma);
/*
* find a VMA
* - we don't extend stack VMAs under NOMMU conditions
*/
struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
{
return find_vma(mm, addr);
}
/*
* expand a stack to a given address
* - not supported under NOMMU conditions
*/
int expand_stack(struct vm_area_struct *vma, unsigned long address)
{
return -ENOMEM;
}
/*
* look up the first VMA exactly that exactly matches addr
* - should be called with mm->mmap_sem at least held readlocked
*/
static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
unsigned long addr,
unsigned long len)
{
struct vm_area_struct *vma;
struct rb_node *n = mm->mm_rb.rb_node;
unsigned long end = addr + len;
/* check the cache first */
vma = mm->mmap_cache;
if (vma && vma->vm_start == addr && vma->vm_end == end)
return vma;
/* trawl the tree (there may be multiple mappings in which addr
* resides) */
for (n = rb_first(&mm->mm_rb); n; n = rb_next(n)) {
vma = rb_entry(n, struct vm_area_struct, vm_rb);
if (vma->vm_start < addr)
continue;
if (vma->vm_start > addr)
return NULL;
if (vma->vm_end == end) {
mm->mmap_cache = vma;
return vma;
}
}
return NULL;
}
/*
......@@ -596,7 +811,7 @@ static int validate_mmap_request(struct file *file,
unsigned long pgoff,
unsigned long *_capabilities)
{
unsigned long capabilities;
unsigned long capabilities, rlen;
unsigned long reqprot = prot;
int ret;
......@@ -616,12 +831,12 @@ static int validate_mmap_request(struct file *file,
return -EINVAL;
/* Careful about overflows.. */
len = PAGE_ALIGN(len);
if (!len || len > TASK_SIZE)
rlen = PAGE_ALIGN(len);
if (!rlen || rlen > TASK_SIZE)
return -ENOMEM;
/* offset overflow? */
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
if ((pgoff + (rlen >> PAGE_SHIFT)) < pgoff)
return -EOVERFLOW;
if (file) {
......@@ -795,13 +1010,18 @@ static unsigned long determine_vm_flags(struct file *file,
}
/*
* set up a shared mapping on a file
* set up a shared mapping on a file (the driver or filesystem provides and
* pins the storage)
*/
static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len)
static int do_mmap_shared_file(struct vm_area_struct *vma)
{
int ret;
ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
if (ret == 0) {
vma->vm_region->vm_top = vma->vm_region->vm_end;
return ret;
}
if (ret != -ENOSYS)
return ret;
......@@ -815,10 +1035,14 @@ static int do_mmap_shared_file(struct vm_area_struct *vma, unsigned long len)
/*
* set up a private mapping or an anonymous shared mapping
*/
static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
static int do_mmap_private(struct vm_area_struct *vma,
struct vm_region *region,
unsigned long len)
{
struct page *pages;
unsigned long total, point, n, rlen;
void *base;
int ret;
int ret, order;
/* invoke the file's mapping function so that it can keep track of
* shared mappings on devices or memory
......@@ -826,34 +1050,63 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
*/
if (vma->vm_file) {
ret = vma->vm_file->f_op->mmap(vma->vm_file, vma);
if (ret != -ENOSYS) {
if (ret == 0) {
/* shouldn't return success if we're not sharing */
BUG_ON(ret == 0 && !(vma->vm_flags & VM_MAYSHARE));
return ret; /* success or a real error */
BUG_ON(!(vma->vm_flags & VM_MAYSHARE));
vma->vm_region->vm_top = vma->vm_region->vm_end;
return ret;
}
if (ret != -ENOSYS)
return ret;
/* getting an ENOSYS error indicates that direct mmap isn't
* possible (as opposed to tried but failed) so we'll try to
* make a private copy of the data and map that instead */
}
rlen = PAGE_ALIGN(len);
/* allocate some memory to hold the mapping
* - note that this may not return a page-aligned address if the object
* we're allocating is smaller than a page
*/
base = kmalloc(len, GFP_KERNEL|__GFP_COMP);
if (!base)
order = get_order(rlen);
kdebug("alloc order %d for %lx", order, len);
pages = alloc_pages(GFP_KERNEL, order);
if (!pages)
goto enomem;
vma->vm_start = (unsigned long) base;
vma->vm_end = vma->vm_start + len;
vma->vm_flags |= VM_MAPPED_COPY;
total = 1 << order;
atomic_add(total, &mmap_pages_allocated);
point = rlen >> PAGE_SHIFT;
/* we allocated a power-of-2 sized page set, so we may want to trim off
* the excess */
if (sysctl_nr_trim_pages && total - point >= sysctl_nr_trim_pages) {
while (total > point) {
order = ilog2(total - point);
n = 1 << order;
kdebug("shave %lu/%lu @%lu", n, total - point, total);
atomic_sub(n, &mmap_pages_allocated);
total -= n;
set_page_refcounted(pages + total);
__free_pages(pages + total, order);
}
}
for (point = 1; point < total; point++)
set_page_refcounted(&pages[point]);
#ifdef WARN_ON_SLACK
if (len + WARN_ON_SLACK <= kobjsize(result))
printk("Allocation of %lu bytes from process %d has %lu bytes of slack\n",
len, current->pid, kobjsize(result) - len);
#endif
base = page_address(pages);
region->vm_flags = vma->vm_flags |= VM_MAPPED_COPY;
region->vm_start = (unsigned long) base;
region->vm_end = region->vm_start + rlen;
region->vm_top = region->vm_start + (total << PAGE_SHIFT);
vma->vm_start = region->vm_start;
vma->vm_end = region->vm_start + len;
if (vma->vm_file) {
/* read the contents of a file into the copy */
......@@ -865,26 +1118,28 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
old_fs = get_fs();
set_fs(KERNEL_DS);
ret = vma->vm_file->f_op->read(vma->vm_file, base, len, &fpos);
ret = vma->vm_file->f_op->read(vma->vm_file, base, rlen, &fpos);
set_fs(old_fs);
if (ret < 0)
goto error_free;
/* clear the last little bit */
if (ret < len)
memset(base + ret, 0, len - ret);
if (ret < rlen)
memset(base + ret, 0, rlen - ret);
} else {
/* if it's an anonymous mapping, then just clear it */
memset(base, 0, len);
memset(base, 0, rlen);
}
return 0;
error_free:
kfree(base);
vma->vm_start = 0;
free_page_series(region->vm_start, region->vm_end);
region->vm_start = vma->vm_start = 0;
region->vm_end = vma->vm_end = 0;
region->vm_top = 0;
return ret;
enomem:
......@@ -904,13 +1159,14 @@ unsigned long do_mmap_pgoff(struct file *file,
unsigned long flags,
unsigned long pgoff)
{
struct vm_list_struct *vml = NULL;
struct vm_area_struct *vma = NULL;
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *rb;
unsigned long capabilities, vm_flags;
void *result;
unsigned long capabilities, vm_flags, result;
int ret;
kenter(",%lx,%lx,%lx,%lx,%lx", addr, len, prot, flags, pgoff);
if (!(flags & MAP_FIXED))
addr = round_hint_to_min(addr);
......@@ -918,73 +1174,120 @@ unsigned long do_mmap_pgoff(struct file *file,
* mapping */
ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
&capabilities);
if (ret < 0)
if (ret < 0) {
kleave(" = %d [val]", ret);
return ret;
}
/* we've determined that we can make the mapping, now translate what we
* now know into VMA flags */
vm_flags = determine_vm_flags(file, prot, flags, capabilities);
/* we're going to need to record the mapping if it works */
vml = kzalloc(sizeof(struct vm_list_struct), GFP_KERNEL);
if (!vml)
goto error_getting_vml;
/* we're going to need to record the mapping */
region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);
if (!region)
goto error_getting_region;
vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (!vma)
goto error_getting_vma;
atomic_set(&region->vm_usage, 1);
region->vm_flags = vm_flags;
region->vm_pgoff = pgoff;
INIT_LIST_HEAD(&vma->anon_vma_node);
vma->vm_flags = vm_flags;
vma->vm_pgoff = pgoff;
down_write(&nommu_vma_sem);
if (file) {
region->vm_file = file;
get_file(file);
vma->vm_file = file;
get_file(file);
if (vm_flags & VM_EXECUTABLE) {
added_exe_file_vma(current->mm);
vma->vm_mm = current->mm;
}
}
/* if we want to share, we need to check for VMAs created by other
down_write(&nommu_region_sem);
/* if we want to share, we need to check for regions created by other
* mmap() calls that overlap with our proposed mapping
* - we can only share with an exact match on most regular files
* - we can only share with a superset match on most regular files
* - shared mappings on character devices and memory backed files are
* permitted to overlap inexactly as far as we are concerned for in
* these cases, sharing is handled in the driver or filesystem rather
* than here
*/
if (vm_flags & VM_MAYSHARE) {
unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
unsigned long vmpglen;
struct vm_region *pregion;
unsigned long pglen, rpglen, pgend, rpgend, start;
/* suppress VMA sharing for shared regions */
if (vm_flags & VM_SHARED &&
capabilities & BDI_CAP_MAP_DIRECT)
goto dont_share_VMAs;
pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
pgend = pgoff + pglen;
for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) {
vma = rb_entry(rb, struct vm_area_struct, vm_rb);
for (rb = rb_first(&nommu_region_tree); rb; rb = rb_next(rb)) {
pregion = rb_entry(rb, struct vm_region, vm_rb);
if (!(vma->vm_flags & VM_MAYSHARE))
if (!(pregion->vm_flags & VM_MAYSHARE))
continue;
/* search for overlapping mappings on the same file */
if (vma->vm_file->f_path.dentry->d_inode != file->f_path.dentry->d_inode)
if (pregion->vm_file->f_path.dentry->d_inode !=
file->f_path.dentry->d_inode)
continue;
if (vma->vm_pgoff >= pgoff + pglen)
if (pregion->vm_pgoff >= pgend)
continue;
vmpglen = vma->vm_end - vma->vm_start + PAGE_SIZE - 1;
vmpglen >>= PAGE_SHIFT;
if (pgoff >= vma->vm_pgoff + vmpglen)
rpglen = pregion->vm_end - pregion->vm_start;
rpglen = (rpglen + PAGE_SIZE - 1) >> PAGE_SHIFT;
rpgend = pregion->vm_pgoff + rpglen;
if (pgoff >= rpgend)
continue;
/* handle inexactly overlapping matches between mappings */
if (vma->vm_pgoff != pgoff || vmpglen != pglen) {
/* handle inexactly overlapping matches between
* mappings */
if ((pregion->vm_pgoff != pgoff || rpglen != pglen) &&
!(pgoff >= pregion->vm_pgoff && pgend <= rpgend)) {
/* new mapping is not a subset of the region */
if (!(capabilities & BDI_CAP_MAP_DIRECT))
goto sharing_violation;
continue;
}
/* we've found a VMA we can share */
atomic_inc(&vma->vm_usage);
vml->vma = vma;
result = (void *) vma->vm_start;
goto shared;
/* we've found a region we can share */
atomic_inc(&pregion->vm_usage);
vma->vm_region = pregion;
start = pregion->vm_start;
start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
vma->vm_start = start;
vma->vm_end = start + len;
if (pregion->vm_flags & VM_MAPPED_COPY) {
kdebug("share copy");
vma->vm_flags |= VM_MAPPED_COPY;
} else {
kdebug("share mmap");
ret = do_mmap_shared_file(vma);
if (ret < 0) {
vma->vm_region = NULL;
vma->vm_start = 0;
vma->vm_end = 0;
atomic_dec(&pregion->vm_usage);
pregion = NULL;
goto error_just_free;
}
}
fput(region->vm_file);
kmem_cache_free(vm_region_jar, region);
region = pregion;
result = start;
goto share;
}
dont_share_VMAs:
vma = NULL;
/* obtain the address at which to make a shared mapping
* - this is the hook for quasi-memory character devices to
* tell us the location of a shared mapping
......@@ -995,113 +1298,93 @@ unsigned long do_mmap_pgoff(struct file *file,
if (IS_ERR((void *) addr)) {
ret = addr;
if (ret != (unsigned long) -ENOSYS)
goto error;
goto error_just_free;
/* the driver refused to tell us where to site
* the mapping so we'll have to attempt to copy
* it */
ret = (unsigned long) -ENODEV;
if (!(capabilities & BDI_CAP_MAP_COPY))
goto error;
goto error_just_free;
capabilities &= ~BDI_CAP_MAP_DIRECT;
} else {
vma->vm_start = region->vm_start = addr;
vma->vm_end = region->vm_end = addr + len;
}
}
}
/* we're going to need a VMA struct as well */
vma = kzalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
if (!vma)
goto error_getting_vma;
INIT_LIST_HEAD(&vma->anon_vma_node);
atomic_set(&vma->vm_usage, 1);
if (file) {
get_file(file);
if (vm_flags & VM_EXECUTABLE) {
added_exe_file_vma(current->mm);
vma->vm_mm = current->mm;
}
}
vma->vm_file = file;
vma->vm_flags = vm_flags;
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_pgoff = pgoff;
vml->vma = vma;
vma->vm_region = region;
/* set up the mapping */
if (file && vma->vm_flags & VM_SHARED)
ret = do_mmap_shared_file(vma, len);
ret = do_mmap_shared_file(vma);
else
ret = do_mmap_private(vma, len);
ret = do_mmap_private(vma, region, len);
if (ret < 0)
goto error;
/* okay... we have a mapping; now we have to register it */
result = (void *) vma->vm_start;
goto error_put_region;
if (vma->vm_flags & VM_MAPPED_COPY) {
realalloc += kobjsize(result);
askedalloc += len;
}
add_nommu_region(region);
realalloc += kobjsize(vma);
askedalloc += sizeof(*vma);
/* okay... we have a mapping; now we have to register it */
result = vma->vm_start;
current->mm->total_vm += len >> PAGE_SHIFT;
add_nommu_vma(vma);
shared:
realalloc += kobjsize(vml);
askedalloc += sizeof(*vml);
add_vma_to_mm(current->mm, vml);
share:
add_vma_to_mm(current->mm, vma);
up_write(&nommu_vma_sem);
up_write(&nommu_region_sem);
if (prot & PROT_EXEC)
flush_icache_range((unsigned long) result,
(unsigned long) result + len);
flush_icache_range(result, result + len);
#ifdef DEBUG
printk("do_mmap:\n");
show_process_blocks();
#endif
return (unsigned long) result;
kleave(" = %lx", result);
return result;
error:
up_write(&nommu_vma_sem);
kfree(vml);
error_put_region:
__put_nommu_region(region);
if (vma) {
if (vma->vm_file) {
fput(vma->vm_file);
if (vma->vm_flags & VM_EXECUTABLE)
removed_exe_file_vma(vma->vm_mm);
}
kfree(vma);
kmem_cache_free(vm_area_cachep, vma);
}
kleave(" = %d [pr]", ret);
return ret;
sharing_violation:
up_write(&nommu_vma_sem);
printk("Attempt to share mismatched mappings\n");
kfree(vml);
return -EINVAL;
error_just_free:
up_write(&nommu_region_sem);
error:
fput(region->vm_file);
kmem_cache_free(vm_region_jar, region);
fput(vma->vm_file);
if (vma->vm_flags & VM_EXECUTABLE)
removed_exe_file_vma(vma->vm_mm);
kmem_cache_free(vm_area_cachep, vma);
kleave(" = %d", ret);
return ret;
error_getting_vma:
up_write(&nommu_vma_sem);
kfree(vml);
printk("Allocation of vma for %lu byte allocation from process %d failed\n",
sharing_violation:
up_write(&nommu_region_sem);
printk(KERN_WARNING "Attempt to share mismatched mappings\n");
ret = -EINVAL;
goto error;
error_getting_vma:
kmem_cache_free(vm_region_jar, region);
printk(KERN_WARNING "Allocation of vma for %lu byte allocation"
" from process %d failed\n",
len, current->pid);
show_free_areas();
return -ENOMEM;
error_getting_vml:
printk("Allocation of vml for %lu byte allocation from process %d failed\n",
error_getting_region:
printk(KERN_WARNING "Allocation of vm region for %lu byte allocation"
" from process %d failed\n",
len, current->pid);
show_free_areas();
return -ENOMEM;
......@@ -1109,85 +1392,183 @@ unsigned long do_mmap_pgoff(struct file *file,
EXPORT_SYMBOL(do_mmap_pgoff);
/*
* handle mapping disposal for uClinux
* split a vma into two pieces at address 'addr', a new vma is allocated either
* for the first part or the tail.
*/
static void put_vma(struct mm_struct *mm, struct vm_area_struct *vma)
int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, int new_below)
{
if (vma) {
down_write(&nommu_vma_sem);
struct vm_area_struct *new;
struct vm_region *region;
unsigned long npages;
if (atomic_dec_and_test(&vma->vm_usage)) {
delete_nommu_vma(vma);
kenter("");
if (vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma);
/* we're only permitted to split anonymous regions that have a single
* owner */
if (vma->vm_file ||
atomic_read(&vma->vm_region->vm_usage) != 1)
return -ENOMEM;
/* IO memory and memory shared directly out of the pagecache from
* ramfs/tmpfs mustn't be released here */
if (vma->vm_flags & VM_MAPPED_COPY) {
realalloc -= kobjsize((void *) vma->vm_start);
askedalloc -= vma->vm_end - vma->vm_start;
kfree((void *) vma->vm_start);
}
if (mm->map_count >= sysctl_max_map_count)
return -ENOMEM;
realalloc -= kobjsize(vma);
askedalloc -= sizeof(*vma);
region = kmem_cache_alloc(vm_region_jar, GFP_KERNEL);
if (!region)
return -ENOMEM;
if (vma->vm_file) {
fput(vma->vm_file);
if (vma->vm_flags & VM_EXECUTABLE)
removed_exe_file_vma(mm);
}
kfree(vma);
}
new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!new) {
kmem_cache_free(vm_region_jar, region);
return -ENOMEM;
}
/* most fields are the same, copy all, and then fixup */
*new = *vma;
*region = *vma->vm_region;
new->vm_region = region;
npages = (addr - vma->vm_start) >> PAGE_SHIFT;
up_write(&nommu_vma_sem);
if (new_below) {
region->vm_top = region->vm_end = new->vm_end = addr;
} else {
region->vm_start = new->vm_start = addr;
region->vm_pgoff = new->vm_pgoff += npages;
}
if (new->vm_ops && new->vm_ops->open)
new->vm_ops->open(new);
delete_vma_from_mm(vma);
down_write(&nommu_region_sem);
delete_nommu_region(vma->vm_region);
if (new_below) {
vma->vm_region->vm_start = vma->vm_start = addr;
vma->vm_region->vm_pgoff = vma->vm_pgoff += npages;
} else {
vma->vm_region->vm_end = vma->vm_end = addr;
vma->vm_region->vm_top = addr;
}
add_nommu_region(vma->vm_region);
add_nommu_region(new->vm_region);
up_write(&nommu_region_sem);
add_vma_to_mm(mm, vma);
add_vma_to_mm(mm, new);
return 0;
}
/*
* release a mapping
* - under NOMMU conditions the parameters must match exactly to the mapping to
* be removed
* shrink a VMA by removing the specified chunk from either the beginning or
* the end
*/
int do_munmap(struct mm_struct *mm, unsigned long addr, size_t len)
static int shrink_vma(struct mm_struct *mm,
struct vm_area_struct *vma,
unsigned long from, unsigned long to)
{
struct vm_list_struct *vml, **parent;
unsigned long end = addr + len;
struct vm_region *region;
#ifdef DEBUG
printk("do_munmap:\n");
#endif
kenter("");
for (parent = &mm->context.vmlist; *parent; parent = &(*parent)->next) {
if ((*parent)->vma->vm_start > addr)
break;
if ((*parent)->vma->vm_start == addr &&
((len == 0) || ((*parent)->vma->vm_end == end)))
goto found;
/* adjust the VMA's pointers, which may reposition it in the MM's tree
* and list */
delete_vma_from_mm(vma);
if (from > vma->vm_start)
vma->vm_end = from;
else
vma->vm_start = to;
add_vma_to_mm(mm, vma);
/* cut the backing region down to size */
region = vma->vm_region;
BUG_ON(atomic_read(&region->vm_usage) != 1);
down_write(&nommu_region_sem);
delete_nommu_region(region);
if (from > region->vm_start) {
to = region->vm_top;
region->vm_top = region->vm_end = from;
} else {
region->vm_start = to;
}
add_nommu_region(region);
up_write(&nommu_region_sem);
printk("munmap of non-mmaped memory by process %d (%s): %p\n",
current->pid, current->comm, (void *) addr);
return -EINVAL;
free_page_series(from, to);
return 0;
}
found:
vml = *parent;
/*
* release a mapping
* - under NOMMU conditions the chunk to be unmapped must be backed by a single
* VMA, though it need not cover the whole VMA
*/
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
{
struct vm_area_struct *vma;
struct rb_node *rb;
unsigned long end = start + len;
int ret;
put_vma(mm, vml->vma);
kenter(",%lx,%zx", start, len);
*parent = vml->next;
realalloc -= kobjsize(vml);
askedalloc -= sizeof(*vml);
kfree(vml);
if (len == 0)
return -EINVAL;
update_hiwater_vm(mm);
mm->total_vm -= len >> PAGE_SHIFT;
/* find the first potentially overlapping VMA */
vma = find_vma(mm, start);
if (!vma) {
printk(KERN_WARNING
"munmap of memory not mmapped by process %d (%s):"
" 0x%lx-0x%lx\n",
current->pid, current->comm, start, start + len - 1);
return -EINVAL;
}
#ifdef DEBUG
show_process_blocks();
#endif
/* we're allowed to split an anonymous VMA but not a file-backed one */
if (vma->vm_file) {
do {
if (start > vma->vm_start) {
kleave(" = -EINVAL [miss]");
return -EINVAL;
}
if (end == vma->vm_end)
goto erase_whole_vma;
rb = rb_next(&vma->vm_rb);
vma = rb_entry(rb, struct vm_area_struct, vm_rb);
} while (rb);
kleave(" = -EINVAL [split file]");
return -EINVAL;
} else {
/* the chunk must be a subset of the VMA found */
if (start == vma->vm_start && end == vma->vm_end)
goto erase_whole_vma;
if (start < vma->vm_start || end > vma->vm_end) {
kleave(" = -EINVAL [superset]");
return -EINVAL;
}
if (start & ~PAGE_MASK) {
kleave(" = -EINVAL [unaligned start]");
return -EINVAL;
}
if (end != vma->vm_end && end & ~PAGE_MASK) {
kleave(" = -EINVAL [unaligned split]");
return -EINVAL;
}
if (start != vma->vm_start && end != vma->vm_end) {
ret = split_vma(mm, vma, start, 1);
if (ret < 0) {
kleave(" = %d [split]", ret);
return ret;
}
}
return shrink_vma(mm, vma, start, end);
}
erase_whole_vma:
delete_vma_from_mm(vma);
delete_vma(mm, vma);
kleave(" = 0");
return 0;
}
EXPORT_SYMBOL(do_munmap);
......@@ -1204,32 +1585,26 @@ asmlinkage long sys_munmap(unsigned long addr, size_t len)
}
/*
* Release all mappings
* release all the mappings made in a process's VM space
*/
void exit_mmap(struct mm_struct * mm)
void exit_mmap(struct mm_struct *mm)
{
struct vm_list_struct *tmp;
if (mm) {
#ifdef DEBUG
printk("Exit_mmap:\n");
#endif
struct vm_area_struct *vma;
mm->total_vm = 0;
if (!mm)
return;
while ((tmp = mm->context.vmlist)) {
mm->context.vmlist = tmp->next;
put_vma(mm, tmp->vma);
kenter("");
realalloc -= kobjsize(tmp);
askedalloc -= sizeof(*tmp);
kfree(tmp);
}
mm->total_vm = 0;
#ifdef DEBUG
show_process_blocks();
#endif
while ((vma = mm->mmap)) {
mm->mmap = vma->vm_next;
delete_vma_from_mm(vma);
delete_vma(mm, vma);
}
kleave("");
}
unsigned long do_brk(unsigned long addr, unsigned long len)
......@@ -1242,8 +1617,8 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
* time (controlled by the MREMAP_MAYMOVE flag and available VM space)
*
* under NOMMU conditions, we only permit changing a mapping's size, and only
* as long as it stays within the hole allocated by the kmalloc() call in
* do_mmap_pgoff() and the block is not shareable
* as long as it stays within the region allocated by do_mmap_private() and the
* block is not shareable
*
* MREMAP_FIXED is not supported under NOMMU conditions
*/
......@@ -1254,13 +1629,16 @@ unsigned long do_mremap(unsigned long addr,
struct vm_area_struct *vma;
/* insanity checks first */
if (new_len == 0)
if (old_len == 0 || new_len == 0)
return (unsigned long) -EINVAL;
if (addr & ~PAGE_MASK)
return -EINVAL;
if (flags & MREMAP_FIXED && new_addr != addr)
return (unsigned long) -EINVAL;
vma = find_vma_exact(current->mm, addr);
vma = find_vma_exact(current->mm, addr, old_len);
if (!vma)
return (unsigned long) -EINVAL;
......@@ -1270,22 +1648,19 @@ unsigned long do_mremap(unsigned long addr,
if (vma->vm_flags & VM_MAYSHARE)
return (unsigned long) -EPERM;
if (new_len > kobjsize((void *) addr))
if (new_len > vma->vm_region->vm_end - vma->vm_region->vm_start)
return (unsigned long) -ENOMEM;
/* all checks complete - do it */
vma->vm_end = vma->vm_start + new_len;
askedalloc -= old_len;
askedalloc += new_len;
return vma->vm_start;
}
EXPORT_SYMBOL(do_mremap);
asmlinkage unsigned long sys_mremap(unsigned long addr,
unsigned long old_len, unsigned long new_len,
unsigned long flags, unsigned long new_addr)
asmlinkage
unsigned long sys_mremap(unsigned long addr,
unsigned long old_len, unsigned long new_len,
unsigned long flags, unsigned long new_addr)
{
unsigned long ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment