Commit d6b7a781 authored by Prasanna Meda's avatar Prasanna Meda Committed by Linus Torvalds

[PATCH] Speed up /proc/pid/maps

This patch uses find_vma() to improve the read response of /proc/pid/maps. 
It attempts to make the liner scan instead of quadratic walk and utilise rb
tree.  Reading the file was doing sequential scan from the begining to file
position all the time, and taking a quite long time.

The improvements came from f_version/m_version and resulting in mmap_cache
match.  Even if mmap_cache does not match, rb tree walk should be faster
than sequential walk.  First attempt was to put the state across read
system calls into private data.  Later got inspiration from wli's pid patch
using f_version in readdir of /proc.  Other advantage is, f_version will be
cleared automatically by lseek.

The test program creates 32K maps and splits them into two(limited by
max_map_count sysctl) using mprotect(0).  After the patch, the read time
improves from many seconds to milliseconds, and does not grow superlinearly
with number of read calls.

Help taken from Peter Swain in idea and testing.

After the patch:
Reading /proc/self/maps:65528 time: 0 secs and   780728 usecs buf:4096 bytes:3811362
Reading /proc/self/maps:65528 time: 1 secs and   117573 usecs buf:1024 bytes:3866627
Reading /proc/self/maps:65528 time: 0 secs and   473459 usecs buf: 256 bytes:3866627
Reading /proc/self/maps:65528 time: 0 secs and   901288 usecs buf:  64 bytes:3866627
Reading /proc/self/maps:65528 time: 1 secs and   480185 usecs buf:  16 bytes:3866627
Reading /proc/self/maps:65528 time: 1 secs and   636268 usecs buf:   4 bytes:3866627
Reading /proc/self/maps:65528 time: 4 secs and   118327 usecs buf:   1 bytes:3866627

Before the patch:
Reading /proc/self/maps:65528 time: 4 secs and   359556 usecs buf:4096 bytes:3866647
Reading /proc/self/maps:65528 time:16 secs and   218584 usecs buf:1024 bytes:3866688
Reading /proc/self/maps:65528 time:67 secs and   870200 usecs buf: 256 bytes:3866688
Reading /proc/self/maps:65528 time:255 secs and   186934 usecs buf:  64 bytes:3866688
Small reads never completed.
Signed-off-by: default avatarPrasanna Meda <pmeda@akamai.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 27f04a42
......@@ -87,6 +87,7 @@ static void pad_len_spaces(struct seq_file *m, int len)
static int show_map(struct seq_file *m, void *v)
{
struct task_struct *task = m->private;
struct vm_area_struct *map = v;
struct mm_struct *mm = map->vm_mm;
struct file *file = map->vm_file;
......@@ -138,30 +139,66 @@ static int show_map(struct seq_file *m, void *v)
}
}
seq_putc(m, '\n');
if (m->count < m->size) /* map is copied successfully */
m->version = (map != get_gate_vma(task))? map->vm_start: 0;
return 0;
}
static void *m_start(struct seq_file *m, loff_t *pos)
{
struct task_struct *task = m->private;
struct mm_struct *mm = get_task_mm(task);
struct vm_area_struct * map;
unsigned long last_addr = m->version;
struct mm_struct *mm;
struct vm_area_struct *map, *tail_map;
loff_t l = *pos;
/*
* We remember last_addr rather than next_addr to hit with
* mmap_cache most of the time. We have zero last_addr at
* the begining and also after lseek. We will have -1 last_addr
* after the end of the maps.
*/
if (last_addr == -1UL)
return NULL;
mm = get_task_mm(task);
if (!mm)
return NULL;
tail_map = get_gate_vma(task);
down_read(&mm->mmap_sem);
map = mm->mmap;
while (l-- && map)
/* Start with last addr hint */
if (last_addr && (map = find_vma(mm, last_addr))) {
map = map->vm_next;
if (!map) {
up_read(&mm->mmap_sem);
mmput(mm);
if (l == -1)
map = get_gate_vma(task);
goto out;
}
/*
* Check the map index is within the range and do
* sequential scan until m_index.
*/
map = NULL;
if ((unsigned long)l < mm->map_count) {
map = mm->mmap;
while (l-- && map)
map = map->vm_next;
goto out;
}
return map;
if (l != mm->map_count)
tail_map = NULL; /* After gate map */
out:
if (map)
return map;
/* End of maps has reached */
m->version = (tail_map != NULL)? 0: -1UL;
up_read(&mm->mmap_sem);
mmput(mm);
return tail_map;
}
static void m_stop(struct seq_file *m, void *v)
......@@ -179,13 +216,13 @@ static void *m_next(struct seq_file *m, void *v, loff_t *pos)
{
struct task_struct *task = m->private;
struct vm_area_struct *map = v;
struct vm_area_struct *tail_map = get_gate_vma(task);
(*pos)++;
if (map->vm_next)
if (map && (map != tail_map) && map->vm_next)
return map->vm_next;
m_stop(m, v);
if (map != get_gate_vma(task))
return get_gate_vma(task);
return NULL;
return (map != tail_map)? tail_map: NULL;
}
struct seq_operations proc_pid_maps_op = {
......
......@@ -36,6 +36,13 @@ int seq_open(struct file *file, struct seq_operations *op)
p->op = op;
file->private_data = p;
/*
* Wrappers around seq_open(e.g. swaps_open) need to be
* aware of this. If they set f_version themselves, they
* should call seq_open first and then set f_version.
*/
file->f_version = 0;
/* SEQ files support lseek, but not pread/pwrite */
file->f_mode &= ~(FMODE_PREAD | FMODE_PWRITE);
return 0;
......@@ -58,6 +65,18 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
int err = 0;
down(&m->sem);
/*
* seq_file->op->..m_start/m_stop/m_next may do special actions
* or optimisations based on the file->f_version, so we want to
* pass the file->f_version to those methods.
*
* seq_file->version is just copy of f_version, and seq_file
* methods can treat it simply as file version.
* It is copied in first and copied out after all operations.
* It is convenient to have it as part of structure to avoid the
* need of passing another argument to all the seq_file methods.
*/
m->version = file->f_version;
/* grab buffer if we didn't have one */
if (!m->buf) {
m->buf = kmalloc(m->size = PAGE_SIZE, GFP_KERNEL);
......@@ -98,6 +117,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
if (!m->buf)
goto Enomem;
m->count = 0;
m->version = 0;
}
m->op->stop(m, p);
m->count = 0;
......@@ -136,6 +156,7 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
copied = err;
else
*ppos += copied;
file->f_version = m->version;
up(&m->sem);
return copied;
Enomem:
......@@ -153,6 +174,7 @@ static int traverse(struct seq_file *m, loff_t offset)
int error = 0;
void *p;
m->version = 0;
m->index = 0;
m->count = m->from = 0;
if (!offset)
......@@ -207,6 +229,7 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
long long retval = -EINVAL;
down(&m->sem);
m->version = file->f_version;
switch (origin) {
case 1:
offset += file->f_pos;
......@@ -220,6 +243,7 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
if (retval) {
/* with extreme prejudice... */
file->f_pos = 0;
m->version = 0;
m->index = 0;
m->count = 0;
} else {
......@@ -228,6 +252,7 @@ loff_t seq_lseek(struct file *file, loff_t offset, int origin)
}
}
up(&m->sem);
file->f_version = m->version;
return retval;
}
EXPORT_SYMBOL(seq_lseek);
......
......@@ -18,6 +18,7 @@ struct seq_file {
size_t from;
size_t count;
loff_t index;
loff_t version;
struct semaphore sem;
struct seq_operations *op;
void *private;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment