Commit 671ccb4b authored by Oleg Nesterov's avatar Oleg Nesterov Committed by Linus Torvalds

[PATCH] readahead: improve sequential read detection

1. Current code can't always detect sequential reading, in case
   when read size is not PAGE_CACHE_SIZE aligned.

   If application reads the file by 4096+512 chunks, we have:
   1st read: first read detected, prev_page = 2.
   2nd read: offset == 2, the read is considered random.

   page_cache_readahead() should treat prev_page == offset as
   sequential access. In this case it is better to ++offset,
   because of blockable_page_cache_readahead(offset, size).

2. If application reads 4096 bytes with *ppos == 512, we have to
   read 2 pages, but req_size == 1 in do_generic_mapping_read().

   Usually it's not a problem. But in random read case it results
   in unnecessary page cache misses.

~$ time dd conv=notrunc if=/tmp/GIG of=/tmp/dummy bs=$((4096+512))

2.6.11-clean:	real=370.35 user=0.16 sys=14.66
2.6.11-patched:	real=234.49 user=0.19 sys=12.41
Signed-off-by: default avatarOleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent c8ca47af
...@@ -707,7 +707,7 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -707,7 +707,7 @@ void do_generic_mapping_read(struct address_space *mapping,
unsigned long index; unsigned long index;
unsigned long end_index; unsigned long end_index;
unsigned long offset; unsigned long offset;
unsigned long req_size; unsigned long last_index;
unsigned long next_index; unsigned long next_index;
unsigned long prev_index; unsigned long prev_index;
loff_t isize; loff_t isize;
...@@ -719,7 +719,7 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -719,7 +719,7 @@ void do_generic_mapping_read(struct address_space *mapping,
index = *ppos >> PAGE_CACHE_SHIFT; index = *ppos >> PAGE_CACHE_SHIFT;
next_index = index; next_index = index;
prev_index = ra.prev_page; prev_index = ra.prev_page;
req_size = (desc->count + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK; offset = *ppos & ~PAGE_CACHE_MASK;
isize = i_size_read(inode); isize = i_size_read(inode);
...@@ -729,7 +729,7 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -729,7 +729,7 @@ void do_generic_mapping_read(struct address_space *mapping,
end_index = (isize - 1) >> PAGE_CACHE_SHIFT; end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
for (;;) { for (;;) {
struct page *page; struct page *page;
unsigned long ret_size, nr, ret; unsigned long nr, ret;
/* nr is the maximum number of bytes to copy from this page */ /* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE; nr = PAGE_CACHE_SIZE;
...@@ -744,12 +744,9 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -744,12 +744,9 @@ void do_generic_mapping_read(struct address_space *mapping,
nr = nr - offset; nr = nr - offset;
cond_resched(); cond_resched();
if (index == next_index && req_size) { if (index == next_index)
ret_size = page_cache_readahead(mapping, &ra, next_index = page_cache_readahead(mapping, &ra, filp,
filp, index, req_size); index, last_index - index);
next_index += ret_size;
req_size -= ret_size;
}
find_page: find_page:
page = find_get_page(mapping, index); page = find_get_page(mapping, index);
......
...@@ -443,26 +443,26 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, ...@@ -443,26 +443,26 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
struct file *filp, unsigned long offset, struct file *filp, unsigned long offset,
unsigned long req_size) unsigned long req_size)
{ {
unsigned long max, newsize = req_size; unsigned long max, newsize;
int sequential = (offset == ra->prev_page + 1); int sequential;
/* /*
* Here we detect the case where the application is performing * Here we detect the case where the application is performing
* sub-page sized reads. We avoid doing extra work and bogusly * sub-page sized reads. We avoid doing extra work and bogusly
* perturbing the readahead window expansion logic. * perturbing the readahead window expansion logic.
* If size is zero, there is no read ahead window so we need one
*/ */
if (offset == ra->prev_page && req_size == 1) if (offset == ra->prev_page && --req_size)
goto out; ++offset;
sequential = (offset == ra->prev_page + 1);
ra->prev_page = offset; ra->prev_page = offset;
max = get_max_readahead(ra); max = get_max_readahead(ra);
newsize = min(req_size, max); newsize = min(req_size, max);
if (newsize == 0 || (ra->flags & RA_FLAG_INCACHE)) { /* No readahead or file already in cache or sub-page sized read */
newsize = 1; if (newsize == 0 || (ra->flags & RA_FLAG_INCACHE))
goto out; /* No readahead or file already in cache */ goto out;
}
ra->prev_page += newsize - 1; ra->prev_page += newsize - 1;
...@@ -527,7 +527,7 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, ...@@ -527,7 +527,7 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
} }
out: out:
return newsize; return ra->prev_page + 1;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment