Commit b2768587 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] speed up readahead for seeky loads

From: Ram Pai <linuxram@us.ibm.com>

Currently the readahead code tends to read one more page than it should with
seeky database-style loads.  This was to prevent bogus readahead triggering
when we step into the last page of the current window.

The patch removes that workaround and fixes up the suboptimal logic instead.


wrt the "rounding errors" mentioned in this patch, Ram provided the following
description:

  Say the i/o size is 20 pages.

  Our algorithm starts by a initial average i/o size of 'ra_pages/2' which
  is mostly say 16.

  Now every time we take a average, the 'average' progresses as follows
  (16+20)/2=18
  (18+20)/2=19
  (19+20)/2=19
  (19+20)/2=19.....
  and the rounding error makes it never touch 20



Benchmarking sitrep:

			IOZONE

	run on a nfs mounted filesystem:
	client machine 2proc, 733MHz, 2GB memory
	server machine 8proc, 700Mhz, 8GB memory

./iozone -c -t1 -s 4096m -r 128k
parent f71e4184
...@@ -353,7 +353,7 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, ...@@ -353,7 +353,7 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
unsigned orig_next_size; unsigned orig_next_size;
unsigned actual; unsigned actual;
int first_access=0; int first_access=0;
unsigned long preoffset=0; unsigned long average;
/* /*
* Here we detect the case where the application is performing * Here we detect the case where the application is performing
...@@ -394,10 +394,17 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, ...@@ -394,10 +394,17 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
if (ra->serial_cnt <= (max * 2)) if (ra->serial_cnt <= (max * 2))
ra->serial_cnt++; ra->serial_cnt++;
} else { } else {
ra->average = (ra->average + ra->serial_cnt) / 2; /*
* to avoid rounding errors, ensure that 'average'
* tends towards the value of ra->serial_cnt.
*/
average = ra->average;
if (average < ra->serial_cnt) {
average++;
}
ra->average = (average + ra->serial_cnt) / 2;
ra->serial_cnt = 1; ra->serial_cnt = 1;
} }
preoffset = ra->prev_page;
ra->prev_page = offset; ra->prev_page = offset;
if (offset >= ra->start && offset <= (ra->start + ra->size)) { if (offset >= ra->start && offset <= (ra->start + ra->size)) {
...@@ -457,18 +464,13 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, ...@@ -457,18 +464,13 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
* ahead window and get some I/O underway for the new * ahead window and get some I/O underway for the new
* current window. * current window.
*/ */
if (!first_access && preoffset >= ra->start && if (!first_access) {
preoffset < (ra->start + ra->size)) { /* Heuristic: there is a high probability
/* Heuristic: If 'n' pages were * that around ra->average number of
* accessed in the current window, there * pages shall be accessed in the next
* is a high probability that around 'n' pages * current window.
* shall be used in the next current window.
*
* To minimize lazy-readahead triggered
* in the next current window, read in
* an extra page.
*/ */
ra->next_size = preoffset - ra->start + 2; ra->next_size = min(ra->average , (unsigned long)max);
} }
ra->start = offset; ra->start = offset;
ra->size = ra->next_size; ra->size = ra->next_size;
...@@ -492,21 +494,19 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra, ...@@ -492,21 +494,19 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
*/ */
if (ra->ahead_start == 0) { if (ra->ahead_start == 0) {
/* /*
* if the average io-size is less than maximum * If the average io-size is more than maximum
* readahead size of the file the io pattern is * readahead size of the file the io pattern is
* sequential. Hence bring in the readahead window * sequential. Hence bring in the readahead window
* immediately. * immediately.
* Else the i/o pattern is random. Bring * If the average io-size is less than maximum
* in the readahead window only if the last page of * readahead size of the file the io pattern is
* the current window is accessed (lazy readahead). * random. Hence don't bother to readahead.
*/ */
unsigned long average = ra->average; average = ra->average;
if (ra->serial_cnt > average) if (ra->serial_cnt > average)
average = (ra->serial_cnt + ra->average) / 2; average = (ra->serial_cnt + ra->average + 1) / 2;
if ((average >= max) || (offset == (ra->start + if (average > max) {
ra->size - 1))) {
ra->ahead_start = ra->start + ra->size; ra->ahead_start = ra->start + ra->size;
ra->ahead_size = ra->next_size; ra->ahead_size = ra->next_size;
actual = do_page_cache_readahead(mapping, filp, actual = do_page_cache_readahead(mapping, filp,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment