Commit 46fc3e7b authored by Fengguang Wu's avatar Fengguang Wu Committed by Linus Torvalds

readahead: add look-ahead support to __do_page_cache_readahead()

Add look-ahead support to __do_page_cache_readahead().

It works by
	- mark the Nth backwards page with PG_readahead,
	(which instructs the page's first reader to invoke readahead)
	- and only do the marking for newly allocated pages.
	(to prevent blindly doing readahead on already cached pages)

Look-ahead is a technique to achieve I/O pipelining:

While the application is working through a chunk of cached pages, the kernel
reads-ahead the next chunk of pages _before_ time of need.  It effectively
hides low level I/O latencies to high level applications.
Signed-off-by: default avatarFengguang Wu <wfg@mail.ustc.edu.cn>
Cc: Steven Pratt <slpratt@austin.ibm.com>
Cc: Ram Pai <linuxram@us.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d77c2d7c
...@@ -265,7 +265,8 @@ static int read_pages(struct address_space *mapping, struct file *filp, ...@@ -265,7 +265,8 @@ static int read_pages(struct address_space *mapping, struct file *filp,
*/ */
static int static int
__do_page_cache_readahead(struct address_space *mapping, struct file *filp, __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
pgoff_t offset, unsigned long nr_to_read) pgoff_t offset, unsigned long nr_to_read,
unsigned long lookahead_size)
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct page *page; struct page *page;
...@@ -278,7 +279,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, ...@@ -278,7 +279,7 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
if (isize == 0) if (isize == 0)
goto out; goto out;
end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
/* /*
* Preallocate as many pages as we will need. * Preallocate as many pages as we will need.
...@@ -301,6 +302,8 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp, ...@@ -301,6 +302,8 @@ __do_page_cache_readahead(struct address_space *mapping, struct file *filp,
break; break;
page->index = page_offset; page->index = page_offset;
list_add(&page->lru, &page_pool); list_add(&page->lru, &page_pool);
if (page_idx == nr_to_read - lookahead_size)
SetPageReadahead(page);
ret++; ret++;
} }
read_unlock_irq(&mapping->tree_lock); read_unlock_irq(&mapping->tree_lock);
...@@ -337,7 +340,7 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp, ...@@ -337,7 +340,7 @@ int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
if (this_chunk > nr_to_read) if (this_chunk > nr_to_read)
this_chunk = nr_to_read; this_chunk = nr_to_read;
err = __do_page_cache_readahead(mapping, filp, err = __do_page_cache_readahead(mapping, filp,
offset, this_chunk); offset, this_chunk, 0);
if (err < 0) { if (err < 0) {
ret = err; ret = err;
break; break;
...@@ -384,7 +387,7 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp, ...@@ -384,7 +387,7 @@ int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
if (bdi_read_congested(mapping->backing_dev_info)) if (bdi_read_congested(mapping->backing_dev_info))
return -1; return -1;
return __do_page_cache_readahead(mapping, filp, offset, nr_to_read); return __do_page_cache_readahead(mapping, filp, offset, nr_to_read, 0);
} }
/* /*
...@@ -404,7 +407,7 @@ blockable_page_cache_readahead(struct address_space *mapping, struct file *filp, ...@@ -404,7 +407,7 @@ blockable_page_cache_readahead(struct address_space *mapping, struct file *filp,
if (!block && bdi_read_congested(mapping->backing_dev_info)) if (!block && bdi_read_congested(mapping->backing_dev_info))
return 0; return 0;
actual = __do_page_cache_readahead(mapping, filp, offset, nr_to_read); actual = __do_page_cache_readahead(mapping, filp, offset, nr_to_read, 0);
return check_ra_success(ra, nr_to_read, actual); return check_ra_success(ra, nr_to_read, actual);
} }
...@@ -449,7 +452,7 @@ static int make_ahead_window(struct address_space *mapping, struct file *filp, ...@@ -449,7 +452,7 @@ static int make_ahead_window(struct address_space *mapping, struct file *filp,
* @req_size: hint: total size of the read which the caller is performing in * @req_size: hint: total size of the read which the caller is performing in
* PAGE_CACHE_SIZE units * PAGE_CACHE_SIZE units
* *
* page_cache_readahead() is the main function. If performs the adaptive * page_cache_readahead() is the main function. It performs the adaptive
* readahead window size management and submits the readahead I/O. * readahead window size management and submits the readahead I/O.
* *
* Note that @filp is purely used for passing on to the ->readpage[s]() * Note that @filp is purely used for passing on to the ->readpage[s]()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment