Commit 82a333fa authored by Linus Torvalds's avatar Linus Torvalds

Simplify and speed up mmap read-around handling

This improves cold-cache program startup noticeably for me, and
simplifies the read-ahead logic at the same time. The rules for
read-ahead are:

 - if the vma is marked random, we just do the regular one-page case. 
   Obvious.

 - if the vma is marked "linear access", we use the regular readahead
   code. No change in behaviour there (well, we also only consider it a 
   _miss_ if it was marked linear access - the "readahead" and
   "readaround"  things are now totally independent of each other)

 - otherwise, we look at how many hits/misses we've had for this 
   particular file open for mmap, and if we've had noticeably more
   misses than hits, we don't bother with read-around.

In particular, this means that the "real" read-ahead logic literally
only needs to worry about finding sequential accesses, and does not
have to worry about the common executable mmap access patthers that 
have very different behaviour.

Some constant tweaking may be a good idea.
parent e939c913
......@@ -420,6 +420,8 @@ struct file_ra_state {
unsigned long ahead_start; /* Ahead window */
unsigned long ahead_size;
unsigned long ra_pages; /* Maximum readahead window */
unsigned long mmap_hit; /* Cache hit stat for mmap accesses */
unsigned long mmap_miss; /* Cache miss stat for mmap accesses */
};
struct file {
......
......@@ -571,10 +571,6 @@ void page_cache_readahead(struct address_space *mapping,
struct file_ra_state *ra,
struct file *filp,
unsigned long offset);
void page_cache_readaround(struct address_space *mapping,
struct file_ra_state *ra,
struct file *filp,
unsigned long offset);
void handle_ra_miss(struct address_space *mapping,
struct file_ra_state *ra, pgoff_t offset);
unsigned long max_sane_readahead(unsigned long nr);
......
......@@ -925,6 +925,9 @@ static int page_cache_read(struct file * file, unsigned long offset)
return error == -EEXIST ? 0 : error;
}
#define MMAP_READAROUND (16UL)
#define MMAP_LOTSAMISS (100)
/*
* filemap_nopage() is invoked via the vma operations vector for a
* mapped memory region to read in file data during a page fault.
......@@ -942,19 +945,19 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
struct inode *inode = mapping->host;
struct page *page;
unsigned long size, pgoff, endoff;
int did_readahead;
int did_readaround = 0;
pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
retry_all:
/*
* An external ptracer can access pages that normally aren't
* accessible..
*/
size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if ((pgoff >= size) && (area->vm_mm == current->mm))
return NULL;
if (pgoff >= size)
goto outside_data_content;
/* If we don't want any read-ahead, don't bother */
if (VM_RandomReadHint(area))
goto no_cached_page;
/*
* The "size" of the file, as far as mmap is concerned, isn't bigger
......@@ -963,25 +966,14 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
if (size > endoff)
size = endoff;
did_readahead = 0;
/*
* The readahead code wants to be told about each and every page
* so it can build and shrink its windows appropriately
*
* For sequential accesses, we use the generic readahead logic.
*/
if (VM_SequentialReadHint(area)) {
did_readahead = 1;
if (VM_SequentialReadHint(area))
page_cache_readahead(mapping, ra, file, pgoff);
}
/*
* If the offset is outside the mapping size we're off the end
* of a privately mapped file, so we need to map a zero page.
*/
if ((pgoff < size) && !VM_RandomReadHint(area)) {
did_readahead = 1;
page_cache_readaround(mapping, ra, file, pgoff);
}
/*
* Do we have something in the page cache already?
......@@ -989,13 +981,27 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
retry_find:
page = find_get_page(mapping, pgoff);
if (!page) {
if (did_readahead) {
if (VM_SequentialReadHint(area)) {
handle_ra_miss(mapping, ra, pgoff);
did_readahead = 0;
goto no_cached_page;
}
goto no_cached_page;
ra->mmap_miss++;
/*
* Do we miss much more than hit in this file? If so,
* stop bothering with read-ahead. It will only hurt.
*/
if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
goto no_cached_page;
did_readaround = 1;
do_page_cache_readahead(mapping, file, pgoff & ~(MMAP_READAROUND-1), MMAP_READAROUND);
goto retry_find;
}
if (!did_readaround)
ra->mmap_hit++;
/*
* Ok, found a page in the page cache, now we need to check
* that it's up-to-date.
......@@ -1010,6 +1016,14 @@ struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address
mark_page_accessed(page);
return page;
outside_data_content:
/*
* An external ptracer can access pages that normally aren't
* accessible..
*/
if (area->vm_mm == current->mm)
return NULL;
/* Fall through to the non-read-ahead case */
no_cached_page:
/*
* We're only likely to ever get here if MADV_RANDOM is in
......
......@@ -437,37 +437,6 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
return;
}
/*
* For mmap reads (typically executables) the access pattern is fairly random,
* but somewhat ascending. So readaround favours pages beyond the target one.
* We also boost the window size, as it can easily shrink due to misses.
*/
void
page_cache_readaround(struct address_space *mapping, struct file_ra_state *ra,
struct file *filp, unsigned long offset)
{
if (ra->next_size != -1UL) {
const unsigned long min = get_min_readahead(ra) * 4;
unsigned long target;
unsigned long backward;
/*
* If next_size is zero then leave it alone, because that's a
* readahead startup state.
*/
if (ra->next_size && ra->next_size < min)
ra->next_size = min;
target = offset;
backward = ra->next_size / 4;
if (backward > target)
target = 0;
else
target -= backward;
page_cache_readahead(mapping, ra, filp, target);
}
}
/*
* handle_ra_miss() is called when it is known that a page which should have
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment