Commit 92216226 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] permit zero-length readahead, and tidy up readahead

- Initialise the per-request_queue readahead parameter properly,
  rather than the dopey "if it's zero you get the deafult"
  approach.

- Permit zero-length readahead.

- 80-columnify mm/readahead.c
parent 49d90743
......@@ -187,6 +187,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
q->max_phys_segments = MAX_PHYS_SEGMENTS;
q->max_hw_segments = MAX_HW_SEGMENTS;
q->make_request_fn = mfn;
q->ra_sectors = VM_MAX_READAHEAD << (10 - 9); /* kbytes->sectors */
blk_queue_max_sectors(q, MAX_SECTORS);
blk_queue_hardsect_size(q, 512);
......@@ -854,7 +855,6 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
q->plug_tq.data = q;
q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
q->queue_lock = lock;
q->ra_sectors = 0; /* Use VM default */
blk_queue_segment_boundary(q, 0xffffffff);
......
......@@ -539,6 +539,8 @@ extern int filemap_sync(struct vm_area_struct *, unsigned long, size_t, unsigned
extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int);
/* readahead.c */
#define VM_MAX_READAHEAD 128 /* kbytes */
#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */
void do_page_cache_readahead(struct file *file,
unsigned long offset, unsigned long nr_to_read);
void page_cache_readahead(struct file *file, unsigned long offset);
......
......@@ -25,9 +25,6 @@
* has a zero value of ra_sectors.
*/
#define VM_MAX_READAHEAD 128 /* kbytes */
#define VM_MIN_READAHEAD 16 /* kbytes (includes current page) */
/*
* Return max readahead size for this inode in number-of-pages.
*/
......@@ -37,8 +34,6 @@ static int get_max_readahead(struct inode *inode)
if (inode->i_sb->s_bdev) {
blk_ra_kbytes = blk_get_readahead(inode->i_sb->s_bdev) / 2;
if (blk_ra_kbytes < VM_MIN_READAHEAD)
blk_ra_kbytes = VM_MAX_READAHEAD;
}
return blk_ra_kbytes >> (PAGE_CACHE_SHIFT - 10);
}
......@@ -62,11 +57,12 @@ static int get_min_readahead(struct inode *inode)
* size: Number of pages in that read
* Together, these form the "current window".
* Together, start and size represent the `readahead window'.
* next_size: The number of pages to read when we get the next readahead miss.
* next_size: The number of pages to read on the next readahead miss.
* prev_page: The page which the readahead algorithm most-recently inspected.
* prev_page is mainly an optimisation: if page_cache_readahead sees
* that it is again being called for a page which it just looked at,
* it can return immediately without making any state changes.
* prev_page is mainly an optimisation: if page_cache_readahead
* sees that it is again being called for a page which it just
* looked at, it can return immediately without making any state
* changes.
* ahead_start,
* ahead_size: Together, these form the "ahead window".
*
......@@ -88,38 +84,39 @@ static int get_min_readahead(struct inode *inode)
* ahead window.
*
* A `readahead hit' occurs when a read request is made against a page which is
* inside the current window. Hits are good, and the window size (next_size) is
* grown aggressively when hits occur. Two pages are added to the next window
* size on each hit, which will end up doubling the next window size by the time
* I/O is submitted for it.
* inside the current window. Hits are good, and the window size (next_size)
* is grown aggressively when hits occur. Two pages are added to the next
* window size on each hit, which will end up doubling the next window size by
* the time I/O is submitted for it.
*
* If readahead hits are more sparse (say, the application is only reading every
* second page) then the window will build more slowly.
* If readahead hits are more sparse (say, the application is only reading
* every second page) then the window will build more slowly.
*
* On a readahead miss (the application seeked away) the readahead window is shrunk
* by 25%. We don't want to drop it too aggressively, because it's a good assumption
* that an application which has built a good readahead window will continue to
* perform linear reads. Either at the new file position, or at the old one after
* another seek.
* On a readahead miss (the application seeked away) the readahead window is
* shrunk by 25%. We don't want to drop it too aggressively, because it is a
* good assumption that an application which has built a good readahead window
* will continue to perform linear reads. Either at the new file position, or
* at the old one after another seek.
*
* There is a special-case: if the first page which the application tries to read
* happens to be the first page of the file, it is assumed that a linear read is
* about to happen and the window is immediately set to half of the device maximum.
* There is a special-case: if the first page which the application tries to
* read happens to be the first page of the file, it is assumed that a linear
* read is about to happen and the window is immediately set to half of the
* device maximum.
*
* A page request at (start + size) is not a miss at all - it's just a part of
* sequential file reading.
*
* This function is to be called for every page which is read, rather than when
* it is time to perform readahead. This is so the readahead algorithm can centrally
* work out the access patterns. This could be costly with many tiny read()s, so
* we specifically optimise for that case with prev_page.
* it is time to perform readahead. This is so the readahead algorithm can
* centrally work out the access patterns. This could be costly with many tiny
* read()s, so we specifically optimise for that case with prev_page.
*/
/*
* do_page_cache_readahead actually reads a chunk of disk. It allocates all the
* pages first, then submits them all for I/O. This avoids the very bad behaviour
* which would occur if page allocations are causing VM writeback. We really don't
* want to intermingle reads and writes like that.
* do_page_cache_readahead actually reads a chunk of disk. It allocates all
* the pages first, then submits them all for I/O. This avoids the very bad
* behaviour which would occur if page allocations are causing VM writeback.
* We really don't want to intermingle reads and writes like that.
*/
void do_page_cache_readahead(struct file *file,
unsigned long offset, unsigned long nr_to_read)
......@@ -209,8 +206,10 @@ void page_cache_readahead(struct file *file, unsigned long offset)
goto out;
}
min = get_min_readahead(inode);
max = get_max_readahead(inode);
if (max == 0)
goto out; /* No readahead */
min = get_min_readahead(inode);
if (ra->next_size == 0 && offset == 0) {
/*
......@@ -232,7 +231,8 @@ void page_cache_readahead(struct file *file, unsigned long offset)
ra->next_size += 2;
} else {
/*
* A miss - lseek, pread, etc. Shrink the readahead window by 25%.
* A miss - lseek, pread, etc. Shrink the readahead
* window by 25%.
*/
ra->next_size -= ra->next_size / 4;
if (ra->next_size < min)
......@@ -332,8 +332,9 @@ void page_cache_readaround(struct file *file, unsigned long offset)
* the VM.
*
* We shrink the readahead window by three pages. This is because we grow it
* by two pages on a readahead hit. Theory being that the readahead window size
* will stabilise around the maximum level at which there isn't any thrashing.
* by two pages on a readahead hit. Theory being that the readahead window
* size will stabilise around the maximum level at which there isn't any
* thrashing.
*/
void handle_ra_thrashing(struct file *file)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment