[PATCH] readahead: improve sequential read detection

1. Current code can't always detect sequential reading, in case when read size is not PAGE_CACHE_SIZE aligned. If application reads the file by 4096+512 chunks, we have: 1st read: first read detected, prev_page = 2. 2nd read: offset == 2, the read is considered random. page_cache_readahead() should treat prev_page == offset as sequential access. In this case it is better to ++offset, because of blockable_page_cache_readahead(offset, size). 2. If application reads 4096 bytes with *ppos == 512, we have to read 2 pages, but req_size == 1 in do_generic_mapping_read(). Usually it's not a problem. But in random read case it results in unnecessary page cache misses. ~$ time dd conv=notrunc if=/tmp/GIG of=/tmp/dummy bs=$((4096+512)) 2.6.11-clean: real=370.35 user=0.16 sys=14.66 2.6.11-patched: real=234.49 user=0.19 sys=12.41 Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>

[PATCH] readahead: improve sequential read detection
1. Current code can't always detect sequential reading, in case when read size is not PAGE_CACHE_SIZE aligned. If application reads the file by 4096+512 chunks, we have: 1st read: first read detected, prev_page = 2. 2nd read: offset == 2, the read is considered random. page_cache_readahead() should treat prev_page == offset as sequential access. In this case it is better to ++offset, because of blockable_page_cache_readahead(offset, size). 2. If application reads 4096 bytes with *ppos == 512, we have to read 2 pages, but req_size == 1 in do_generic_mapping_read(). Usually it's not a problem. But in random read case it results in unnecessary page cache misses. ~$ time dd conv=notrunc if=/tmp/GIG of=/tmp/dummy bs=$((4096+512)) 2.6.11-clean: real=370.35 user=0.16 sys=14.66 2.6.11-patched: real=234.49 user=0.19 sys=12.41 Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
671ccb4b · Oleg Nesterov · Linus Torvalds · c8ca47af · 671ccb4b · 671ccb4b
Commit 671ccb4b authored Mar 07, 2005 by Oleg Nesterov Committed by Linus Torvalds Mar 07, 2005
Hide whitespace changes
Inline Side-by-side

Showing with 16 additions and 19 deletions

mm/filemap.c mm/filemap.c +6 -9

mm/readahead.c mm/readahead.c +10 -10

No files found.
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -707,7 +707,7 @@ void do_generic_mapping_read(struct address_space *mapping,
 	unsigned long index;
 	unsigned long end_index;
 	unsigned long offset;
-	unsigned long req_size;
+	unsigned long last_index;
 	unsigned long next_index;
 	unsigned long prev_index;
 	loff_t isize;
@@ -719,7 +719,7 @@ void do_generic_mapping_read(struct address_space *mapping,
 	index = *ppos >> PAGE_CACHE_SHIFT;
 	next_index = index;
 	prev_index = ra.prev_page;
-	req_size = (desc->count + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
 	offset = *ppos & ~PAGE_CACHE_MASK;
 	isize = i_size_read(inode);
@@ -729,7 +729,7 @@ void do_generic_mapping_read(struct address_space *mapping,
 	end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
 	for (;;) {
 		struct page *page;
-		unsigned long ret_size, nr, ret;
+		unsigned long nr, ret;
 		/* nr is the maximum number of bytes to copy from this page */
 		nr = PAGE_CACHE_SIZE;
@@ -744,12 +744,9 @@ void do_generic_mapping_read(struct address_space *mapping,
 		nr = nr - offset;
 		cond_resched();
-		if (index == next_index && req_size) {
+		if (index == next_index)
-			ret_size = page_cache_readahead(mapping, &ra,
+			next_index = page_cache_readahead(mapping, &ra, filp,
-					filp, index, req_size);
+					index, last_index - index);
-			next_index += ret_size;
-			req_size -= ret_size;
-		}
 find_page:
 		page = find_get_page(mapping, index);

--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -443,26 +443,26 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
 		     struct file *filp, unsigned long offset,
 		     unsigned long req_size)
 {
-	unsigned long max, newsize = req_size;
+	unsigned long max, newsize;
-	int sequential = (offset == ra->prev_page + 1);
+	int sequential;
 	/*
 	 * Here we detect the case where the application is performing
 	 * sub-page sized reads.  We avoid doing extra work and bogusly
 	 * perturbing the readahead window expansion logic.
-	 * If size is zero, there is no read ahead window so we need one
 	 */
-	if (offset == ra->prev_page && req_size == 1)
+	if (offset == ra->prev_page && --req_size)
-		goto out;
+		++offset;
+	sequential = (offset == ra->prev_page + 1);
 	ra->prev_page = offset;
 	max = get_max_readahead(ra);
 	newsize = min(req_size, max);
-	if (newsize == 0 || (ra->flags & RA_FLAG_INCACHE)) {
+	/* No readahead or file already in cache or sub-page sized read */
-		newsize = 1;
+	if (newsize == 0 || (ra->flags & RA_FLAG_INCACHE))
-		goto out;	/* No readahead or file already in cache */
+		goto out;
-	}
 	ra->prev_page += newsize - 1;
@@ -527,7 +527,7 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
 	}
 out:
-	return newsize;
+	return ra->prev_page + 1;
 }
 /*