Commit 4bd9607e authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

[PATCH] Fix read() vs truncate race

do_generic_mapping_read()
{
	isize1 = i_size_read();
	...
	readpage
	copy_to_user up to isize1;
}

readpage()
{
	isize2 = i_size_read();
	...
	read blocks
	...
	zero-fill all blocks past isize2
}

If a second thread runs truncate and shrinks i_size, so isize1 and isize2 are
different, the read can return up to a page of zero-fill that shouldn't really
exist.

The trick is to read isize1 after doing the readpage.  I realised this is the
right way to do it without having to change the readpage API.

The patch should not cost any cycles when reading from pagecache.
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 3cf8b87b
...@@ -650,7 +650,8 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -650,7 +650,8 @@ void do_generic_mapping_read(struct address_space *mapping,
read_actor_t actor) read_actor_t actor)
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
unsigned long index, offset; unsigned long index, end_index, offset;
loff_t isize;
struct page *cached_page; struct page *cached_page;
int error; int error;
struct file_ra_state ra = *_ra; struct file_ra_state ra = *_ra;
...@@ -659,26 +660,18 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -659,26 +660,18 @@ void do_generic_mapping_read(struct address_space *mapping,
index = *ppos >> PAGE_CACHE_SHIFT; index = *ppos >> PAGE_CACHE_SHIFT;
offset = *ppos & ~PAGE_CACHE_MASK; offset = *ppos & ~PAGE_CACHE_MASK;
for (;;) { isize = i_size_read(inode);
struct page *page;
unsigned long end_index, nr, ret;
loff_t isize = i_size_read(inode);
end_index = isize >> PAGE_CACHE_SHIFT; end_index = isize >> PAGE_CACHE_SHIFT;
if (index > end_index) if (index > end_index)
break; goto out;
nr = PAGE_CACHE_SIZE;
if (index == end_index) { for (;;) {
nr = isize & ~PAGE_CACHE_MASK; struct page *page;
if (nr <= offset) unsigned long nr, ret;
break;
}
cond_resched(); cond_resched();
page_cache_readahead(mapping, &ra, filp, index); page_cache_readahead(mapping, &ra, filp, index);
nr = nr - offset;
find_page: find_page:
page = find_get_page(mapping, index); page = find_get_page(mapping, index);
if (unlikely(page == NULL)) { if (unlikely(page == NULL)) {
...@@ -688,6 +681,17 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -688,6 +681,17 @@ void do_generic_mapping_read(struct address_space *mapping,
if (!PageUptodate(page)) if (!PageUptodate(page))
goto page_not_up_to_date; goto page_not_up_to_date;
page_ok: page_ok:
/* nr is the maximum number of bytes to copy from this page */
nr = PAGE_CACHE_SIZE;
if (index == end_index) {
nr = isize & ~PAGE_CACHE_MASK;
if (nr <= offset) {
page_cache_release(page);
goto out;
}
}
nr = nr - offset;
/* If users can be writing to this page using arbitrary /* If users can be writing to this page using arbitrary
* virtual addresses, take care about potential aliasing * virtual addresses, take care about potential aliasing
* before reading the page on the kernel side. * before reading the page on the kernel side.
...@@ -719,7 +723,7 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -719,7 +723,7 @@ void do_generic_mapping_read(struct address_space *mapping,
page_cache_release(page); page_cache_release(page);
if (ret == nr && desc->count) if (ret == nr && desc->count)
continue; continue;
break; goto out;
page_not_up_to_date: page_not_up_to_date:
/* Get exclusive access to the page ... */ /* Get exclusive access to the page ... */
...@@ -739,22 +743,41 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -739,22 +743,41 @@ void do_generic_mapping_read(struct address_space *mapping,
} }
readpage: readpage:
/* ... and start the actual read. The read will unlock the page. */ /* Start the actual read. The read will unlock the page. */
error = mapping->a_ops->readpage(filp, page); error = mapping->a_ops->readpage(filp, page);
if (!error) { if (unlikely(error))
if (PageUptodate(page)) goto readpage_error;
goto page_ok;
if (!PageUptodate(page)) {
wait_on_page_locked(page); wait_on_page_locked(page);
if (PageUptodate(page)) if (!PageUptodate(page)) {
goto page_ok;
error = -EIO; error = -EIO;
goto readpage_error;
}
}
/*
* i_size must be checked after we have done ->readpage.
*
* Checking i_size after the readpage allows us to calculate
* the correct value for "nr", which means the zero-filled
* part of the page is not copied back to userspace (unless
* another truncate extends the file - this is desired though).
*/
isize = i_size_read(inode);
end_index = isize >> PAGE_CACHE_SHIFT;
if (index > end_index) {
page_cache_release(page);
goto out;
} }
goto page_ok;
readpage_error:
/* UHHUH! A synchronous read error occurred. Report it */ /* UHHUH! A synchronous read error occurred. Report it */
desc->error = error; desc->error = error;
page_cache_release(page); page_cache_release(page);
break; goto out;
no_cached_page: no_cached_page:
/* /*
...@@ -765,7 +788,7 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -765,7 +788,7 @@ void do_generic_mapping_read(struct address_space *mapping,
cached_page = page_cache_alloc_cold(mapping); cached_page = page_cache_alloc_cold(mapping);
if (!cached_page) { if (!cached_page) {
desc->error = -ENOMEM; desc->error = -ENOMEM;
break; goto out;
} }
} }
error = add_to_page_cache_lru(cached_page, mapping, error = add_to_page_cache_lru(cached_page, mapping,
...@@ -774,13 +797,14 @@ void do_generic_mapping_read(struct address_space *mapping, ...@@ -774,13 +797,14 @@ void do_generic_mapping_read(struct address_space *mapping,
if (error == -EEXIST) if (error == -EEXIST)
goto find_page; goto find_page;
desc->error = error; desc->error = error;
break; goto out;
} }
page = cached_page; page = cached_page;
cached_page = NULL; cached_page = NULL;
goto readpage; goto readpage;
} }
out:
*_ra = ra; *_ra = ra;
*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment