Commit c5070032 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] reiserfs v3 readpages support

Patch from Chris Mason <mason@suse.com>

The patch below is against 2.5.59, various forms have been floating
around for a while, and Andrew recently included this fixed version in
2.5.55-mm.  The end result is faster reads and writes for reiserfs.

This adds reiserfs support for readpages, along with a support func in
fs/mpage.c to deal with the reiserfs_get_block call sending back up to
date buffers with packed tails copied into them.

Most of the changes are to reiserfs_writepage, which still had many
2.4isms in the way it started io, dealt with errors and handled the bh
state bits.  I've also added an optimization so it only starts
transactions when we need to copy a packed tail into the btree or fill a
hole, instead of any time reiserfs_writepage hits an unmapped buffer.
parent 07285c80
...@@ -116,6 +116,49 @@ mpage_alloc(struct block_device *bdev, ...@@ -116,6 +116,49 @@ mpage_alloc(struct block_device *bdev,
return bio; return bio;
} }
/*
* support function for mpage_readpages. The fs supplied get_block might
* return an up to date buffer. This is used to map that buffer into
* the page, which allows readpage to avoid triggering a duplicate call
* to get_block.
*
* The idea is to avoid adding buffers to pages that don't already have
* them. So when the buffer is up to date and the page size == block size,
* this marks the page up to date instead of adding new buffers.
*/
static void
map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
{
struct inode *inode = page->mapping->host;
struct buffer_head *page_bh, *head;
int block = 0;
if (!page_has_buffers(page)) {
/*
* don't make any buffers if there is only one buffer on
* the page and the page just needs to be set up to date
*/
if (inode->i_blkbits == PAGE_CACHE_SHIFT &&
buffer_uptodate(bh)) {
SetPageUptodate(page);
return;
}
create_empty_buffers(page, 1 << inode->i_blkbits, 0);
}
head = page_buffers(page);
page_bh = head;
do {
if (block == page_block) {
page_bh->b_state = bh->b_state;
page_bh->b_bdev = bh->b_bdev;
page_bh->b_blocknr = bh->b_blocknr;
break;
}
page_bh = page_bh->b_this_page;
block++;
} while (page_bh != head);
}
/** /**
* mpage_readpages - populate an address space with some pages, and * mpage_readpages - populate an address space with some pages, and
* start reads against them. * start reads against them.
...@@ -186,6 +229,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, ...@@ -186,6 +229,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
last_block = (inode->i_size + blocksize - 1) >> blkbits; last_block = (inode->i_size + blocksize - 1) >> blkbits;
bh.b_page = page;
for (page_block = 0; page_block < blocks_per_page; for (page_block = 0; page_block < blocks_per_page;
page_block++, block_in_file++) { page_block++, block_in_file++) {
bh.b_state = 0; bh.b_state = 0;
...@@ -200,6 +244,17 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, ...@@ -200,6 +244,17 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
first_hole = page_block; first_hole = page_block;
continue; continue;
} }
/* some filesystems will copy data into the page during
* the get_block call, in which case we don't want to
* read it again. map_buffer_to_page copies the data
* we just collected from get_block into the page's buffers
* so readpage doesn't have to repeat the get_block call
*/
if (buffer_uptodate(&bh)) {
map_buffer_to_page(page, &bh, page_block);
goto confused;
}
if (first_hole != blocks_per_page) if (first_hole != blocks_per_page)
goto confused; /* hole -> non-hole */ goto confused; /* hole -> non-hole */
...@@ -256,7 +311,10 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, ...@@ -256,7 +311,10 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
confused: confused:
if (bio) if (bio)
bio = mpage_bio_submit(READ, bio); bio = mpage_bio_submit(READ, bio);
block_read_full_page(page, get_block); if (!PageUptodate(page))
block_read_full_page(page, get_block);
else
unlock_page(page);
goto out; goto out;
} }
...@@ -344,6 +402,7 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, ...@@ -344,6 +402,7 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
sector_t boundary_block = 0; sector_t boundary_block = 0;
struct block_device *boundary_bdev = NULL; struct block_device *boundary_bdev = NULL;
int length; int length;
struct buffer_head map_bh;
if (page_has_buffers(page)) { if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page); struct buffer_head *head = page_buffers(page);
...@@ -401,8 +460,8 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, ...@@ -401,8 +460,8 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
BUG_ON(!PageUptodate(page)); BUG_ON(!PageUptodate(page));
block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
last_block = (inode->i_size - 1) >> blkbits; last_block = (inode->i_size - 1) >> blkbits;
map_bh.b_page = page;
for (page_block = 0; page_block < blocks_per_page; ) { for (page_block = 0; page_block < blocks_per_page; ) {
struct buffer_head map_bh;
map_bh.b_state = 0; map_bh.b_state = 0;
if (get_block(inode, block_in_file, &map_bh, 1)) if (get_block(inode, block_in_file, &map_bh, 1))
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
/* args for the create parameter of reiserfs_get_block */ /* args for the create parameter of reiserfs_get_block */
#define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
...@@ -262,7 +264,10 @@ static int _get_block_create_0 (struct inode * inode, long block, ...@@ -262,7 +264,10 @@ static int _get_block_create_0 (struct inode * inode, long block,
blocknr = get_block_num(ind_item, path.pos_in_item) ; blocknr = get_block_num(ind_item, path.pos_in_item) ;
ret = 0 ; ret = 0 ;
if (blocknr) { if (blocknr) {
map_bh(bh_result, inode->i_sb, blocknr); map_bh(bh_result, inode->i_sb, blocknr);
if (path.pos_in_item == ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
set_buffer_boundary(bh_result);
}
} else } else
// We do not return -ENOENT if there is a hole but page is uptodate, because it means // We do not return -ENOENT if there is a hole but page is uptodate, because it means
// That there is some MMAPED data associated with it that is yet to be written to disk. // That there is some MMAPED data associated with it that is yet to be written to disk.
...@@ -286,7 +291,7 @@ static int _get_block_create_0 (struct inode * inode, long block, ...@@ -286,7 +291,7 @@ static int _get_block_create_0 (struct inode * inode, long block,
return -ENOENT; return -ENOENT;
} }
/* if we've got a direct item, and the buffer was uptodate, /* if we've got a direct item, and the buffer or page was uptodate,
** we don't want to pull data off disk again. skip to the ** we don't want to pull data off disk again. skip to the
** end, where we map the buffer and return ** end, where we map the buffer and return
*/ */
...@@ -367,7 +372,9 @@ static int _get_block_create_0 (struct inode * inode, long block, ...@@ -367,7 +372,9 @@ static int _get_block_create_0 (struct inode * inode, long block,
finished: finished:
pathrelse (&path); pathrelse (&path);
/* I _really_ doubt that you want it. Chris? */ /* this buffer has valid data, but isn't valid for io. mapping it to
* block #0 tells the rest of reiserfs it just has a tail in it
*/
map_bh(bh_result, inode->i_sb, 0); map_bh(bh_result, inode->i_sb, 0);
set_buffer_uptodate (bh_result); set_buffer_uptodate (bh_result);
return 0; return 0;
...@@ -842,6 +849,12 @@ int reiserfs_get_block (struct inode * inode, sector_t block, ...@@ -842,6 +849,12 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
return retval; return retval;
} }
static int
reiserfs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
}
// //
// BAD: new directories have stat data of new type and all other items // BAD: new directories have stat data of new type and all other items
...@@ -1809,13 +1822,19 @@ static int map_block_for_writepage(struct inode *inode, ...@@ -1809,13 +1822,19 @@ static int map_block_for_writepage(struct inode *inode,
int use_get_block = 0 ; int use_get_block = 0 ;
int bytes_copied = 0 ; int bytes_copied = 0 ;
int copy_size ; int copy_size ;
int trans_running = 0;
/* catch places below that try to log something without starting a trans */
th.t_trans_id = 0;
if (!buffer_uptodate(bh_result)) {
buffer_error();
return -EIO;
}
kmap(bh_result->b_page) ; kmap(bh_result->b_page) ;
start_over: start_over:
reiserfs_write_lock(inode->i_sb); reiserfs_write_lock(inode->i_sb);
journal_begin(&th, inode->i_sb, jbegin_count) ;
reiserfs_update_inode_transaction(inode) ;
make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ; make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
research: research:
...@@ -1841,7 +1860,6 @@ static int map_block_for_writepage(struct inode *inode, ...@@ -1841,7 +1860,6 @@ static int map_block_for_writepage(struct inode *inode,
goto out ; goto out ;
} }
set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode); set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode);
set_buffer_uptodate(bh_result);
} else if (is_direct_le_ih(ih)) { } else if (is_direct_le_ih(ih)) {
char *p ; char *p ;
p = page_address(bh_result->b_page) ; p = page_address(bh_result->b_page) ;
...@@ -1850,7 +1868,20 @@ static int map_block_for_writepage(struct inode *inode, ...@@ -1850,7 +1868,20 @@ static int map_block_for_writepage(struct inode *inode,
fs_gen = get_generation(inode->i_sb) ; fs_gen = get_generation(inode->i_sb) ;
copy_item_head(&tmp_ih, ih) ; copy_item_head(&tmp_ih, ih) ;
if (!trans_running) {
/* vs-3050 is gone, no need to drop the path */
journal_begin(&th, inode->i_sb, jbegin_count) ;
reiserfs_update_inode_transaction(inode) ;
trans_running = 1;
if (fs_changed(fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
goto research;
}
}
reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ; reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
reiserfs_restore_prepared_buffer(inode->i_sb, bh) ; reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
goto research; goto research;
...@@ -1861,7 +1892,6 @@ static int map_block_for_writepage(struct inode *inode, ...@@ -1861,7 +1892,6 @@ static int map_block_for_writepage(struct inode *inode,
journal_mark_dirty(&th, inode->i_sb, bh) ; journal_mark_dirty(&th, inode->i_sb, bh) ;
bytes_copied += copy_size ; bytes_copied += copy_size ;
set_block_dev_mapped(bh_result, 0, inode); set_block_dev_mapped(bh_result, 0, inode);
set_buffer_uptodate(bh_result);
/* are there still bytes left? */ /* are there still bytes left? */
if (bytes_copied < bh_result->b_size && if (bytes_copied < bh_result->b_size &&
...@@ -1878,7 +1908,10 @@ static int map_block_for_writepage(struct inode *inode, ...@@ -1878,7 +1908,10 @@ static int map_block_for_writepage(struct inode *inode,
out: out:
pathrelse(&path) ; pathrelse(&path) ;
journal_end(&th, inode->i_sb, jbegin_count) ; if (trans_running) {
journal_end(&th, inode->i_sb, jbegin_count) ;
trans_running = 0;
}
reiserfs_write_unlock(inode->i_sb); reiserfs_write_unlock(inode->i_sb);
/* this is where we fill in holes in the file. */ /* this is where we fill in holes in the file. */
...@@ -1894,49 +1927,77 @@ static int map_block_for_writepage(struct inode *inode, ...@@ -1894,49 +1927,77 @@ static int map_block_for_writepage(struct inode *inode,
} }
} }
kunmap(bh_result->b_page) ; kunmap(bh_result->b_page) ;
if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
/* we've copied data from the page into the direct item, so the
* buffer in the page is now clean, mark it to reflect that.
*/
lock_buffer(bh_result);
clear_buffer_dirty(bh_result);
unlock_buffer(bh_result);
}
return retval ; return retval ;
} }
/* helper func to get a buffer head ready for writepage to send to /*
** ll_rw_block * does the right thing for deciding when to lock a buffer and
*/ * mark it for io during a writepage. make sure the buffer is
static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) { * dirty before sending it here though.
struct buffer_head *bh ; */
int i; static void lock_buffer_for_writepage(struct page *page,
for(i = 0 ; i < nr ; i++) { struct writeback_control *wbc,
bh = bhp[i] ; struct buffer_head *bh)
lock_buffer(bh) ; {
mark_buffer_async_write(bh) ; if (wbc->sync_mode != WB_SYNC_NONE) {
/* submit_bh doesn't care if the buffer is dirty, but nobody lock_buffer(bh);
** later on in the call chain will be cleaning it. So, we } else {
** clean the buffer here, it still gets written either way. if (test_set_buffer_locked(bh)) {
*/ __set_page_dirty_nobuffers(page);
clear_buffer_dirty(bh) ; return;
set_buffer_uptodate(bh) ; }
submit_bh(WRITE, bh) ; }
if (test_clear_buffer_dirty(bh)) {
if (!buffer_uptodate(bh))
buffer_error();
mark_buffer_async_write(bh);
} else {
unlock_buffer(bh);
} }
} }
/*
* mason@suse.com: updated in 2.5.54 to follow the same general io
* start/recovery path as __block_write_full_page, along with special
* code to handle reiserfs tails.
*/
static int reiserfs_write_full_page(struct page *page, struct writeback_control *wbc) { static int reiserfs_write_full_page(struct page *page, struct writeback_control *wbc) {
struct inode *inode = page->mapping->host ; struct inode *inode = page->mapping->host ;
unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ; unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
unsigned last_offset = PAGE_CACHE_SIZE;
int error = 0; int error = 0;
unsigned long block ; unsigned long block ;
unsigned cur_offset = 0 ; struct buffer_head *head, *bh;
struct buffer_head *head, *bh ;
int partial = 0 ; int partial = 0 ;
struct buffer_head *arr[PAGE_CACHE_SIZE/512] ; int nr = 0;
int nr = 0 ;
if (!page_has_buffers(page)) /* The page dirty bit is cleared before writepage is called, which
block_prepare_write(page, 0, 0, NULL) ; * means we have to tell create_empty_buffers to make dirty buffers
* The page really should be up to date at this point, so tossing
* in the BH_Uptodate is just a sanity check.
*/
if (!page_has_buffers(page)) {
if (!PageUptodate(page))
buffer_error();
create_empty_buffers(page, inode->i_sb->s_blocksize,
(1 << BH_Dirty) | (1 << BH_Uptodate));
}
head = page_buffers(page) ;
/* last page in the file, zero out any contents past the /* last page in the file, zero out any contents past the
** last byte in the file ** last byte in the file
*/ */
if (page->index >= end_index) { if (page->index >= end_index) {
char *kaddr; char *kaddr;
unsigned last_offset;
last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ; last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
/* no file contents in this page */ /* no file contents in this page */
...@@ -1949,66 +2010,107 @@ static int reiserfs_write_full_page(struct page *page, struct writeback_control ...@@ -1949,66 +2010,107 @@ static int reiserfs_write_full_page(struct page *page, struct writeback_control
flush_dcache_page(page) ; flush_dcache_page(page) ;
kunmap_atomic(kaddr, KM_USER0) ; kunmap_atomic(kaddr, KM_USER0) ;
} }
head = page_buffers(page) ;
bh = head ; bh = head ;
block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ; block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ;
do { do {
/* if this offset in the page is outside the file */ get_bh(bh);
if (cur_offset >= last_offset) { if (buffer_dirty(bh)) {
if (!buffer_uptodate(bh))
partial = 1 ;
} else {
/* fast path, buffer mapped to an unformatted node */
if (buffer_mapped(bh) && bh->b_blocknr != 0) { if (buffer_mapped(bh) && bh->b_blocknr != 0) {
arr[nr++] = bh ; /* buffer mapped to an unformatted node */
lock_buffer_for_writepage(page, wbc, bh);
} else { } else {
/* buffer not mapped yet, or points to a direct item. /* not mapped yet, or it points to a direct item, search
** search and dirty or log * the btree for the mapping info, and log any direct
*/ * items found
*/
if ((error = map_block_for_writepage(inode, bh, block))) { if ((error = map_block_for_writepage(inode, bh, block))) {
goto fail ; goto fail ;
} }
/* map_block_for_writepage either found an unformatted node if (buffer_mapped(bh) && bh->b_blocknr != 0) {
** and mapped it for us, or it found a direct item lock_buffer_for_writepage(page, wbc, bh);
** and logged the changes. }
*/
if (buffer_mapped(bh) && bh->b_blocknr != 0) {
arr[nr++] = bh ;
}
} }
} }
bh = bh->b_this_page ; bh = bh->b_this_page;
cur_offset += bh->b_size ; block++;
block++ ;
} while(bh != head) ; } while(bh != head) ;
if (!partial)
SetPageUptodate(page) ;
BUG_ON(PageWriteback(page)); BUG_ON(PageWriteback(page));
SetPageWriteback(page); SetPageWriteback(page);
unlock_page(page); unlock_page(page);
/* if this page only had a direct item, it is very possible for /*
** nr == 0 without there being any kind of error. * since any buffer might be the only dirty buffer on the page,
*/ * the first submit_bh can bring the page out of writeback.
if (nr) { * be careful with the buffers.
submit_bh_for_writepage(arr, nr) ; */
} else { do {
end_page_writeback(page) ; struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
submit_bh(WRITE, bh);
nr++;
}
put_bh(bh);
bh = next;
} while(bh != head);
error = 0;
done:
if (nr == 0) {
/*
* if this page only had a direct item, it is very possible for
* no io to be required without there being an error. Or,
* someone else could have locked them and sent them down the
* pipe without locking the page
*/
do {
if (!buffer_uptodate(bh)) {
partial = 1;
break;
}
} while(bh != head);
if (!partial)
SetPageUptodate(page);
end_page_writeback(page);
} }
return error;
return 0 ;
fail: fail:
if (nr) { /* catches various errors, we need to make sure any valid dirty blocks
SetPageWriteback(page); * get to the media. The page is currently locked and not marked for
unlock_page(page); * writeback
submit_bh_for_writepage(arr, nr) ; */
} else { ClearPageUptodate(page);
unlock_page(page) ; bh = head;
} do {
ClearPageUptodate(page) ; get_bh(bh);
return error ; if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
lock_buffer(bh);
mark_buffer_async_write(bh);
} else {
/*
* clear any dirty bits that might have come from getting
* attached to a dirty page
*/
clear_buffer_dirty(bh);
}
bh = bh->b_this_page;
} while(bh != head);
SetPageError(page);
BUG_ON(PageWriteback(page));
SetPageWriteback(page);
unlock_page(page);
do {
struct buffer_head *next = bh->b_this_page;
if (buffer_async_write(bh)) {
clear_buffer_dirty(bh);
submit_bh(WRITE, bh);
nr++;
}
put_bh(bh);
bh = next;
} while(bh != head);
goto done;
} }
...@@ -2115,6 +2217,7 @@ static int reiserfs_releasepage(struct page *page, int unused_gfp_flags) ...@@ -2115,6 +2217,7 @@ static int reiserfs_releasepage(struct page *page, int unused_gfp_flags)
struct address_space_operations reiserfs_address_space_operations = { struct address_space_operations reiserfs_address_space_operations = {
.writepage = reiserfs_writepage, .writepage = reiserfs_writepage,
.readpage = reiserfs_readpage, .readpage = reiserfs_readpage,
.readpages = reiserfs_readpages,
.releasepage = reiserfs_releasepage, .releasepage = reiserfs_releasepage,
.sync_page = block_sync_page, .sync_page = block_sync_page,
.prepare_write = reiserfs_prepare_write, .prepare_write = reiserfs_prepare_write,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment