Commit 359a5de1 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] direct-io support for XFS unwritten extents

From: Nathan Scott <nathans@sgi.com>

This patch adds a mechanism by which a filesystem can register an interest in
the completion of direct I/O.  The completion routine will be given the
inode, an offset and a length, and an optional filesystem-private field.

We have extended the use of the buffer_head-based interface (i.e.
get_block_t) for direct I/O such that the b_private field is now utilised.
It is defined to be initially zero at the start of I/O, and will be passed
into the filesystem unmodified by the VFS with each map request, while
setting up the direct I/O.  Once I/O has completed the final value of this
pointer will be passed into a filesystems I/O completion handler.  This
mechanism can be used to keep track of all of the mapping requests which
encompass an individual direct I/O request.

This has been implemented specifically for XFS, but is done so as to be as
generic as possible.  XFS uses this mechanism to provide support for
unwritten extents - these are file extents which have been pre-allocated
on-disk, but not yet written to (once written, these become regular file
extents, but only once I/O is complete).
parent 14d927a3
...@@ -125,7 +125,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, ...@@ -125,7 +125,7 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct inode *inode = file->f_dentry->d_inode->i_mapping->host; struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return blockdev_direct_IO(rw, iocb, inode, inode->i_bdev, iov, offset, return blockdev_direct_IO(rw, iocb, inode, inode->i_bdev, iov, offset,
nr_segs, blkdev_get_blocks); nr_segs, blkdev_get_blocks, NULL);
} }
static int blkdev_writepage(struct page *page, struct writeback_control *wbc) static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
* added support for non-aligned IO. * added support for non-aligned IO.
* 06Nov2002 pbadari@us.ibm.com * 06Nov2002 pbadari@us.ibm.com
* added asynchronous IO support. * added asynchronous IO support.
* 21Jul2003 nathans@sgi.com
* added IO completion notifier.
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
...@@ -74,6 +76,7 @@ struct dio { ...@@ -74,6 +76,7 @@ struct dio {
int boundary; /* prev block is at a boundary */ int boundary; /* prev block is at a boundary */
int reap_counter; /* rate limit reaping */ int reap_counter; /* rate limit reaping */
get_blocks_t *get_blocks; /* block mapping function */ get_blocks_t *get_blocks; /* block mapping function */
dio_iodone_t *end_io; /* IO completion function */
sector_t final_block_in_bio; /* current final block in bio + 1 */ sector_t final_block_in_bio; /* current final block in bio + 1 */
sector_t next_block_for_io; /* next block to be put under IO, sector_t next_block_for_io; /* next block to be put under IO,
in dio_blocks units */ in dio_blocks units */
...@@ -192,6 +195,18 @@ static struct page *dio_get_page(struct dio *dio) ...@@ -192,6 +195,18 @@ static struct page *dio_get_page(struct dio *dio)
return dio->pages[dio->head++]; return dio->pages[dio->head++];
} }
/*
* Called when all DIO BIO I/O has been completed - let the filesystem
* know, if it registered an interest earlier via get_blocks. Pass the
* private field of the map buffer_head so that filesystems can use it
* to hold additional state between get_blocks calls and dio_complete.
*/
static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes)
{
if (dio->end_io)
dio->end_io(dio->inode, offset, bytes, dio->map_bh.b_private);
}
/* /*
* Called when a BIO has been processed. If the count goes to zero then IO is * Called when a BIO has been processed. If the count goes to zero then IO is
* complete and we can signal this to the AIO layer. * complete and we can signal this to the AIO layer.
...@@ -199,7 +214,9 @@ static struct page *dio_get_page(struct dio *dio) ...@@ -199,7 +214,9 @@ static struct page *dio_get_page(struct dio *dio)
static void finished_one_bio(struct dio *dio) static void finished_one_bio(struct dio *dio)
{ {
if (atomic_dec_and_test(&dio->bio_count)) { if (atomic_dec_and_test(&dio->bio_count)) {
if(dio->is_async) { if (dio->is_async) {
dio_complete(dio, dio->block_in_file << dio->blkbits,
dio->result);
aio_complete(dio->iocb, dio->result, 0); aio_complete(dio->iocb, dio->result, 0);
kfree(dio); kfree(dio);
} }
...@@ -824,7 +841,7 @@ static int do_direct_IO(struct dio *dio) ...@@ -824,7 +841,7 @@ static int do_direct_IO(struct dio *dio)
static int static int
direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
const struct iovec *iov, loff_t offset, unsigned long nr_segs, const struct iovec *iov, loff_t offset, unsigned long nr_segs,
unsigned blkbits, get_blocks_t get_blocks) unsigned blkbits, get_blocks_t get_blocks, dio_iodone_t end_io)
{ {
unsigned long user_addr; unsigned long user_addr;
int seg; int seg;
...@@ -852,6 +869,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -852,6 +869,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
dio->boundary = 0; dio->boundary = 0;
dio->reap_counter = 0; dio->reap_counter = 0;
dio->get_blocks = get_blocks; dio->get_blocks = get_blocks;
dio->end_io = end_io;
dio->map_bh.b_private = NULL;
dio->final_block_in_bio = -1; dio->final_block_in_bio = -1;
dio->next_block_for_io = -1; dio->next_block_for_io = -1;
...@@ -953,6 +972,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -953,6 +972,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
if (rw == READ && (offset + ret > i_size)) if (rw == READ && (offset + ret > i_size))
ret = i_size - offset; ret = i_size - offset;
} }
dio_complete(dio, offset, ret);
kfree(dio); kfree(dio);
} }
return ret; return ret;
...@@ -964,7 +984,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -964,7 +984,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
int int
blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset, struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_blocks_t get_blocks) unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io)
{ {
int seg; int seg;
size_t size; size_t size;
...@@ -999,7 +1019,7 @@ blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, ...@@ -999,7 +1019,7 @@ blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
} }
retval = direct_io_worker(rw, iocb, inode, iov, offset, retval = direct_io_worker(rw, iocb, inode, iov, offset,
nr_segs, blkbits, get_blocks); nr_segs, blkbits, get_blocks, end_io);
out: out:
return retval; return retval;
} }
...@@ -662,7 +662,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, ...@@ -662,7 +662,7 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct inode *inode = file->f_dentry->d_inode->i_mapping->host; struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, ext2_get_blocks); offset, nr_segs, ext2_get_blocks, NULL);
} }
static int static int
......
...@@ -1562,7 +1562,8 @@ static int ext3_direct_IO(int rw, struct kiocb *iocb, ...@@ -1562,7 +1562,8 @@ static int ext3_direct_IO(int rw, struct kiocb *iocb,
} }
ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, ext3_direct_io_get_blocks); offset, nr_segs,
ext3_direct_io_get_blocks, NULL);
out_stop: out_stop:
if (handle) { if (handle) {
......
...@@ -308,7 +308,7 @@ static int jfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, ...@@ -308,7 +308,7 @@ static int jfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct inode *inode = file->f_dentry->d_inode->i_mapping->host; struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, jfs_get_blocks); offset, nr_segs, jfs_get_blocks, NULL);
} }
struct address_space_operations jfs_aops = { struct address_space_operations jfs_aops = {
......
...@@ -76,10 +76,10 @@ linvfs_unwritten_done( ...@@ -76,10 +76,10 @@ linvfs_unwritten_done(
/* /*
* Issue transactions to convert a buffer range from unwritten * Issue transactions to convert a buffer range from unwritten
* to written extents. * to written extents (buffered IO).
*/ */
STATIC void STATIC void
linvfs_unwritten_conv( linvfs_unwritten_convert(
xfs_buf_t *bp) xfs_buf_t *bp)
{ {
vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *); vnode_t *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *);
...@@ -96,6 +96,30 @@ linvfs_unwritten_conv( ...@@ -96,6 +96,30 @@ linvfs_unwritten_conv(
pagebuf_iodone(bp, 0, 0); pagebuf_iodone(bp, 0, 0);
} }
/*
* Issue transactions to convert a buffer range from unwritten
* to written extents (direct IO).
*/
STATIC void
linvfs_unwritten_convert_direct(
struct inode *inode,
loff_t offset,
ssize_t size,
void *private)
{
ASSERT(!private || inode == (struct inode *)private);
/* private indicates an unwritten extent lay beneath this IO,
* see linvfs_get_block_core.
*/
if (private && size > 0) {
vnode_t *vp = LINVFS_GET_VP(inode);
int error;
VOP_BMAP(vp, offset, size, BMAP_UNWRITTEN, NULL, NULL, error);
}
}
STATIC int STATIC int
map_blocks( map_blocks(
struct inode *inode, struct inode *inode,
...@@ -456,7 +480,7 @@ map_unwritten( ...@@ -456,7 +480,7 @@ map_unwritten(
XFS_BUF_SET_SIZE(pb, size); XFS_BUF_SET_SIZE(pb, size);
XFS_BUF_SET_OFFSET(pb, offset); XFS_BUF_SET_OFFSET(pb, offset);
XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode)); XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_conv); XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_convert);
if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) { if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
pagebuf_iodone(pb, 1, 1); pagebuf_iodone(pb, 1, 1);
...@@ -804,7 +828,7 @@ STATIC int ...@@ -804,7 +828,7 @@ STATIC int
linvfs_get_block_core( linvfs_get_block_core(
struct inode *inode, struct inode *inode,
sector_t iblock, sector_t iblock,
int blocks, unsigned long blocks,
struct buffer_head *bh_result, struct buffer_head *bh_result,
int create, int create,
int direct, int direct,
...@@ -854,8 +878,11 @@ linvfs_get_block_core( ...@@ -854,8 +878,11 @@ linvfs_get_block_core(
set_buffer_mapped(bh_result); set_buffer_mapped(bh_result);
} }
if (pbmap.pbm_flags & PBMF_UNWRITTEN) { if (pbmap.pbm_flags & PBMF_UNWRITTEN) {
if (create) if (create) {
if (direct)
bh_result->b_private = inode;
set_buffer_mapped(bh_result); set_buffer_mapped(bh_result);
}
set_buffer_unwritten(bh_result); set_buffer_unwritten(bh_result);
set_buffer_delay(bh_result); set_buffer_delay(bh_result);
} }
...@@ -935,8 +962,8 @@ linvfs_direct_IO( ...@@ -935,8 +962,8 @@ linvfs_direct_IO(
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file->f_dentry->d_inode->i_mapping->host; struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return blockdev_direct_IO(rw, iocb, inode, NULL, return blockdev_direct_IO(rw, iocb, inode, NULL, iov, offset, nr_segs,
iov, offset, nr_segs, linvfs_get_blocks_direct); linvfs_get_blocks_direct, linvfs_unwritten_convert_direct);
} }
......
...@@ -219,6 +219,8 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock, ...@@ -219,6 +219,8 @@ typedef int (get_block_t)(struct inode *inode, sector_t iblock,
typedef int (get_blocks_t)(struct inode *inode, sector_t iblock, typedef int (get_blocks_t)(struct inode *inode, sector_t iblock,
unsigned long max_blocks, unsigned long max_blocks,
struct buffer_head *bh_result, int create); struct buffer_head *bh_result, int create);
typedef void (dio_iodone_t)(struct inode *inode, loff_t offset,
ssize_t bytes, void *private);
/* /*
* Attribute flags. These should be or-ed together to figure out what * Attribute flags. These should be or-ed together to figure out what
...@@ -1291,7 +1293,7 @@ extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb, ...@@ -1291,7 +1293,7 @@ extern ssize_t generic_file_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset, unsigned long nr_segs); const struct iovec *iov, loff_t offset, unsigned long nr_segs);
extern int blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, extern int blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset, struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_blocks_t *get_blocks); unsigned long nr_segs, get_blocks_t *get_blocks, dio_iodone_t *end_io);
extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos); unsigned long nr_segs, loff_t *ppos);
ssize_t generic_file_writev(struct file *filp, const struct iovec *iov, ssize_t generic_file_writev(struct file *filp, const struct iovec *iov,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment