Commit 8d5d02e6 authored by Mingming Cao's avatar Mingming Cao Committed by Theodore Ts'o

ext4: async direct IO for holes and fallocate support

For async direct IO that covers holes or fallocate, the end_io
callback function now queued the convertion work on workqueue but
don't flush the work rightaway as it might take too long to afford.

But when fsync is called after all the data is completed, user expects
the metadata also being updated before fsync returns.

Thus we need to flush the conversion work when fsync() is called.
This patch keep track of a listed of completed async direct io that
has a work queued on workqueue.  When fsync() is called, it will go
through the list and do the conversion.
Signed-off-by: default avatarMingming Cao <cmm@us.ibm.com>
parent 4c0425ff
...@@ -127,10 +127,11 @@ struct mpage_da_data { ...@@ -127,10 +127,11 @@ struct mpage_da_data {
int pages_written; int pages_written;
int retval; int retval;
}; };
#define DIO_AIO_UNWRITTEN 0x1
typedef struct ext4_io_end { typedef struct ext4_io_end {
struct list_head list; /* per-file finished AIO list */
struct inode *inode; /* file being written to */ struct inode *inode; /* file being written to */
unsigned int flag; /* sync IO or AIO */ unsigned int flag; /* unwritten or not */
int error; /* I/O error code */ int error; /* I/O error code */
ext4_lblk_t offset; /* offset in the file */ ext4_lblk_t offset; /* offset in the file */
size_t size; /* size of the extent */ size_t size; /* size of the extent */
...@@ -690,6 +691,11 @@ struct ext4_inode_info { ...@@ -690,6 +691,11 @@ struct ext4_inode_info {
__u16 i_extra_isize; __u16 i_extra_isize;
spinlock_t i_block_reservation_lock; spinlock_t i_block_reservation_lock;
/* completed async DIOs that might need unwritten extents handling */
struct list_head i_aio_dio_complete_list;
/* current io_end structure for async DIO write*/
ext4_io_end_t *cur_aio_dio;
}; };
/* /*
...@@ -1419,7 +1425,7 @@ extern int ext4_block_truncate_page(handle_t *handle, ...@@ -1419,7 +1425,7 @@ extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from); struct address_space *mapping, loff_t from);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t ext4_get_reserved_space(struct inode *inode); extern qsize_t ext4_get_reserved_space(struct inode *inode);
extern int flush_aio_dio_completed_IO(struct inode *inode);
/* ioctl.c */ /* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
......
...@@ -3033,6 +3033,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3033,6 +3033,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
{ {
int ret = 0; int ret = 0;
int err = 0; int err = 0;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical"
"block %llu, max_blocks %u, flags %d, allocated %u", "block %llu, max_blocks %u, flags %d, allocated %u",
...@@ -3045,6 +3046,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3045,6 +3046,9 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
ret = ext4_split_unwritten_extents(handle, ret = ext4_split_unwritten_extents(handle,
inode, path, iblock, inode, path, iblock,
max_blocks, flags); max_blocks, flags);
/* flag the io_end struct that we need convert when IO done */
if (io)
io->flag = DIO_AIO_UNWRITTEN;
goto out; goto out;
} }
/* DIO end_io complete, convert the filled extent to written */ /* DIO end_io complete, convert the filled extent to written */
...@@ -3130,6 +3134,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3130,6 +3134,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
int err = 0, depth, ret, cache_type; int err = 0, depth, ret, cache_type;
unsigned int allocated = 0; unsigned int allocated = 0;
struct ext4_allocation_request ar; struct ext4_allocation_request ar;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
__clear_bit(BH_New, &bh_result->b_state); __clear_bit(BH_New, &bh_result->b_state);
ext_debug("blocks %u/%u requested for inode %lu\n", ext_debug("blocks %u/%u requested for inode %lu\n",
...@@ -3279,8 +3284,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ...@@ -3279,8 +3284,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* try to insert new extent into found leaf and return */ /* try to insert new extent into found leaf and return */
ext4_ext_store_pblock(&newex, newblock); ext4_ext_store_pblock(&newex, newblock);
newex.ee_len = cpu_to_le16(ar.len); newex.ee_len = cpu_to_le16(ar.len);
if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) /* Mark uninitialized */ /* Mark uninitialized */
if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){
ext4_ext_mark_uninitialized(&newex); ext4_ext_mark_uninitialized(&newex);
/*
* io_end structure was created for every async
* direct IO write to the middle of the file.
* To avoid unecessary convertion for every aio dio rewrite
* to the mid of file, here we flag the IO that is really
* need the convertion.
*
*/
if (io && flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT)
io->flag = DIO_AIO_UNWRITTEN;
}
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err) { if (err) {
/* free data blocks we just allocated */ /* free data blocks we just allocated */
......
...@@ -44,6 +44,8 @@ ...@@ -44,6 +44,8 @@
* *
* What we do is just kick off a commit and wait on it. This will snapshot the * What we do is just kick off a commit and wait on it. This will snapshot the
* inode to disk. * inode to disk.
*
* i_mutex lock is held when entering and exiting this function
*/ */
int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
...@@ -56,6 +58,9 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) ...@@ -56,6 +58,9 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
trace_ext4_sync_file(file, dentry, datasync); trace_ext4_sync_file(file, dentry, datasync);
ret = flush_aio_dio_completed_IO(inode);
if (ret < 0)
goto out;
/* /*
* data=writeback: * data=writeback:
* The caller's filemap_fdatawrite()/wait will sync the data. * The caller's filemap_fdatawrite()/wait will sync the data.
......
This diff is collapsed.
...@@ -687,6 +687,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) ...@@ -687,6 +687,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->i_allocated_meta_blocks = 0; ei->i_allocated_meta_blocks = 0;
ei->i_delalloc_reserved_flag = 0; ei->i_delalloc_reserved_flag = 0;
spin_lock_init(&(ei->i_block_reservation_lock)); spin_lock_init(&(ei->i_block_reservation_lock));
INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
ei->cur_aio_dio = NULL;
return &ei->vfs_inode; return &ei->vfs_inode;
} }
...@@ -3375,11 +3377,13 @@ static int ext4_sync_fs(struct super_block *sb, int wait) ...@@ -3375,11 +3377,13 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
{ {
int ret = 0; int ret = 0;
tid_t target; tid_t target;
struct ext4_sb_info *sbi = EXT4_SB(sb);
trace_ext4_sync_fs(sb, wait); trace_ext4_sync_fs(sb, wait);
if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { flush_workqueue(sbi->dio_unwritten_wq);
if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
if (wait) if (wait)
jbd2_log_wait_commit(EXT4_SB(sb)->s_journal, target); jbd2_log_wait_commit(sbi->s_journal, target);
} }
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment