Commit 729f52c6 authored by Zheng Liu's avatar Zheng Liu Committed by Theodore Ts'o

ext4: add a new nolock flag in ext4_map_blocks

EXT4_GET_BLOCKS_NO_LOCK flag is added to indicate that we don't need
to acquire i_data_sem lock in ext4_map_blocks.  Meanwhile, it changes
ext4_get_block() to not start a new journal because when we do a
overwrite dio, there is no any metadata that needs to be modified.

We define a new function called ext4_get_block_write_nolock, which is
used in dio overwrite nolock.  In this function, it doesn't try to
acquire i_data_sem lock and doesn't start a new journal as it does a
lookup.

CC: Tao Ma <tm@tao.ma>
CC: Eric Sandeen <sandeen@redhat.com>
CC: Robin Dong <hao.bigrat@gmail.com>
Signed-off-by: default avatarZheng Liu <wenqing.lz@taobao.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent fbe10494
...@@ -571,6 +571,8 @@ enum { ...@@ -571,6 +571,8 @@ enum {
#define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040
/* Request will not result in inode size update (user for fallocate) */ /* Request will not result in inode size update (user for fallocate) */
#define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080
/* Do not take i_data_sem locking in ext4_map_blocks */
#define EXT4_GET_BLOCKS_NO_LOCK 0x0100
/* /*
* Flags used by ext4_free_blocks * Flags used by ext4_free_blocks
......
...@@ -544,6 +544,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -544,6 +544,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* Try to see if we can get the block without requesting a new * Try to see if we can get the block without requesting a new
* file system block. * file system block.
*/ */
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
down_read((&EXT4_I(inode)->i_data_sem)); down_read((&EXT4_I(inode)->i_data_sem));
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_map_blocks(handle, inode, map, flags & retval = ext4_ext_map_blocks(handle, inode, map, flags &
...@@ -552,6 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -552,6 +553,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
retval = ext4_ind_map_blocks(handle, inode, map, flags & retval = ext4_ind_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE); EXT4_GET_BLOCKS_KEEP_SIZE);
} }
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
up_read((&EXT4_I(inode)->i_data_sem)); up_read((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
...@@ -2818,6 +2820,32 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, ...@@ -2818,6 +2820,32 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
EXT4_GET_BLOCKS_IO_CREATE_EXT); EXT4_GET_BLOCKS_IO_CREATE_EXT);
} }
static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int flags)
{
handle_t *handle = ext4_journal_current_handle();
struct ext4_map_blocks map;
int ret = 0;
ext4_debug("ext4_get_block_write_nolock: inode %lu, flag %d\n",
inode->i_ino, flags);
flags = EXT4_GET_BLOCKS_NO_LOCK;
map.m_lblk = iblock;
map.m_len = bh_result->b_size >> inode->i_blkbits;
ret = ext4_map_blocks(handle, inode, &map, flags);
if (ret > 0) {
map_bh(bh_result, inode->i_sb, map.m_pblk);
bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
map.m_flags;
bh_result->b_size = inode->i_sb->s_blocksize * map.m_len;
ret = 0;
}
return ret;
}
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private, int ret, ssize_t size, void *private, int ret,
bool is_async) bool is_async)
...@@ -2966,6 +2994,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ...@@ -2966,6 +2994,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
loff_t final_size = offset + count; loff_t final_size = offset + count;
if (rw == WRITE && final_size <= inode->i_size) { if (rw == WRITE && final_size <= inode->i_size) {
int overwrite = 0;
/* /*
* We could direct write to holes and fallocate. * We could direct write to holes and fallocate.
* *
...@@ -3005,6 +3035,15 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ...@@ -3005,6 +3035,15 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
EXT4_I(inode)->cur_aio_dio = iocb->private; EXT4_I(inode)->cur_aio_dio = iocb->private;
} }
if (overwrite)
ret = __blockdev_direct_IO(rw, iocb, inode,
inode->i_sb->s_bdev, iov,
offset, nr_segs,
ext4_get_block_write_nolock,
ext4_end_io_dio,
NULL,
0);
else
ret = __blockdev_direct_IO(rw, iocb, inode, ret = __blockdev_direct_IO(rw, iocb, inode,
inode->i_sb->s_bdev, iov, inode->i_sb->s_bdev, iov,
offset, nr_segs, offset, nr_segs,
...@@ -3031,7 +3070,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, ...@@ -3031,7 +3070,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
ext4_free_io_end(iocb->private); ext4_free_io_end(iocb->private);
iocb->private = NULL; iocb->private = NULL;
} else if (ret > 0 && ext4_test_inode_state(inode, } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN)) { EXT4_STATE_DIO_UNWRITTEN)) {
int err; int err;
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment