Commit 16c54688 authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o

ext4: Allow parallel DIO reads

We can easily support parallel direct IO reads. We only have to make
sure we cannot expose uninitialized data by reading allocated block to
which data was not written yet, or which was already truncated. That is
easily achieved by holding inode_lock in shared mode - that excludes all
writes, truncates, hole punches. We also have to guard against page
writeback allocating blocks for delay-allocated pages - that race is
handled by the fact that we writeback all the pages in the affected
range and the lock protects us from new pages being created there.
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent cca32b7e
...@@ -3528,35 +3528,31 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) ...@@ -3528,35 +3528,31 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
{ {
int unlocked = 0; struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = iocb->ki_filp->f_mapping->host; struct inode *inode = mapping->host;
ssize_t ret; ssize_t ret;
if (ext4_should_dioread_nolock(inode)) { /*
/* * Shared inode_lock is enough for us - it protects against concurrent
* Nolock dioread optimization may be dynamically disabled * writes & truncates and since we take care of writing back page cache,
* via ext4_inode_block_unlocked_dio(). Check inode's state * we are protected against page writeback as well.
* while holding extra i_dio_count ref. */
*/ inode_lock_shared(inode);
inode_dio_begin(inode);
smp_mb();
if (unlikely(ext4_test_inode_state(inode,
EXT4_STATE_DIOREAD_LOCK)))
inode_dio_end(inode);
else
unlocked = 1;
}
if (IS_DAX(inode)) { if (IS_DAX(inode)) {
ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0);
NULL, unlocked ? 0 : DIO_LOCKING);
} else { } else {
size_t count = iov_iter_count(iter);
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
iocb->ki_pos + count);
if (ret)
goto out_unlock;
ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
iter, ext4_dio_get_block, iter, ext4_dio_get_block,
NULL, NULL, NULL, NULL, 0);
unlocked ? 0 : DIO_LOCKING);
} }
if (unlocked) out_unlock:
inode_dio_end(inode); inode_unlock_shared(inode);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment