Commit 2e60a51e authored by Miao Xie's avatar Miao Xie Committed by Josef Bacik

Btrfs: serialize unlocked dio reads with truncate

Currently, we can do unlocked dio reads, but the following race
is possible:

dio_read_task			truncate_task
				->btrfs_setattr()
->btrfs_direct_IO
    ->__blockdev_direct_IO
      ->btrfs_get_block
				  ->btrfs_truncate()
				 #alloc truncated blocks
				 #to other inode
      ->submit_io()
     #INFORMATION LEAK

In order to avoid this problem, we must serialize unlocked dio reads with
truncate. There are two approaches:
- use extent lock to protect the extent that we truncate
- use inode_dio_wait() to make sure the truncating task will wait for
  the read DIO.

If we use the 1st one, we will meet the endless truncation problem due to
the nonlocked read DIO after we implement the nonlocked write DIO. It is
because we still need invoke inode_dio_wait() avoid the race between write
DIO and truncation. By that time, we have to introduce

  btrfs_inode_{block, resume}_nolock_dio()

again. That is we have to implement this patch again, so I choose the 2nd
way to fix the problem.
Signed-off-by: default avatarMiao Xie <miaox@cn.fujitsu.com>
Signed-off-by: default avatarJosef Bacik <jbacik@fusionio.com>
parent 0934856d
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#define BTRFS_INODE_NEEDS_FULL_SYNC 7 #define BTRFS_INODE_NEEDS_FULL_SYNC 7
#define BTRFS_INODE_COPY_EVERYTHING 8 #define BTRFS_INODE_COPY_EVERYTHING 8
#define BTRFS_INODE_IN_DELALLOC_LIST 9 #define BTRFS_INODE_IN_DELALLOC_LIST 9
#define BTRFS_INODE_READDIO_NEED_LOCK 10
/* in memory btrfs inode */ /* in memory btrfs inode */
struct btrfs_inode { struct btrfs_inode {
...@@ -217,4 +218,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) ...@@ -217,4 +218,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
return 0; return 0;
} }
/*
* Disable DIO read nolock optimization, so new dio readers will be forced
* to grab i_mutex. It is used to avoid the endless truncate due to
* nonlocked dio read.
*/
static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
{
set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags);
smp_mb();
}
static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
{
smp_mb__before_clear_bit();
clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags);
}
#endif #endif
...@@ -3888,6 +3888,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) ...@@ -3888,6 +3888,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
/* we don't support swapfiles, so vmtruncate shouldn't fail */ /* we don't support swapfiles, so vmtruncate shouldn't fail */
truncate_setsize(inode, newsize); truncate_setsize(inode, newsize);
/* Disable nonlocked read DIO to avoid the end less truncate */
btrfs_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
btrfs_inode_resume_unlocked_dio(inode);
ret = btrfs_truncate(inode); ret = btrfs_truncate(inode);
if (ret && inode->i_nlink) if (ret && inode->i_nlink)
btrfs_orphan_del(NULL, inode); btrfs_orphan_del(NULL, inode);
...@@ -6670,6 +6676,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ...@@ -6670,6 +6676,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
size_t count = 0; size_t count = 0;
int flags = 0;
bool wakeup = false;
ssize_t ret; ssize_t ret;
if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
...@@ -6681,13 +6689,22 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ...@@ -6681,13 +6689,22 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
ret = btrfs_delalloc_reserve_space(inode, count); ret = btrfs_delalloc_reserve_space(inode, count);
if (ret) if (ret)
return ret; return ret;
} else {
atomic_inc(&inode->i_dio_count);
smp_mb__after_atomic_inc();
if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags))) {
inode_dio_done(inode);
flags = DIO_LOCKING | DIO_SKIP_HOLES;
} else {
wakeup = true;
}
} }
ret = __blockdev_direct_IO(rw, iocb, inode, ret = __blockdev_direct_IO(rw, iocb, inode,
BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
btrfs_submit_direct, 0); btrfs_submit_direct, flags);
if (rw & WRITE) { if (rw & WRITE) {
if (ret < 0 && ret != -EIOCBQUEUED) if (ret < 0 && ret != -EIOCBQUEUED)
btrfs_delalloc_release_space(inode, count); btrfs_delalloc_release_space(inode, count);
...@@ -6700,6 +6717,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ...@@ -6700,6 +6717,8 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
} }
btrfs_delalloc_release_metadata(inode, 0); btrfs_delalloc_release_metadata(inode, 0);
} }
if (wakeup)
inode_dio_done(inode);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment