Commit 97dcdea0 authored by Chandan Rajendra's avatar Chandan Rajendra Committed by David Sterba

Btrfs: Fix deadlock between direct IO and fast fsync

The following deadlock is seen when executing generic/113 test,

 ---------------------------------------------------------+----------------------------------------------------
  Direct I/O task                                           Fast fsync task
 ---------------------------------------------------------+----------------------------------------------------
  btrfs_direct_IO
    __blockdev_direct_IO
     do_blockdev_direct_IO
      do_direct_IO
       btrfs_get_blocks_direct
        while (blocks needs to written)
         get_more_blocks (first iteration)
          btrfs_get_blocks_direct
           btrfs_create_dio_extent
             down_read(&BTRFS_I(inode) >dio_sem)
             Create and add extent map and ordered extent
             up_read(&BTRFS_I(inode) >dio_sem)
                                                            btrfs_sync_file
                                                              btrfs_log_dentry_safe
                                                               btrfs_log_inode_parent
                                                                btrfs_log_inode
                                                                 btrfs_log_changed_extents
                                                                  down_write(&BTRFS_I(inode) >dio_sem)
                                                                   Collect new extent maps and ordered extents
                                                                    wait for ordered extent completion
         get_more_blocks (second iteration)
          btrfs_get_blocks_direct
           btrfs_create_dio_extent
             down_read(&BTRFS_I(inode) >dio_sem)
 --------------------------------------------------------------------------------------------------------------

In the above description, Btrfs direct I/O code path has not yet started
submitting bios for file range covered by the initial ordered
extent. Meanwhile, The fast fsync task obtains the write semaphore and
waits for I/O on the ordered extent to get completed. However, the
Direct I/O task is now blocked on obtaining the read semaphore.

To resolve the deadlock, this commit modifies the Direct I/O code path
to obtain the read semaphore before invoking
__blockdev_direct_IO(). The semaphore is then given up after
__blockdev_direct_IO() returns. This allows the Direct I/O code to
complete I/O on all the ordered extents it creates.
Signed-off-by: default avatarChandan Rajendra <chandan@linux.vnet.ibm.com>
Reviewed-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 47b5d646
...@@ -7215,7 +7215,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode, ...@@ -7215,7 +7215,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
struct extent_map *em = NULL; struct extent_map *em = NULL;
int ret; int ret;
down_read(&BTRFS_I(inode)->dio_sem);
if (type != BTRFS_ORDERED_NOCOW) { if (type != BTRFS_ORDERED_NOCOW) {
em = create_pinned_em(inode, start, len, orig_start, em = create_pinned_em(inode, start, len, orig_start,
block_start, block_len, orig_block_len, block_start, block_len, orig_block_len,
...@@ -7234,7 +7233,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode, ...@@ -7234,7 +7233,6 @@ static struct extent_map *btrfs_create_dio_extent(struct inode *inode,
em = ERR_PTR(ret); em = ERR_PTR(ret);
} }
out: out:
up_read(&BTRFS_I(inode)->dio_sem);
return em; return em;
} }
...@@ -8695,6 +8693,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -8695,6 +8693,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
dio_data.unsubmitted_oe_range_start = (u64)offset; dio_data.unsubmitted_oe_range_start = (u64)offset;
dio_data.unsubmitted_oe_range_end = (u64)offset; dio_data.unsubmitted_oe_range_end = (u64)offset;
current->journal_info = &dio_data; current->journal_info = &dio_data;
down_read(&BTRFS_I(inode)->dio_sem);
} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
&BTRFS_I(inode)->runtime_flags)) { &BTRFS_I(inode)->runtime_flags)) {
inode_dio_end(inode); inode_dio_end(inode);
...@@ -8707,6 +8706,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ...@@ -8707,6 +8706,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
iter, btrfs_get_blocks_direct, NULL, iter, btrfs_get_blocks_direct, NULL,
btrfs_submit_direct, flags); btrfs_submit_direct, flags);
if (iov_iter_rw(iter) == WRITE) { if (iov_iter_rw(iter) == WRITE) {
up_read(&BTRFS_I(inode)->dio_sem);
current->journal_info = NULL; current->journal_info = NULL;
if (ret < 0 && ret != -EIOCBQUEUED) { if (ret < 0 && ret != -EIOCBQUEUED) {
if (dio_data.reserve) if (dio_data.reserve)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment