Commit ffe51f01 authored by Lukas Czerner's avatar Lukas Czerner Committed by Jens Axboe

fs: Avoid invalidation in interrupt context in dio_complete()

Currently we try to defer completion of async DIO to the process context
in case there are any mapped pages associated with the inode so that we
can invalidate the pages when the IO completes. However the check is racy
and the pages can be mapped afterwards. If this happens we might end up
calling invalidate_inode_pages2_range() in dio_complete() in interrupt
context which could sleep. This can be reproduced by generic/451.

Fix this by passing the information whether we can or can't invalidate
to the dio_complete(). Thanks Eryu Guan for reporting this and Jan Kara
for suggesting a fix.

Fixes: 332391a9 ("fs: Fix page cache inconsistency when mixing buffered and AIO DIO")
Reported-by: default avatarEryu Guan <eguan@redhat.com>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Tested-by: default avatarEryu Guan <eguan@redhat.com>
Signed-off-by: default avatarLukas Czerner <lczerner@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 639812a1
...@@ -44,6 +44,12 @@ ...@@ -44,6 +44,12 @@
*/ */
#define DIO_PAGES 64 #define DIO_PAGES 64
/*
* Flags for dio_complete()
*/
#define DIO_COMPLETE_ASYNC 0x01 /* This is async IO */
#define DIO_COMPLETE_INVALIDATE 0x02 /* Can invalidate pages */
/* /*
* This code generally works in units of "dio_blocks". A dio_block is * This code generally works in units of "dio_blocks". A dio_block is
* somewhere between the hard sector size and the filesystem block size. it * somewhere between the hard sector size and the filesystem block size. it
...@@ -225,7 +231,7 @@ static inline struct page *dio_get_page(struct dio *dio, ...@@ -225,7 +231,7 @@ static inline struct page *dio_get_page(struct dio *dio,
* filesystems can use it to hold additional state between get_block calls and * filesystems can use it to hold additional state between get_block calls and
* dio_complete. * dio_complete.
*/ */
static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
{ {
loff_t offset = dio->iocb->ki_pos; loff_t offset = dio->iocb->ki_pos;
ssize_t transferred = 0; ssize_t transferred = 0;
...@@ -266,7 +272,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) ...@@ -266,7 +272,8 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
* one is a pretty crazy thing to do, so we don't support it 100%. If * one is a pretty crazy thing to do, so we don't support it 100%. If
* this invalidation fails, tough, the write still worked... * this invalidation fails, tough, the write still worked...
*/ */
if (ret > 0 && dio->op == REQ_OP_WRITE && if (flags & DIO_COMPLETE_INVALIDATE &&
ret > 0 && dio->op == REQ_OP_WRITE &&
dio->inode->i_mapping->nrpages) { dio->inode->i_mapping->nrpages) {
err = invalidate_inode_pages2_range(dio->inode->i_mapping, err = invalidate_inode_pages2_range(dio->inode->i_mapping,
offset >> PAGE_SHIFT, offset >> PAGE_SHIFT,
...@@ -285,7 +292,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) ...@@ -285,7 +292,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async)
if (!(dio->flags & DIO_SKIP_DIO_COUNT)) if (!(dio->flags & DIO_SKIP_DIO_COUNT))
inode_dio_end(dio->inode); inode_dio_end(dio->inode);
if (is_async) { if (flags & DIO_COMPLETE_ASYNC) {
/* /*
* generic_write_sync expects ki_pos to have been updated * generic_write_sync expects ki_pos to have been updated
* already, but the submission path only does this for * already, but the submission path only does this for
...@@ -306,7 +313,7 @@ static void dio_aio_complete_work(struct work_struct *work) ...@@ -306,7 +313,7 @@ static void dio_aio_complete_work(struct work_struct *work)
{ {
struct dio *dio = container_of(work, struct dio, complete_work); struct dio *dio = container_of(work, struct dio, complete_work);
dio_complete(dio, 0, true); dio_complete(dio, 0, DIO_COMPLETE_ASYNC | DIO_COMPLETE_INVALIDATE);
} }
static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio); static blk_status_t dio_bio_complete(struct dio *dio, struct bio *bio);
...@@ -348,7 +355,7 @@ static void dio_bio_end_aio(struct bio *bio) ...@@ -348,7 +355,7 @@ static void dio_bio_end_aio(struct bio *bio)
queue_work(dio->inode->i_sb->s_dio_done_wq, queue_work(dio->inode->i_sb->s_dio_done_wq,
&dio->complete_work); &dio->complete_work);
} else { } else {
dio_complete(dio, 0, true); dio_complete(dio, 0, DIO_COMPLETE_ASYNC);
} }
} }
} }
...@@ -1359,7 +1366,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, ...@@ -1359,7 +1366,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
dio_await_completion(dio); dio_await_completion(dio);
if (drop_refcount(dio) == 0) { if (drop_refcount(dio) == 0) {
retval = dio_complete(dio, retval, false); retval = dio_complete(dio, retval, DIO_COMPLETE_INVALIDATE);
} else } else
BUG_ON(retval != -EIOCBQUEUED); BUG_ON(retval != -EIOCBQUEUED);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment