Commit 6dfa1b67 authored by Dave Chinner's avatar Dave Chinner Committed by Dave Chinner

xfs: handle DIO overwrite EOF update completion correctly

Currently a DIO overwrite that extends the EOF (e.g sub-block IO or
write into allocated blocks beyond EOF) requires a transaction for
the EOF update. Thi is done in IO completion context, but we aren't
explicitly handling this situation properly and so it can run in
interrupt context. Ensure that we defer IO that spans EOF correctly
to the DIO completion workqueue, and now that we have an ioend in IO
completion we can use the common ioend completion path to do all the
work.

Note: we do not preallocate the append transaction as we can have
multiple mapping and allocation calls per direct IO. hence
preallocating can still leave us with nested transactions by
attempting to map and allocate more blocks after we've preallocated
an append transaction.
Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parent d5cc2e3f
...@@ -1293,7 +1293,7 @@ xfs_map_direct( ...@@ -1293,7 +1293,7 @@ xfs_map_direct(
imap); imap);
} }
if (ioend->io_type == XFS_IO_UNWRITTEN) if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend))
set_buffer_defer_completion(bh_result); set_buffer_defer_completion(bh_result);
} }
...@@ -1535,8 +1535,10 @@ xfs_end_io_direct_write( ...@@ -1535,8 +1535,10 @@ xfs_end_io_direct_write(
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
struct xfs_ioend *ioend = private; struct xfs_ioend *ioend = private;
trace_xfs_gbmap_direct_endio(ip, offset, size, ioend->io_type, NULL);
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
goto out_destroy_ioend; goto out_end_io;
/* /*
* dio completion end_io functions are only called on writes if more * dio completion end_io functions are only called on writes if more
...@@ -1557,40 +1559,37 @@ xfs_end_io_direct_write( ...@@ -1557,40 +1559,37 @@ xfs_end_io_direct_write(
ioend->io_offset = offset; ioend->io_offset = offset;
/* /*
* While the generic direct I/O code updates the inode size, it does * The ioend tells us whether we are doing unwritten extent conversion
* so only after the end_io handler is called, which means our * or an append transaction that updates the on-disk file size. These
* end_io handler thinks the on-disk size is outside the in-core * cases are the only cases where we should *potentially* be needing
* size. To prevent this just update it a little bit earlier here. * to update the VFS inode size. When the ioend indicates this, we
* are *guaranteed* to be running in non-interrupt context.
*
* We need to update the in-core inode size here so that we don't end up
* with the on-disk inode size being outside the in-core inode size.
* While we can do this in the process context after the IO has
* completed, this does not work for AIO and hence we always update
* the in-core inode size here if necessary.
*/ */
if (offset + size > i_size_read(inode)) if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend)) {
i_size_write(inode, offset + size); if (offset + size > i_size_read(inode))
i_size_write(inode, offset + size);
} else
ASSERT(offset + size <= i_size_read(inode));
/* /*
* For direct I/O we do not know if we need to allocate blocks or not, * If we are doing an append IO that needs to update the EOF on disk,
* so we can't preallocate an append transaction, as that results in * do the transaction reserve now so we can use common end io
* nested reservations and log space deadlocks. Hence allocate the * processing. Stashing the error (if there is one) in the ioend will
* transaction here. While this is sub-optimal and can block IO * result in the ioend processing passing on the error if it is
* completion for some time, we're stuck with doing it this way until * possible as we can't return it from here.
* we can pass the ioend to the direct IO allocation callbacks and
* avoid nesting that way.
*/ */
if (ioend->io_type == XFS_IO_UNWRITTEN) { if (ioend->io_type == XFS_IO_OVERWRITE && xfs_ioend_is_append(ioend))
xfs_iomap_write_unwritten(ip, offset, size); ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
} else if (offset + size > ip->i_d.di_size) {
struct xfs_trans *tp;
int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
goto out_destroy_ioend;
}
xfs_setfilesize(ip, tp, offset, size); out_end_io:
} xfs_end_io(&ioend->io_work);
out_destroy_ioend: return;
xfs_destroy_ioend(ioend);
} }
STATIC ssize_t STATIC ssize_t
......
...@@ -1220,6 +1220,7 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); ...@@ -1220,6 +1220,7 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
DEFINE_IOMAP_EVENT(xfs_gbmap_direct); DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new); DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update); DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
DECLARE_EVENT_CLASS(xfs_simple_io_class, DECLARE_EVENT_CLASS(xfs_simple_io_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment