Commit 281627df authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Ben Myers

xfs: log file size updates at I/O completion time

Do not use unlogged metadata updates and the VFS dirty bit for updating
the file size after writeback.  In addition to causing various problems
with updates getting delayed for far too long this also drags in the
unscalable VFS dirty tracking, and is one of the few remaining unlogged
metadata updates.
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarMark Tinguely <tinguely@sgi.com>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent 84803fb7
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "xfs_bmap_btree.h" #include "xfs_bmap_btree.h"
#include "xfs_dinode.h" #include "xfs_dinode.h"
#include "xfs_inode.h" #include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_alloc.h" #include "xfs_alloc.h"
#include "xfs_error.h" #include "xfs_error.h"
#include "xfs_rw.h" #include "xfs_rw.h"
...@@ -107,25 +108,65 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) ...@@ -107,25 +108,65 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
XFS_I(ioend->io_inode)->i_d.di_size; XFS_I(ioend->io_inode)->i_d.di_size;
} }
STATIC int
xfs_setfilesize_trans_alloc(
struct xfs_ioend *ioend)
{
struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
struct xfs_trans *tp;
int error;
tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
return error;
}
ioend->io_append_trans = tp;
/*
* We hand off the transaction to the completion thread now, so
* clear the flag here.
*/
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
return 0;
}
/* /*
* Update on-disk file size now that data has been written to disk. * Update on-disk file size now that data has been written to disk.
*/ */
STATIC void STATIC int
xfs_setfilesize( xfs_setfilesize(
struct xfs_ioend *ioend) struct xfs_ioend *ioend)
{ {
struct xfs_inode *ip = XFS_I(ioend->io_inode); struct xfs_inode *ip = XFS_I(ioend->io_inode);
struct xfs_trans *tp = ioend->io_append_trans;
xfs_fsize_t isize; xfs_fsize_t isize;
/*
* The transaction was allocated in the I/O submission thread,
* thus we need to mark ourselves as beeing in a transaction
* manually.
*/
current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size);
if (isize) { if (!isize) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_trans_cancel(tp, 0);
return 0;
}
trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
ip->i_d.di_size = isize; ip->i_d.di_size = isize;
xfs_mark_inode_dirty(ip); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
} xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_iunlock(ip, XFS_ILOCK_EXCL); return xfs_trans_commit(tp, 0);
} }
/* /*
...@@ -143,7 +184,7 @@ xfs_finish_ioend( ...@@ -143,7 +184,7 @@ xfs_finish_ioend(
if (ioend->io_type == IO_UNWRITTEN) if (ioend->io_type == IO_UNWRITTEN)
queue_work(mp->m_unwritten_workqueue, &ioend->io_work); queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
else if (xfs_ioend_is_append(ioend)) else if (ioend->io_append_trans)
queue_work(mp->m_data_workqueue, &ioend->io_work); queue_work(mp->m_data_workqueue, &ioend->io_work);
else else
xfs_destroy_ioend(ioend); xfs_destroy_ioend(ioend);
...@@ -173,18 +214,32 @@ xfs_end_io( ...@@ -173,18 +214,32 @@ xfs_end_io(
* range to normal written extens after the data I/O has finished. * range to normal written extens after the data I/O has finished.
*/ */
if (ioend->io_type == IO_UNWRITTEN) { if (ioend->io_type == IO_UNWRITTEN) {
/*
* For buffered I/O we never preallocate a transaction when
* doing the unwritten extent conversion, but for direct I/O
* we do not know if we are converting an unwritten extent
* or not at the point where we preallocate the transaction.
*/
if (ioend->io_append_trans) {
ASSERT(ioend->io_isdirect);
current_set_flags_nested(
&ioend->io_append_trans->t_pflags, PF_FSTRANS);
xfs_trans_cancel(ioend->io_append_trans, 0);
}
error = xfs_iomap_write_unwritten(ip, ioend->io_offset, error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size); ioend->io_size);
if (error) { if (error) {
ioend->io_error = -error; ioend->io_error = -error;
goto done; goto done;
} }
} else if (ioend->io_append_trans) {
error = xfs_setfilesize(ioend);
if (error)
ioend->io_error = -error;
} else { } else {
/* ASSERT(!xfs_ioend_is_append(ioend));
* We might have to update the on-disk file size after
* extending writes.
*/
xfs_setfilesize(ioend);
} }
done: done:
...@@ -224,6 +279,7 @@ xfs_alloc_ioend( ...@@ -224,6 +279,7 @@ xfs_alloc_ioend(
*/ */
atomic_set(&ioend->io_remaining, 1); atomic_set(&ioend->io_remaining, 1);
ioend->io_isasync = 0; ioend->io_isasync = 0;
ioend->io_isdirect = 0;
ioend->io_error = 0; ioend->io_error = 0;
ioend->io_list = NULL; ioend->io_list = NULL;
ioend->io_type = type; ioend->io_type = type;
...@@ -234,6 +290,7 @@ xfs_alloc_ioend( ...@@ -234,6 +290,7 @@ xfs_alloc_ioend(
ioend->io_size = 0; ioend->io_size = 0;
ioend->io_iocb = NULL; ioend->io_iocb = NULL;
ioend->io_result = 0; ioend->io_result = 0;
ioend->io_append_trans = NULL;
INIT_WORK(&ioend->io_work, xfs_end_io); INIT_WORK(&ioend->io_work, xfs_end_io);
return ioend; return ioend;
...@@ -341,18 +398,9 @@ xfs_submit_ioend_bio( ...@@ -341,18 +398,9 @@ xfs_submit_ioend_bio(
xfs_ioend_t *ioend, xfs_ioend_t *ioend,
struct bio *bio) struct bio *bio)
{ {
struct xfs_inode *ip = XFS_I(ioend->io_inode);
atomic_inc(&ioend->io_remaining); atomic_inc(&ioend->io_remaining);
bio->bi_private = ioend; bio->bi_private = ioend;
bio->bi_end_io = xfs_end_bio; bio->bi_end_io = xfs_end_bio;
/*
* If the I/O is beyond EOF we mark the inode dirty immediately
* but don't update the inode size until I/O completion.
*/
if (xfs_new_eof(ip, ioend->io_offset + ioend->io_size))
xfs_mark_inode_dirty(ip);
submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
} }
...@@ -999,8 +1047,20 @@ xfs_vm_writepage( ...@@ -999,8 +1047,20 @@ xfs_vm_writepage(
wbc, end_index); wbc, end_index);
} }
if (iohead) if (iohead) {
/*
* Reserve log space if we might write beyond the on-disk
* inode size.
*/
if (ioend->io_type != IO_UNWRITTEN &&
xfs_ioend_is_append(ioend)) {
err = xfs_setfilesize_trans_alloc(ioend);
if (err)
goto error;
}
xfs_submit_ioend(wbc, iohead); xfs_submit_ioend(wbc, iohead);
}
return 0; return 0;
...@@ -1280,17 +1340,32 @@ xfs_vm_direct_IO( ...@@ -1280,17 +1340,32 @@ xfs_vm_direct_IO(
{ {
struct inode *inode = iocb->ki_filp->f_mapping->host; struct inode *inode = iocb->ki_filp->f_mapping->host;
struct block_device *bdev = xfs_find_bdev_for_inode(inode); struct block_device *bdev = xfs_find_bdev_for_inode(inode);
struct xfs_ioend *ioend = NULL;
ssize_t ret; ssize_t ret;
if (rw & WRITE) { if (rw & WRITE) {
iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); size_t size = iov_length(iov, nr_segs);
/*
* We need to preallocate a transaction for a size update
* here. In the case that this write both updates the size
* and converts at least on unwritten extent we will cancel
* the still clean transaction after the I/O has finished.
*/
iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
if (offset + size > XFS_I(inode)->i_d.di_size) {
ret = xfs_setfilesize_trans_alloc(ioend);
if (ret)
goto out_destroy_ioend;
ioend->io_isdirect = 1;
}
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
offset, nr_segs, offset, nr_segs,
xfs_get_blocks_direct, xfs_get_blocks_direct,
xfs_end_io_direct_write, NULL, 0); xfs_end_io_direct_write, NULL, 0);
if (ret != -EIOCBQUEUED && iocb->private) if (ret != -EIOCBQUEUED && iocb->private)
xfs_destroy_ioend(iocb->private); goto out_trans_cancel;
} else { } else {
ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
offset, nr_segs, offset, nr_segs,
...@@ -1299,6 +1374,16 @@ xfs_vm_direct_IO( ...@@ -1299,6 +1374,16 @@ xfs_vm_direct_IO(
} }
return ret; return ret;
out_trans_cancel:
if (ioend->io_append_trans) {
current_set_flags_nested(&ioend->io_append_trans->t_pflags,
PF_FSTRANS);
xfs_trans_cancel(ioend->io_append_trans, 0);
}
out_destroy_ioend:
xfs_destroy_ioend(ioend);
return ret;
} }
STATIC void STATIC void
......
...@@ -46,12 +46,14 @@ typedef struct xfs_ioend { ...@@ -46,12 +46,14 @@ typedef struct xfs_ioend {
int io_error; /* I/O error code */ int io_error; /* I/O error code */
atomic_t io_remaining; /* hold count */ atomic_t io_remaining; /* hold count */
unsigned int io_isasync : 1; /* needs aio_complete */ unsigned int io_isasync : 1; /* needs aio_complete */
unsigned int io_isdirect : 1;/* direct I/O */
struct inode *io_inode; /* file being written to */ struct inode *io_inode; /* file being written to */
struct buffer_head *io_buffer_head;/* buffer linked list head */ struct buffer_head *io_buffer_head;/* buffer linked list head */
struct buffer_head *io_buffer_tail;/* buffer linked list tail */ struct buffer_head *io_buffer_tail;/* buffer linked list tail */
size_t io_size; /* size of the extent */ size_t io_size; /* size of the extent */
xfs_off_t io_offset; /* offset in the file */ xfs_off_t io_offset; /* offset in the file */
struct work_struct io_work; /* xfsdatad work queue */ struct work_struct io_work; /* xfsdatad work queue */
struct xfs_trans *io_append_trans;/* xact. for size update */
struct kiocb *io_iocb; struct kiocb *io_iocb;
int io_result; int io_result;
} xfs_ioend_t; } xfs_ioend_t;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment