Commit 22b4eb5e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs:
  xfs: cleanup xfs_file_aio_write
  xfs: always return with the iolock held from xfs_file_aio_write_checks
  xfs: remove the i_new_size field in struct xfs_inode
  xfs: remove the i_size field in struct xfs_inode
  xfs: replace i_pin_wait with a bit waitqueue
  xfs: replace i_flock with a sleeping bitlock
  xfs: make i_flags an unsigned long
  xfs: remove the if_ext_max field in struct xfs_ifork
  xfs: remove the unused dm_attrs structure
  xfs: cleanup xfs_iomap_eof_align_last_fsb
  xfs: remove xfs_itruncate_data
parents d65773b2 d0606464
...@@ -111,8 +111,7 @@ xfs_ioend_new_eof( ...@@ -111,8 +111,7 @@ xfs_ioend_new_eof(
xfs_fsize_t bsize; xfs_fsize_t bsize;
bsize = ioend->io_offset + ioend->io_size; bsize = ioend->io_offset + ioend->io_size;
isize = MAX(ip->i_size, ip->i_new_size); isize = MIN(i_size_read(VFS_I(ip)), bsize);
isize = MIN(isize, bsize);
return isize > ip->i_d.di_size ? isize : 0; return isize > ip->i_d.di_size ? isize : 0;
} }
...@@ -126,11 +125,7 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) ...@@ -126,11 +125,7 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
} }
/* /*
* Update on-disk file size now that data has been written to disk. The * Update on-disk file size now that data has been written to disk.
* current in-memory file size is i_size. If a write is beyond eof i_new_size
* will be the intended file size until i_size is updated. If this write does
* not extend all the way to the valid file size then restrict this update to
* the end of the write.
* *
* This function does not block as blocking on the inode lock in IO completion * This function does not block as blocking on the inode lock in IO completion
* can lead to IO completion order dependency deadlocks.. If it can't get the * can lead to IO completion order dependency deadlocks.. If it can't get the
...@@ -1278,6 +1273,15 @@ xfs_end_io_direct_write( ...@@ -1278,6 +1273,15 @@ xfs_end_io_direct_write(
{ {
struct xfs_ioend *ioend = iocb->private; struct xfs_ioend *ioend = iocb->private;
/*
* While the generic direct I/O code updates the inode size, it does
* so only after the end_io handler is called, which means our
* end_io handler thinks the on-disk size is outside the in-core
* size. To prevent this just update it a little bit earlier here.
*/
if (offset + size > i_size_read(ioend->io_inode))
i_size_write(ioend->io_inode, offset + size);
/* /*
* blockdev_direct_IO can return an error even after the I/O * blockdev_direct_IO can return an error even after the I/O
* completion handler was called. Thus we need to protect * completion handler was called. Thus we need to protect
...@@ -1340,12 +1344,11 @@ xfs_vm_write_failed( ...@@ -1340,12 +1344,11 @@ xfs_vm_write_failed(
if (to > inode->i_size) { if (to > inode->i_size) {
/* /*
* punch out the delalloc blocks we have already allocated. We * Punch out the delalloc blocks we have already allocated.
* don't call xfs_setattr() to do this as we may be in the *
* middle of a multi-iovec write and so the vfs inode->i_size * Don't bother with xfs_setattr given that nothing can have
* will not match the xfs ip->i_size and so it will zero too * made it to disk yet as the page is still locked at this
* much. Hence we jus truncate the page cache to zero what is * point.
* necessary and punch the delalloc blocks directly.
*/ */
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
xfs_fileoff_t start_fsb; xfs_fileoff_t start_fsb;
......
...@@ -827,10 +827,6 @@ xfs_attr_inactive(xfs_inode_t *dp) ...@@ -827,10 +827,6 @@ xfs_attr_inactive(xfs_inode_t *dp)
if (error) if (error)
goto out; goto out;
/*
* Commit the last in the sequence of transactions.
*/
xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL);
......
...@@ -271,10 +271,6 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) ...@@ -271,10 +271,6 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
dp = args->dp; dp = args->dp;
mp = dp->i_mount; mp = dp->i_mount;
dp->i_d.di_forkoff = forkoff; dp->i_d.di_forkoff = forkoff;
dp->i_df.if_ext_max =
XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
dp->i_afp->if_ext_max =
XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
ifp = dp->i_afp; ifp = dp->i_afp;
ASSERT(ifp->if_flags & XFS_IFINLINE); ASSERT(ifp->if_flags & XFS_IFINLINE);
...@@ -326,7 +322,6 @@ xfs_attr_fork_reset( ...@@ -326,7 +322,6 @@ xfs_attr_fork_reset(
ASSERT(ip->i_d.di_anextents == 0); ASSERT(ip->i_d.di_anextents == 0);
ASSERT(ip->i_afp == NULL); ASSERT(ip->i_afp == NULL);
ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
} }
...@@ -389,10 +384,6 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) ...@@ -389,10 +384,6 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
(args->op_flags & XFS_DA_OP_ADDNAME) || (args->op_flags & XFS_DA_OP_ADDNAME) ||
!(mp->m_flags & XFS_MOUNT_ATTR2) || !(mp->m_flags & XFS_MOUNT_ATTR2) ||
dp->i_d.di_format == XFS_DINODE_FMT_BTREE); dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
dp->i_afp->if_ext_max =
XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
dp->i_df.if_ext_max =
XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
xfs_trans_log_inode(args->trans, dp, xfs_trans_log_inode(args->trans, dp,
XFS_ILOG_CORE | XFS_ILOG_ADATA); XFS_ILOG_CORE | XFS_ILOG_ADATA);
} }
......
This diff is collapsed.
...@@ -163,12 +163,14 @@ xfs_swap_extents_check_format( ...@@ -163,12 +163,14 @@ xfs_swap_extents_check_format(
/* Check temp in extent form to max in target */ /* Check temp in extent form to max in target */
if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max) XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
return EINVAL; return EINVAL;
/* Check target in extent form to max in temp */ /* Check target in extent form to max in temp */
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
return EINVAL; return EINVAL;
/* /*
...@@ -180,18 +182,25 @@ xfs_swap_extents_check_format( ...@@ -180,18 +182,25 @@ xfs_swap_extents_check_format(
* (a common defrag case) which will occur when the temp inode is in * (a common defrag case) which will occur when the temp inode is in
* extent format... * extent format...
*/ */
if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
((XFS_IFORK_BOFF(ip) && if (XFS_IFORK_BOFF(ip) &&
tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) || tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max)) return EINVAL;
return EINVAL; if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
return EINVAL;
}
/* Reciprocal target->temp btree format checks */ /* Reciprocal target->temp btree format checks */
if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
((XFS_IFORK_BOFF(tip) && if (XFS_IFORK_BOFF(tip) &&
ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) || ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max)) return EINVAL;
return EINVAL;
if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
return EINVAL;
}
return 0; return 0;
} }
...@@ -348,16 +357,6 @@ xfs_swap_extents( ...@@ -348,16 +357,6 @@ xfs_swap_extents(
*ifp = *tifp; /* struct copy */ *ifp = *tifp; /* struct copy */
*tifp = *tempifp; /* struct copy */ *tifp = *tempifp; /* struct copy */
/*
* Fix the in-memory data fork values that are dependent on the fork
* offset in the inode. We can't assume they remain the same as attr2
* has dynamic fork offsets.
*/
ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
(uint)sizeof(xfs_bmbt_rec_t);
tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
(uint)sizeof(xfs_bmbt_rec_t);
/* /*
* Fix the on-disk inode values * Fix the on-disk inode values
*/ */
......
...@@ -327,7 +327,7 @@ xfs_file_aio_read( ...@@ -327,7 +327,7 @@ xfs_file_aio_read(
mp->m_rtdev_targp : mp->m_ddev_targp; mp->m_rtdev_targp : mp->m_ddev_targp;
if ((iocb->ki_pos & target->bt_smask) || if ((iocb->ki_pos & target->bt_smask) ||
(size & target->bt_smask)) { (size & target->bt_smask)) {
if (iocb->ki_pos == ip->i_size) if (iocb->ki_pos == i_size_read(inode))
return 0; return 0;
return -XFS_ERROR(EINVAL); return -XFS_ERROR(EINVAL);
} }
...@@ -412,51 +412,6 @@ xfs_file_splice_read( ...@@ -412,51 +412,6 @@ xfs_file_splice_read(
return ret; return ret;
} }
STATIC void
xfs_aio_write_isize_update(
struct inode *inode,
loff_t *ppos,
ssize_t bytes_written)
{
struct xfs_inode *ip = XFS_I(inode);
xfs_fsize_t isize = i_size_read(inode);
if (bytes_written > 0)
XFS_STATS_ADD(xs_write_bytes, bytes_written);
if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
*ppos > isize))
*ppos = isize;
if (*ppos > ip->i_size) {
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
if (*ppos > ip->i_size)
ip->i_size = *ppos;
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
}
}
/*
* If this was a direct or synchronous I/O that failed (such as ENOSPC) then
* part of the I/O may have been written to disk before the error occurred. In
* this case the on-disk file size may have been adjusted beyond the in-memory
* file size and now needs to be truncated back.
*/
STATIC void
xfs_aio_write_newsize_update(
struct xfs_inode *ip,
xfs_fsize_t new_size)
{
if (new_size == ip->i_new_size) {
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
if (new_size == ip->i_new_size)
ip->i_new_size = 0;
if (ip->i_d.di_size > ip->i_size)
ip->i_d.di_size = ip->i_size;
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
}
}
/* /*
* xfs_file_splice_write() does not use xfs_rw_ilock() because * xfs_file_splice_write() does not use xfs_rw_ilock() because
* generic_file_splice_write() takes the i_mutex itself. This, in theory, * generic_file_splice_write() takes the i_mutex itself. This, in theory,
...@@ -475,7 +430,6 @@ xfs_file_splice_write( ...@@ -475,7 +430,6 @@ xfs_file_splice_write(
{ {
struct inode *inode = outfilp->f_mapping->host; struct inode *inode = outfilp->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
xfs_fsize_t new_size;
int ioflags = 0; int ioflags = 0;
ssize_t ret; ssize_t ret;
...@@ -489,19 +443,12 @@ xfs_file_splice_write( ...@@ -489,19 +443,12 @@ xfs_file_splice_write(
xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_IOLOCK_EXCL);
new_size = *ppos + count;
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (new_size > ip->i_size)
ip->i_new_size = new_size;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
trace_xfs_file_splice_write(ip, count, *ppos, ioflags); trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
if (ret > 0)
XFS_STATS_ADD(xs_write_bytes, ret);
xfs_aio_write_isize_update(inode, ppos, ret);
xfs_aio_write_newsize_update(ip, new_size);
xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret; return ret;
} }
...@@ -689,28 +636,26 @@ xfs_zero_eof( ...@@ -689,28 +636,26 @@ xfs_zero_eof(
/* /*
* Common pre-write limit and setup checks. * Common pre-write limit and setup checks.
* *
* Returns with iolock held according to @iolock. * Called with the iolocked held either shared and exclusive according to
* @iolock, and returns with it held. Might upgrade the iolock to exclusive
* if called for a direct write beyond i_size.
*/ */
STATIC ssize_t STATIC ssize_t
xfs_file_aio_write_checks( xfs_file_aio_write_checks(
struct file *file, struct file *file,
loff_t *pos, loff_t *pos,
size_t *count, size_t *count,
xfs_fsize_t *new_sizep,
int *iolock) int *iolock)
{ {
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
xfs_fsize_t new_size;
int error = 0; int error = 0;
xfs_rw_ilock(ip, XFS_ILOCK_EXCL); xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
*new_sizep = 0;
restart: restart:
error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
if (error) { if (error) {
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
*iolock = 0;
return error; return error;
} }
...@@ -720,36 +665,21 @@ xfs_file_aio_write_checks( ...@@ -720,36 +665,21 @@ xfs_file_aio_write_checks(
/* /*
* If the offset is beyond the size of the file, we need to zero any * If the offset is beyond the size of the file, we need to zero any
* blocks that fall between the existing EOF and the start of this * blocks that fall between the existing EOF and the start of this
* write. There is no need to issue zeroing if another in-flght IO ends * write. If zeroing is needed and we are currently holding the
* at or before this one If zeronig is needed and we are currently * iolock shared, we need to update it to exclusive which involves
* holding the iolock shared, we need to update it to exclusive which * dropping all locks and relocking to maintain correct locking order.
* involves dropping all locks and relocking to maintain correct locking * If we do this, restart the function to ensure all checks and values
* order. If we do this, restart the function to ensure all checks and * are still valid.
* values are still valid.
*/ */
if ((ip->i_new_size && *pos > ip->i_new_size) || if (*pos > i_size_read(inode)) {
(!ip->i_new_size && *pos > ip->i_size)) {
if (*iolock == XFS_IOLOCK_SHARED) { if (*iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
*iolock = XFS_IOLOCK_EXCL; *iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
goto restart; goto restart;
} }
error = -xfs_zero_eof(ip, *pos, ip->i_size); error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
} }
/*
* If this IO extends beyond EOF, we may need to update ip->i_new_size.
* We have already zeroed space beyond EOF (if necessary). Only update
* ip->i_new_size if this IO ends beyond any other in-flight writes.
*/
new_size = *pos + *count;
if (new_size > ip->i_size) {
if (new_size > ip->i_new_size)
ip->i_new_size = new_size;
*new_sizep = new_size;
}
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
if (error) if (error)
return error; return error;
...@@ -794,9 +724,7 @@ xfs_file_dio_aio_write( ...@@ -794,9 +724,7 @@ xfs_file_dio_aio_write(
const struct iovec *iovp, const struct iovec *iovp,
unsigned long nr_segs, unsigned long nr_segs,
loff_t pos, loff_t pos,
size_t ocount, size_t ocount)
xfs_fsize_t *new_size,
int *iolock)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
...@@ -806,10 +734,10 @@ xfs_file_dio_aio_write( ...@@ -806,10 +734,10 @@ xfs_file_dio_aio_write(
ssize_t ret = 0; ssize_t ret = 0;
size_t count = ocount; size_t count = ocount;
int unaligned_io = 0; int unaligned_io = 0;
int iolock;
struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp; mp->m_rtdev_targp : mp->m_ddev_targp;
*iolock = 0;
if ((pos & target->bt_smask) || (count & target->bt_smask)) if ((pos & target->bt_smask) || (count & target->bt_smask))
return -XFS_ERROR(EINVAL); return -XFS_ERROR(EINVAL);
...@@ -824,31 +752,31 @@ xfs_file_dio_aio_write( ...@@ -824,31 +752,31 @@ xfs_file_dio_aio_write(
* EOF zeroing cases and fill out the new inode size as appropriate. * EOF zeroing cases and fill out the new inode size as appropriate.
*/ */
if (unaligned_io || mapping->nrpages) if (unaligned_io || mapping->nrpages)
*iolock = XFS_IOLOCK_EXCL; iolock = XFS_IOLOCK_EXCL;
else else
*iolock = XFS_IOLOCK_SHARED; iolock = XFS_IOLOCK_SHARED;
xfs_rw_ilock(ip, *iolock); xfs_rw_ilock(ip, iolock);
/* /*
* Recheck if there are cached pages that need invalidate after we got * Recheck if there are cached pages that need invalidate after we got
* the iolock to protect against other threads adding new pages while * the iolock to protect against other threads adding new pages while
* we were waiting for the iolock. * we were waiting for the iolock.
*/ */
if (mapping->nrpages && *iolock == XFS_IOLOCK_SHARED) { if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, *iolock); xfs_rw_iunlock(ip, iolock);
*iolock = XFS_IOLOCK_EXCL; iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock); xfs_rw_ilock(ip, iolock);
} }
ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock); ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
if (ret) if (ret)
return ret; goto out;
if (mapping->nrpages) { if (mapping->nrpages) {
ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
FI_REMAPF_LOCKED); FI_REMAPF_LOCKED);
if (ret) if (ret)
return ret; goto out;
} }
/* /*
...@@ -857,15 +785,18 @@ xfs_file_dio_aio_write( ...@@ -857,15 +785,18 @@ xfs_file_dio_aio_write(
*/ */
if (unaligned_io) if (unaligned_io)
inode_dio_wait(inode); inode_dio_wait(inode);
else if (*iolock == XFS_IOLOCK_EXCL) { else if (iolock == XFS_IOLOCK_EXCL) {
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
*iolock = XFS_IOLOCK_SHARED; iolock = XFS_IOLOCK_SHARED;
} }
trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
ret = generic_file_direct_write(iocb, iovp, ret = generic_file_direct_write(iocb, iovp,
&nr_segs, pos, &iocb->ki_pos, count, ocount); &nr_segs, pos, &iocb->ki_pos, count, ocount);
out:
xfs_rw_iunlock(ip, iolock);
/* No fallback to buffered IO on errors for XFS. */ /* No fallback to buffered IO on errors for XFS. */
ASSERT(ret < 0 || ret == count); ASSERT(ret < 0 || ret == count);
return ret; return ret;
...@@ -877,9 +808,7 @@ xfs_file_buffered_aio_write( ...@@ -877,9 +808,7 @@ xfs_file_buffered_aio_write(
const struct iovec *iovp, const struct iovec *iovp,
unsigned long nr_segs, unsigned long nr_segs,
loff_t pos, loff_t pos,
size_t ocount, size_t ocount)
xfs_fsize_t *new_size,
int *iolock)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
...@@ -887,14 +816,14 @@ xfs_file_buffered_aio_write( ...@@ -887,14 +816,14 @@ xfs_file_buffered_aio_write(
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
ssize_t ret; ssize_t ret;
int enospc = 0; int enospc = 0;
int iolock = XFS_IOLOCK_EXCL;
size_t count = ocount; size_t count = ocount;
*iolock = XFS_IOLOCK_EXCL; xfs_rw_ilock(ip, iolock);
xfs_rw_ilock(ip, *iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock); ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
if (ret) if (ret)
return ret; goto out;
/* We can write back this queue in page reclaim */ /* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info; current->backing_dev_info = mapping->backing_dev_info;
...@@ -908,13 +837,15 @@ xfs_file_buffered_aio_write( ...@@ -908,13 +837,15 @@ xfs_file_buffered_aio_write(
* page locks and retry *once* * page locks and retry *once*
*/ */
if (ret == -ENOSPC && !enospc) { if (ret == -ENOSPC && !enospc) {
ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
if (ret)
return ret;
enospc = 1; enospc = 1;
goto write_retry; ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
if (!ret)
goto write_retry;
} }
current->backing_dev_info = NULL; current->backing_dev_info = NULL;
out:
xfs_rw_iunlock(ip, iolock);
return ret; return ret;
} }
...@@ -930,9 +861,7 @@ xfs_file_aio_write( ...@@ -930,9 +861,7 @@ xfs_file_aio_write(
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
ssize_t ret; ssize_t ret;
int iolock;
size_t ocount = 0; size_t ocount = 0;
xfs_fsize_t new_size = 0;
XFS_STATS_INC(xs_write_calls); XFS_STATS_INC(xs_write_calls);
...@@ -951,33 +880,22 @@ xfs_file_aio_write( ...@@ -951,33 +880,22 @@ xfs_file_aio_write(
return -EIO; return -EIO;
if (unlikely(file->f_flags & O_DIRECT)) if (unlikely(file->f_flags & O_DIRECT))
ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
ocount, &new_size, &iolock);
else else
ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
ocount, &new_size, &iolock); ocount);
xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
if (ret <= 0) if (ret > 0) {
goto out_unlock; ssize_t err;
/* Handle various SYNC-type writes */ XFS_STATS_ADD(xs_write_bytes, ret);
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
loff_t end = pos + ret - 1;
int error;
xfs_rw_iunlock(ip, iolock); /* Handle various SYNC-type writes */
error = xfs_file_fsync(file, pos, end, err = generic_write_sync(file, pos, ret);
(file->f_flags & __O_SYNC) ? 0 : 1); if (err < 0)
xfs_rw_ilock(ip, iolock); ret = err;
if (error)
ret = error;
} }
out_unlock:
xfs_aio_write_newsize_update(ip, new_size);
xfs_rw_iunlock(ip, iolock);
return ret; return ret;
} }
......
...@@ -90,7 +90,7 @@ xfs_wait_on_pages( ...@@ -90,7 +90,7 @@ xfs_wait_on_pages(
if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
return -filemap_fdatawait_range(mapping, first, return -filemap_fdatawait_range(mapping, first,
last == -1 ? ip->i_size - 1 : last); last == -1 ? XFS_ISIZE(ip) - 1 : last);
} }
return 0; return 0;
} }
...@@ -77,7 +77,7 @@ xfs_inode_alloc( ...@@ -77,7 +77,7 @@ xfs_inode_alloc(
ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush)); ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0); ASSERT(ip->i_ino == 0);
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
...@@ -94,8 +94,6 @@ xfs_inode_alloc( ...@@ -94,8 +94,6 @@ xfs_inode_alloc(
ip->i_update_core = 0; ip->i_update_core = 0;
ip->i_delayed_blks = 0; ip->i_delayed_blks = 0;
memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
ip->i_size = 0;
ip->i_new_size = 0;
return ip; return ip;
} }
...@@ -150,7 +148,7 @@ xfs_inode_free( ...@@ -150,7 +148,7 @@ xfs_inode_free(
/* asserts to verify all state is correct here */ /* asserts to verify all state is correct here */
ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush)); ASSERT(!xfs_isiflocked(ip));
/* /*
* Because we use RCU freeing we need to ensure the inode always * Because we use RCU freeing we need to ensure the inode always
...@@ -450,8 +448,6 @@ xfs_iget( ...@@ -450,8 +448,6 @@ xfs_iget(
*ipp = ip; *ipp = ip;
ASSERT(ip->i_df.if_ext_max ==
XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
/* /*
* If we have a real type for an on-disk inode, we can set ops(&unlock) * If we have a real type for an on-disk inode, we can set ops(&unlock)
* now. If it's a new inode being created, xfs_ialloc will handle it. * now. If it's a new inode being created, xfs_ialloc will handle it.
...@@ -715,3 +711,19 @@ xfs_isilocked( ...@@ -715,3 +711,19 @@ xfs_isilocked(
return 0; return 0;
} }
#endif #endif
void
__xfs_iflock(
struct xfs_inode *ip)
{
wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
do {
prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
if (xfs_isiflocked(ip))
io_schedule();
} while (!xfs_iflock_nowait(ip));
finish_wait(wq, &wait.wait);
}
...@@ -299,11 +299,8 @@ xfs_iformat( ...@@ -299,11 +299,8 @@ xfs_iformat(
{ {
xfs_attr_shortform_t *atp; xfs_attr_shortform_t *atp;
int size; int size;
int error; int error = 0;
xfs_fsize_t di_size; xfs_fsize_t di_size;
ip->i_df.if_ext_max =
XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
error = 0;
if (unlikely(be32_to_cpu(dip->di_nextents) + if (unlikely(be32_to_cpu(dip->di_nextents) +
be16_to_cpu(dip->di_anextents) > be16_to_cpu(dip->di_anextents) >
...@@ -350,7 +347,6 @@ xfs_iformat( ...@@ -350,7 +347,6 @@ xfs_iformat(
return XFS_ERROR(EFSCORRUPTED); return XFS_ERROR(EFSCORRUPTED);
} }
ip->i_d.di_size = 0; ip->i_d.di_size = 0;
ip->i_size = 0;
ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip); ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
break; break;
...@@ -409,10 +405,10 @@ xfs_iformat( ...@@ -409,10 +405,10 @@ xfs_iformat(
} }
if (!XFS_DFORK_Q(dip)) if (!XFS_DFORK_Q(dip))
return 0; return 0;
ASSERT(ip->i_afp == NULL); ASSERT(ip->i_afp == NULL);
ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS); ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
ip->i_afp->if_ext_max =
XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
switch (dip->di_aformat) { switch (dip->di_aformat) {
case XFS_DINODE_FMT_LOCAL: case XFS_DINODE_FMT_LOCAL:
atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip); atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
...@@ -604,10 +600,11 @@ xfs_iformat_btree( ...@@ -604,10 +600,11 @@ xfs_iformat_btree(
* or the number of extents is greater than the number of * or the number of extents is greater than the number of
* blocks. * blocks.
*/ */
if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
|| XFS_BMDR_SPACE_CALC(nrecs) > XFS_IFORK_MAXEXT(ip, whichfork) ||
XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) XFS_BMDR_SPACE_CALC(nrecs) >
|| XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) { XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) ||
XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).", xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
(unsigned long long) ip->i_ino); (unsigned long long) ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW, XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
...@@ -835,12 +832,6 @@ xfs_iread( ...@@ -835,12 +832,6 @@ xfs_iread(
* with the uninitialized part of it. * with the uninitialized part of it.
*/ */
ip->i_d.di_mode = 0; ip->i_d.di_mode = 0;
/*
* Initialize the per-fork minima and maxima for a new
* inode here. xfs_iformat will do it for old inodes.
*/
ip->i_df.if_ext_max =
XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
} }
/* /*
...@@ -861,7 +852,6 @@ xfs_iread( ...@@ -861,7 +852,6 @@ xfs_iread(
} }
ip->i_delayed_blks = 0; ip->i_delayed_blks = 0;
ip->i_size = ip->i_d.di_size;
/* /*
* Mark the buffer containing the inode as something to keep * Mark the buffer containing the inode as something to keep
...@@ -1051,7 +1041,6 @@ xfs_ialloc( ...@@ -1051,7 +1041,6 @@ xfs_ialloc(
} }
ip->i_d.di_size = 0; ip->i_d.di_size = 0;
ip->i_size = 0;
ip->i_d.di_nextents = 0; ip->i_d.di_nextents = 0;
ASSERT(ip->i_d.di_nblocks == 0); ASSERT(ip->i_d.di_nblocks == 0);
...@@ -1165,52 +1154,6 @@ xfs_ialloc( ...@@ -1165,52 +1154,6 @@ xfs_ialloc(
return 0; return 0;
} }
/*
* Check to make sure that there are no blocks allocated to the
* file beyond the size of the file. We don't check this for
* files with fixed size extents or real time extents, but we
* at least do it for regular files.
*/
#ifdef DEBUG
STATIC void
xfs_isize_check(
struct xfs_inode *ip,
xfs_fsize_t isize)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t map_first;
int nimaps;
xfs_bmbt_irec_t imaps[2];
int error;
if (!S_ISREG(ip->i_d.di_mode))
return;
if (XFS_IS_REALTIME_INODE(ip))
return;
if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
return;
nimaps = 2;
map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
/*
* The filesystem could be shutting down, so bmapi may return
* an error.
*/
error = xfs_bmapi_read(ip, map_first,
(XFS_B_TO_FSB(mp,
(xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first),
imaps, &nimaps, XFS_BMAPI_ENTIRE);
if (error)
return;
ASSERT(nimaps == 1);
ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
}
#else /* DEBUG */
#define xfs_isize_check(ip, isize)
#endif /* DEBUG */
/* /*
* Free up the underlying blocks past new_size. The new size must be smaller * Free up the underlying blocks past new_size. The new size must be smaller
* than the current size. This routine can be used both for the attribute and * than the current size. This routine can be used both for the attribute and
...@@ -1252,12 +1195,14 @@ xfs_itruncate_extents( ...@@ -1252,12 +1195,14 @@ xfs_itruncate_extents(
int done = 0; int done = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
ASSERT(new_size <= ip->i_size); ASSERT(new_size <= XFS_ISIZE(ip));
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(ip->i_itemp != NULL); ASSERT(ip->i_itemp != NULL);
ASSERT(ip->i_itemp->ili_lock_flags == 0); ASSERT(ip->i_itemp->ili_lock_flags == 0);
ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
trace_xfs_itruncate_extents_start(ip, new_size);
/* /*
* Since it is possible for space to become allocated beyond * Since it is possible for space to become allocated beyond
* the end of the file (in a crash where the space is allocated * the end of the file (in a crash where the space is allocated
...@@ -1325,6 +1270,14 @@ xfs_itruncate_extents( ...@@ -1325,6 +1270,14 @@ xfs_itruncate_extents(
goto out; goto out;
} }
/*
* Always re-log the inode so that our permanent transaction can keep
* on rolling it forward in the log.
*/
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
trace_xfs_itruncate_extents_end(ip, new_size);
out: out:
*tpp = tp; *tpp = tp;
return error; return error;
...@@ -1338,74 +1291,6 @@ xfs_itruncate_extents( ...@@ -1338,74 +1291,6 @@ xfs_itruncate_extents(
goto out; goto out;
} }
int
xfs_itruncate_data(
struct xfs_trans **tpp,
struct xfs_inode *ip,
xfs_fsize_t new_size)
{
int error;
trace_xfs_itruncate_data_start(ip, new_size);
/*
* The first thing we do is set the size to new_size permanently on
* disk. This way we don't have to worry about anyone ever being able
* to look at the data being freed even in the face of a crash.
* What we're getting around here is the case where we free a block, it
* is allocated to another file, it is written to, and then we crash.
* If the new data gets written to the file but the log buffers
* containing the free and reallocation don't, then we'd end up with
* garbage in the blocks being freed. As long as we make the new_size
* permanent before actually freeing any blocks it doesn't matter if
* they get written to.
*/
if (ip->i_d.di_nextents > 0) {
/*
* If we are not changing the file size then do not update
* the on-disk file size - we may be called from
* xfs_inactive_free_eofblocks(). If we update the on-disk
* file size and then the system crashes before the contents
* of the file are flushed to disk then the files may be
* full of holes (ie NULL files bug).
*/
if (ip->i_size != new_size) {
ip->i_d.di_size = new_size;
ip->i_size = new_size;
xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
}
}
error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
if (error)
return error;
/*
* If we are not changing the file size then do not update the on-disk
* file size - we may be called from xfs_inactive_free_eofblocks().
* If we update the on-disk file size and then the system crashes
* before the contents of the file are flushed to disk then the files
* may be full of holes (ie NULL files bug).
*/
xfs_isize_check(ip, new_size);
if (ip->i_size != new_size) {
ip->i_d.di_size = new_size;
ip->i_size = new_size;
}
ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
/*
* Always re-log the inode so that our permanent transaction can keep
* on rolling it forward in the log.
*/
xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
trace_xfs_itruncate_data_end(ip, new_size);
return 0;
}
/* /*
* This is called when the inode's link count goes to 0. * This is called when the inode's link count goes to 0.
* We place the on-disk inode on a list in the AGI. It * We place the on-disk inode on a list in the AGI. It
...@@ -1824,8 +1709,7 @@ xfs_ifree( ...@@ -1824,8 +1709,7 @@ xfs_ifree(
ASSERT(ip->i_d.di_nlink == 0); ASSERT(ip->i_d.di_nlink == 0);
ASSERT(ip->i_d.di_nextents == 0); ASSERT(ip->i_d.di_nextents == 0);
ASSERT(ip->i_d.di_anextents == 0); ASSERT(ip->i_d.di_anextents == 0);
ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) || ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode));
(!S_ISREG(ip->i_d.di_mode)));
ASSERT(ip->i_d.di_nblocks == 0); ASSERT(ip->i_d.di_nblocks == 0);
/* /*
...@@ -1844,8 +1728,6 @@ xfs_ifree( ...@@ -1844,8 +1728,6 @@ xfs_ifree(
ip->i_d.di_flags = 0; ip->i_d.di_flags = 0;
ip->i_d.di_dmevmask = 0; ip->i_d.di_dmevmask = 0;
ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */ ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
ip->i_df.if_ext_max =
XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
/* /*
...@@ -2151,7 +2033,7 @@ xfs_idestroy_fork( ...@@ -2151,7 +2033,7 @@ xfs_idestroy_fork(
* once someone is waiting for it to be unpinned. * once someone is waiting for it to be unpinned.
*/ */
static void static void
xfs_iunpin_nowait( xfs_iunpin(
struct xfs_inode *ip) struct xfs_inode *ip)
{ {
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
...@@ -2163,14 +2045,29 @@ xfs_iunpin_nowait( ...@@ -2163,14 +2045,29 @@ xfs_iunpin_nowait(
} }
static void
__xfs_iunpin_wait(
struct xfs_inode *ip)
{
wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
xfs_iunpin(ip);
do {
prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
if (xfs_ipincount(ip))
io_schedule();
} while (xfs_ipincount(ip));
finish_wait(wq, &wait.wait);
}
void void
xfs_iunpin_wait( xfs_iunpin_wait(
struct xfs_inode *ip) struct xfs_inode *ip)
{ {
if (xfs_ipincount(ip)) { if (xfs_ipincount(ip))
xfs_iunpin_nowait(ip); __xfs_iunpin_wait(ip);
wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0));
}
} }
/* /*
...@@ -2510,9 +2407,9 @@ xfs_iflush( ...@@ -2510,9 +2407,9 @@ xfs_iflush(
XFS_STATS_INC(xs_iflush_count); XFS_STATS_INC(xs_iflush_count);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(!completion_done(&ip->i_flush)); ASSERT(xfs_isiflocked(ip));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max); ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
iip = ip->i_itemp; iip = ip->i_itemp;
mp = ip->i_mount; mp = ip->i_mount;
...@@ -2529,7 +2426,7 @@ xfs_iflush( ...@@ -2529,7 +2426,7 @@ xfs_iflush(
* out for us if they occur after the log force completes. * out for us if they occur after the log force completes.
*/ */
if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) { if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
xfs_iunpin_nowait(ip); xfs_iunpin(ip);
xfs_ifunlock(ip); xfs_ifunlock(ip);
return EAGAIN; return EAGAIN;
} }
...@@ -2626,9 +2523,9 @@ xfs_iflush_int( ...@@ -2626,9 +2523,9 @@ xfs_iflush_int(
#endif #endif
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(!completion_done(&ip->i_flush)); ASSERT(xfs_isiflocked(ip));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max); ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
iip = ip->i_itemp; iip = ip->i_itemp;
mp = ip->i_mount; mp = ip->i_mount;
......
...@@ -66,7 +66,6 @@ typedef struct xfs_ifork { ...@@ -66,7 +66,6 @@ typedef struct xfs_ifork {
struct xfs_btree_block *if_broot; /* file's incore btree root */ struct xfs_btree_block *if_broot; /* file's incore btree root */
short if_broot_bytes; /* bytes allocated for root */ short if_broot_bytes; /* bytes allocated for root */
unsigned char if_flags; /* per-fork flags */ unsigned char if_flags; /* per-fork flags */
unsigned char if_ext_max; /* max # of extent records */
union { union {
xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */ xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
xfs_ext_irec_t *if_ext_irec; /* irec map file exts */ xfs_ext_irec_t *if_ext_irec; /* irec map file exts */
...@@ -206,12 +205,12 @@ typedef struct xfs_icdinode { ...@@ -206,12 +205,12 @@ typedef struct xfs_icdinode {
((w) == XFS_DATA_FORK ? \ ((w) == XFS_DATA_FORK ? \
((ip)->i_d.di_nextents = (n)) : \ ((ip)->i_d.di_nextents = (n)) : \
((ip)->i_d.di_anextents = (n))) ((ip)->i_d.di_anextents = (n)))
#define XFS_IFORK_MAXEXT(ip, w) \
(XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
#ifdef __KERNEL__ #ifdef __KERNEL__
struct bhv_desc;
struct xfs_buf; struct xfs_buf;
struct xfs_bmap_free; struct xfs_bmap_free;
struct xfs_bmbt_irec; struct xfs_bmbt_irec;
...@@ -220,12 +219,6 @@ struct xfs_mount; ...@@ -220,12 +219,6 @@ struct xfs_mount;
struct xfs_trans; struct xfs_trans;
struct xfs_dquot; struct xfs_dquot;
typedef struct dm_attrs_s {
__uint32_t da_dmevmask; /* DMIG event mask */
__uint16_t da_dmstate; /* DMIG state info */
__uint16_t da_pad; /* DMIG extra padding */
} dm_attrs_t;
typedef struct xfs_inode { typedef struct xfs_inode {
/* Inode linking and identification information. */ /* Inode linking and identification information. */
struct xfs_mount *i_mount; /* fs mount struct ptr */ struct xfs_mount *i_mount; /* fs mount struct ptr */
...@@ -244,27 +237,19 @@ typedef struct xfs_inode { ...@@ -244,27 +237,19 @@ typedef struct xfs_inode {
struct xfs_inode_log_item *i_itemp; /* logging information */ struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */ mrlock_t i_lock; /* inode lock */
mrlock_t i_iolock; /* inode IO lock */ mrlock_t i_iolock; /* inode IO lock */
struct completion i_flush; /* inode flush completion q */
atomic_t i_pincount; /* inode pin count */ atomic_t i_pincount; /* inode pin count */
wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
spinlock_t i_flags_lock; /* inode i_flags lock */ spinlock_t i_flags_lock; /* inode i_flags lock */
/* Miscellaneous state. */ /* Miscellaneous state. */
unsigned short i_flags; /* see defined flags below */ unsigned long i_flags; /* see defined flags below */
unsigned char i_update_core; /* timestamps/size is dirty */ unsigned char i_update_core; /* timestamps/size is dirty */
unsigned int i_delayed_blks; /* count of delay alloc blks */ unsigned int i_delayed_blks; /* count of delay alloc blks */
xfs_icdinode_t i_d; /* most of ondisk inode */ xfs_icdinode_t i_d; /* most of ondisk inode */
xfs_fsize_t i_size; /* in-memory size */
xfs_fsize_t i_new_size; /* size when write completes */
/* VFS inode */ /* VFS inode */
struct inode i_vnode; /* embedded VFS inode */ struct inode i_vnode; /* embedded VFS inode */
} xfs_inode_t; } xfs_inode_t;
#define XFS_ISIZE(ip) S_ISREG((ip)->i_d.di_mode) ? \
(ip)->i_size : (ip)->i_d.di_size;
/* Convert from vfs inode to xfs inode */ /* Convert from vfs inode to xfs inode */
static inline struct xfs_inode *XFS_I(struct inode *inode) static inline struct xfs_inode *XFS_I(struct inode *inode)
{ {
...@@ -277,6 +262,18 @@ static inline struct inode *VFS_I(struct xfs_inode *ip) ...@@ -277,6 +262,18 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
return &ip->i_vnode; return &ip->i_vnode;
} }
/*
* For regular files we only update the on-disk filesize when actually
* writing data back to disk. Until then only the copy in the VFS inode
* is uptodate.
*/
static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
{
if (S_ISREG(ip->i_d.di_mode))
return i_size_read(VFS_I(ip));
return ip->i_d.di_size;
}
/* /*
* i_flags helper functions * i_flags helper functions
*/ */
...@@ -331,6 +328,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) ...@@ -331,6 +328,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
return ret; return ret;
} }
static inline int
xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
{
int ret;
spin_lock(&ip->i_flags_lock);
ret = ip->i_flags & flags;
if (!ret)
ip->i_flags |= flags;
spin_unlock(&ip->i_flags_lock);
return ret;
}
/* /*
* Project quota id helpers (previously projid was 16bit only * Project quota id helpers (previously projid was 16bit only
* and using two 16bit values to hold new 32bit projid was chosen * and using two 16bit values to hold new 32bit projid was chosen
...@@ -350,36 +360,20 @@ xfs_set_projid(struct xfs_inode *ip, ...@@ -350,36 +360,20 @@ xfs_set_projid(struct xfs_inode *ip,
ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
} }
/*
* Manage the i_flush queue embedded in the inode. This completion
* queue synchronizes processes attempting to flush the in-core
* inode back to disk.
*/
static inline void xfs_iflock(xfs_inode_t *ip)
{
wait_for_completion(&ip->i_flush);
}
static inline int xfs_iflock_nowait(xfs_inode_t *ip)
{
return try_wait_for_completion(&ip->i_flush);
}
static inline void xfs_ifunlock(xfs_inode_t *ip)
{
complete(&ip->i_flush);
}
/* /*
* In-core inode flags. * In-core inode flags.
*/ */
#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */ #define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
#define XFS_ISTALE 0x0002 /* inode has been staled */ #define XFS_ISTALE (1 << 1) /* inode has been staled */
#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */ #define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
#define XFS_INEW 0x0008 /* inode has just been allocated */ #define XFS_INEW (1 << 3) /* inode has just been allocated */
#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */ #define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */
#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */ #define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */ #define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */
#define XFS_IPINNED (1 << __XFS_IPINNED_BIT)
/* /*
* Per-lifetime flags need to be reset when re-using a reclaimable inode during * Per-lifetime flags need to be reset when re-using a reclaimable inode during
...@@ -391,6 +385,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip) ...@@ -391,6 +385,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
XFS_IFILESTREAM); XFS_IFILESTREAM);
/*
* Synchronize processes attempting to flush the in-core inode back to disk.
*/
extern void __xfs_iflock(struct xfs_inode *ip);
static inline int xfs_iflock_nowait(struct xfs_inode *ip)
{
return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
}
static inline void xfs_iflock(struct xfs_inode *ip)
{
if (!xfs_iflock_nowait(ip))
__xfs_iflock(ip);
}
static inline void xfs_ifunlock(struct xfs_inode *ip)
{
xfs_iflags_clear(ip, XFS_IFLOCK);
wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
}
static inline int xfs_isiflocked(struct xfs_inode *ip)
{
return xfs_iflags_test(ip, XFS_IFLOCK);
}
/* /*
* Flags for inode locking. * Flags for inode locking.
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield) * Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
...@@ -491,8 +513,6 @@ int xfs_ifree(struct xfs_trans *, xfs_inode_t *, ...@@ -491,8 +513,6 @@ int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
struct xfs_bmap_free *); struct xfs_bmap_free *);
int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *, int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
int, xfs_fsize_t); int, xfs_fsize_t);
int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *,
xfs_fsize_t);
int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_iext_realloc(xfs_inode_t *, int, int);
......
...@@ -79,8 +79,6 @@ xfs_inode_item_size( ...@@ -79,8 +79,6 @@ xfs_inode_item_size(
break; break;
case XFS_DINODE_FMT_BTREE: case XFS_DINODE_FMT_BTREE:
ASSERT(ip->i_df.if_ext_max ==
XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
iip->ili_format.ilf_fields &= iip->ili_format.ilf_fields &=
~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | ~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
XFS_ILOG_DEV | XFS_ILOG_UUID); XFS_ILOG_DEV | XFS_ILOG_UUID);
...@@ -557,7 +555,7 @@ xfs_inode_item_unpin( ...@@ -557,7 +555,7 @@ xfs_inode_item_unpin(
trace_xfs_inode_unpin(ip, _RET_IP_); trace_xfs_inode_unpin(ip, _RET_IP_);
ASSERT(atomic_read(&ip->i_pincount) > 0); ASSERT(atomic_read(&ip->i_pincount) > 0);
if (atomic_dec_and_test(&ip->i_pincount)) if (atomic_dec_and_test(&ip->i_pincount))
wake_up(&ip->i_ipin_wait); wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
} }
/* /*
...@@ -719,7 +717,7 @@ xfs_inode_item_pushbuf( ...@@ -719,7 +717,7 @@ xfs_inode_item_pushbuf(
* If a flush is not in progress anymore, chances are that the * If a flush is not in progress anymore, chances are that the
* inode was taken off the AIL. So, just get out. * inode was taken off the AIL. So, just get out.
*/ */
if (completion_done(&ip->i_flush) || if (!xfs_isiflocked(ip) ||
!(lip->li_flags & XFS_LI_IN_AIL)) { !(lip->li_flags & XFS_LI_IN_AIL)) {
xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_iunlock(ip, XFS_ILOCK_SHARED);
return true; return true;
...@@ -752,7 +750,7 @@ xfs_inode_item_push( ...@@ -752,7 +750,7 @@ xfs_inode_item_push(
struct xfs_inode *ip = iip->ili_inode; struct xfs_inode *ip = iip->ili_inode;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
ASSERT(!completion_done(&ip->i_flush)); ASSERT(xfs_isiflocked(ip));
/* /*
* Since we were able to lock the inode's flush lock and * Since we were able to lock the inode's flush lock and
......
...@@ -57,26 +57,26 @@ xfs_iomap_eof_align_last_fsb( ...@@ -57,26 +57,26 @@ xfs_iomap_eof_align_last_fsb(
xfs_fileoff_t *last_fsb) xfs_fileoff_t *last_fsb)
{ {
xfs_fileoff_t new_last_fsb = 0; xfs_fileoff_t new_last_fsb = 0;
xfs_extlen_t align; xfs_extlen_t align = 0;
int eof, error; int eof, error;
if (XFS_IS_REALTIME_INODE(ip)) if (!XFS_IS_REALTIME_INODE(ip)) {
; /*
/* * Round up the allocation request to a stripe unit
* If mounted with the "-o swalloc" option, roundup the allocation * (m_dalign) boundary if the file size is >= stripe unit
* request to a stripe width boundary if the file size is >= * size, and we are allocating past the allocation eof.
* stripe width and we are allocating past the allocation eof. *
*/ * If mounted with the "-o swalloc" option the alignment is
else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) && * increased from the strip unit size to the stripe width.
(ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth))) */
new_last_fsb = roundup_64(*last_fsb, mp->m_swidth); if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
/* align = mp->m_swidth;
* Roundup the allocation request to a stripe unit (m_dalign) boundary else if (mp->m_dalign)
* if the file size is >= stripe unit size, and we are allocating past align = mp->m_dalign;
* the allocation eof.
*/ if (align && XFS_ISIZE(ip) >= XFS_FSB_TO_B(mp, align))
else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign))) new_last_fsb = roundup_64(*last_fsb, align);
new_last_fsb = roundup_64(*last_fsb, mp->m_dalign); }
/* /*
* Always round up the allocation request to an extent boundary * Always round up the allocation request to an extent boundary
...@@ -154,7 +154,7 @@ xfs_iomap_write_direct( ...@@ -154,7 +154,7 @@ xfs_iomap_write_direct(
offset_fsb = XFS_B_TO_FSBT(mp, offset); offset_fsb = XFS_B_TO_FSBT(mp, offset);
last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
if ((offset + count) > ip->i_size) { if ((offset + count) > XFS_ISIZE(ip)) {
error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
if (error) if (error)
goto error_out; goto error_out;
...@@ -211,7 +211,7 @@ xfs_iomap_write_direct( ...@@ -211,7 +211,7 @@ xfs_iomap_write_direct(
xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, ip, 0);
bmapi_flag = 0; bmapi_flag = 0;
if (offset < ip->i_size || extsz) if (offset < XFS_ISIZE(ip) || extsz)
bmapi_flag |= XFS_BMAPI_PREALLOC; bmapi_flag |= XFS_BMAPI_PREALLOC;
/* /*
...@@ -286,7 +286,7 @@ xfs_iomap_eof_want_preallocate( ...@@ -286,7 +286,7 @@ xfs_iomap_eof_want_preallocate(
int found_delalloc = 0; int found_delalloc = 0;
*prealloc = 0; *prealloc = 0;
if ((offset + count) <= ip->i_size) if (offset + count <= XFS_ISIZE(ip))
return 0; return 0;
/* /*
...@@ -340,7 +340,7 @@ xfs_iomap_prealloc_size( ...@@ -340,7 +340,7 @@ xfs_iomap_prealloc_size(
* if we pass in alloc_blocks = 0. Hence the "+ 1" to * if we pass in alloc_blocks = 0. Hence the "+ 1" to
* ensure we always pass in a non-zero value. * ensure we always pass in a non-zero value.
*/ */
alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1; alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
rounddown_pow_of_two(alloc_blocks)); rounddown_pow_of_two(alloc_blocks));
...@@ -564,7 +564,7 @@ xfs_iomap_write_allocate( ...@@ -564,7 +564,7 @@ xfs_iomap_write_allocate(
* back.... * back....
*/ */
nimaps = 1; nimaps = 1;
end_fsb = XFS_B_TO_FSB(mp, ip->i_size); end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
error = xfs_bmap_last_offset(NULL, ip, &last_block, error = xfs_bmap_last_offset(NULL, ip, &last_block,
XFS_DATA_FORK); XFS_DATA_FORK);
if (error) if (error)
......
...@@ -750,6 +750,7 @@ xfs_setattr_size( ...@@ -750,6 +750,7 @@ xfs_setattr_size(
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
struct inode *inode = VFS_I(ip); struct inode *inode = VFS_I(ip);
int mask = iattr->ia_valid; int mask = iattr->ia_valid;
xfs_off_t oldsize, newsize;
struct xfs_trans *tp; struct xfs_trans *tp;
int error; int error;
uint lock_flags; uint lock_flags;
...@@ -777,11 +778,13 @@ xfs_setattr_size( ...@@ -777,11 +778,13 @@ xfs_setattr_size(
lock_flags |= XFS_IOLOCK_EXCL; lock_flags |= XFS_IOLOCK_EXCL;
xfs_ilock(ip, lock_flags); xfs_ilock(ip, lock_flags);
oldsize = inode->i_size;
newsize = iattr->ia_size;
/* /*
* Short circuit the truncate case for zero length files. * Short circuit the truncate case for zero length files.
*/ */
if (iattr->ia_size == 0 && if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) {
ip->i_size == 0 && ip->i_d.di_nextents == 0) {
if (!(mask & (ATTR_CTIME|ATTR_MTIME))) if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
goto out_unlock; goto out_unlock;
...@@ -807,14 +810,14 @@ xfs_setattr_size( ...@@ -807,14 +810,14 @@ xfs_setattr_size(
* the inode to the transaction, because the inode cannot be unlocked * the inode to the transaction, because the inode cannot be unlocked
* once it is a part of the transaction. * once it is a part of the transaction.
*/ */
if (iattr->ia_size > ip->i_size) { if (newsize > oldsize) {
/* /*
* Do the first part of growing a file: zero any data in the * Do the first part of growing a file: zero any data in the
* last block that is beyond the old EOF. We need to do this * last block that is beyond the old EOF. We need to do this
* before the inode is joined to the transaction to modify * before the inode is joined to the transaction to modify
* i_size. * i_size.
*/ */
error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); error = xfs_zero_eof(ip, newsize, oldsize);
if (error) if (error)
goto out_unlock; goto out_unlock;
} }
...@@ -833,8 +836,8 @@ xfs_setattr_size( ...@@ -833,8 +836,8 @@ xfs_setattr_size(
* here and prevents waiting for other data not within the range we * here and prevents waiting for other data not within the range we
* care about here. * care about here.
*/ */
if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, 0, error = xfs_flush_pages(ip, ip->i_d.di_size, newsize, 0,
FI_NONE); FI_NONE);
if (error) if (error)
goto out_unlock; goto out_unlock;
...@@ -845,8 +848,7 @@ xfs_setattr_size( ...@@ -845,8 +848,7 @@ xfs_setattr_size(
*/ */
inode_dio_wait(inode); inode_dio_wait(inode);
error = -block_truncate_page(inode->i_mapping, iattr->ia_size, error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
xfs_get_blocks);
if (error) if (error)
goto out_unlock; goto out_unlock;
...@@ -857,7 +859,7 @@ xfs_setattr_size( ...@@ -857,7 +859,7 @@ xfs_setattr_size(
if (error) if (error)
goto out_trans_cancel; goto out_trans_cancel;
truncate_setsize(inode, iattr->ia_size); truncate_setsize(inode, newsize);
commit_flags = XFS_TRANS_RELEASE_LOG_RES; commit_flags = XFS_TRANS_RELEASE_LOG_RES;
lock_flags |= XFS_ILOCK_EXCL; lock_flags |= XFS_ILOCK_EXCL;
...@@ -876,19 +878,29 @@ xfs_setattr_size( ...@@ -876,19 +878,29 @@ xfs_setattr_size(
* these flags set. For all other operations the VFS set these flags * these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update. * explicitly if it wants a timestamp update.
*/ */
if (iattr->ia_size != ip->i_size && if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
(!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
iattr->ia_ctime = iattr->ia_mtime = iattr->ia_ctime = iattr->ia_mtime =
current_fs_time(inode->i_sb); current_fs_time(inode->i_sb);
mask |= ATTR_CTIME | ATTR_MTIME; mask |= ATTR_CTIME | ATTR_MTIME;
} }
if (iattr->ia_size > ip->i_size) { /*
ip->i_d.di_size = iattr->ia_size; * The first thing we do is set the size to new_size permanently on
ip->i_size = iattr->ia_size; * disk. This way we don't have to worry about anyone ever being able
} else if (iattr->ia_size <= ip->i_size || * to look at the data being freed even in the face of a crash.
(iattr->ia_size == 0 && ip->i_d.di_nextents)) { * What we're getting around here is the case where we free a block, it
error = xfs_itruncate_data(&tp, ip, iattr->ia_size); * is allocated to another file, it is written to, and then we crash.
* If the new data gets written to the file but the log buffers
* containing the free and reallocation don't, then we'd end up with
* garbage in the blocks being freed. As long as we make the new size
* permanent before actually freeing any blocks it doesn't matter if
* they get written to.
*/
ip->i_d.di_size = newsize;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (newsize <= oldsize) {
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
if (error) if (error)
goto out_trans_abort; goto out_trans_abort;
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "xfs_mount.h" #include "xfs_mount.h"
#include "xfs_bmap_btree.h" #include "xfs_bmap_btree.h"
#include "xfs_inode.h" #include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_itable.h" #include "xfs_itable.h"
#include "xfs_bmap.h" #include "xfs_bmap.h"
#include "xfs_rtalloc.h" #include "xfs_rtalloc.h"
...@@ -263,13 +264,18 @@ xfs_qm_scall_trunc_qfile( ...@@ -263,13 +264,18 @@ xfs_qm_scall_trunc_qfile(
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, ip, 0);
error = xfs_itruncate_data(&tp, ip, 0); ip->i_d.di_size = 0;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
XFS_TRANS_ABORT); XFS_TRANS_ABORT);
goto out_unlock; goto out_unlock;
} }
ASSERT(ip->i_d.di_nextents == 0);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
......
...@@ -828,14 +828,6 @@ xfs_fs_inode_init_once( ...@@ -828,14 +828,6 @@ xfs_fs_inode_init_once(
/* xfs inode */ /* xfs inode */
atomic_set(&ip->i_pincount, 0); atomic_set(&ip->i_pincount, 0);
spin_lock_init(&ip->i_flags_lock); spin_lock_init(&ip->i_flags_lock);
init_waitqueue_head(&ip->i_ipin_wait);
/*
* Because we want to use a counting completion, complete
* the flush completion once to allow a single access to
* the flush completion without blocking.
*/
init_completion(&ip->i_flush);
complete(&ip->i_flush);
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
"xfsino", ip->i_ino); "xfsino", ip->i_ino);
......
...@@ -707,14 +707,13 @@ xfs_reclaim_inode_grab( ...@@ -707,14 +707,13 @@ xfs_reclaim_inode_grab(
return 1; return 1;
/* /*
* do some unlocked checks first to avoid unnecessary lock traffic. * If we are asked for non-blocking operation, do unlocked checks to
* The first is a flush lock check, the second is a already in reclaim * see if the inode already is being flushed or in reclaim to avoid
* check. Only do these checks if we are not going to block on locks. * lock traffic.
*/ */
if ((flags & SYNC_TRYLOCK) && if ((flags & SYNC_TRYLOCK) &&
(!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { __xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM))
return 1; return 1;
}
/* /*
* The radix tree lock here protects a thread in xfs_iget from racing * The radix tree lock here protects a thread in xfs_iget from racing
......
...@@ -891,7 +891,6 @@ DECLARE_EVENT_CLASS(xfs_file_class, ...@@ -891,7 +891,6 @@ DECLARE_EVENT_CLASS(xfs_file_class,
__field(dev_t, dev) __field(dev_t, dev)
__field(xfs_ino_t, ino) __field(xfs_ino_t, ino)
__field(xfs_fsize_t, size) __field(xfs_fsize_t, size)
__field(xfs_fsize_t, new_size)
__field(loff_t, offset) __field(loff_t, offset)
__field(size_t, count) __field(size_t, count)
__field(int, flags) __field(int, flags)
...@@ -900,17 +899,15 @@ DECLARE_EVENT_CLASS(xfs_file_class, ...@@ -900,17 +899,15 @@ DECLARE_EVENT_CLASS(xfs_file_class,
__entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino; __entry->ino = ip->i_ino;
__entry->size = ip->i_d.di_size; __entry->size = ip->i_d.di_size;
__entry->new_size = ip->i_new_size;
__entry->offset = offset; __entry->offset = offset;
__entry->count = count; __entry->count = count;
__entry->flags = flags; __entry->flags = flags;
), ),
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " TP_printk("dev %d:%d ino 0x%llx size 0x%llx "
"offset 0x%llx count 0x%zx ioflags %s", "offset 0x%llx count 0x%zx ioflags %s",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino, __entry->ino,
__entry->size, __entry->size,
__entry->new_size,
__entry->offset, __entry->offset,
__entry->count, __entry->count,
__print_flags(__entry->flags, "|", XFS_IO_FLAGS)) __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
...@@ -978,7 +975,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class, ...@@ -978,7 +975,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
__field(dev_t, dev) __field(dev_t, dev)
__field(xfs_ino_t, ino) __field(xfs_ino_t, ino)
__field(loff_t, size) __field(loff_t, size)
__field(loff_t, new_size)
__field(loff_t, offset) __field(loff_t, offset)
__field(size_t, count) __field(size_t, count)
__field(int, type) __field(int, type)
...@@ -990,7 +986,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class, ...@@ -990,7 +986,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
__entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino; __entry->ino = ip->i_ino;
__entry->size = ip->i_d.di_size; __entry->size = ip->i_d.di_size;
__entry->new_size = ip->i_new_size;
__entry->offset = offset; __entry->offset = offset;
__entry->count = count; __entry->count = count;
__entry->type = type; __entry->type = type;
...@@ -998,13 +993,11 @@ DECLARE_EVENT_CLASS(xfs_imap_class, ...@@ -998,13 +993,11 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
__entry->startblock = irec ? irec->br_startblock : 0; __entry->startblock = irec ? irec->br_startblock : 0;
__entry->blockcount = irec ? irec->br_blockcount : 0; __entry->blockcount = irec ? irec->br_blockcount : 0;
), ),
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd "
"offset 0x%llx count %zd type %s " "type %s startoff 0x%llx startblock %lld blockcount 0x%llx",
"startoff 0x%llx startblock %lld blockcount 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino, __entry->ino,
__entry->size, __entry->size,
__entry->new_size,
__entry->offset, __entry->offset,
__entry->count, __entry->count,
__print_symbolic(__entry->type, XFS_IO_TYPES), __print_symbolic(__entry->type, XFS_IO_TYPES),
...@@ -1031,26 +1024,23 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class, ...@@ -1031,26 +1024,23 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
__field(xfs_ino_t, ino) __field(xfs_ino_t, ino)
__field(loff_t, isize) __field(loff_t, isize)
__field(loff_t, disize) __field(loff_t, disize)
__field(loff_t, new_size)
__field(loff_t, offset) __field(loff_t, offset)
__field(size_t, count) __field(size_t, count)
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev; __entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino; __entry->ino = ip->i_ino;
__entry->isize = ip->i_size; __entry->isize = VFS_I(ip)->i_size;
__entry->disize = ip->i_d.di_size; __entry->disize = ip->i_d.di_size;
__entry->new_size = ip->i_new_size;
__entry->offset = offset; __entry->offset = offset;
__entry->count = count; __entry->count = count;
), ),
TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx " TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx "
"offset 0x%llx count %zd", "offset 0x%llx count %zd",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino, __entry->ino,
__entry->isize, __entry->isize,
__entry->disize, __entry->disize,
__entry->new_size,
__entry->offset, __entry->offset,
__entry->count) __entry->count)
); );
...@@ -1090,8 +1080,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class, ...@@ -1090,8 +1080,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class,
DEFINE_EVENT(xfs_itrunc_class, name, \ DEFINE_EVENT(xfs_itrunc_class, name, \
TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
TP_ARGS(ip, new_size)) TP_ARGS(ip, new_size))
DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start); DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_start);
DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end); DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_end);
TRACE_EVENT(xfs_pagecache_inval, TRACE_EVENT(xfs_pagecache_inval,
TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
...@@ -1568,7 +1558,6 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class, ...@@ -1568,7 +1558,6 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class,
__field(xfs_ino_t, ino) __field(xfs_ino_t, ino)
__field(int, format) __field(int, format)
__field(int, nex) __field(int, nex)
__field(int, max_nex)
__field(int, broot_size) __field(int, broot_size)
__field(int, fork_off) __field(int, fork_off)
), ),
...@@ -1578,18 +1567,16 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class, ...@@ -1578,18 +1567,16 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class,
__entry->ino = ip->i_ino; __entry->ino = ip->i_ino;
__entry->format = ip->i_d.di_format; __entry->format = ip->i_d.di_format;
__entry->nex = ip->i_d.di_nextents; __entry->nex = ip->i_d.di_nextents;
__entry->max_nex = ip->i_df.if_ext_max;
__entry->broot_size = ip->i_df.if_broot_bytes; __entry->broot_size = ip->i_df.if_broot_bytes;
__entry->fork_off = XFS_IFORK_BOFF(ip); __entry->fork_off = XFS_IFORK_BOFF(ip);
), ),
TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, " TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
"Max in-fork extents %d, broot size %d, fork offset %d", "broot size %d, fork offset %d",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino, __entry->ino,
__print_symbolic(__entry->which, XFS_SWAPEXT_INODES), __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
__print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
__entry->nex, __entry->nex,
__entry->max_nex,
__entry->broot_size, __entry->broot_size,
__entry->fork_off) __entry->fork_off)
) )
......
...@@ -175,7 +175,7 @@ xfs_free_eofblocks( ...@@ -175,7 +175,7 @@ xfs_free_eofblocks(
* Figure out if there are any blocks beyond the end * Figure out if there are any blocks beyond the end
* of the file. If not, then there is nothing to do. * of the file. If not, then there is nothing to do.
*/ */
end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size)); end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
if (last_fsb <= end_fsb) if (last_fsb <= end_fsb)
return 0; return 0;
...@@ -226,7 +226,14 @@ xfs_free_eofblocks( ...@@ -226,7 +226,14 @@ xfs_free_eofblocks(
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, ip, 0);
error = xfs_itruncate_data(&tp, ip, ip->i_size); /*
* Do not update the on-disk file size. If we update the
* on-disk file size and then the system crashes before the
* contents of the file are flushed to disk then the files
* may be full of holes (ie NULL files bug).
*/
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
XFS_ISIZE(ip));
if (error) { if (error) {
/* /*
* If we get an error at this point we simply don't * If we get an error at this point we simply don't
...@@ -540,8 +547,8 @@ xfs_release( ...@@ -540,8 +547,8 @@ xfs_release(
return 0; return 0;
if ((S_ISREG(ip->i_d.di_mode) && if ((S_ISREG(ip->i_d.di_mode) &&
((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || (VFS_I(ip)->i_size > 0 ||
ip->i_delayed_blks > 0)) && (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS)) && (ip->i_df.if_flags & XFS_IFEXTENTS)) &&
(!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) { (!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
...@@ -618,7 +625,7 @@ xfs_inactive( ...@@ -618,7 +625,7 @@ xfs_inactive(
* only one with a reference to the inode. * only one with a reference to the inode.
*/ */
truncate = ((ip->i_d.di_nlink == 0) && truncate = ((ip->i_d.di_nlink == 0) &&
((ip->i_d.di_size != 0) || (ip->i_size != 0) || ((ip->i_d.di_size != 0) || XFS_ISIZE(ip) != 0 ||
(ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) && (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
S_ISREG(ip->i_d.di_mode)); S_ISREG(ip->i_d.di_mode));
...@@ -632,12 +639,12 @@ xfs_inactive( ...@@ -632,12 +639,12 @@ xfs_inactive(
if (ip->i_d.di_nlink != 0) { if (ip->i_d.di_nlink != 0) {
if ((S_ISREG(ip->i_d.di_mode) && if ((S_ISREG(ip->i_d.di_mode) &&
((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || (VFS_I(ip)->i_size > 0 ||
ip->i_delayed_blks > 0)) && (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS) && (ip->i_df.if_flags & XFS_IFEXTENTS) &&
(!(ip->i_d.di_flags & (!(ip->i_d.di_flags &
(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) || (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
(ip->i_delayed_blks != 0)))) { ip->i_delayed_blks != 0))) {
error = xfs_free_eofblocks(mp, ip, 0); error = xfs_free_eofblocks(mp, ip, 0);
if (error) if (error)
return VN_INACTIVE_CACHE; return VN_INACTIVE_CACHE;
...@@ -670,13 +677,18 @@ xfs_inactive( ...@@ -670,13 +677,18 @@ xfs_inactive(
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, ip, 0);
error = xfs_itruncate_data(&tp, ip, 0); ip->i_d.di_size = 0;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, xfs_trans_cancel(tp,
XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
return VN_INACTIVE_CACHE; return VN_INACTIVE_CACHE;
} }
ASSERT(ip->i_d.di_nextents == 0);
} else if (S_ISLNK(ip->i_d.di_mode)) { } else if (S_ISLNK(ip->i_d.di_mode)) {
/* /*
...@@ -1961,11 +1973,11 @@ xfs_zero_remaining_bytes( ...@@ -1961,11 +1973,11 @@ xfs_zero_remaining_bytes(
* since nothing can read beyond eof. The space will * since nothing can read beyond eof. The space will
* be zeroed when the file is extended anyway. * be zeroed when the file is extended anyway.
*/ */
if (startoff >= ip->i_size) if (startoff >= XFS_ISIZE(ip))
return 0; return 0;
if (endoff > ip->i_size) if (endoff > XFS_ISIZE(ip))
endoff = ip->i_size; endoff = XFS_ISIZE(ip);
bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp, mp->m_rtdev_targp : mp->m_ddev_targp,
...@@ -2260,7 +2272,7 @@ xfs_change_file_space( ...@@ -2260,7 +2272,7 @@ xfs_change_file_space(
bf->l_start += offset; bf->l_start += offset;
break; break;
case 2: /*SEEK_END*/ case 2: /*SEEK_END*/
bf->l_start += ip->i_size; bf->l_start += XFS_ISIZE(ip);
break; break;
default: default:
return XFS_ERROR(EINVAL); return XFS_ERROR(EINVAL);
...@@ -2277,7 +2289,7 @@ xfs_change_file_space( ...@@ -2277,7 +2289,7 @@ xfs_change_file_space(
bf->l_whence = 0; bf->l_whence = 0;
startoffset = bf->l_start; startoffset = bf->l_start;
fsize = ip->i_size; fsize = XFS_ISIZE(ip);
/* /*
* XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve * XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment