Commit fe0142df authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-4.20-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pul xfs updates from Dave Chinner:
 "There's not a huge amount of change in this cycle - Darrick has been
  out of action for a couple of months (hence me sending the last few
  pull requests), so we decided a quiet cycle mainly focussed on bug
  fixes was a good idea. Darrick will take the helm again at the end of
  this merge window.

  FYI, I may be sending another update later in the cycle - there's a
  pending rework of the clone/dedupe_file_range code that fixes numerous
  bugs that is spread amongst the VFS, XFS and ocfs2 code. It has been
  reviewed and tested, Al and I just need to work out the details of the
  merge, so it may come from him rather than me.

  Summary:

   - only support filesystems with unwritten extents

   - add definition for statfs XFS magic number

   - remove unused parameters around reflink code

   - more debug for dangling delalloc extents

   - cancel COW extents on extent swap targets

   - fix quota stats output and clean up the code

   - refactor some of the attribute code in preparation for parent
     pointers

   - fix several buffer handling bugs"

* tag 'xfs-4.20-merge-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (21 commits)
  xfs: cancel COW blocks before swapext
  xfs: clear ail delwri queued bufs on unmount of shutdown fs
  xfs: use offsetof() in place of offset macros for __xfsstats
  xfs: Fix xqmstats offsets in /proc/fs/xfs/xqmstat
  xfs: fix use-after-free race in xfs_buf_rele
  xfs: Add attibute remove and helper functions
  xfs: Add attibute set and helper functions
  xfs: Add helper function xfs_attr_try_sf_addname
  xfs: Move fs/xfs/xfs_attr.h to fs/xfs/libxfs/xfs_attr.h
  xfs: issue log message on user force shutdown
  xfs: fix buffer state management in xrep_findroot_block
  xfs: always assign buffer verifiers when one is provided
  xfs: xrep_findroot_block should reject root blocks with siblings
  xfs: add a define for statfs magic to uapi
  xfs: print dangling delalloc extents
  xfs: fix fork selection in xfs_find_trim_cow_extent
  xfs: remove the unused trimmed argument from xfs_reflink_trim_around_shared
  xfs: remove the unused shared argument to xfs_reflink_reserve_cow
  xfs: handle zeroing in xfs_file_iomap_begin_delay
  xfs: remove suport for filesystems without unwritten extent flag
  ...
parents bfd93a87 96987eea
...@@ -191,6 +191,128 @@ xfs_attr_calc_size( ...@@ -191,6 +191,128 @@ xfs_attr_calc_size(
return nblks; return nblks;
} }
STATIC int
xfs_attr_try_sf_addname(
struct xfs_inode *dp,
struct xfs_da_args *args)
{
struct xfs_mount *mp = dp->i_mount;
int error, error2;
error = xfs_attr_shortform_addname(args);
if (error == -ENOSPC)
return error;
/*
* Commit the shortform mods, and we're done.
* NOTE: this is also the error path (EEXIST, etc).
*/
if (!error && (args->flags & ATTR_KERNOTIME) == 0)
xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG);
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(args->trans);
error2 = xfs_trans_commit(args->trans);
args->trans = NULL;
return error ? error : error2;
}
/*
* Set the attribute specified in @args.
*/
int
xfs_attr_set_args(
struct xfs_da_args *args,
struct xfs_buf **leaf_bp)
{
struct xfs_inode *dp = args->dp;
int error;
/*
* If the attribute list is non-existent or a shortform list,
* upgrade it to a single-leaf-block attribute list.
*/
if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
(dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
dp->i_d.di_anextents == 0)) {
/*
* Build initial attribute list (if required).
*/
if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
xfs_attr_shortform_create(args);
/*
* Try to add the attr to the attribute list in the inode.
*/
error = xfs_attr_try_sf_addname(dp, args);
if (error != -ENOSPC)
return error;
/*
* It won't fit in the shortform, transform to a leaf block.
* GROT: another possible req'mt for a double-split btree op.
*/
error = xfs_attr_shortform_to_leaf(args, leaf_bp);
if (error)
return error;
/*
* Prevent the leaf buffer from being unlocked so that a
* concurrent AIL push cannot grab the half-baked leaf
* buffer and run into problems with the write verifier.
*/
xfs_trans_bhold(args->trans, *leaf_bp);
error = xfs_defer_finish(&args->trans);
if (error)
return error;
/*
* Commit the leaf transformation. We'll need another
* (linked) transaction to add the new attribute to the
* leaf.
*/
error = xfs_trans_roll_inode(&args->trans, dp);
if (error)
return error;
xfs_trans_bjoin(args->trans, *leaf_bp);
*leaf_bp = NULL;
}
if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
error = xfs_attr_leaf_addname(args);
else
error = xfs_attr_node_addname(args);
return error;
}
/*
* Remove the attribute specified in @args.
*/
int
xfs_attr_remove_args(
struct xfs_da_args *args)
{
struct xfs_inode *dp = args->dp;
int error;
if (!xfs_inode_hasattr(dp)) {
error = -ENOATTR;
} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
error = xfs_attr_shortform_remove(args);
} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
error = xfs_attr_leaf_removename(args);
} else {
error = xfs_attr_node_removename(args);
}
return error;
}
int int
xfs_attr_set( xfs_attr_set(
struct xfs_inode *dp, struct xfs_inode *dp,
...@@ -204,7 +326,7 @@ xfs_attr_set( ...@@ -204,7 +326,7 @@ xfs_attr_set(
struct xfs_da_args args; struct xfs_da_args args;
struct xfs_trans_res tres; struct xfs_trans_res tres;
int rsvd = (flags & ATTR_ROOT) != 0; int rsvd = (flags & ATTR_ROOT) != 0;
int error, err2, local; int error, local;
XFS_STATS_INC(mp, xs_attr_set); XFS_STATS_INC(mp, xs_attr_set);
...@@ -255,94 +377,18 @@ xfs_attr_set( ...@@ -255,94 +377,18 @@ xfs_attr_set(
error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS); XFS_QMOPT_RES_REGBLKS);
if (error) {
xfs_iunlock(dp, XFS_ILOCK_EXCL);
xfs_trans_cancel(args.trans);
return error;
}
xfs_trans_ijoin(args.trans, dp, 0);
/*
* If the attribute list is non-existent or a shortform list,
* upgrade it to a single-leaf-block attribute list.
*/
if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL ||
(dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
dp->i_d.di_anextents == 0)) {
/*
* Build initial attribute list (if required).
*/
if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
xfs_attr_shortform_create(&args);
/*
* Try to add the attr to the attribute list in
* the inode.
*/
error = xfs_attr_shortform_addname(&args);
if (error != -ENOSPC) {
/*
* Commit the shortform mods, and we're done.
* NOTE: this is also the error path (EEXIST, etc).
*/
ASSERT(args.trans != NULL);
/*
* If this is a synchronous mount, make sure that
* the transaction goes to disk before returning
* to the user.
*/
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(args.trans);
if (!error && (flags & ATTR_KERNOTIME) == 0) {
xfs_trans_ichgtime(args.trans, dp,
XFS_ICHGTIME_CHG);
}
err2 = xfs_trans_commit(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error ? error : err2;
}
/*
* It won't fit in the shortform, transform to a leaf block.
* GROT: another possible req'mt for a double-split btree op.
*/
error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
if (error)
goto out;
/*
* Prevent the leaf buffer from being unlocked so that a
* concurrent AIL push cannot grab the half-baked leaf
* buffer and run into problems with the write verifier.
*/
xfs_trans_bhold(args.trans, leaf_bp);
error = xfs_defer_finish(&args.trans);
if (error) if (error)
goto out; goto out_trans_cancel;
/* xfs_trans_ijoin(args.trans, dp, 0);
* Commit the leaf transformation. We'll need another (linked) error = xfs_attr_set_args(&args, &leaf_bp);
* transaction to add the new attribute to the leaf, which
* means that we have to hold & join the leaf buffer here too.
*/
error = xfs_trans_roll_inode(&args.trans, dp);
if (error) if (error)
goto out; goto out_release_leaf;
xfs_trans_bjoin(args.trans, leaf_bp); if (!args.trans) {
leaf_bp = NULL; /* shortform attribute has already been committed */
goto out_unlock;
} }
if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
error = xfs_attr_leaf_addname(&args);
else
error = xfs_attr_node_addname(&args);
if (error)
goto out;
/* /*
* If this is a synchronous mount, make sure that the * If this is a synchronous mount, make sure that the
* transaction goes to disk before returning to the user. * transaction goes to disk before returning to the user.
...@@ -358,17 +404,17 @@ xfs_attr_set( ...@@ -358,17 +404,17 @@ xfs_attr_set(
*/ */
xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
error = xfs_trans_commit(args.trans); error = xfs_trans_commit(args.trans);
out_unlock:
xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_iunlock(dp, XFS_ILOCK_EXCL);
return error; return error;
out: out_release_leaf:
if (leaf_bp) if (leaf_bp)
xfs_trans_brelse(args.trans, leaf_bp); xfs_trans_brelse(args.trans, leaf_bp);
out_trans_cancel:
if (args.trans) if (args.trans)
xfs_trans_cancel(args.trans); xfs_trans_cancel(args.trans);
xfs_iunlock(dp, XFS_ILOCK_EXCL); goto out_unlock;
return error;
} }
/* /*
...@@ -423,17 +469,7 @@ xfs_attr_remove( ...@@ -423,17 +469,7 @@ xfs_attr_remove(
*/ */
xfs_trans_ijoin(args.trans, dp, 0); xfs_trans_ijoin(args.trans, dp, 0);
if (!xfs_inode_hasattr(dp)) { error = xfs_attr_remove_args(&args);
error = -ENOATTR;
} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
error = xfs_attr_shortform_remove(&args);
} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
error = xfs_attr_leaf_removename(&args);
} else {
error = xfs_attr_node_removename(&args);
}
if (error) if (error)
goto out; goto out;
......
...@@ -140,7 +140,9 @@ int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name, ...@@ -140,7 +140,9 @@ int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
unsigned char *value, int *valuelenp, int flags); unsigned char *value, int *valuelenp, int flags);
int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name, int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
unsigned char *value, int valuelen, int flags); unsigned char *value, int valuelen, int flags);
int xfs_attr_set_args(struct xfs_da_args *args, struct xfs_buf **leaf_bp);
int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags); int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
int xfs_attr_remove_args(struct xfs_da_args *args);
int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize, int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
int flags, struct attrlist_cursor_kern *cursor); int flags, struct attrlist_cursor_kern *cursor);
......
...@@ -1019,6 +1019,34 @@ xfs_bmap_add_attrfork_local( ...@@ -1019,6 +1019,34 @@ xfs_bmap_add_attrfork_local(
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
/* Set an inode attr fork off based on the format */
int
xfs_bmap_set_attrforkoff(
struct xfs_inode *ip,
int size,
int *version)
{
switch (ip->i_d.di_format) {
case XFS_DINODE_FMT_DEV:
ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
break;
case XFS_DINODE_FMT_LOCAL:
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
if (!ip->i_d.di_forkoff)
ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
else if ((ip->i_mount->m_flags & XFS_MOUNT_ATTR2) && version)
*version = 2;
break;
default:
ASSERT(0);
return -EINVAL;
}
return 0;
}
/* /*
* Convert inode from non-attributed to attributed. * Convert inode from non-attributed to attributed.
* Must not be in a transaction, ip must not be locked. * Must not be in a transaction, ip must not be locked.
...@@ -1070,26 +1098,9 @@ xfs_bmap_add_attrfork( ...@@ -1070,26 +1098,9 @@ xfs_bmap_add_attrfork(
xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, ip, 0);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_bmap_set_attrforkoff(ip, size, &version);
switch (ip->i_d.di_format) { if (error)
case XFS_DINODE_FMT_DEV:
ip->i_d.di_forkoff = roundup(sizeof(xfs_dev_t), 8) >> 3;
break;
case XFS_DINODE_FMT_LOCAL:
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
ip->i_d.di_forkoff = xfs_attr_shortform_bytesfit(ip, size);
if (!ip->i_d.di_forkoff)
ip->i_d.di_forkoff = xfs_default_attroffset(ip) >> 3;
else if (mp->m_flags & XFS_MOUNT_ATTR2)
version = 2;
break;
default:
ASSERT(0);
error = -EINVAL;
goto trans_cancel; goto trans_cancel;
}
ASSERT(ip->i_afp == NULL); ASSERT(ip->i_afp == NULL);
ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP); ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
ip->i_afp->if_flags = XFS_IFEXTENTS; ip->i_afp->if_flags = XFS_IFEXTENTS;
...@@ -4081,8 +4092,7 @@ xfs_bmapi_allocate( ...@@ -4081,8 +4092,7 @@ xfs_bmapi_allocate(
* extents to real extents when we're about to write the data. * extents to real extents when we're about to write the data.
*/ */
if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) && if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
(bma->flags & XFS_BMAPI_PREALLOC) && (bma->flags & XFS_BMAPI_PREALLOC))
xfs_sb_version_hasextflgbit(&mp->m_sb))
bma->got.br_state = XFS_EXT_UNWRITTEN; bma->got.br_state = XFS_EXT_UNWRITTEN;
if (bma->wasdel) if (bma->wasdel)
...@@ -5245,8 +5255,7 @@ __xfs_bunmapi( ...@@ -5245,8 +5255,7 @@ __xfs_bunmapi(
* unmapping part of it. But we can't really * unmapping part of it. But we can't really
* get rid of part of a realtime extent. * get rid of part of a realtime extent.
*/ */
if (del.br_state == XFS_EXT_UNWRITTEN || if (del.br_state == XFS_EXT_UNWRITTEN) {
!xfs_sb_version_hasextflgbit(&mp->m_sb)) {
/* /*
* This piece is unwritten, or we're not * This piece is unwritten, or we're not
* using unwritten extents. Skip over it. * using unwritten extents. Skip over it.
...@@ -5296,10 +5305,9 @@ __xfs_bunmapi( ...@@ -5296,10 +5305,9 @@ __xfs_bunmapi(
del.br_blockcount -= mod; del.br_blockcount -= mod;
del.br_startoff += mod; del.br_startoff += mod;
del.br_startblock += mod; del.br_startblock += mod;
} else if ((del.br_startoff == start && } else if (del.br_startoff == start &&
(del.br_state == XFS_EXT_UNWRITTEN || (del.br_state == XFS_EXT_UNWRITTEN ||
tp->t_blk_res == 0)) || tp->t_blk_res == 0)) {
!xfs_sb_version_hasextflgbit(&mp->m_sb)) {
/* /*
* Can't make it unwritten. There isn't * Can't make it unwritten. There isn't
* a full extent here so just skip it. * a full extent here so just skip it.
...@@ -6114,11 +6122,7 @@ xfs_bmap_validate_extent( ...@@ -6114,11 +6122,7 @@ xfs_bmap_validate_extent(
XFS_FSB_TO_AGNO(mp, endfsb)) XFS_FSB_TO_AGNO(mp, endfsb))
return __this_address; return __this_address;
} }
if (irec->br_state != XFS_EXT_NORM) { if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
if (whichfork != XFS_DATA_FORK)
return __this_address; return __this_address;
if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
return __this_address;
}
return NULL; return NULL;
} }
...@@ -183,6 +183,7 @@ void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, ...@@ -183,6 +183,7 @@ void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len); xfs_filblks_t len);
void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *); void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
int xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version);
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno, void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, struct xfs_owner_info *oinfo, xfs_filblks_t len, struct xfs_owner_info *oinfo,
......
...@@ -287,6 +287,8 @@ static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp) ...@@ -287,6 +287,8 @@ static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp)
{ {
if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT))
return false; return false;
if (!(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT))
return false;
/* check for unknown features in the fs */ /* check for unknown features in the fs */
if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) ||
...@@ -357,12 +359,6 @@ static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp) ...@@ -357,12 +359,6 @@ static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp)
(sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT); (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);
} }
static inline bool xfs_sb_version_hasextflgbit(struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||
(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);
}
static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp) static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp)
{ {
return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
......
...@@ -1115,7 +1115,8 @@ xfs_fs_geometry( ...@@ -1115,7 +1115,8 @@ xfs_fs_geometry(
geo->version = XFS_FSOP_GEOM_VERSION; geo->version = XFS_FSOP_GEOM_VERSION;
geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
XFS_FSOP_GEOM_FLAGS_DIRV2; XFS_FSOP_GEOM_FLAGS_DIRV2 |
XFS_FSOP_GEOM_FLAGS_EXTFLG;
if (xfs_sb_version_hasattr(sbp)) if (xfs_sb_version_hasattr(sbp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR; geo->flags |= XFS_FSOP_GEOM_FLAGS_ATTR;
if (xfs_sb_version_hasquota(sbp)) if (xfs_sb_version_hasquota(sbp))
...@@ -1124,8 +1125,6 @@ xfs_fs_geometry( ...@@ -1124,8 +1125,6 @@ xfs_fs_geometry(
geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN; geo->flags |= XFS_FSOP_GEOM_FLAGS_IALIGN;
if (xfs_sb_version_hasdalign(sbp)) if (xfs_sb_version_hasdalign(sbp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN; geo->flags |= XFS_FSOP_GEOM_FLAGS_DALIGN;
if (xfs_sb_version_hasextflgbit(sbp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_EXTFLG;
if (xfs_sb_version_hassector(sbp)) if (xfs_sb_version_hassector(sbp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR; geo->flags |= XFS_FSOP_GEOM_FLAGS_SECTOR;
if (xfs_sb_version_hasasciici(sbp)) if (xfs_sb_version_hasasciici(sbp))
......
...@@ -29,6 +29,8 @@ ...@@ -29,6 +29,8 @@
#include "xfs_ag_resv.h" #include "xfs_ag_resv.h"
#include "xfs_trans_space.h" #include "xfs_trans_space.h"
#include "xfs_quota.h" #include "xfs_quota.h"
#include "xfs_attr.h"
#include "xfs_reflink.h"
#include "scrub/xfs_scrub.h" #include "scrub/xfs_scrub.h"
#include "scrub/scrub.h" #include "scrub/scrub.h"
#include "scrub/common.h" #include "scrub/common.h"
...@@ -692,13 +694,14 @@ xrep_findroot_block( ...@@ -692,13 +694,14 @@ xrep_findroot_block(
struct xrep_find_ag_btree *fab, struct xrep_find_ag_btree *fab,
uint64_t owner, uint64_t owner,
xfs_agblock_t agbno, xfs_agblock_t agbno,
bool *found_it) bool *done_with_block)
{ {
struct xfs_mount *mp = ri->sc->mp; struct xfs_mount *mp = ri->sc->mp;
struct xfs_buf *bp; struct xfs_buf *bp;
struct xfs_btree_block *btblock; struct xfs_btree_block *btblock;
xfs_daddr_t daddr; xfs_daddr_t daddr;
int error; int block_level;
int error = 0;
daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.agno, agbno); daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.agno, agbno);
...@@ -717,36 +720,111 @@ xrep_findroot_block( ...@@ -717,36 +720,111 @@ xrep_findroot_block(
return error; return error;
} }
/*
* Read the buffer into memory so that we can see if it's a match for
* our btree type. We have no clue if it is beforehand, and we want to
* avoid xfs_trans_read_buf's behavior of dumping the DONE state (which
* will cause needless disk reads in subsequent calls to this function)
* and logging metadata verifier failures.
*
* Therefore, pass in NULL buffer ops. If the buffer was already in
* memory from some other caller it will already have b_ops assigned.
* If it was in memory from a previous unsuccessful findroot_block
* call, the buffer won't have b_ops but it should be clean and ready
* for us to try to verify if the read call succeeds. The same applies
* if the buffer wasn't in memory at all.
*
* Note: If we never match a btree type with this buffer, it will be
* left in memory with NULL b_ops. This shouldn't be a problem unless
* the buffer gets written.
*/
error = xfs_trans_read_buf(mp, ri->sc->tp, mp->m_ddev_targp, daddr, error = xfs_trans_read_buf(mp, ri->sc->tp, mp->m_ddev_targp, daddr,
mp->m_bsize, 0, &bp, NULL); mp->m_bsize, 0, &bp, NULL);
if (error) if (error)
return error; return error;
/* /* Ensure the block magic matches the btree type we're looking for. */
* Does this look like a block matching our fs and higher than any
* other block we've found so far? If so, reattach buffer verifiers
* so the AIL won't complain if the buffer is also dirty.
*/
btblock = XFS_BUF_TO_BLOCK(bp); btblock = XFS_BUF_TO_BLOCK(bp);
if (be32_to_cpu(btblock->bb_magic) != fab->magic) if (be32_to_cpu(btblock->bb_magic) != fab->magic)
goto out; goto out;
if (xfs_sb_version_hascrc(&mp->m_sb) &&
!uuid_equal(&btblock->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) /*
* If the buffer already has ops applied and they're not the ones for
* this btree type, we know this block doesn't match the btree and we
* can bail out.
*
* If the buffer ops match ours, someone else has already validated
* the block for us, so we can move on to checking if this is a root
* block candidate.
*
* If the buffer does not have ops, nobody has successfully validated
* the contents and the buffer cannot be dirty. If the magic, uuid,
* and structure match this btree type then we'll move on to checking
* if it's a root block candidate. If there is no match, bail out.
*/
if (bp->b_ops) {
if (bp->b_ops != fab->buf_ops)
goto out;
} else {
ASSERT(!xfs_trans_buf_is_dirty(bp));
if (!uuid_equal(&btblock->bb_u.s.bb_uuid,
&mp->m_sb.sb_meta_uuid))
goto out;
fab->buf_ops->verify_read(bp);
if (bp->b_error) {
bp->b_error = 0;
goto out; goto out;
}
/*
* Some read verifiers will (re)set b_ops, so we must be
* careful not to blow away any such assignment.
*/
if (!bp->b_ops)
bp->b_ops = fab->buf_ops; bp->b_ops = fab->buf_ops;
}
/* Ignore this block if it's lower in the tree than we've seen. */ /*
if (fab->root != NULLAGBLOCK && * This block passes the magic/uuid and verifier tests for this btree
xfs_btree_get_level(btblock) < fab->height) * type. We don't need the caller to try the other tree types.
goto out; */
*done_with_block = true;
/* Make sure we pass the verifiers. */ /*
bp->b_ops->verify_read(bp); * Compare this btree block's level to the height of the current
if (bp->b_error) * candidate root block.
*
* If the level matches the root we found previously, throw away both
* blocks because there can't be two candidate roots.
*
* If level is lower in the tree than the root we found previously,
* ignore this block.
*/
block_level = xfs_btree_get_level(btblock);
if (block_level + 1 == fab->height) {
fab->root = NULLAGBLOCK;
goto out;
} else if (block_level < fab->height) {
goto out; goto out;
}
/*
* This is the highest block in the tree that we've found so far.
* Update the btree height to reflect what we've learned from this
* block.
*/
fab->height = block_level + 1;
/*
* If this block doesn't have sibling pointers, then it's the new root
* block candidate. Otherwise, the root will be found farther up the
* tree.
*/
if (btblock->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) &&
btblock->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
fab->root = agbno; fab->root = agbno;
fab->height = xfs_btree_get_level(btblock) + 1; else
*found_it = true; fab->root = NULLAGBLOCK;
trace_xrep_findroot_block(mp, ri->sc->sa.agno, agbno, trace_xrep_findroot_block(mp, ri->sc->sa.agno, agbno,
be32_to_cpu(btblock->bb_magic), fab->height - 1); be32_to_cpu(btblock->bb_magic), fab->height - 1);
...@@ -768,7 +846,7 @@ xrep_findroot_rmap( ...@@ -768,7 +846,7 @@ xrep_findroot_rmap(
struct xrep_findroot *ri = priv; struct xrep_findroot *ri = priv;
struct xrep_find_ag_btree *fab; struct xrep_find_ag_btree *fab;
xfs_agblock_t b; xfs_agblock_t b;
bool found_it; bool done;
int error = 0; int error = 0;
/* Ignore anything that isn't AG metadata. */ /* Ignore anything that isn't AG metadata. */
...@@ -777,16 +855,16 @@ xrep_findroot_rmap( ...@@ -777,16 +855,16 @@ xrep_findroot_rmap(
/* Otherwise scan each block + btree type. */ /* Otherwise scan each block + btree type. */
for (b = 0; b < rec->rm_blockcount; b++) { for (b = 0; b < rec->rm_blockcount; b++) {
found_it = false; done = false;
for (fab = ri->btree_info; fab->buf_ops; fab++) { for (fab = ri->btree_info; fab->buf_ops; fab++) {
if (rec->rm_owner != fab->rmap_owner) if (rec->rm_owner != fab->rmap_owner)
continue; continue;
error = xrep_findroot_block(ri, fab, error = xrep_findroot_block(ri, fab,
rec->rm_owner, rec->rm_startblock + b, rec->rm_owner, rec->rm_startblock + b,
&found_it); &done);
if (error) if (error)
return error; return error;
if (found_it) if (done)
break; break;
} }
} }
......
...@@ -412,19 +412,6 @@ xchk_validate_inputs( ...@@ -412,19 +412,6 @@ xchk_validate_inputs(
goto out; goto out;
} }
error = -EOPNOTSUPP;
/*
* We won't scrub any filesystem that doesn't have the ability
* to record unwritten extents. The option was made default in
* 2003, removed from mkfs in 2007, and cannot be disabled in
* v5, so if we find a filesystem without this flag it's either
* really old or totally unsupported. Avoid it either way.
* We also don't support v1-v3 filesystems, which aren't
* mountable.
*/
if (!xfs_sb_version_hasextflgbit(&mp->m_sb))
goto out;
/* /*
* We only want to repair read-write v5+ filesystems. Defer the check * We only want to repair read-write v5+ filesystems. Defer the check
* for ops->repair until after our scrub confirms that we need to * for ops->repair until after our scrub confirms that we need to
......
...@@ -917,7 +917,7 @@ xfs_vm_writepage( ...@@ -917,7 +917,7 @@ xfs_vm_writepage(
struct writeback_control *wbc) struct writeback_control *wbc)
{ {
struct xfs_writepage_ctx wpc = { struct xfs_writepage_ctx wpc = {
.io_type = XFS_IO_INVALID, .io_type = XFS_IO_HOLE,
}; };
int ret; int ret;
...@@ -933,7 +933,7 @@ xfs_vm_writepages( ...@@ -933,7 +933,7 @@ xfs_vm_writepages(
struct writeback_control *wbc) struct writeback_control *wbc)
{ {
struct xfs_writepage_ctx wpc = { struct xfs_writepage_ctx wpc = {
.io_type = XFS_IO_INVALID, .io_type = XFS_IO_HOLE,
}; };
int ret; int ret;
......
...@@ -12,21 +12,19 @@ extern struct bio_set xfs_ioend_bioset; ...@@ -12,21 +12,19 @@ extern struct bio_set xfs_ioend_bioset;
* Types of I/O for bmap clustering and I/O completion tracking. * Types of I/O for bmap clustering and I/O completion tracking.
*/ */
enum { enum {
XFS_IO_INVALID, /* initial state */ XFS_IO_HOLE, /* covers region without any block allocation */
XFS_IO_DELALLOC, /* covers delalloc region */ XFS_IO_DELALLOC, /* covers delalloc region */
XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */ XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */
XFS_IO_OVERWRITE, /* covers already allocated extent */ XFS_IO_OVERWRITE, /* covers already allocated extent */
XFS_IO_COW, /* covers copy-on-write extent */ XFS_IO_COW, /* covers copy-on-write extent */
XFS_IO_HOLE, /* covers region without any block allocation */
}; };
#define XFS_IO_TYPES \ #define XFS_IO_TYPES \
{ XFS_IO_INVALID, "invalid" }, \ { XFS_IO_HOLE, "hole" }, \
{ XFS_IO_DELALLOC, "delalloc" }, \ { XFS_IO_DELALLOC, "delalloc" }, \
{ XFS_IO_UNWRITTEN, "unwritten" }, \ { XFS_IO_UNWRITTEN, "unwritten" }, \
{ XFS_IO_OVERWRITE, "overwrite" }, \ { XFS_IO_OVERWRITE, "overwrite" }, \
{ XFS_IO_COW, "CoW" }, \ { XFS_IO_COW, "CoW" }
{ XFS_IO_HOLE, "hole" }
/* /*
* Structure for buffered I/O completions. * Structure for buffered I/O completions.
......
...@@ -406,10 +406,10 @@ xfs_getbmap_report_one( ...@@ -406,10 +406,10 @@ xfs_getbmap_report_one(
struct xfs_bmbt_irec *got) struct xfs_bmbt_irec *got)
{ {
struct kgetbmap *p = out + bmv->bmv_entries; struct kgetbmap *p = out + bmv->bmv_entries;
bool shared = false, trimmed = false; bool shared = false;
int error; int error;
error = xfs_reflink_trim_around_shared(ip, got, &shared, &trimmed); error = xfs_reflink_trim_around_shared(ip, got, &shared);
if (error) if (error)
return error; return error;
...@@ -1042,44 +1042,6 @@ xfs_unmap_extent( ...@@ -1042,44 +1042,6 @@ xfs_unmap_extent(
goto out_unlock; goto out_unlock;
} }
static int
xfs_adjust_extent_unmap_boundaries(
struct xfs_inode *ip,
xfs_fileoff_t *startoffset_fsb,
xfs_fileoff_t *endoffset_fsb)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_bmbt_irec imap;
int nimap, error;
xfs_extlen_t mod = 0;
nimap = 1;
error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0);
if (error)
return error;
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
div_u64_rem(imap.br_startblock, mp->m_sb.sb_rextsize, &mod);
if (mod)
*startoffset_fsb += mp->m_sb.sb_rextsize - mod;
}
nimap = 1;
error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0);
if (error)
return error;
if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
mod++;
if (mod && mod != mp->m_sb.sb_rextsize)
*endoffset_fsb -= mod;
}
return 0;
}
static int static int
xfs_flush_unmap_range( xfs_flush_unmap_range(
struct xfs_inode *ip, struct xfs_inode *ip,
...@@ -1133,19 +1095,8 @@ xfs_free_file_space( ...@@ -1133,19 +1095,8 @@ xfs_free_file_space(
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
/* /*
* Need to zero the stuff we're not freeing, on disk. If it's a RT file * Need to zero the stuff we're not freeing, on disk.
* and we can't use unwritten extents then we actually need to ensure */
* to zero the whole extent, otherwise we just need to take of block
* boundaries, and xfs_bunmapi will handle the rest.
*/
if (XFS_IS_REALTIME_INODE(ip) &&
!xfs_sb_version_hasextflgbit(&mp->m_sb)) {
error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb,
&endoffset_fsb);
if (error)
return error;
}
if (endoffset_fsb > startoffset_fsb) { if (endoffset_fsb > startoffset_fsb) {
while (!done) { while (!done) {
error = xfs_unmap_extent(ip, startoffset_fsb, error = xfs_unmap_extent(ip, startoffset_fsb,
...@@ -1824,6 +1775,12 @@ xfs_swap_extents( ...@@ -1824,6 +1775,12 @@ xfs_swap_extents(
if (error) if (error)
goto out_unlock; goto out_unlock;
if (xfs_inode_has_cow_data(tip)) {
error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true);
if (error)
return error;
}
/* /*
* Extent "swapping" with rmap requires a permanent reservation and * Extent "swapping" with rmap requires a permanent reservation and
* a block reservation because it's really just a remap operation * a block reservation because it's really just a remap operation
......
...@@ -37,6 +37,32 @@ static kmem_zone_t *xfs_buf_zone; ...@@ -37,6 +37,32 @@ static kmem_zone_t *xfs_buf_zone;
#define xb_to_gfp(flags) \ #define xb_to_gfp(flags) \
((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN) ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN)
/*
* Locking orders
*
* xfs_buf_ioacct_inc:
* xfs_buf_ioacct_dec:
* b_sema (caller holds)
* b_lock
*
* xfs_buf_stale:
* b_sema (caller holds)
* b_lock
* lru_lock
*
* xfs_buf_rele:
* b_lock
* pag_buf_lock
* lru_lock
*
* xfs_buftarg_wait_rele
* lru_lock
* b_lock (trylock due to inversion)
*
* xfs_buftarg_isolate
* lru_lock
* b_lock (trylock due to inversion)
*/
static inline int static inline int
xfs_buf_is_vmapped( xfs_buf_is_vmapped(
...@@ -749,6 +775,30 @@ _xfs_buf_read( ...@@ -749,6 +775,30 @@ _xfs_buf_read(
return xfs_buf_submit(bp); return xfs_buf_submit(bp);
} }
/*
* If the caller passed in an ops structure and the buffer doesn't have ops
* assigned, set the ops and use them to verify the contents. If the contents
* cannot be verified, we'll clear XBF_DONE. We assume the buffer has no
* recorded errors and is already in XBF_DONE state.
*/
int
xfs_buf_ensure_ops(
struct xfs_buf *bp,
const struct xfs_buf_ops *ops)
{
ASSERT(bp->b_flags & XBF_DONE);
ASSERT(bp->b_error == 0);
if (!ops || bp->b_ops)
return 0;
bp->b_ops = ops;
bp->b_ops->verify_read(bp);
if (bp->b_error)
bp->b_flags &= ~XBF_DONE;
return bp->b_error;
}
xfs_buf_t * xfs_buf_t *
xfs_buf_read_map( xfs_buf_read_map(
struct xfs_buftarg *target, struct xfs_buftarg *target,
...@@ -762,26 +812,32 @@ xfs_buf_read_map( ...@@ -762,26 +812,32 @@ xfs_buf_read_map(
flags |= XBF_READ; flags |= XBF_READ;
bp = xfs_buf_get_map(target, map, nmaps, flags); bp = xfs_buf_get_map(target, map, nmaps, flags);
if (bp) { if (!bp)
return NULL;
trace_xfs_buf_read(bp, flags, _RET_IP_); trace_xfs_buf_read(bp, flags, _RET_IP_);
if (!(bp->b_flags & XBF_DONE)) { if (!(bp->b_flags & XBF_DONE)) {
XFS_STATS_INC(target->bt_mount, xb_get_read); XFS_STATS_INC(target->bt_mount, xb_get_read);
bp->b_ops = ops; bp->b_ops = ops;
_xfs_buf_read(bp, flags); _xfs_buf_read(bp, flags);
} else if (flags & XBF_ASYNC) { return bp;
}
xfs_buf_ensure_ops(bp, ops);
if (flags & XBF_ASYNC) {
/* /*
* Read ahead call which is already satisfied, * Read ahead call which is already satisfied,
* drop the buffer * drop the buffer
*/ */
xfs_buf_relse(bp); xfs_buf_relse(bp);
return NULL; return NULL;
} else {
/* We do not want read in the flags */
bp->b_flags &= ~XBF_READ;
}
} }
/* We do not want read in the flags */
bp->b_flags &= ~XBF_READ;
ASSERT(bp->b_ops != NULL || ops == NULL);
return bp; return bp;
} }
...@@ -1006,8 +1062,18 @@ xfs_buf_rele( ...@@ -1006,8 +1062,18 @@ xfs_buf_rele(
ASSERT(atomic_read(&bp->b_hold) > 0); ASSERT(atomic_read(&bp->b_hold) > 0);
release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock); /*
* We grab the b_lock here first to serialise racing xfs_buf_rele()
* calls. The pag_buf_lock being taken on the last reference only
* serialises against racing lookups in xfs_buf_find(). IOWs, the second
* to last reference we drop here is not serialised against the last
* reference until we take bp->b_lock. Hence if we don't grab b_lock
* first, the last "release" reference can win the race to the lock and
* free the buffer before the second-to-last reference is processed,
* leading to a use-after-free scenario.
*/
spin_lock(&bp->b_lock); spin_lock(&bp->b_lock);
release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
if (!release) { if (!release) {
/* /*
* Drop the in-flight state if the buffer is already on the LRU * Drop the in-flight state if the buffer is already on the LRU
...@@ -1989,6 +2055,13 @@ xfs_buf_delwri_submit_buffers( ...@@ -1989,6 +2055,13 @@ xfs_buf_delwri_submit_buffers(
* is only safely useable for callers that can track I/O completion by higher * is only safely useable for callers that can track I/O completion by higher
* level means, e.g. AIL pushing as the @buffer_list is consumed in this * level means, e.g. AIL pushing as the @buffer_list is consumed in this
* function. * function.
*
* Note: this function will skip buffers it would block on, and in doing so
* leaves them on @buffer_list so they can be retried on a later pass. As such,
* it is up to the caller to ensure that the buffer list is fully submitted or
* cancelled appropriately when they are finished with the list. Failure to
* cancel or resubmit the list until it is empty will result in leaked buffers
* at unmount time.
*/ */
int int
xfs_buf_delwri_submit_nowait( xfs_buf_delwri_submit_nowait(
......
...@@ -385,4 +385,6 @@ extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int); ...@@ -385,4 +385,6 @@ extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
int xfs_buf_ensure_ops(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
#endif /* __XFS_BUF_H__ */ #endif /* __XFS_BUF_H__ */
...@@ -470,20 +470,13 @@ xfs_fs_goingdown( ...@@ -470,20 +470,13 @@ xfs_fs_goingdown(
*/ */
void void
xfs_do_force_shutdown( xfs_do_force_shutdown(
xfs_mount_t *mp, struct xfs_mount *mp,
int flags, int flags,
char *fname, char *fname,
int lnnum) int lnnum)
{ {
int logerror; bool logerror = flags & SHUTDOWN_LOG_IO_ERROR;
logerror = flags & SHUTDOWN_LOG_IO_ERROR;
if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
xfs_notice(mp,
"%s(0x%x) called from line %d of file %s. Return address = "PTR_FMT,
__func__, flags, lnnum, fname, __return_address);
}
/* /*
* No need to duplicate efforts. * No need to duplicate efforts.
*/ */
...@@ -499,13 +492,22 @@ xfs_do_force_shutdown( ...@@ -499,13 +492,22 @@ xfs_do_force_shutdown(
if (xfs_log_force_umount(mp, logerror)) if (xfs_log_force_umount(mp, logerror))
return; return;
if (flags & SHUTDOWN_FORCE_UMOUNT) {
xfs_alert(mp,
"User initiated shutdown received. Shutting down filesystem");
return;
}
xfs_notice(mp,
"%s(0x%x) called from line %d of file %s. Return address = "PTR_FMT,
__func__, flags, lnnum, fname, __return_address);
if (flags & SHUTDOWN_CORRUPT_INCORE) { if (flags & SHUTDOWN_CORRUPT_INCORE) {
xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT, xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
"Corruption of in-memory data detected. Shutting down filesystem"); "Corruption of in-memory data detected. Shutting down filesystem");
if (XFS_ERRLEVEL_HIGH <= xfs_error_level) if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
xfs_stack_trace(); xfs_stack_trace();
} else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) { } else if (logerror) {
if (logerror) {
xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR, xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
"Log I/O Error Detected. Shutting down filesystem"); "Log I/O Error Detected. Shutting down filesystem");
} else if (flags & SHUTDOWN_DEVICE_REQ) { } else if (flags & SHUTDOWN_DEVICE_REQ) {
...@@ -515,11 +517,9 @@ xfs_do_force_shutdown( ...@@ -515,11 +517,9 @@ xfs_do_force_shutdown(
xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR, xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
"I/O Error Detected. Shutting down filesystem"); "I/O Error Detected. Shutting down filesystem");
} }
}
if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
xfs_alert(mp, xfs_alert(mp,
"Please umount the filesystem and rectify the problem(s)"); "Please unmount the filesystem and rectify the problem(s)");
}
} }
/* /*
......
...@@ -604,14 +604,6 @@ xfs_ioc_space( ...@@ -604,14 +604,6 @@ xfs_ioc_space(
uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
int error; int error;
/*
* Only allow the sys admin to reserve space unless
* unwritten extents are enabled.
*/
if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
!capable(CAP_SYS_ADMIN))
return -EPERM;
if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
return -EPERM; return -EPERM;
......
...@@ -62,6 +62,21 @@ xfs_bmbt_to_iomap( ...@@ -62,6 +62,21 @@ xfs_bmbt_to_iomap(
iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip)); iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip));
} }
static void
xfs_hole_to_iomap(
struct xfs_inode *ip,
struct iomap *iomap,
xfs_fileoff_t offset_fsb,
xfs_fileoff_t end_fsb)
{
iomap->addr = IOMAP_NULL_ADDR;
iomap->type = IOMAP_HOLE;
iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb);
iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb);
iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip));
}
xfs_extlen_t xfs_extlen_t
xfs_eof_alignment( xfs_eof_alignment(
struct xfs_inode *ip, struct xfs_inode *ip,
...@@ -502,6 +517,7 @@ xfs_file_iomap_begin_delay( ...@@ -502,6 +517,7 @@ xfs_file_iomap_begin_delay(
struct inode *inode, struct inode *inode,
loff_t offset, loff_t offset,
loff_t count, loff_t count,
unsigned flags,
struct iomap *iomap) struct iomap *iomap)
{ {
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
...@@ -538,15 +554,23 @@ xfs_file_iomap_begin_delay( ...@@ -538,15 +554,23 @@ xfs_file_iomap_begin_delay(
goto out_unlock; goto out_unlock;
} }
end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got); eof = !xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got);
if (!eof && got.br_startoff <= offset_fsb) { if (eof)
if (xfs_is_reflink_inode(ip)) { got.br_startoff = end_fsb; /* fake hole until the end */
bool shared;
end_fsb = min(XFS_B_TO_FSB(mp, offset + count), if (got.br_startoff <= offset_fsb) {
maxbytes_fsb); /*
* For reflink files we may need a delalloc reservation when
* overwriting shared extents. This includes zeroing of
* existing extents that contain data.
*/
if (xfs_is_reflink_inode(ip) &&
((flags & IOMAP_WRITE) ||
got.br_state != XFS_EXT_UNWRITTEN)) {
xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb); xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
error = xfs_reflink_reserve_cow(ip, &got, &shared); error = xfs_reflink_reserve_cow(ip, &got);
if (error) if (error)
goto out_unlock; goto out_unlock;
} }
...@@ -555,6 +579,11 @@ xfs_file_iomap_begin_delay( ...@@ -555,6 +579,11 @@ xfs_file_iomap_begin_delay(
goto done; goto done;
} }
if (flags & IOMAP_ZERO) {
xfs_hole_to_iomap(ip, iomap, offset_fsb, got.br_startoff);
goto out_unlock;
}
error = xfs_qm_dqattach_locked(ip, false); error = xfs_qm_dqattach_locked(ip, false);
if (error) if (error)
goto out_unlock; goto out_unlock;
...@@ -1003,16 +1032,17 @@ xfs_file_iomap_begin( ...@@ -1003,16 +1032,17 @@ xfs_file_iomap_begin(
struct xfs_bmbt_irec imap; struct xfs_bmbt_irec imap;
xfs_fileoff_t offset_fsb, end_fsb; xfs_fileoff_t offset_fsb, end_fsb;
int nimaps = 1, error = 0; int nimaps = 1, error = 0;
bool shared = false, trimmed = false; bool shared = false;
unsigned lockmode; unsigned lockmode;
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
return -EIO; return -EIO;
if (((flags & (IOMAP_WRITE | IOMAP_DIRECT)) == IOMAP_WRITE) && if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && !(flags & IOMAP_DIRECT) &&
!IS_DAX(inode) && !xfs_get_extsz_hint(ip)) { !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */ /* Reserve delalloc blocks for regular writeback. */
return xfs_file_iomap_begin_delay(inode, offset, length, iomap); return xfs_file_iomap_begin_delay(inode, offset, length, flags,
iomap);
} }
/* /*
...@@ -1038,8 +1068,7 @@ xfs_file_iomap_begin( ...@@ -1038,8 +1068,7 @@ xfs_file_iomap_begin(
if (flags & IOMAP_REPORT) { if (flags & IOMAP_REPORT) {
/* Trim the mapping to the nearest shared extent boundary. */ /* Trim the mapping to the nearest shared extent boundary. */
error = xfs_reflink_trim_around_shared(ip, &imap, &shared, error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
&trimmed);
if (error) if (error)
goto out_unlock; goto out_unlock;
} }
...@@ -1065,7 +1094,7 @@ xfs_file_iomap_begin( ...@@ -1065,7 +1094,7 @@ xfs_file_iomap_begin(
if (error) if (error)
goto out_unlock; goto out_unlock;
} else { } else {
error = xfs_reflink_reserve_cow(ip, &imap, &shared); error = xfs_reflink_reserve_cow(ip, &imap);
if (error) if (error)
goto out_unlock; goto out_unlock;
} }
......
...@@ -182,8 +182,7 @@ int ...@@ -182,8 +182,7 @@ int
xfs_reflink_trim_around_shared( xfs_reflink_trim_around_shared(
struct xfs_inode *ip, struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, struct xfs_bmbt_irec *irec,
bool *shared, bool *shared)
bool *trimmed)
{ {
xfs_agnumber_t agno; xfs_agnumber_t agno;
xfs_agblock_t agbno; xfs_agblock_t agbno;
...@@ -209,7 +208,7 @@ xfs_reflink_trim_around_shared( ...@@ -209,7 +208,7 @@ xfs_reflink_trim_around_shared(
if (error) if (error)
return error; return error;
*shared = *trimmed = false; *shared = false;
if (fbno == NULLAGBLOCK) { if (fbno == NULLAGBLOCK) {
/* No shared blocks at all. */ /* No shared blocks at all. */
return 0; return 0;
...@@ -222,8 +221,6 @@ xfs_reflink_trim_around_shared( ...@@ -222,8 +221,6 @@ xfs_reflink_trim_around_shared(
*/ */
irec->br_blockcount = flen; irec->br_blockcount = flen;
*shared = true; *shared = true;
if (flen != aglen)
*trimmed = true;
return 0; return 0;
} else { } else {
/* /*
...@@ -233,7 +230,6 @@ xfs_reflink_trim_around_shared( ...@@ -233,7 +230,6 @@ xfs_reflink_trim_around_shared(
* start of the shared region. * start of the shared region.
*/ */
irec->br_blockcount = fbno - agbno; irec->br_blockcount = fbno - agbno;
*trimmed = true;
return 0; return 0;
} }
} }
...@@ -241,7 +237,7 @@ xfs_reflink_trim_around_shared( ...@@ -241,7 +237,7 @@ xfs_reflink_trim_around_shared(
/* /*
* Trim the passed in imap to the next shared/unshared extent boundary, and * Trim the passed in imap to the next shared/unshared extent boundary, and
* if imap->br_startoff points to a shared extent reserve space for it in the * if imap->br_startoff points to a shared extent reserve space for it in the
* COW fork. In this case *shared is set to true, else to false. * COW fork.
* *
* Note that imap will always contain the block numbers for the existing blocks * Note that imap will always contain the block numbers for the existing blocks
* in the data fork, as the upper layers need them for read-modify-write * in the data fork, as the upper layers need them for read-modify-write
...@@ -250,14 +246,14 @@ xfs_reflink_trim_around_shared( ...@@ -250,14 +246,14 @@ xfs_reflink_trim_around_shared(
int int
xfs_reflink_reserve_cow( xfs_reflink_reserve_cow(
struct xfs_inode *ip, struct xfs_inode *ip,
struct xfs_bmbt_irec *imap, struct xfs_bmbt_irec *imap)
bool *shared)
{ {
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got; struct xfs_bmbt_irec got;
int error = 0; int error = 0;
bool eof = false, trimmed; bool eof = false;
struct xfs_iext_cursor icur; struct xfs_iext_cursor icur;
bool shared;
/* /*
* Search the COW fork extent list first. This serves two purposes: * Search the COW fork extent list first. This serves two purposes:
...@@ -273,18 +269,16 @@ xfs_reflink_reserve_cow( ...@@ -273,18 +269,16 @@ xfs_reflink_reserve_cow(
if (!eof && got.br_startoff <= imap->br_startoff) { if (!eof && got.br_startoff <= imap->br_startoff) {
trace_xfs_reflink_cow_found(ip, imap); trace_xfs_reflink_cow_found(ip, imap);
xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
*shared = true;
return 0; return 0;
} }
/* Trim the mapping to the nearest shared extent boundary. */ /* Trim the mapping to the nearest shared extent boundary. */
error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); error = xfs_reflink_trim_around_shared(ip, imap, &shared);
if (error) if (error)
return error; return error;
/* Not shared? Just report the (potentially capped) extent. */ /* Not shared? Just report the (potentially capped) extent. */
if (!*shared) if (!shared)
return 0; return 0;
/* /*
...@@ -368,7 +362,6 @@ xfs_find_trim_cow_extent( ...@@ -368,7 +362,6 @@ xfs_find_trim_cow_extent(
xfs_filblks_t count_fsb = imap->br_blockcount; xfs_filblks_t count_fsb = imap->br_blockcount;
struct xfs_iext_cursor icur; struct xfs_iext_cursor icur;
struct xfs_bmbt_irec got; struct xfs_bmbt_irec got;
bool trimmed;
*found = false; *found = false;
...@@ -376,9 +369,13 @@ xfs_find_trim_cow_extent( ...@@ -376,9 +369,13 @@ xfs_find_trim_cow_extent(
* If we don't find an overlapping extent, trim the range we need to * If we don't find an overlapping extent, trim the range we need to
* allocate to fit the hole we found. * allocate to fit the hole we found.
*/ */
if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) || if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got))
got.br_startoff > offset_fsb) got.br_startoff = offset_fsb + count_fsb;
return xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); if (got.br_startoff > offset_fsb) {
xfs_trim_extent(imap, imap->br_startoff,
got.br_startoff - imap->br_startoff);
return xfs_reflink_trim_around_shared(ip, imap, shared);
}
*shared = true; *shared = true;
if (isnullstartblock(got.br_startblock)) { if (isnullstartblock(got.br_startblock)) {
......
...@@ -10,10 +10,10 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp, ...@@ -10,10 +10,10 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen,
xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal); xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal);
extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed); struct xfs_bmbt_irec *irec, bool *shared);
extern int xfs_reflink_reserve_cow(struct xfs_inode *ip, extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
struct xfs_bmbt_irec *imap, bool *shared); struct xfs_bmbt_irec *imap);
extern int xfs_reflink_allocate_cow(struct xfs_inode *ip, extern int xfs_reflink_allocate_cow(struct xfs_inode *ip,
struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode); struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode);
extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset, extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
......
...@@ -29,30 +29,30 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf) ...@@ -29,30 +29,30 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
char *desc; char *desc;
int endpoint; int endpoint;
} xstats[] = { } xstats[] = {
{ "extent_alloc", XFSSTAT_END_EXTENT_ALLOC }, { "extent_alloc", xfsstats_offset(xs_abt_lookup) },
{ "abt", XFSSTAT_END_ALLOC_BTREE }, { "abt", xfsstats_offset(xs_blk_mapr) },
{ "blk_map", XFSSTAT_END_BLOCK_MAPPING }, { "blk_map", xfsstats_offset(xs_bmbt_lookup) },
{ "bmbt", XFSSTAT_END_BLOCK_MAP_BTREE }, { "bmbt", xfsstats_offset(xs_dir_lookup) },
{ "dir", XFSSTAT_END_DIRECTORY_OPS }, { "dir", xfsstats_offset(xs_trans_sync) },
{ "trans", XFSSTAT_END_TRANSACTIONS }, { "trans", xfsstats_offset(xs_ig_attempts) },
{ "ig", XFSSTAT_END_INODE_OPS }, { "ig", xfsstats_offset(xs_log_writes) },
{ "log", XFSSTAT_END_LOG_OPS }, { "log", xfsstats_offset(xs_try_logspace)},
{ "push_ail", XFSSTAT_END_TAIL_PUSHING }, { "push_ail", xfsstats_offset(xs_xstrat_quick)},
{ "xstrat", XFSSTAT_END_WRITE_CONVERT }, { "xstrat", xfsstats_offset(xs_write_calls) },
{ "rw", XFSSTAT_END_READ_WRITE_OPS }, { "rw", xfsstats_offset(xs_attr_get) },
{ "attr", XFSSTAT_END_ATTRIBUTE_OPS }, { "attr", xfsstats_offset(xs_iflush_count)},
{ "icluster", XFSSTAT_END_INODE_CLUSTER }, { "icluster", xfsstats_offset(vn_active) },
{ "vnodes", XFSSTAT_END_VNODE_OPS }, { "vnodes", xfsstats_offset(xb_get) },
{ "buf", XFSSTAT_END_BUF }, { "buf", xfsstats_offset(xs_abtb_2) },
{ "abtb2", XFSSTAT_END_ABTB_V2 }, { "abtb2", xfsstats_offset(xs_abtc_2) },
{ "abtc2", XFSSTAT_END_ABTC_V2 }, { "abtc2", xfsstats_offset(xs_bmbt_2) },
{ "bmbt2", XFSSTAT_END_BMBT_V2 }, { "bmbt2", xfsstats_offset(xs_ibt_2) },
{ "ibt2", XFSSTAT_END_IBT_V2 }, { "ibt2", xfsstats_offset(xs_fibt_2) },
{ "fibt2", XFSSTAT_END_FIBT_V2 }, { "fibt2", xfsstats_offset(xs_rmap_2) },
{ "rmapbt", XFSSTAT_END_RMAP_V2 }, { "rmapbt", xfsstats_offset(xs_refcbt_2) },
{ "refcntbt", XFSSTAT_END_REFCOUNT }, { "refcntbt", xfsstats_offset(xs_qm_dqreclaims)},
/* we print both series of quota information together */ /* we print both series of quota information together */
{ "qm", XFSSTAT_END_QM }, { "qm", xfsstats_offset(xs_xstrat_bytes)},
}; };
/* Loop over all stats groups */ /* Loop over all stats groups */
...@@ -104,6 +104,10 @@ void xfs_stats_clearall(struct xfsstats __percpu *stats) ...@@ -104,6 +104,10 @@ void xfs_stats_clearall(struct xfsstats __percpu *stats)
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
/* legacy quota interfaces */ /* legacy quota interfaces */
#ifdef CONFIG_XFS_QUOTA #ifdef CONFIG_XFS_QUOTA
#define XFSSTAT_START_XQMSTAT xfsstats_offset(xs_qm_dqreclaims)
#define XFSSTAT_END_XQMSTAT xfsstats_offset(xs_qm_dquot)
static int xqm_proc_show(struct seq_file *m, void *v) static int xqm_proc_show(struct seq_file *m, void *v)
{ {
/* maximum; incore; ratio free to inuse; freelist */ /* maximum; incore; ratio free to inuse; freelist */
...@@ -119,7 +123,7 @@ static int xqmstat_proc_show(struct seq_file *m, void *v) ...@@ -119,7 +123,7 @@ static int xqmstat_proc_show(struct seq_file *m, void *v)
int j; int j;
seq_printf(m, "qm"); seq_printf(m, "qm");
for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++) for (j = XFSSTAT_START_XQMSTAT; j < XFSSTAT_END_XQMSTAT; j++)
seq_printf(m, " %u", counter_val(xfsstats.xs_stats, j)); seq_printf(m, " %u", counter_val(xfsstats.xs_stats, j));
seq_putc(m, '\n'); seq_putc(m, '\n');
return 0; return 0;
......
...@@ -41,17 +41,14 @@ enum { ...@@ -41,17 +41,14 @@ enum {
* XFS global statistics * XFS global statistics
*/ */
struct __xfsstats { struct __xfsstats {
# define XFSSTAT_END_EXTENT_ALLOC 4
uint32_t xs_allocx; uint32_t xs_allocx;
uint32_t xs_allocb; uint32_t xs_allocb;
uint32_t xs_freex; uint32_t xs_freex;
uint32_t xs_freeb; uint32_t xs_freeb;
# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4)
uint32_t xs_abt_lookup; uint32_t xs_abt_lookup;
uint32_t xs_abt_compare; uint32_t xs_abt_compare;
uint32_t xs_abt_insrec; uint32_t xs_abt_insrec;
uint32_t xs_abt_delrec; uint32_t xs_abt_delrec;
# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7)
uint32_t xs_blk_mapr; uint32_t xs_blk_mapr;
uint32_t xs_blk_mapw; uint32_t xs_blk_mapw;
uint32_t xs_blk_unmap; uint32_t xs_blk_unmap;
...@@ -59,21 +56,17 @@ struct __xfsstats { ...@@ -59,21 +56,17 @@ struct __xfsstats {
uint32_t xs_del_exlist; uint32_t xs_del_exlist;
uint32_t xs_look_exlist; uint32_t xs_look_exlist;
uint32_t xs_cmp_exlist; uint32_t xs_cmp_exlist;
# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4)
uint32_t xs_bmbt_lookup; uint32_t xs_bmbt_lookup;
uint32_t xs_bmbt_compare; uint32_t xs_bmbt_compare;
uint32_t xs_bmbt_insrec; uint32_t xs_bmbt_insrec;
uint32_t xs_bmbt_delrec; uint32_t xs_bmbt_delrec;
# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4)
uint32_t xs_dir_lookup; uint32_t xs_dir_lookup;
uint32_t xs_dir_create; uint32_t xs_dir_create;
uint32_t xs_dir_remove; uint32_t xs_dir_remove;
uint32_t xs_dir_getdents; uint32_t xs_dir_getdents;
# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3)
uint32_t xs_trans_sync; uint32_t xs_trans_sync;
uint32_t xs_trans_async; uint32_t xs_trans_async;
uint32_t xs_trans_empty; uint32_t xs_trans_empty;
# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7)
uint32_t xs_ig_attempts; uint32_t xs_ig_attempts;
uint32_t xs_ig_found; uint32_t xs_ig_found;
uint32_t xs_ig_frecycle; uint32_t xs_ig_frecycle;
...@@ -81,13 +74,11 @@ struct __xfsstats { ...@@ -81,13 +74,11 @@ struct __xfsstats {
uint32_t xs_ig_dup; uint32_t xs_ig_dup;
uint32_t xs_ig_reclaims; uint32_t xs_ig_reclaims;
uint32_t xs_ig_attrchg; uint32_t xs_ig_attrchg;
# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5)
uint32_t xs_log_writes; uint32_t xs_log_writes;
uint32_t xs_log_blocks; uint32_t xs_log_blocks;
uint32_t xs_log_noiclogs; uint32_t xs_log_noiclogs;
uint32_t xs_log_force; uint32_t xs_log_force;
uint32_t xs_log_force_sleep; uint32_t xs_log_force_sleep;
# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10)
uint32_t xs_try_logspace; uint32_t xs_try_logspace;
uint32_t xs_sleep_logspace; uint32_t xs_sleep_logspace;
uint32_t xs_push_ail; uint32_t xs_push_ail;
...@@ -98,22 +89,17 @@ struct __xfsstats { ...@@ -98,22 +89,17 @@ struct __xfsstats {
uint32_t xs_push_ail_flushing; uint32_t xs_push_ail_flushing;
uint32_t xs_push_ail_restarts; uint32_t xs_push_ail_restarts;
uint32_t xs_push_ail_flush; uint32_t xs_push_ail_flush;
# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2)
uint32_t xs_xstrat_quick; uint32_t xs_xstrat_quick;
uint32_t xs_xstrat_split; uint32_t xs_xstrat_split;
# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2)
uint32_t xs_write_calls; uint32_t xs_write_calls;
uint32_t xs_read_calls; uint32_t xs_read_calls;
# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4)
uint32_t xs_attr_get; uint32_t xs_attr_get;
uint32_t xs_attr_set; uint32_t xs_attr_set;
uint32_t xs_attr_remove; uint32_t xs_attr_remove;
uint32_t xs_attr_list; uint32_t xs_attr_list;
# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3)
uint32_t xs_iflush_count; uint32_t xs_iflush_count;
uint32_t xs_icluster_flushcnt; uint32_t xs_icluster_flushcnt;
uint32_t xs_icluster_flushinode; uint32_t xs_icluster_flushinode;
# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8)
uint32_t vn_active; /* # vnodes not on free lists */ uint32_t vn_active; /* # vnodes not on free lists */
uint32_t vn_alloc; /* # times vn_alloc called */ uint32_t vn_alloc; /* # times vn_alloc called */
uint32_t vn_get; /* # times vn_get called */ uint32_t vn_get; /* # times vn_get called */
...@@ -122,7 +108,6 @@ struct __xfsstats { ...@@ -122,7 +108,6 @@ struct __xfsstats {
uint32_t vn_reclaim; /* # times vn_reclaim called */ uint32_t vn_reclaim; /* # times vn_reclaim called */
uint32_t vn_remove; /* # times vn_remove called */ uint32_t vn_remove; /* # times vn_remove called */
uint32_t vn_free; /* # times vn_free called */ uint32_t vn_free; /* # times vn_free called */
#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9)
uint32_t xb_get; uint32_t xb_get;
uint32_t xb_create; uint32_t xb_create;
uint32_t xb_get_locked; uint32_t xb_get_locked;
...@@ -133,28 +118,19 @@ struct __xfsstats { ...@@ -133,28 +118,19 @@ struct __xfsstats {
uint32_t xb_page_found; uint32_t xb_page_found;
uint32_t xb_get_read; uint32_t xb_get_read;
/* Version 2 btree counters */ /* Version 2 btree counters */
#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF + __XBTS_MAX)
uint32_t xs_abtb_2[__XBTS_MAX]; uint32_t xs_abtb_2[__XBTS_MAX];
#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2 + __XBTS_MAX)
uint32_t xs_abtc_2[__XBTS_MAX]; uint32_t xs_abtc_2[__XBTS_MAX];
#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2 + __XBTS_MAX)
uint32_t xs_bmbt_2[__XBTS_MAX]; uint32_t xs_bmbt_2[__XBTS_MAX];
#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2 + __XBTS_MAX)
uint32_t xs_ibt_2[__XBTS_MAX]; uint32_t xs_ibt_2[__XBTS_MAX];
#define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2 + __XBTS_MAX)
uint32_t xs_fibt_2[__XBTS_MAX]; uint32_t xs_fibt_2[__XBTS_MAX];
#define XFSSTAT_END_RMAP_V2 (XFSSTAT_END_FIBT_V2 + __XBTS_MAX)
uint32_t xs_rmap_2[__XBTS_MAX]; uint32_t xs_rmap_2[__XBTS_MAX];
#define XFSSTAT_END_REFCOUNT (XFSSTAT_END_RMAP_V2 + __XBTS_MAX)
uint32_t xs_refcbt_2[__XBTS_MAX]; uint32_t xs_refcbt_2[__XBTS_MAX];
#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_REFCOUNT + 6)
uint32_t xs_qm_dqreclaims; uint32_t xs_qm_dqreclaims;
uint32_t xs_qm_dqreclaim_misses; uint32_t xs_qm_dqreclaim_misses;
uint32_t xs_qm_dquot_dups; uint32_t xs_qm_dquot_dups;
uint32_t xs_qm_dqcachemisses; uint32_t xs_qm_dqcachemisses;
uint32_t xs_qm_dqcachehits; uint32_t xs_qm_dqcachehits;
uint32_t xs_qm_dqwants; uint32_t xs_qm_dqwants;
#define XFSSTAT_END_QM (XFSSTAT_END_XQMSTAT+2)
uint32_t xs_qm_dquot; uint32_t xs_qm_dquot;
uint32_t xs_qm_dquot_unused; uint32_t xs_qm_dquot_unused;
/* Extra precision counters */ /* Extra precision counters */
...@@ -163,10 +139,12 @@ struct __xfsstats { ...@@ -163,10 +139,12 @@ struct __xfsstats {
uint64_t xs_read_bytes; uint64_t xs_read_bytes;
}; };
#define xfsstats_offset(f) (offsetof(struct __xfsstats, f)/sizeof(uint32_t))
struct xfsstats { struct xfsstats {
union { union {
struct __xfsstats s; struct __xfsstats s;
uint32_t a[XFSSTAT_END_XQMSTAT]; uint32_t a[xfsstats_offset(xs_qm_dquot)];
}; };
}; };
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include <linux/dax.h> #include <linux/dax.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/magic.h>
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/mempool.h> #include <linux/mempool.h>
#include <linux/writeback.h> #include <linux/writeback.h>
...@@ -933,6 +934,32 @@ xfs_fs_alloc_inode( ...@@ -933,6 +934,32 @@ xfs_fs_alloc_inode(
return NULL; return NULL;
} }
#ifdef DEBUG
static void
xfs_check_delalloc(
struct xfs_inode *ip,
int whichfork)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_bmbt_irec got;
struct xfs_iext_cursor icur;
if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got))
return;
do {
if (isnullstartblock(got.br_startblock)) {
xfs_warn(ip->i_mount,
"ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]",
ip->i_ino,
whichfork == XFS_DATA_FORK ? "data" : "cow",
got.br_startoff, got.br_blockcount);
}
} while (xfs_iext_next_extent(ifp, &icur, &got));
}
#else
#define xfs_check_delalloc(ip, whichfork) do { } while (0)
#endif
/* /*
* Now that the generic code is guaranteed not to be accessing * Now that the generic code is guaranteed not to be accessing
* the linux inode, we can inactivate and reclaim the inode. * the linux inode, we can inactivate and reclaim the inode.
...@@ -951,7 +978,12 @@ xfs_fs_destroy_inode( ...@@ -951,7 +978,12 @@ xfs_fs_destroy_inode(
xfs_inactive(ip); xfs_inactive(ip);
ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) {
xfs_check_delalloc(ip, XFS_DATA_FORK);
xfs_check_delalloc(ip, XFS_COW_FORK);
ASSERT(0);
}
XFS_STATS_INC(ip->i_mount, vn_reclaim); XFS_STATS_INC(ip->i_mount, vn_reclaim);
/* /*
...@@ -1097,7 +1129,7 @@ xfs_fs_statfs( ...@@ -1097,7 +1129,7 @@ xfs_fs_statfs(
xfs_extlen_t lsize; xfs_extlen_t lsize;
int64_t ffree; int64_t ffree;
statp->f_type = XFS_SB_MAGIC; statp->f_type = XFS_SUPER_MAGIC;
statp->f_namelen = MAXNAMELEN - 1; statp->f_namelen = MAXNAMELEN - 1;
id = huge_encode_dev(mp->m_ddev_targp->bt_dev); id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
...@@ -1650,7 +1682,7 @@ xfs_fs_fill_super( ...@@ -1650,7 +1682,7 @@ xfs_fs_fill_super(
* we must configure the block size in the superblock before we run the * we must configure the block size in the superblock before we run the
* full mount process as the mount process can lookup and cache inodes. * full mount process as the mount process can lookup and cache inodes.
*/ */
sb->s_magic = XFS_SB_MAGIC; sb->s_magic = XFS_SUPER_MAGIC;
sb->s_blocksize = mp->m_sb.sb_blocksize; sb->s_blocksize = mp->m_sb.sb_blocksize;
sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
......
...@@ -220,6 +220,7 @@ void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint); ...@@ -220,6 +220,7 @@ void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
void xfs_trans_log_buf(struct xfs_trans *, struct xfs_buf *, uint, void xfs_trans_log_buf(struct xfs_trans *, struct xfs_buf *, uint,
uint); uint);
void xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *); void xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *);
bool xfs_trans_buf_is_dirty(struct xfs_buf *bp);
void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
void xfs_extent_free_init_defer_op(void); void xfs_extent_free_init_defer_op(void);
......
...@@ -531,17 +531,33 @@ xfsaild( ...@@ -531,17 +531,33 @@ xfsaild(
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
/* /*
* Check kthread_should_stop() after we set the task state * Check kthread_should_stop() after we set the task state to
* to guarantee that we either see the stop bit and exit or * guarantee that we either see the stop bit and exit or the
* the task state is reset to runnable such that it's not * task state is reset to runnable such that it's not scheduled
* scheduled out indefinitely and detects the stop bit at * out indefinitely and detects the stop bit at next iteration.
* next iteration.
*
* A memory barrier is included in above task state set to * A memory barrier is included in above task state set to
* serialize again kthread_stop(). * serialize again kthread_stop().
*/ */
if (kthread_should_stop()) { if (kthread_should_stop()) {
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
/*
* The caller forces out the AIL before stopping the
* thread in the common case, which means the delwri
* queue is drained. In the shutdown case, the queue may
* still hold relogged buffers that haven't been
* submitted because they were pinned since added to the
* queue.
*
* Log I/O error processing stales the underlying buffer
* and clears the delwri state, expecting the buf to be
* removed on the next submission attempt. That won't
* happen if we're shutting down, so this is the last
* opportunity to release such buffers from the queue.
*/
ASSERT(list_empty(&ailp->ail_buf_list) ||
XFS_FORCED_SHUTDOWN(ailp->ail_mount));
xfs_buf_delwri_cancel(&ailp->ail_buf_list);
break; break;
} }
......
...@@ -264,11 +264,39 @@ xfs_trans_read_buf_map( ...@@ -264,11 +264,39 @@ xfs_trans_read_buf_map(
return -EIO; return -EIO;
} }
/*
* Check if the caller is trying to read a buffer that is
* already attached to the transaction yet has no buffer ops
* assigned. Ops are usually attached when the buffer is
* attached to the transaction, or by the read caller if
* special circumstances. That didn't happen, which is not
* how this is supposed to go.
*
* If the buffer passes verification we'll let this go, but if
* not we have to shut down. Let the transaction cleanup code
* release this buffer when it kills the tranaction.
*/
ASSERT(bp->b_ops != NULL);
error = xfs_buf_ensure_ops(bp, ops);
if (error) {
xfs_buf_ioerror_alert(bp, __func__);
if (tp->t_flags & XFS_TRANS_DIRTY)
xfs_force_shutdown(tp->t_mountp,
SHUTDOWN_META_IO_ERROR);
/* bad CRC means corrupted metadata */
if (error == -EFSBADCRC)
error = -EFSCORRUPTED;
return error;
}
bip = bp->b_log_item; bip = bp->b_log_item;
bip->bli_recur++; bip->bli_recur++;
ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT(atomic_read(&bip->bli_refcount) > 0);
trace_xfs_trans_read_buf_recur(bip); trace_xfs_trans_read_buf_recur(bip);
ASSERT(bp->b_ops != NULL || ops == NULL);
*bpp = bp; *bpp = bp;
return 0; return 0;
} }
...@@ -316,11 +344,25 @@ xfs_trans_read_buf_map( ...@@ -316,11 +344,25 @@ xfs_trans_read_buf_map(
_xfs_trans_bjoin(tp, bp, 1); _xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_read_buf(bp->b_log_item); trace_xfs_trans_read_buf(bp->b_log_item);
} }
ASSERT(bp->b_ops != NULL || ops == NULL);
*bpp = bp; *bpp = bp;
return 0; return 0;
} }
/* Has this buffer been dirtied by anyone? */
bool
xfs_trans_buf_is_dirty(
struct xfs_buf *bp)
{
struct xfs_buf_log_item *bip = bp->b_log_item;
if (!bip)
return false;
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
return test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags);
}
/* /*
* Release a buffer previously joined to the transaction. If the buffer is * Release a buffer previously joined to the transaction. If the buffer is
* modified within this transaction, decrement the recursion count but do not * modified within this transaction, decrement the recursion count but do not
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#define HPFS_SUPER_MAGIC 0xf995e849 #define HPFS_SUPER_MAGIC 0xf995e849
#define ISOFS_SUPER_MAGIC 0x9660 #define ISOFS_SUPER_MAGIC 0x9660
#define JFFS2_SUPER_MAGIC 0x72b6 #define JFFS2_SUPER_MAGIC 0x72b6
#define XFS_SUPER_MAGIC 0x58465342 /* "XFSB" */
#define PSTOREFS_MAGIC 0x6165676C #define PSTOREFS_MAGIC 0x6165676C
#define EFIVARFS_MAGIC 0xde5e81e4 #define EFIVARFS_MAGIC 0xde5e81e4
#define HOSTFS_SUPER_MAGIC 0x00c0ffee #define HOSTFS_SUPER_MAGIC 0x00c0ffee
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment