Commit 825ec756 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-6.12-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Carlos Maiolino:

 - A few small typo fixes

 - fstests xfs/538 DEBUG-only fix

 - Performance fix on blockgc on COW'ed files, by skipping trims on
   cowblock inodes currently opened for write

 - Prevent cowblocks to be freed under dirty pagecache during unshare

 - Update MAINTAINERS file to quote the new maintainer

* tag 'xfs-6.12-fixes-3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: fix a typo
  xfs: don't free cowblocks from under dirty pagecache on unshare
  xfs: skip background cowblock trims on inodes open for write
  xfs: support lowmode allocations in xfs_bmap_exact_minlen_extent_alloc
  xfs: call xfs_bmap_exact_minlen_extent_alloc from xfs_bmap_btalloc
  xfs: don't ifdef around the exact minlen allocations
  xfs: fold xfs_bmap_alloc_userdata into xfs_bmapi_allocate
  xfs: distinguish extra split from real ENOSPC from xfs_attr_node_try_addname
  xfs: distinguish extra split from real ENOSPC from xfs_attr3_leaf_split
  xfs: return bool from xfs_attr3_leaf_add
  xfs: merge xfs_attr_leaf_try_add into xfs_attr_leaf_addname
  xfs: Use try_cmpxchg() in xlog_cil_insert_pcp_aggregate()
  xfs: scrub: convert comma to semicolon
  xfs: Remove empty declartion in header file
  MAINTAINERS: add Carlos Maiolino as XFS release manager
parents d3d15566 77bfe1b1
......@@ -25404,7 +25404,7 @@ F: include/xen/arm/swiotlb-xen.h
F: include/xen/swiotlb-xen.h
XFS FILESYSTEM
M: Chandan Babu R <chandan.babu@oracle.com>
M: Carlos Maiolino <cem@kernel.org>
R: Darrick J. Wong <djwong@kernel.org>
L: linux-xfs@vger.kernel.org
S: Supported
......
......@@ -2766,7 +2766,6 @@ xfs_alloc_commit_autoreap(
xfs_defer_item_unpause(tp, aarp->dfp);
}
#ifdef DEBUG
/*
* Check if an AGF has a free extent record whose length is equal to
* args->minlen.
......@@ -2806,7 +2805,6 @@ xfs_exact_minlen_extent_available(
return error;
}
#endif
/*
* Decide whether to use this allocation group for this allocation.
......@@ -2880,15 +2878,14 @@ xfs_alloc_fix_freelist(
if (!xfs_alloc_space_available(args, need, alloc_flags))
goto out_agbp_relse;
#ifdef DEBUG
if (args->alloc_minlen_only) {
if (IS_ENABLED(CONFIG_XFS_DEBUG) && args->alloc_minlen_only) {
int stat;
error = xfs_exact_minlen_extent_available(args, agbp, &stat);
if (error || !stat)
goto out_agbp_relse;
}
#endif
/*
* Make the freelist shorter if it's too long.
*
......
......@@ -53,11 +53,9 @@ typedef struct xfs_alloc_arg {
int datatype; /* mask defining data type treatment */
char wasdel; /* set if allocation was prev delayed */
char wasfromfl; /* set if allocation is from freelist */
bool alloc_minlen_only; /* allocate exact minlen extent */
struct xfs_owner_info oinfo; /* owner of blocks being allocated */
enum xfs_ag_resv_type resv; /* block reservation to use */
#ifdef DEBUG
bool alloc_minlen_only; /* allocate exact minlen extent */
#endif
} xfs_alloc_arg_t;
/*
......
......@@ -51,7 +51,6 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp);
STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args);
/*
* Internal routines when attribute list is more than one block.
......@@ -437,6 +436,33 @@ xfs_attr_hashval(
return xfs_attr_hashname(name, namelen);
}
/* Save the current remote block info and clear the current pointers. */
static void
xfs_attr_save_rmt_blk(
struct xfs_da_args *args)
{
args->blkno2 = args->blkno;
args->index2 = args->index;
args->rmtblkno2 = args->rmtblkno;
args->rmtblkcnt2 = args->rmtblkcnt;
args->rmtvaluelen2 = args->rmtvaluelen;
args->rmtblkno = 0;
args->rmtblkcnt = 0;
args->rmtvaluelen = 0;
}
/* Set stored info about a remote block */
static void
xfs_attr_restore_rmt_blk(
struct xfs_da_args *args)
{
args->blkno = args->blkno2;
args->index = args->index2;
args->rmtblkno = args->rmtblkno2;
args->rmtblkcnt = args->rmtblkcnt2;
args->rmtvaluelen = args->rmtvaluelen2;
}
/*
* PPTR_REPLACE operations require the caller to set the old and new names and
* values explicitly. Update the canonical fields to the new name and value
......@@ -482,48 +508,73 @@ xfs_attr_complete_op(
return replace_state;
}
/*
* Try to add an attribute to an inode in leaf form.
*/
static int
xfs_attr_leaf_addname(
struct xfs_attr_intent *attr)
{
struct xfs_da_args *args = attr->xattri_da_args;
struct xfs_buf *bp;
int error;
ASSERT(xfs_attr_is_leaf(args->dp));
error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp);
if (error)
return error;
/*
* Use the leaf buffer we may already hold locked as a result of
* a sf-to-leaf conversion.
* Look up the xattr name to set the insertion point for the new xattr.
*/
error = xfs_attr_leaf_try_add(args);
if (error == -ENOSPC) {
error = xfs_attr3_leaf_to_node(args);
if (error)
return error;
error = xfs_attr3_leaf_lookup_int(bp, args);
switch (error) {
case -ENOATTR:
if (args->op_flags & XFS_DA_OP_REPLACE)
goto out_brelse;
break;
case -EEXIST:
if (!(args->op_flags & XFS_DA_OP_REPLACE))
goto out_brelse;
trace_xfs_attr_leaf_replace(args);
/*
* We're not in leaf format anymore, so roll the transaction and
* retry the add to the newly allocated node block.
* Save the existing remote attr state so that the current
* values reflect the state of the new attribute we are about to
* add, not the attribute we just found and will remove later.
*/
attr->xattri_dela_state = XFS_DAS_NODE_ADD;
goto out;
xfs_attr_save_rmt_blk(args);
break;
case 0:
break;
default:
goto out_brelse;
}
if (error)
return error;
/*
* We need to commit and roll if we need to allocate remote xattr blocks
* or perform more xattr manipulations. Otherwise there is nothing more
* to do and we can return success.
*/
if (args->rmtblkno)
if (!xfs_attr3_leaf_add(bp, args)) {
error = xfs_attr3_leaf_to_node(args);
if (error)
return error;
attr->xattri_dela_state = XFS_DAS_NODE_ADD;
} else if (args->rmtblkno) {
attr->xattri_dela_state = XFS_DAS_LEAF_SET_RMT;
else
attr->xattri_dela_state = xfs_attr_complete_op(attr,
XFS_DAS_LEAF_REPLACE);
out:
} else {
attr->xattri_dela_state =
xfs_attr_complete_op(attr, XFS_DAS_LEAF_REPLACE);
}
trace_xfs_attr_leaf_addname_return(attr->xattri_dela_state, args->dp);
return 0;
out_brelse:
xfs_trans_brelse(args->trans, bp);
return error;
}
......@@ -546,7 +597,7 @@ xfs_attr_node_addname(
return error;
error = xfs_attr_node_try_addname(attr);
if (error == -ENOSPC) {
if (error == 1) {
error = xfs_attr3_leaf_to_node(args);
if (error)
return error;
......@@ -1170,88 +1221,6 @@ xfs_attr_shortform_addname(
* External routines when attribute list is one block
*========================================================================*/
/* Save the current remote block info and clear the current pointers. */
static void
xfs_attr_save_rmt_blk(
struct xfs_da_args *args)
{
args->blkno2 = args->blkno;
args->index2 = args->index;
args->rmtblkno2 = args->rmtblkno;
args->rmtblkcnt2 = args->rmtblkcnt;
args->rmtvaluelen2 = args->rmtvaluelen;
args->rmtblkno = 0;
args->rmtblkcnt = 0;
args->rmtvaluelen = 0;
}
/* Set stored info about a remote block */
static void
xfs_attr_restore_rmt_blk(
struct xfs_da_args *args)
{
args->blkno = args->blkno2;
args->index = args->index2;
args->rmtblkno = args->rmtblkno2;
args->rmtblkcnt = args->rmtblkcnt2;
args->rmtvaluelen = args->rmtvaluelen2;
}
/*
* Tries to add an attribute to an inode in leaf form
*
* This function is meant to execute as part of a delayed operation and leaves
* the transaction handling to the caller. On success the attribute is added
* and the inode and transaction are left dirty. If there is not enough space,
* the attr data is converted to node format and -ENOSPC is returned. Caller is
* responsible for handling the dirty inode and transaction or adding the attr
* in node format.
*/
STATIC int
xfs_attr_leaf_try_add(
struct xfs_da_args *args)
{
struct xfs_buf *bp;
int error;
error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp);
if (error)
return error;
/*
* Look up the xattr name to set the insertion point for the new xattr.
*/
error = xfs_attr3_leaf_lookup_int(bp, args);
switch (error) {
case -ENOATTR:
if (args->op_flags & XFS_DA_OP_REPLACE)
goto out_brelse;
break;
case -EEXIST:
if (!(args->op_flags & XFS_DA_OP_REPLACE))
goto out_brelse;
trace_xfs_attr_leaf_replace(args);
/*
* Save the existing remote attr state so that the current
* values reflect the state of the new attribute we are about to
* add, not the attribute we just found and will remove later.
*/
xfs_attr_save_rmt_blk(args);
break;
case 0:
break;
default:
goto out_brelse;
}
return xfs_attr3_leaf_add(bp, args);
out_brelse:
xfs_trans_brelse(args->trans, bp);
return error;
}
/*
* Return EEXIST if attr is found, or ENOATTR if not
*/
......@@ -1417,9 +1386,12 @@ xfs_attr_node_addname_find_attr(
/*
* Add a name to a Btree-format attribute list.
*
* This will involve walking down the Btree, and may involve splitting
* leaf nodes and even splitting intermediate nodes up to and including
* the root node (a special case of an intermediate node).
* This will involve walking down the Btree, and may involve splitting leaf
* nodes and even splitting intermediate nodes up to and including the root
* node (a special case of an intermediate node).
*
* If the tree was still in single leaf format and needs to converted to
* real node format return 1 and let the caller handle that.
*/
static int
xfs_attr_node_try_addname(
......@@ -1427,21 +1399,21 @@ xfs_attr_node_try_addname(
{
struct xfs_da_state *state = attr->xattri_da_state;
struct xfs_da_state_blk *blk;
int error;
int error = 0;
trace_xfs_attr_node_addname(state->args);
blk = &state->path.blk[state->path.active-1];
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
error = xfs_attr3_leaf_add(blk->bp, state->args);
if (error == -ENOSPC) {
if (!xfs_attr3_leaf_add(blk->bp, state->args)) {
if (state->path.active == 1) {
/*
* Its really a single leaf node, but it had
* out-of-line values so it looked like it *might*
* have been a b-tree. Let the caller deal with this.
*/
error = 1;
goto out;
}
......
......@@ -47,7 +47,7 @@
*/
STATIC int xfs_attr3_leaf_create(struct xfs_da_args *args,
xfs_dablk_t which_block, struct xfs_buf **bpp);
STATIC int xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
STATIC void xfs_attr3_leaf_add_work(struct xfs_buf *leaf_buffer,
struct xfs_attr3_icleaf_hdr *ichdr,
struct xfs_da_args *args, int freemap_index);
STATIC void xfs_attr3_leaf_compact(struct xfs_da_args *args,
......@@ -995,10 +995,8 @@ xfs_attr_shortform_to_leaf(
xfs_attr_sethash(&nargs);
error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
ASSERT(error == -ENOATTR);
error = xfs_attr3_leaf_add(bp, &nargs);
ASSERT(error != -ENOSPC);
if (error)
goto out;
if (!xfs_attr3_leaf_add(bp, &nargs))
ASSERT(0);
sfe = xfs_attr_sf_nextentry(sfe);
}
error = 0;
......@@ -1333,6 +1331,9 @@ xfs_attr3_leaf_create(
/*
* Split the leaf node, rebalance, then add the new entry.
*
* Returns 0 if the entry was added, 1 if a further split is needed or a
* negative error number otherwise.
*/
int
xfs_attr3_leaf_split(
......@@ -1340,8 +1341,9 @@ xfs_attr3_leaf_split(
struct xfs_da_state_blk *oldblk,
struct xfs_da_state_blk *newblk)
{
xfs_dablk_t blkno;
int error;
bool added;
xfs_dablk_t blkno;
int error;
trace_xfs_attr_leaf_split(state->args);
......@@ -1376,10 +1378,10 @@ xfs_attr3_leaf_split(
*/
if (state->inleaf) {
trace_xfs_attr_leaf_add_old(state->args);
error = xfs_attr3_leaf_add(oldblk->bp, state->args);
added = xfs_attr3_leaf_add(oldblk->bp, state->args);
} else {
trace_xfs_attr_leaf_add_new(state->args);
error = xfs_attr3_leaf_add(newblk->bp, state->args);
added = xfs_attr3_leaf_add(newblk->bp, state->args);
}
/*
......@@ -1387,13 +1389,15 @@ xfs_attr3_leaf_split(
*/
oldblk->hashval = xfs_attr_leaf_lasthash(oldblk->bp, NULL);
newblk->hashval = xfs_attr_leaf_lasthash(newblk->bp, NULL);
return error;
if (!added)
return 1;
return 0;
}
/*
* Add a name to the leaf attribute list structure.
*/
int
bool
xfs_attr3_leaf_add(
struct xfs_buf *bp,
struct xfs_da_args *args)
......@@ -1402,6 +1406,7 @@ xfs_attr3_leaf_add(
struct xfs_attr3_icleaf_hdr ichdr;
int tablesize;
int entsize;
bool added = true;
int sum;
int tmp;
int i;
......@@ -1430,7 +1435,7 @@ xfs_attr3_leaf_add(
if (ichdr.freemap[i].base < ichdr.firstused)
tmp += sizeof(xfs_attr_leaf_entry_t);
if (ichdr.freemap[i].size >= tmp) {
tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
xfs_attr3_leaf_add_work(bp, &ichdr, args, i);
goto out_log_hdr;
}
sum += ichdr.freemap[i].size;
......@@ -1442,7 +1447,7 @@ xfs_attr3_leaf_add(
* no good and we should just give up.
*/
if (!ichdr.holes && sum < entsize)
return -ENOSPC;
return false;
/*
* Compact the entries to coalesce free space.
......@@ -1455,24 +1460,24 @@ xfs_attr3_leaf_add(
* free region, in freemap[0]. If it is not big enough, give up.
*/
if (ichdr.freemap[0].size < (entsize + sizeof(xfs_attr_leaf_entry_t))) {
tmp = -ENOSPC;
added = false;
goto out_log_hdr;
}
tmp = xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
xfs_attr3_leaf_add_work(bp, &ichdr, args, 0);
out_log_hdr:
xfs_attr3_leaf_hdr_to_disk(args->geo, leaf, &ichdr);
xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, &leaf->hdr,
xfs_attr3_leaf_hdr_size(leaf)));
return tmp;
return added;
}
/*
* Add a name to a leaf attribute list structure.
*/
STATIC int
STATIC void
xfs_attr3_leaf_add_work(
struct xfs_buf *bp,
struct xfs_attr3_icleaf_hdr *ichdr,
......@@ -1590,7 +1595,6 @@ xfs_attr3_leaf_add_work(
}
}
ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
return 0;
}
/*
......
......@@ -76,7 +76,7 @@ int xfs_attr3_leaf_split(struct xfs_da_state *state,
int xfs_attr3_leaf_lookup_int(struct xfs_buf *leaf,
struct xfs_da_args *args);
int xfs_attr3_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
bool xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
......
......@@ -3477,31 +3477,19 @@ xfs_bmap_process_allocated_extent(
xfs_bmap_alloc_account(ap);
}
#ifdef DEBUG
static int
xfs_bmap_exact_minlen_extent_alloc(
struct xfs_bmalloca *ap)
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args)
{
struct xfs_mount *mp = ap->ip->i_mount;
struct xfs_alloc_arg args = { .tp = ap->tp, .mp = mp };
xfs_fileoff_t orig_offset;
xfs_extlen_t orig_length;
int error;
ASSERT(ap->length);
if (ap->minlen != 1) {
ap->blkno = NULLFSBLOCK;
ap->length = 0;
args->fsbno = NULLFSBLOCK;
return 0;
}
orig_offset = ap->offset;
orig_length = ap->length;
args.alloc_minlen_only = 1;
xfs_bmap_compute_alignments(ap, &args);
args->alloc_minlen_only = 1;
args->minlen = args->maxlen = ap->minlen;
args->total = ap->total;
/*
* Unlike the longest extent available in an AG, we don't track
......@@ -3511,39 +3499,16 @@ xfs_bmap_exact_minlen_extent_alloc(
* we need not be concerned about a drop in performance in
* "debug only" code paths.
*/
ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0);
ap->blkno = XFS_AGB_TO_FSB(ap->ip->i_mount, 0, 0);
args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
args.minlen = args.maxlen = ap->minlen;
args.total = ap->total;
args.alignment = 1;
args.minalignslop = 0;
args.minleft = ap->minleft;
args.wasdel = ap->wasdel;
args.resv = XFS_AG_RESV_NONE;
args.datatype = ap->datatype;
error = xfs_alloc_vextent_first_ag(&args, ap->blkno);
if (error)
return error;
if (args.fsbno != NULLFSBLOCK) {
xfs_bmap_process_allocated_extent(ap, &args, orig_offset,
orig_length);
} else {
ap->blkno = NULLFSBLOCK;
ap->length = 0;
}
return 0;
/*
* Call xfs_bmap_btalloc_low_space here as it first does a "normal" AG
* iteration and then drops args->total to args->minlen, which might be
* required to find an allocation for the transaction reservation when
* the file system is very full.
*/
return xfs_bmap_btalloc_low_space(ap, args);
}
#else
#define xfs_bmap_exact_minlen_extent_alloc(bma) (-EFSCORRUPTED)
#endif
/*
* If we are not low on available data blocks and we are allocating at
......@@ -3801,8 +3766,11 @@ xfs_bmap_btalloc(
/* Trim the allocation back to the maximum an AG can fit. */
args.maxlen = min(ap->length, mp->m_ag_max_usable);
if ((ap->datatype & XFS_ALLOC_USERDATA) &&
xfs_inode_is_filestream(ap->ip))
if (unlikely(XFS_TEST_ERROR(false, mp,
XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
error = xfs_bmap_exact_minlen_extent_alloc(ap, &args);
else if ((ap->datatype & XFS_ALLOC_USERDATA) &&
xfs_inode_is_filestream(ap->ip))
error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align);
else
error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align);
......@@ -4176,43 +4144,6 @@ xfs_bmapi_reserve_delalloc(
return error;
}
static int
xfs_bmap_alloc_userdata(
struct xfs_bmalloca *bma)
{
struct xfs_mount *mp = bma->ip->i_mount;
int whichfork = xfs_bmapi_whichfork(bma->flags);
int error;
/*
* Set the data type being allocated. For the data fork, the first data
* in the file is treated differently to all other allocations. For the
* attribute fork, we only need to ensure the allocated range is not on
* the busy list.
*/
bma->datatype = XFS_ALLOC_NOBUSY;
if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
bma->datatype |= XFS_ALLOC_USERDATA;
if (bma->offset == 0)
bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
if (mp->m_dalign && bma->length >= mp->m_dalign) {
error = xfs_bmap_isaeof(bma, whichfork);
if (error)
return error;
}
if (XFS_IS_REALTIME_INODE(bma->ip))
return xfs_bmap_rtalloc(bma);
}
if (unlikely(XFS_TEST_ERROR(false, mp,
XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
return xfs_bmap_exact_minlen_extent_alloc(bma);
return xfs_bmap_btalloc(bma);
}
static int
xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
......@@ -4230,15 +4161,32 @@ xfs_bmapi_allocate(
else
bma->minlen = 1;
if (bma->flags & XFS_BMAPI_METADATA) {
if (unlikely(XFS_TEST_ERROR(false, mp,
XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT)))
error = xfs_bmap_exact_minlen_extent_alloc(bma);
else
error = xfs_bmap_btalloc(bma);
} else {
error = xfs_bmap_alloc_userdata(bma);
if (!(bma->flags & XFS_BMAPI_METADATA)) {
/*
* For the data and COW fork, the first data in the file is
* treated differently to all other allocations. For the
* attribute fork, we only need to ensure the allocated range
* is not on the busy list.
*/
bma->datatype = XFS_ALLOC_NOBUSY;
if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
bma->datatype |= XFS_ALLOC_USERDATA;
if (bma->offset == 0)
bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
if (mp->m_dalign && bma->length >= mp->m_dalign) {
error = xfs_bmap_isaeof(bma, whichfork);
if (error)
return error;
}
}
}
if ((bma->datatype & XFS_ALLOC_USERDATA) &&
XFS_IS_REALTIME_INODE(bma->ip))
error = xfs_bmap_rtalloc(bma);
else
error = xfs_bmap_btalloc(bma);
if (error)
return error;
if (bma->blkno == NULLFSBLOCK)
......
......@@ -593,9 +593,8 @@ xfs_da3_split(
switch (oldblk->magic) {
case XFS_ATTR_LEAF_MAGIC:
error = xfs_attr3_leaf_split(state, oldblk, newblk);
if ((error != 0) && (error != -ENOSPC)) {
if (error < 0)
return error; /* GROT: attr is inconsistent */
}
if (!error) {
addblk = newblk;
break;
......@@ -617,6 +616,8 @@ xfs_da3_split(
error = xfs_attr3_leaf_split(state, newblk,
&state->extrablk);
}
if (error == 1)
return -ENOSPC;
if (error)
return error; /* GROT: attr inconsistent */
addblk = newblk;
......
......@@ -657,7 +657,7 @@ xrep_ibt_build_new_trees(
* Start by setting up the inobt staging cursor.
*/
fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
XFS_IBT_BLOCK(sc->mp)),
XFS_IBT_BLOCK(sc->mp));
xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno,
XFS_AG_RESV_NONE);
ri->new_inobt.bload.claim_block = xrep_ibt_claim_block;
......@@ -678,7 +678,7 @@ xrep_ibt_build_new_trees(
resv = XFS_AG_RESV_NONE;
fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
XFS_FIBT_BLOCK(sc->mp)),
XFS_FIBT_BLOCK(sc->mp));
xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT,
fsbno, resv);
ri->new_finobt.bload.claim_block = xrep_fibt_claim_block;
......
......@@ -1280,14 +1280,17 @@ xfs_inode_clear_eofblocks_tag(
}
/*
* Set ourselves up to free CoW blocks from this file. If it's already clean
* then we can bail out quickly, but otherwise we must back off if the file
* is undergoing some kind of write.
* Prepare to free COW fork blocks from an inode.
*/
static bool
xfs_prep_free_cowblocks(
struct xfs_inode *ip)
struct xfs_inode *ip,
struct xfs_icwalk *icw)
{
bool sync;
sync = icw && (icw->icw_flags & XFS_ICWALK_FLAG_SYNC);
/*
* Just clear the tag if we have an empty cow fork or none at all. It's
* possible the inode was fully unshared since it was originally tagged.
......@@ -1299,16 +1302,22 @@ xfs_prep_free_cowblocks(
}
/*
* If the mapping is dirty or under writeback we cannot touch the
* CoW fork. Leave it alone if we're in the midst of a directio.
* A cowblocks trim of an inode can have a significant effect on
* fragmentation even when a reasonable COW extent size hint is set.
* Therefore, we prefer to not process cowblocks unless they are clean
* and idle. We can never process a cowblocks inode that is dirty or has
* in-flight I/O under any circumstances, because outstanding writeback
* or dio expects targeted COW fork blocks exist through write
* completion where they can be remapped into the data fork.
*
* Therefore, the heuristic used here is to never process inodes
* currently opened for write from background (i.e. non-sync) scans. For
* sync scans, use the pagecache/dio state of the inode to ensure we
* never free COW fork blocks out from under pending I/O.
*/
if ((VFS_I(ip)->i_state & I_DIRTY_PAGES) ||
mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY) ||
mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
atomic_read(&VFS_I(ip)->i_dio_count))
if (!sync && inode_is_open_for_write(VFS_I(ip)))
return false;
return true;
return xfs_can_free_cowblocks(ip);
}
/*
......@@ -1337,7 +1346,7 @@ xfs_inode_free_cowblocks(
if (!xfs_iflags_test(ip, XFS_ICOWBLOCKS))
return 0;
if (!xfs_prep_free_cowblocks(ip))
if (!xfs_prep_free_cowblocks(ip, icw))
return 0;
if (!xfs_icwalk_match(ip, icw))
......@@ -1366,7 +1375,7 @@ xfs_inode_free_cowblocks(
* Check again, nobody else should be able to dirty blocks or change
* the reflink iflag now that we have the first two locks held.
*/
if (xfs_prep_free_cowblocks(ip))
if (xfs_prep_free_cowblocks(ip, icw))
ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false);
return ret;
}
......
......@@ -158,6 +158,4 @@ bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
bool xlog_force_shutdown(struct xlog *log, uint32_t shutdown_flags);
int xfs_attr_use_log_assist(struct xfs_mount *mp);
#endif /* __XFS_LOG_H__ */
......@@ -156,7 +156,6 @@ xlog_cil_insert_pcp_aggregate(
struct xfs_cil *cil,
struct xfs_cil_ctx *ctx)
{
struct xlog_cil_pcp *cilpcp;
int cpu;
int count = 0;
......@@ -171,13 +170,11 @@ xlog_cil_insert_pcp_aggregate(
* structures that could have a nonzero space_used.
*/
for_each_cpu(cpu, &ctx->cil_pcpmask) {
int old, prev;
struct xlog_cil_pcp *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
int old = READ_ONCE(cilpcp->space_used);
cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
do {
old = cilpcp->space_used;
prev = cmpxchg(&cilpcp->space_used, old, 0);
} while (old != prev);
while (!try_cmpxchg(&cilpcp->space_used, &old, 0))
;
count += old;
}
atomic_add(count, &ctx->space_used);
......
......@@ -1849,7 +1849,7 @@ xlog_find_item_ops(
* from the transaction. However, we can't do that until after we've
* replayed all the other items because they may be dependent on the
* cancelled buffer and replaying the cancelled buffer can remove it
* form the cancelled buffer table. Hence they have tobe done last.
* form the cancelled buffer table. Hence they have to be done last.
*
* 3. Inode allocation buffers must be replayed before inode items that
* read the buffer and replay changes into it. For filesystems using the
......
......@@ -1595,6 +1595,9 @@ xfs_reflink_clear_inode_flag(
ASSERT(xfs_is_reflink_inode(ip));
if (!xfs_can_free_cowblocks(ip))
return 0;
error = xfs_reflink_inode_has_shared_extents(*tpp, ip, &needs_flag);
if (error || needs_flag)
return error;
......
......@@ -6,6 +6,25 @@
#ifndef __XFS_REFLINK_H
#define __XFS_REFLINK_H 1
/*
* Check whether it is safe to free COW fork blocks from an inode. It is unsafe
* to do so when an inode has dirty cache or I/O in-flight, even if no shared
* extents exist in the data fork, because outstanding I/O may target blocks
* that were speculatively allocated to the COW fork.
*/
static inline bool
xfs_can_free_cowblocks(struct xfs_inode *ip)
{
struct inode *inode = VFS_I(ip);
if ((inode->i_state & I_DIRTY_PAGES) ||
mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK) ||
atomic_read(&inode->i_dio_count))
return false;
return true;
}
extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, bool *shared);
int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment