Commit bb8e7e9f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-6.5-merge-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull more xfs updates from Darrick Wong:

 - Fix some ordering problems with log items during log recovery

 - Don't deadlock the system by trying to flush busy freed extents while
   holding on to busy freed extents

 - Improve validation of log geometry parameters when reading the
   primary superblock

 - Validate the length field in the AGF header

 - Fix recordset filtering bugs when re-calling GETFSMAP to return more
   results when the resultset didn't previously fit in the caller's
   buffer

 - Fix integer overflows in GETFSMAP when working with rt volumes larger
   than 2^32 fsblocks

 - Fix GETFSMAP reporting the undefined space beyond the last rtextent

 - Fix filtering bugs in GETFSMAP's log device backend if the log ever
   becomes longer than 2^32 fsblocks

 - Improve validation of file offsets in the GETFSMAP range parameters

 - Fix an off by one bug in the pmem media failure notification
   computation

 - Validate the length field in the AGI header too

* tag 'xfs-6.5-merge-5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: Remove unneeded semicolon
  xfs: AGI length should be bounds checked
  xfs: fix the calculation for "end" and "length"
  xfs: fix xfs_btree_query_range callers to initialize btree rec fully
  xfs: validate fsmap offsets specified in the query keys
  xfs: fix logdev fsmap query result filtering
  xfs: clean up the rtbitmap fsmap backend
  xfs: fix getfsmap reporting past the last rt extent
  xfs: fix integer overflows in the fsmap rtbitmap and logdev backends
  xfs: fix interval filtering in multi-step fsmap queries
  xfs: fix bounds check in xfs_defer_agfl_block()
  xfs: AGF length has never been bounds checked
  xfs: journal geometry is not properly bounds checked
  xfs: don't block in busy flushing when freeing extents
  xfs: allow extent free intents to be retried
  xfs: pass alloc flags through to xfs_extent_busy_flush()
  xfs: use deferred frees for btree block freeing
  xfs: don't reverse order of items in bulk AIL insertion
  xfs: remove redundant initializations of pointers drop_leaf and save_leaf
parents ace1ba1c 34acceaa
...@@ -985,7 +985,7 @@ xfs_ag_shrink_space( ...@@ -985,7 +985,7 @@ xfs_ag_shrink_space(
goto resv_err; goto resv_err;
err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
true); XFS_AG_RESV_NONE, true);
if (err2) if (err2)
goto resv_err; goto resv_err;
......
This diff is collapsed.
...@@ -19,11 +19,12 @@ unsigned int xfs_agfl_size(struct xfs_mount *mp); ...@@ -19,11 +19,12 @@ unsigned int xfs_agfl_size(struct xfs_mount *mp);
/* /*
* Flags for xfs_alloc_fix_freelist. * Flags for xfs_alloc_fix_freelist.
*/ */
#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ #define XFS_ALLOC_FLAG_TRYLOCK (1U << 0) /* use trylock for buffer locking */
#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ #define XFS_ALLOC_FLAG_FREEING (1U << 1) /* indicate caller is freeing extents*/
#define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */ #define XFS_ALLOC_FLAG_NORMAP (1U << 2) /* don't modify the rmapbt */
#define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */ #define XFS_ALLOC_FLAG_NOSHRINK (1U << 3) /* don't shrink the freelist */
#define XFS_ALLOC_FLAG_CHECK 0x00000010 /* test only, don't modify args */ #define XFS_ALLOC_FLAG_CHECK (1U << 4) /* test only, don't modify args */
#define XFS_ALLOC_FLAG_TRYFLUSH (1U << 5) /* don't wait in busy extent flush */
/* /*
* Argument structure for xfs_alloc routines. * Argument structure for xfs_alloc routines.
...@@ -195,7 +196,7 @@ int xfs_alloc_read_agfl(struct xfs_perag *pag, struct xfs_trans *tp, ...@@ -195,7 +196,7 @@ int xfs_alloc_read_agfl(struct xfs_perag *pag, struct xfs_trans *tp,
struct xfs_buf **bpp); struct xfs_buf **bpp);
int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t, int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t,
struct xfs_buf *, struct xfs_owner_info *); struct xfs_buf *, struct xfs_owner_info *);
int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags); int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, uint32_t alloc_flags);
int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag, int xfs_free_extent_fix_freelist(struct xfs_trans *tp, struct xfs_perag *pag,
struct xfs_buf **agbp); struct xfs_buf **agbp);
...@@ -232,7 +233,7 @@ xfs_buf_to_agfl_bno( ...@@ -232,7 +233,7 @@ xfs_buf_to_agfl_bno(
int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo, xfs_filblks_t len, const struct xfs_owner_info *oinfo,
bool skip_discard); enum xfs_ag_resv_type type, bool skip_discard);
/* /*
* List of extents to be free "later". * List of extents to be free "later".
...@@ -245,6 +246,7 @@ struct xfs_extent_free_item { ...@@ -245,6 +246,7 @@ struct xfs_extent_free_item {
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
struct xfs_perag *xefi_pag; struct xfs_perag *xefi_pag;
unsigned int xefi_flags; unsigned int xefi_flags;
enum xfs_ag_resv_type xefi_agresv;
}; };
void xfs_extent_free_get_group(struct xfs_mount *mp, void xfs_extent_free_get_group(struct xfs_mount *mp,
...@@ -259,9 +261,10 @@ xfs_free_extent_later( ...@@ -259,9 +261,10 @@ xfs_free_extent_later(
struct xfs_trans *tp, struct xfs_trans *tp,
xfs_fsblock_t bno, xfs_fsblock_t bno,
xfs_filblks_t len, xfs_filblks_t len,
const struct xfs_owner_info *oinfo) const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type)
{ {
return __xfs_free_extent_later(tp, bno, len, oinfo, false); return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);
} }
...@@ -270,4 +273,7 @@ extern struct kmem_cache *xfs_extfree_item_cache; ...@@ -270,4 +273,7 @@ extern struct kmem_cache *xfs_extfree_item_cache;
int __init xfs_extfree_intent_init_cache(void); int __init xfs_extfree_intent_init_cache(void);
void xfs_extfree_intent_destroy_cache(void); void xfs_extfree_intent_destroy_cache(void);
xfs_failaddr_t xfs_validate_ag_length(struct xfs_buf *bp, uint32_t seqno,
uint32_t length);
#endif /* __XFS_ALLOC_H__ */ #endif /* __XFS_ALLOC_H__ */
...@@ -2293,8 +2293,6 @@ xfs_attr3_leaf_unbalance( ...@@ -2293,8 +2293,6 @@ xfs_attr3_leaf_unbalance(
trace_xfs_attr_leaf_unbalance(state->args); trace_xfs_attr_leaf_unbalance(state->args);
drop_leaf = drop_blk->bp->b_addr;
save_leaf = save_blk->bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf); xfs_attr3_leaf_hdr_from_disk(state->args->geo, &drophdr, drop_leaf);
xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf); xfs_attr3_leaf_hdr_from_disk(state->args->geo, &savehdr, save_leaf);
entry = xfs_attr3_leaf_entryp(drop_leaf); entry = xfs_attr3_leaf_entryp(drop_leaf);
......
...@@ -574,7 +574,8 @@ xfs_bmap_btree_to_extents( ...@@ -574,7 +574,8 @@ xfs_bmap_btree_to_extents(
return error; return error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
XFS_AG_RESV_NONE);
if (error) if (error)
return error; return error;
...@@ -5236,8 +5237,9 @@ xfs_bmap_del_extent_real( ...@@ -5236,8 +5237,9 @@ xfs_bmap_del_extent_real(
} else { } else {
error = __xfs_free_extent_later(tp, del->br_startblock, error = __xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL, del->br_blockcount, NULL,
(bflags & XFS_BMAPI_NODISCARD) || XFS_AG_RESV_NONE,
del->br_state == XFS_EXT_UNWRITTEN); ((bflags & XFS_BMAPI_NODISCARD) ||
del->br_state == XFS_EXT_UNWRITTEN));
if (error) if (error)
goto done; goto done;
} }
......
...@@ -271,7 +271,8 @@ xfs_bmbt_free_block( ...@@ -271,7 +271,8 @@ xfs_bmbt_free_block(
int error; int error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
XFS_AG_RESV_NONE);
if (error) if (error)
return error; return error;
......
...@@ -1853,8 +1853,8 @@ xfs_difree_inode_chunk( ...@@ -1853,8 +1853,8 @@ xfs_difree_inode_chunk(
/* not sparse, calculate extent info directly */ /* not sparse, calculate extent info directly */
return xfs_free_extent_later(tp, return xfs_free_extent_later(tp,
XFS_AGB_TO_FSB(mp, agno, sagbno), XFS_AGB_TO_FSB(mp, agno, sagbno),
M_IGEO(mp)->ialloc_blks, M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
&XFS_RMAP_OINFO_INODES); XFS_AG_RESV_NONE);
} }
/* holemask is only 16-bits (fits in an unsigned long) */ /* holemask is only 16-bits (fits in an unsigned long) */
...@@ -1899,8 +1899,8 @@ xfs_difree_inode_chunk( ...@@ -1899,8 +1899,8 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
error = xfs_free_extent_later(tp, error = xfs_free_extent_later(tp,
XFS_AGB_TO_FSB(mp, agno, agbno), XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
contigblk, &XFS_RMAP_OINFO_INODES); &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
if (error) if (error)
return error; return error;
...@@ -2490,6 +2490,9 @@ xfs_agi_verify( ...@@ -2490,6 +2490,9 @@ xfs_agi_verify(
{ {
struct xfs_mount *mp = bp->b_mount; struct xfs_mount *mp = bp->b_mount;
struct xfs_agi *agi = bp->b_addr; struct xfs_agi *agi = bp->b_addr;
xfs_failaddr_t fa;
uint32_t agi_seqno = be32_to_cpu(agi->agi_seqno);
uint32_t agi_length = be32_to_cpu(agi->agi_length);
int i; int i;
if (xfs_has_crc(mp)) { if (xfs_has_crc(mp)) {
...@@ -2507,6 +2510,10 @@ xfs_agi_verify( ...@@ -2507,6 +2510,10 @@ xfs_agi_verify(
if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum))) if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
return __this_address; return __this_address;
fa = xfs_validate_ag_length(bp, agi_seqno, agi_length);
if (fa)
return fa;
if (be32_to_cpu(agi->agi_level) < 1 || if (be32_to_cpu(agi->agi_level) < 1 ||
be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels) be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels)
return __this_address; return __this_address;
...@@ -2516,15 +2523,6 @@ xfs_agi_verify( ...@@ -2516,15 +2523,6 @@ xfs_agi_verify(
be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels)) be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels))
return __this_address; return __this_address;
/*
* during growfs operations, the perag is not fully initialised,
* so we can't use it for any useful checking. growfs ensures we can't
* use it by using uncached buffers that don't have the perag attached
* so we can detect and avoid this problem.
*/
if (bp->b_pag && be32_to_cpu(agi->agi_seqno) != bp->b_pag->pag_agno)
return __this_address;
for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) { for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
if (agi->agi_unlinked[i] == cpu_to_be32(NULLAGINO)) if (agi->agi_unlinked[i] == cpu_to_be32(NULLAGINO))
continue; continue;
......
...@@ -160,8 +160,7 @@ __xfs_inobt_free_block( ...@@ -160,8 +160,7 @@ __xfs_inobt_free_block(
xfs_inobt_mod_blockcount(cur, -1); xfs_inobt_mod_blockcount(cur, -1);
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
return xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
&XFS_RMAP_OINFO_INOBT, resv); &XFS_RMAP_OINFO_INOBT, resv);
} }
......
...@@ -1152,7 +1152,8 @@ xfs_refcount_adjust_extents( ...@@ -1152,7 +1152,8 @@ xfs_refcount_adjust_extents(
cur->bc_ag.pag->pag_agno, cur->bc_ag.pag->pag_agno,
tmp.rc_startblock); tmp.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno, error = xfs_free_extent_later(cur->bc_tp, fsbno,
tmp.rc_blockcount, NULL); tmp.rc_blockcount, NULL,
XFS_AG_RESV_NONE);
if (error) if (error)
goto out_error; goto out_error;
} }
...@@ -1213,7 +1214,8 @@ xfs_refcount_adjust_extents( ...@@ -1213,7 +1214,8 @@ xfs_refcount_adjust_extents(
cur->bc_ag.pag->pag_agno, cur->bc_ag.pag->pag_agno,
ext.rc_startblock); ext.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno, error = xfs_free_extent_later(cur->bc_tp, fsbno,
ext.rc_blockcount, NULL); ext.rc_blockcount, NULL,
XFS_AG_RESV_NONE);
if (error) if (error)
goto out_error; goto out_error;
} }
...@@ -1919,8 +1921,13 @@ xfs_refcount_recover_cow_leftovers( ...@@ -1919,8 +1921,13 @@ xfs_refcount_recover_cow_leftovers(
struct xfs_buf *agbp; struct xfs_buf *agbp;
struct xfs_refcount_recovery *rr, *n; struct xfs_refcount_recovery *rr, *n;
struct list_head debris; struct list_head debris;
union xfs_btree_irec low; union xfs_btree_irec low = {
union xfs_btree_irec high; .rc.rc_domain = XFS_REFC_DOMAIN_COW,
};
union xfs_btree_irec high = {
.rc.rc_domain = XFS_REFC_DOMAIN_COW,
.rc.rc_startblock = -1U,
};
xfs_fsblock_t fsb; xfs_fsblock_t fsb;
int error; int error;
...@@ -1951,10 +1958,6 @@ xfs_refcount_recover_cow_leftovers( ...@@ -1951,10 +1958,6 @@ xfs_refcount_recover_cow_leftovers(
cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
/* Find all the leftover CoW staging extents. */ /* Find all the leftover CoW staging extents. */
memset(&low, 0, sizeof(low));
memset(&high, 0, sizeof(high));
low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW;
high.rc.rc_startblock = -1U;
error = xfs_btree_query_range(cur, &low, &high, error = xfs_btree_query_range(cur, &low, &high,
xfs_refcount_recover_extent, &debris); xfs_refcount_recover_extent, &debris);
xfs_btree_del_cursor(cur, error); xfs_btree_del_cursor(cur, error);
...@@ -1981,7 +1984,8 @@ xfs_refcount_recover_cow_leftovers( ...@@ -1981,7 +1984,8 @@ xfs_refcount_recover_cow_leftovers(
/* Free the block. */ /* Free the block. */
error = xfs_free_extent_later(tp, fsb, error = xfs_free_extent_later(tp, fsb,
rr->rr_rrec.rc_blockcount, NULL); rr->rr_rrec.rc_blockcount, NULL,
XFS_AG_RESV_NONE);
if (error) if (error)
goto out_trans; goto out_trans;
......
...@@ -106,19 +106,13 @@ xfs_refcountbt_free_block( ...@@ -106,19 +106,13 @@ xfs_refcountbt_free_block(
struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_buf *agbp = cur->bc_ag.agbp;
struct xfs_agf *agf = agbp->b_addr; struct xfs_agf *agf = agbp->b_addr;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
int error;
trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno,
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1);
be32_add_cpu(&agf->agf_refcount_blocks, -1); be32_add_cpu(&agf->agf_refcount_blocks, -1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
error = xfs_free_extent(cur->bc_tp, cur->bc_ag.pag, return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1,
&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
if (error)
return error;
return error;
} }
STATIC int STATIC int
......
...@@ -2389,14 +2389,10 @@ xfs_rmap_query_range( ...@@ -2389,14 +2389,10 @@ xfs_rmap_query_range(
xfs_rmap_query_range_fn fn, xfs_rmap_query_range_fn fn,
void *priv) void *priv)
{ {
union xfs_btree_irec low_brec; union xfs_btree_irec low_brec = { .r = *low_rec };
union xfs_btree_irec high_brec; union xfs_btree_irec high_brec = { .r = *high_rec };
struct xfs_rmap_query_range_info query; struct xfs_rmap_query_range_info query = { .priv = priv, .fn = fn };
low_brec.r = *low_rec;
high_brec.r = *high_rec;
query.priv = priv;
query.fn = fn;
return xfs_btree_query_range(cur, &low_brec, &high_brec, return xfs_btree_query_range(cur, &low_brec, &high_brec,
xfs_rmap_query_range_helper, &query); xfs_rmap_query_range_helper, &query);
} }
......
...@@ -412,7 +412,6 @@ xfs_validate_sb_common( ...@@ -412,7 +412,6 @@ xfs_validate_sb_common(
sbp->sb_inodelog < XFS_DINODE_MIN_LOG || sbp->sb_inodelog < XFS_DINODE_MIN_LOG ||
sbp->sb_inodelog > XFS_DINODE_MAX_LOG || sbp->sb_inodelog > XFS_DINODE_MAX_LOG ||
sbp->sb_inodesize != (1 << sbp->sb_inodelog) || sbp->sb_inodesize != (1 << sbp->sb_inodelog) ||
sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE ||
sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES || XFS_FSB_TO_B(mp, sbp->sb_agblocks) < XFS_MIN_AG_BYTES ||
XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES || XFS_FSB_TO_B(mp, sbp->sb_agblocks) > XFS_MAX_AG_BYTES ||
...@@ -430,6 +429,61 @@ xfs_validate_sb_common( ...@@ -430,6 +429,61 @@ xfs_validate_sb_common(
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
/*
* Logs that are too large are not supported at all. Reject them
* outright. Logs that are too small are tolerated on v4 filesystems,
* but we can only check that when mounting the log. Hence we skip
* those checks here.
*/
if (sbp->sb_logblocks > XFS_MAX_LOG_BLOCKS) {
xfs_notice(mp,
"Log size 0x%x blocks too large, maximum size is 0x%llx blocks",
sbp->sb_logblocks, XFS_MAX_LOG_BLOCKS);
return -EFSCORRUPTED;
}
if (XFS_FSB_TO_B(mp, sbp->sb_logblocks) > XFS_MAX_LOG_BYTES) {
xfs_warn(mp,
"log size 0x%llx bytes too large, maximum size is 0x%llx bytes",
XFS_FSB_TO_B(mp, sbp->sb_logblocks),
XFS_MAX_LOG_BYTES);
return -EFSCORRUPTED;
}
/*
* Do not allow filesystems with corrupted log sector or stripe units to
* be mounted. We cannot safely size the iclogs or write to the log if
* the log stripe unit is not valid.
*/
if (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT) {
if (sbp->sb_logsectsize != (1U << sbp->sb_logsectlog)) {
xfs_notice(mp,
"log sector size in bytes/log2 (0x%x/0x%x) must match",
sbp->sb_logsectsize, 1U << sbp->sb_logsectlog);
return -EFSCORRUPTED;
}
} else if (sbp->sb_logsectsize || sbp->sb_logsectlog) {
xfs_notice(mp,
"log sector size in bytes/log2 (0x%x/0x%x) are not zero",
sbp->sb_logsectsize, sbp->sb_logsectlog);
return -EFSCORRUPTED;
}
if (sbp->sb_logsunit > 1) {
if (sbp->sb_logsunit % sbp->sb_blocksize) {
xfs_notice(mp,
"log stripe unit 0x%x bytes must be a multiple of block size",
sbp->sb_logsunit);
return -EFSCORRUPTED;
}
if (sbp->sb_logsunit > XLOG_MAX_RECORD_BSIZE) {
xfs_notice(mp,
"log stripe unit 0x%x bytes over maximum size (0x%x bytes)",
sbp->sb_logsunit, XLOG_MAX_RECORD_BSIZE);
return -EFSCORRUPTED;
}
}
/* Validate the realtime geometry; stolen from xfs_repair */ /* Validate the realtime geometry; stolen from xfs_repair */
if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE || if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE ||
sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) { sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) {
......
...@@ -566,20 +566,45 @@ xfs_extent_busy_clear( ...@@ -566,20 +566,45 @@ xfs_extent_busy_clear(
/* /*
* Flush out all busy extents for this AG. * Flush out all busy extents for this AG.
*
* If the current transaction is holding busy extents, the caller may not want
* to wait for committed busy extents to resolve. If we are being told just to
* try a flush or progress has been made since we last skipped a busy extent,
* return immediately to allow the caller to try again.
*
* If we are freeing extents, we might actually be holding the only free extents
* in the transaction busy list and the log force won't resolve that situation.
* In this case, we must return -EAGAIN to avoid a deadlock by informing the
* caller it needs to commit the busy extents it holds before retrying the
* extent free operation.
*/ */
void int
xfs_extent_busy_flush( xfs_extent_busy_flush(
struct xfs_mount *mp, struct xfs_trans *tp,
struct xfs_perag *pag, struct xfs_perag *pag,
unsigned busy_gen) unsigned busy_gen,
uint32_t alloc_flags)
{ {
DEFINE_WAIT (wait); DEFINE_WAIT (wait);
int error; int error;
error = xfs_log_force(mp, XFS_LOG_SYNC); error = xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
if (error) if (error)
return; return error;
/* Avoid deadlocks on uncommitted busy extents. */
if (!list_empty(&tp->t_busy)) {
if (alloc_flags & XFS_ALLOC_FLAG_TRYFLUSH)
return 0;
if (busy_gen != READ_ONCE(pag->pagb_gen))
return 0;
if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
return -EAGAIN;
}
/* Wait for committed busy extents to resolve. */
do { do {
prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE); prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
if (busy_gen != READ_ONCE(pag->pagb_gen)) if (busy_gen != READ_ONCE(pag->pagb_gen))
...@@ -588,6 +613,7 @@ xfs_extent_busy_flush( ...@@ -588,6 +613,7 @@ xfs_extent_busy_flush(
} while (1); } while (1);
finish_wait(&pag->pagb_wait, &wait); finish_wait(&pag->pagb_wait, &wait);
return 0;
} }
void void
......
...@@ -51,9 +51,9 @@ bool ...@@ -51,9 +51,9 @@ bool
xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno, xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno,
xfs_extlen_t *len, unsigned *busy_gen); xfs_extlen_t *len, unsigned *busy_gen);
void int
xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag, xfs_extent_busy_flush(struct xfs_trans *tp, struct xfs_perag *pag,
unsigned busy_gen); unsigned busy_gen, uint32_t alloc_flags);
void void
xfs_extent_busy_wait_all(struct xfs_mount *mp); xfs_extent_busy_wait_all(struct xfs_mount *mp);
......
...@@ -336,6 +336,34 @@ xfs_trans_get_efd( ...@@ -336,6 +336,34 @@ xfs_trans_get_efd(
return efdp; return efdp;
} }
/*
* Fill the EFD with all extents from the EFI when we need to roll the
* transaction and continue with a new EFI.
*
* This simply copies all the extents in the EFI to the EFD rather than make
* assumptions about which extents in the EFI have already been processed. We
* currently keep the xefi list in the same order as the EFI extent list, but
* that may not always be the case. Copying everything avoids leaving a landmine
* were we fail to cancel all the extents in an EFI if the xefi list is
* processed in a different order to the extents in the EFI.
*/
static void
xfs_efd_from_efi(
struct xfs_efd_log_item *efdp)
{
struct xfs_efi_log_item *efip = efdp->efd_efip;
uint i;
ASSERT(efip->efi_format.efi_nextents > 0);
ASSERT(efdp->efd_next_extent < efip->efi_format.efi_nextents);
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
efdp->efd_format.efd_extents[i] =
efip->efi_format.efi_extents[i];
}
efdp->efd_next_extent = efip->efi_format.efi_nextents;
}
/* /*
* Free an extent and log it to the EFD. Note that the transaction is marked * Free an extent and log it to the EFD. Note that the transaction is marked
* dirty regardless of whether the extent free succeeds or fails to support the * dirty regardless of whether the extent free succeeds or fails to support the
...@@ -365,7 +393,7 @@ xfs_trans_free_extent( ...@@ -365,7 +393,7 @@ xfs_trans_free_extent(
agbno, xefi->xefi_blockcount); agbno, xefi->xefi_blockcount);
error = __xfs_free_extent(tp, xefi->xefi_pag, agbno, error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv,
xefi->xefi_flags & XFS_EFI_SKIP_DISCARD); xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
/* /*
...@@ -378,6 +406,17 @@ xfs_trans_free_extent( ...@@ -378,6 +406,17 @@ xfs_trans_free_extent(
tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE; tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE;
set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
/*
* If we need a new transaction to make progress, the caller will log a
* new EFI with the current contents. It will also log an EFD to cancel
* the existing EFI, and so we need to copy all the unprocessed extents
* in this EFI to the EFD so this works correctly.
*/
if (error == -EAGAIN) {
xfs_efd_from_efi(efdp);
return error;
}
next_extent = efdp->efd_next_extent; next_extent = efdp->efd_next_extent;
ASSERT(next_extent < efdp->efd_format.efd_nextents); ASSERT(next_extent < efdp->efd_format.efd_nextents);
extp = &(efdp->efd_format.efd_extents[next_extent]); extp = &(efdp->efd_format.efd_extents[next_extent]);
...@@ -495,6 +534,13 @@ xfs_extent_free_finish_item( ...@@ -495,6 +534,13 @@ xfs_extent_free_finish_item(
error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi); error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi);
/*
* Don't free the XEFI if we need a new transaction to complete
* processing of it.
*/
if (error == -EAGAIN)
return error;
xfs_extent_free_put_group(xefi); xfs_extent_free_put_group(xefi);
kmem_cache_free(xfs_extfree_item_cache, xefi); kmem_cache_free(xfs_extfree_item_cache, xefi);
return error; return error;
...@@ -620,6 +666,7 @@ xfs_efi_item_recover( ...@@ -620,6 +666,7 @@ xfs_efi_item_recover(
struct xfs_trans *tp; struct xfs_trans *tp;
int i; int i;
int error = 0; int error = 0;
bool requeue_only = false;
/* /*
* First check the validity of the extents described by the * First check the validity of the extents described by the
...@@ -644,6 +691,7 @@ xfs_efi_item_recover( ...@@ -644,6 +691,7 @@ xfs_efi_item_recover(
for (i = 0; i < efip->efi_format.efi_nextents; i++) { for (i = 0; i < efip->efi_format.efi_nextents; i++) {
struct xfs_extent_free_item fake = { struct xfs_extent_free_item fake = {
.xefi_owner = XFS_RMAP_OWN_UNKNOWN, .xefi_owner = XFS_RMAP_OWN_UNKNOWN,
.xefi_agresv = XFS_AG_RESV_NONE,
}; };
struct xfs_extent *extp; struct xfs_extent *extp;
...@@ -652,9 +700,28 @@ xfs_efi_item_recover( ...@@ -652,9 +700,28 @@ xfs_efi_item_recover(
fake.xefi_startblock = extp->ext_start; fake.xefi_startblock = extp->ext_start;
fake.xefi_blockcount = extp->ext_len; fake.xefi_blockcount = extp->ext_len;
if (!requeue_only) {
xfs_extent_free_get_group(mp, &fake); xfs_extent_free_get_group(mp, &fake);
error = xfs_trans_free_extent(tp, efdp, &fake); error = xfs_trans_free_extent(tp, efdp, &fake);
xfs_extent_free_put_group(&fake); xfs_extent_free_put_group(&fake);
}
/*
* If we can't free the extent without potentially deadlocking,
* requeue the rest of the extents to a new so that they get
* run again later with a new transaction context.
*/
if (error == -EAGAIN || requeue_only) {
error = xfs_free_extent_later(tp, fake.xefi_startblock,
fake.xefi_blockcount,
&XFS_RMAP_OINFO_ANY_OWNER,
fake.xefi_agresv);
if (!error) {
requeue_only = true;
continue;
}
}
if (error == -EFSCORRUPTED) if (error == -EFSCORRUPTED)
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
extp, sizeof(*extp)); extp, sizeof(*extp));
......
This diff is collapsed.
...@@ -639,7 +639,6 @@ xfs_log_mount( ...@@ -639,7 +639,6 @@ xfs_log_mount(
int num_bblks) int num_bblks)
{ {
struct xlog *log; struct xlog *log;
bool fatal = xfs_has_crc(mp);
int error = 0; int error = 0;
int min_logfsbs; int min_logfsbs;
...@@ -663,53 +662,37 @@ xfs_log_mount( ...@@ -663,53 +662,37 @@ xfs_log_mount(
mp->m_log = log; mp->m_log = log;
/* /*
* Validate the given log space and drop a critical message via syslog * Now that we have set up the log and it's internal geometry
* if the log size is too small that would lead to some unexpected * parameters, we can validate the given log space and drop a critical
* situations in transaction log space reservation stage. * message via syslog if the log size is too small. A log that is too
* small can lead to unexpected situations in transaction log space
* reservation stage. The superblock verifier has already validated all
* the other log geometry constraints, so we don't have to check those
* here.
* *
* Note: we can't just reject the mount if the validation fails. This * Note: For v4 filesystems, we can't just reject the mount if the
* would mean that people would have to downgrade their kernel just to * validation fails. This would mean that people would have to
* remedy the situation as there is no way to grow the log (short of * downgrade their kernel just to remedy the situation as there is no
* black magic surgery with xfs_db). * way to grow the log (short of black magic surgery with xfs_db).
* *
* We can, however, reject mounts for CRC format filesystems, as the * We can, however, reject mounts for V5 format filesystems, as the
* mkfs binary being used to make the filesystem should never create a * mkfs binary being used to make the filesystem should never create a
* filesystem with a log that is too small. * filesystem with a log that is too small.
*/ */
min_logfsbs = xfs_log_calc_minimum_size(mp); min_logfsbs = xfs_log_calc_minimum_size(mp);
if (mp->m_sb.sb_logblocks < min_logfsbs) { if (mp->m_sb.sb_logblocks < min_logfsbs) {
xfs_warn(mp, xfs_warn(mp,
"Log size %d blocks too small, minimum size is %d blocks", "Log size %d blocks too small, minimum size is %d blocks",
mp->m_sb.sb_logblocks, min_logfsbs); mp->m_sb.sb_logblocks, min_logfsbs);
error = -EINVAL;
} else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) {
xfs_warn(mp,
"Log size %d blocks too large, maximum size is %lld blocks",
mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS);
error = -EINVAL;
} else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) {
xfs_warn(mp,
"log size %lld bytes too large, maximum size is %lld bytes",
XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
XFS_MAX_LOG_BYTES);
error = -EINVAL;
} else if (mp->m_sb.sb_logsunit > 1 &&
mp->m_sb.sb_logsunit % mp->m_sb.sb_blocksize) {
xfs_warn(mp,
"log stripe unit %u bytes must be a multiple of block size",
mp->m_sb.sb_logsunit);
error = -EINVAL;
fatal = true;
}
if (error) {
/* /*
* Log check errors are always fatal on v5; or whenever bad * Log check errors are always fatal on v5; or whenever bad
* metadata leads to a crash. * metadata leads to a crash.
*/ */
if (fatal) { if (xfs_has_crc(mp)) {
xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!"); xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
ASSERT(0); ASSERT(0);
error = -EINVAL;
goto out_free_log; goto out_free_log;
} }
xfs_crit(mp, "Log size out of supported range."); xfs_crit(mp, "Log size out of supported range.");
......
...@@ -114,7 +114,8 @@ xfs_dax_notify_ddev_failure( ...@@ -114,7 +114,8 @@ xfs_dax_notify_ddev_failure(
int error = 0; int error = 0;
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, daddr); xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, daddr);
xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno); xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno);
xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp, daddr + bblen); xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp,
daddr + bblen - 1);
xfs_agnumber_t end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno); xfs_agnumber_t end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno);
error = xfs_trans_alloc_empty(mp, &tp); error = xfs_trans_alloc_empty(mp, &tp);
...@@ -210,7 +211,7 @@ xfs_dax_notify_failure( ...@@ -210,7 +211,7 @@ xfs_dax_notify_failure(
ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1; ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1;
/* Ignore the range out of filesystem area */ /* Ignore the range out of filesystem area */
if (offset + len < ddev_start) if (offset + len - 1 < ddev_start)
return -ENXIO; return -ENXIO;
if (offset > ddev_end) if (offset > ddev_end)
return -ENXIO; return -ENXIO;
...@@ -222,8 +223,8 @@ xfs_dax_notify_failure( ...@@ -222,8 +223,8 @@ xfs_dax_notify_failure(
len -= ddev_start - offset; len -= ddev_start - offset;
offset = 0; offset = 0;
} }
if (offset + len > ddev_end) if (offset + len - 1 > ddev_end)
len -= ddev_end - offset; len = ddev_end - offset + 1;
return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len), return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len),
mf_flags); mf_flags);
......
...@@ -617,7 +617,8 @@ xfs_reflink_cancel_cow_blocks( ...@@ -617,7 +617,8 @@ xfs_reflink_cancel_cow_blocks(
del.br_blockcount); del.br_blockcount);
error = xfs_free_extent_later(*tpp, del.br_startblock, error = xfs_free_extent_later(*tpp, del.br_startblock,
del.br_blockcount, NULL); del.br_blockcount, NULL,
XFS_AG_RESV_NONE);
if (error) if (error)
break; break;
......
...@@ -3623,6 +3623,31 @@ DEFINE_FSMAP_EVENT(xfs_fsmap_low_key); ...@@ -3623,6 +3623,31 @@ DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);
DEFINE_FSMAP_EVENT(xfs_fsmap_high_key); DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);
DEFINE_FSMAP_EVENT(xfs_fsmap_mapping); DEFINE_FSMAP_EVENT(xfs_fsmap_mapping);
DECLARE_EVENT_CLASS(xfs_fsmap_linear_class,
TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno),
TP_ARGS(mp, keydev, bno),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(dev_t, keydev)
__field(xfs_fsblock_t, bno)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->keydev = new_decode_dev(keydev);
__entry->bno = bno;
),
TP_printk("dev %d:%d keydev %d:%d bno 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
MAJOR(__entry->keydev), MINOR(__entry->keydev),
__entry->bno)
)
#define DEFINE_FSMAP_LINEAR_EVENT(name) \
DEFINE_EVENT(xfs_fsmap_linear_class, name, \
TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), \
TP_ARGS(mp, keydev, bno))
DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_low_key_linear);
DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_high_key_linear);
DECLARE_EVENT_CLASS(xfs_getfsmap_class, DECLARE_EVENT_CLASS(xfs_getfsmap_class,
TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap), TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap),
TP_ARGS(mp, fsmap), TP_ARGS(mp, fsmap),
......
...@@ -823,7 +823,7 @@ xfs_trans_ail_update_bulk( ...@@ -823,7 +823,7 @@ xfs_trans_ail_update_bulk(
trace_xfs_ail_insert(lip, 0, lsn); trace_xfs_ail_insert(lip, 0, lsn);
} }
lip->li_lsn = lsn; lip->li_lsn = lsn;
list_add(&lip->li_ail, &tmp); list_add_tail(&lip->li_ail, &tmp);
} }
if (!list_empty(&tmp)) if (!list_empty(&tmp))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment