Commit b8f1fa24 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-6.7-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Chandan Babu:

 - Fix deadlock arising due to intent items in AIL not being cleared
   when log recovery fails

 - Fix stale data exposure bug when remapping COW fork extents to data
   fork

 - Fix deadlock when data device flush fails

 - Fix AGFL minimum size calculation

 - Select DEBUG_FS instead of XFS_DEBUG when XFS_ONLINE_SCRUB_STATS is
   selected

 - Fix corruption of log inode's extent count field when NREXT64 feature
   is enabled

* tag 'xfs-6.7-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: recovery should not clear di_flushiter unconditionally
  xfs: inode recovery does not validate the recovered inode
  xfs: fix again select in kconfig XFS_ONLINE_SCRUB_STATS
  xfs: fix internal error from AGFL exhaustion
  xfs: up(ic_sema) if flushing data device fails
  xfs: only remap the written blocks in xfs_reflink_end_cow_extent
  XFS: Update MAINTAINERS to catch all XFS documentation
  xfs: abort intent items when recovery intents fail
  xfs: factor out xfs_defer_pending_abort
parents bb28378a 7930d9e1
...@@ -23882,8 +23882,7 @@ T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git ...@@ -23882,8 +23882,7 @@ T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git
P: Documentation/filesystems/xfs-maintainer-entry-profile.rst P: Documentation/filesystems/xfs-maintainer-entry-profile.rst
F: Documentation/ABI/testing/sysfs-fs-xfs F: Documentation/ABI/testing/sysfs-fs-xfs
F: Documentation/admin-guide/xfs.rst F: Documentation/admin-guide/xfs.rst
F: Documentation/filesystems/xfs-delayed-logging-design.rst F: Documentation/filesystems/xfs-*
F: Documentation/filesystems/xfs-self-describing-metadata.rst
F: fs/xfs/ F: fs/xfs/
F: include/uapi/linux/dqblk_xfs.h F: include/uapi/linux/dqblk_xfs.h
F: include/uapi/linux/fsmap.h F: include/uapi/linux/fsmap.h
......
...@@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS ...@@ -147,7 +147,7 @@ config XFS_ONLINE_SCRUB_STATS
bool "XFS online metadata check usage data collection" bool "XFS online metadata check usage data collection"
default y default y
depends on XFS_ONLINE_SCRUB depends on XFS_ONLINE_SCRUB
select XFS_DEBUG select DEBUG_FS
help help
If you say Y here, the kernel will gather usage data about If you say Y here, the kernel will gather usage data about
the online metadata check subsystem. This includes the number the online metadata check subsystem. This includes the number
......
...@@ -2275,16 +2275,37 @@ xfs_alloc_min_freelist( ...@@ -2275,16 +2275,37 @@ xfs_alloc_min_freelist(
ASSERT(mp->m_alloc_maxlevels > 0); ASSERT(mp->m_alloc_maxlevels > 0);
/*
* For a btree shorter than the maximum height, the worst case is that
* every level gets split and a new level is added, then while inserting
* another entry to refill the AGFL, every level under the old root gets
* split again. This is:
*
* (full height split reservation) + (AGFL refill split height)
* = (current height + 1) + (current height - 1)
* = (new height) + (new height - 2)
* = 2 * new height - 2
*
* For a btree of maximum height, the worst case is that every level
* under the root gets split, then while inserting another entry to
* refill the AGFL, every level under the root gets split again. This is
* also:
*
* 2 * (current height - 1)
* = 2 * (new height - 1)
* = 2 * new height - 2
*/
/* space needed by-bno freespace btree */ /* space needed by-bno freespace btree */
min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1, min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
mp->m_alloc_maxlevels); mp->m_alloc_maxlevels) * 2 - 2;
/* space needed by-size freespace btree */ /* space needed by-size freespace btree */
min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
mp->m_alloc_maxlevels); mp->m_alloc_maxlevels) * 2 - 2;
/* space needed reverse mapping used space btree */ /* space needed reverse mapping used space btree */
if (xfs_has_rmapbt(mp)) if (xfs_has_rmapbt(mp))
min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
mp->m_rmap_maxlevels); mp->m_rmap_maxlevels) * 2 - 2;
return min_free; return min_free;
} }
......
...@@ -245,21 +245,18 @@ xfs_defer_create_intents( ...@@ -245,21 +245,18 @@ xfs_defer_create_intents(
return ret; return ret;
} }
/* Abort all the intents that were committed. */
STATIC void STATIC void
xfs_defer_trans_abort( xfs_defer_pending_abort(
struct xfs_trans *tp, struct xfs_mount *mp,
struct list_head *dop_pending) struct list_head *dop_list)
{ {
struct xfs_defer_pending *dfp; struct xfs_defer_pending *dfp;
const struct xfs_defer_op_type *ops; const struct xfs_defer_op_type *ops;
trace_xfs_defer_trans_abort(tp, _RET_IP_);
/* Abort intent items that don't have a done item. */ /* Abort intent items that don't have a done item. */
list_for_each_entry(dfp, dop_pending, dfp_list) { list_for_each_entry(dfp, dop_list, dfp_list) {
ops = defer_op_types[dfp->dfp_type]; ops = defer_op_types[dfp->dfp_type];
trace_xfs_defer_pending_abort(tp->t_mountp, dfp); trace_xfs_defer_pending_abort(mp, dfp);
if (dfp->dfp_intent && !dfp->dfp_done) { if (dfp->dfp_intent && !dfp->dfp_done) {
ops->abort_intent(dfp->dfp_intent); ops->abort_intent(dfp->dfp_intent);
dfp->dfp_intent = NULL; dfp->dfp_intent = NULL;
...@@ -267,6 +264,16 @@ xfs_defer_trans_abort( ...@@ -267,6 +264,16 @@ xfs_defer_trans_abort(
} }
} }
/* Abort all the intents that were committed. */
STATIC void
xfs_defer_trans_abort(
struct xfs_trans *tp,
struct list_head *dop_pending)
{
trace_xfs_defer_trans_abort(tp, _RET_IP_);
xfs_defer_pending_abort(tp->t_mountp, dop_pending);
}
/* /*
* Capture resources that the caller said not to release ("held") when the * Capture resources that the caller said not to release ("held") when the
* transaction commits. Caller is responsible for zero-initializing @dres. * transaction commits. Caller is responsible for zero-initializing @dres.
...@@ -756,12 +763,13 @@ xfs_defer_ops_capture( ...@@ -756,12 +763,13 @@ xfs_defer_ops_capture(
/* Release all resources that we used to capture deferred ops. */ /* Release all resources that we used to capture deferred ops. */
void void
xfs_defer_ops_capture_free( xfs_defer_ops_capture_abort(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_defer_capture *dfc) struct xfs_defer_capture *dfc)
{ {
unsigned short i; unsigned short i;
xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
xfs_defer_cancel_list(mp, &dfc->dfc_dfops); xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
for (i = 0; i < dfc->dfc_held.dr_bufs; i++) for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
...@@ -802,7 +810,7 @@ xfs_defer_ops_capture_and_commit( ...@@ -802,7 +810,7 @@ xfs_defer_ops_capture_and_commit(
/* Commit the transaction and add the capture structure to the list. */ /* Commit the transaction and add the capture structure to the list. */
error = xfs_trans_commit(tp); error = xfs_trans_commit(tp);
if (error) { if (error) {
xfs_defer_ops_capture_free(mp, dfc); xfs_defer_ops_capture_abort(mp, dfc);
return error; return error;
} }
......
...@@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp, ...@@ -121,7 +121,7 @@ int xfs_defer_ops_capture_and_commit(struct xfs_trans *tp,
struct list_head *capture_list); struct list_head *capture_list);
void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp, void xfs_defer_ops_continue(struct xfs_defer_capture *d, struct xfs_trans *tp,
struct xfs_defer_resources *dres); struct xfs_defer_resources *dres);
void xfs_defer_ops_capture_free(struct xfs_mount *mp, void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
struct xfs_defer_capture *d); struct xfs_defer_capture *d);
void xfs_defer_resources_rele(struct xfs_defer_resources *dres); void xfs_defer_resources_rele(struct xfs_defer_resources *dres);
......
...@@ -510,6 +510,9 @@ xfs_dinode_verify( ...@@ -510,6 +510,9 @@ xfs_dinode_verify(
if (mode && nextents + naextents > nblocks) if (mode && nextents + naextents > nblocks)
return __this_address; return __this_address;
if (nextents + naextents == 0 && nblocks != 0)
return __this_address;
if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents) if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
return __this_address; return __this_address;
......
...@@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2( ...@@ -286,6 +286,7 @@ xlog_recover_inode_commit_pass2(
struct xfs_log_dinode *ldip; struct xfs_log_dinode *ldip;
uint isize; uint isize;
int need_free = 0; int need_free = 0;
xfs_failaddr_t fa;
if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) { if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
in_f = item->ri_buf[0].i_addr; in_f = item->ri_buf[0].i_addr;
...@@ -369,8 +370,8 @@ xlog_recover_inode_commit_pass2( ...@@ -369,8 +370,8 @@ xlog_recover_inode_commit_pass2(
* superblock flag to determine whether we need to look at di_flushiter * superblock flag to determine whether we need to look at di_flushiter
* to skip replay when the on disk inode is newer than the log one * to skip replay when the on disk inode is newer than the log one
*/ */
if (!xfs_has_v3inodes(mp) && if (!xfs_has_v3inodes(mp)) {
ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) { if (ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
/* /*
* Deal with the wrap case, DI_MAX_FLUSH is less * Deal with the wrap case, DI_MAX_FLUSH is less
* than smaller numbers * than smaller numbers
...@@ -387,6 +388,8 @@ xlog_recover_inode_commit_pass2( ...@@ -387,6 +388,8 @@ xlog_recover_inode_commit_pass2(
/* Take the opportunity to reset the flush iteration count */ /* Take the opportunity to reset the flush iteration count */
ldip->di_flushiter = 0; ldip->di_flushiter = 0;
}
if (unlikely(S_ISREG(ldip->di_mode))) { if (unlikely(S_ISREG(ldip->di_mode))) {
if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) && if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
...@@ -528,8 +531,19 @@ xlog_recover_inode_commit_pass2( ...@@ -528,8 +531,19 @@ xlog_recover_inode_commit_pass2(
(dip->di_mode != 0)) (dip->di_mode != 0))
error = xfs_recover_inode_owner_change(mp, dip, in_f, error = xfs_recover_inode_owner_change(mp, dip, in_f,
buffer_list); buffer_list);
/* re-generate the checksum. */ /* re-generate the checksum and validate the recovered inode. */
xfs_dinode_calc_crc(log->l_mp, dip); xfs_dinode_calc_crc(log->l_mp, dip);
fa = xfs_dinode_verify(log->l_mp, in_f->ilf_ino, dip);
if (fa) {
XFS_CORRUPTION_ERROR(
"Bad dinode after recovery",
XFS_ERRLEVEL_LOW, mp, dip, sizeof(*dip));
xfs_alert(mp,
"Metadata corruption detected at %pS, inode 0x%llx",
fa, in_f->ilf_ino);
error = -EFSCORRUPTED;
goto out_release;
}
ASSERT(bp->b_mount == mp); ASSERT(bp->b_mount == mp);
bp->b_flags |= _XBF_LOGRECOVERY; bp->b_flags |= _XBF_LOGRECOVERY;
......
...@@ -1893,9 +1893,7 @@ xlog_write_iclog( ...@@ -1893,9 +1893,7 @@ xlog_write_iclog(
* the buffer manually, the code needs to be kept in sync * the buffer manually, the code needs to be kept in sync
* with the I/O completion path. * with the I/O completion path.
*/ */
xlog_state_done_syncing(iclog); goto sync;
up(&iclog->ic_sema);
return;
} }
/* /*
...@@ -1925,20 +1923,17 @@ xlog_write_iclog( ...@@ -1925,20 +1923,17 @@ xlog_write_iclog(
* avoid shutdown re-entering this path and erroring out again. * avoid shutdown re-entering this path and erroring out again.
*/ */
if (log->l_targ != log->l_mp->m_ddev_targp && if (log->l_targ != log->l_mp->m_ddev_targp &&
blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) { blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev))
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); goto shutdown;
return;
}
} }
if (iclog->ic_flags & XLOG_ICL_NEED_FUA) if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
iclog->ic_bio.bi_opf |= REQ_FUA; iclog->ic_bio.bi_opf |= REQ_FUA;
iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) { if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count))
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); goto shutdown;
return;
}
if (is_vmalloc_addr(iclog->ic_data)) if (is_vmalloc_addr(iclog->ic_data))
flush_kernel_vmap_range(iclog->ic_data, count); flush_kernel_vmap_range(iclog->ic_data, count);
...@@ -1959,6 +1954,12 @@ xlog_write_iclog( ...@@ -1959,6 +1954,12 @@ xlog_write_iclog(
} }
submit_bio(&iclog->ic_bio); submit_bio(&iclog->ic_bio);
return;
shutdown:
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
sync:
xlog_state_done_syncing(iclog);
up(&iclog->ic_sema);
} }
/* /*
......
...@@ -2511,7 +2511,7 @@ xlog_abort_defer_ops( ...@@ -2511,7 +2511,7 @@ xlog_abort_defer_ops(
list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { list_for_each_entry_safe(dfc, next, capture_list, dfc_list) {
list_del_init(&dfc->dfc_list); list_del_init(&dfc->dfc_list);
xfs_defer_ops_capture_free(mp, dfc); xfs_defer_ops_capture_abort(mp, dfc);
} }
} }
......
...@@ -784,6 +784,7 @@ xfs_reflink_end_cow_extent( ...@@ -784,6 +784,7 @@ xfs_reflink_end_cow_extent(
} }
} }
del = got; del = got;
xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb);
/* Grab the corresponding mapping in the data fork. */ /* Grab the corresponding mapping in the data fork. */
nmaps = 1; nmaps = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment