Commit 849a4f09 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-6.4-rc1-fixes' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs bug fixes from Dave Chinner:
 "Largely minor bug fixes and cleanups, th emost important of which are
  probably the fixes for regressions in the extent allocation code:

   - fixes for inode garbage collection shutdown racing with work queue
     updates

   - ensure inodegc workers run on the CPU they are supposed to

   - disable counter scrubbing until we can exclusively freeze the
     filesystem from the kernel

   - regression fixes for new allocation related bugs

   - a couple of minor cleanups"

* tag 'xfs-6.4-rc1-fixes' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: fix xfs_inodegc_stop racing with mod_delayed_work
  xfs: disable reaping in fscounters scrub
  xfs: check that per-cpu inodegc workers actually run on that cpu
  xfs: explicitly specify cpu when forcing inodegc delayed work to run immediately
  xfs: fix negative array access in xfs_getbmap
  xfs: don't allocate into the data fork for an unshare request
  xfs: flush dirty data and drain directios before scrubbing cow fork
  xfs: set bnobt/cntbt numrecs correctly when formatting new AGs
  xfs: don't unconditionally null args->pag in xfs_bmap_btalloc_at_eof
parents 105131df 2254a739
...@@ -495,10 +495,12 @@ xfs_freesp_init_recs( ...@@ -495,10 +495,12 @@ xfs_freesp_init_recs(
ASSERT(start >= mp->m_ag_prealloc_blocks); ASSERT(start >= mp->m_ag_prealloc_blocks);
if (start != mp->m_ag_prealloc_blocks) { if (start != mp->m_ag_prealloc_blocks) {
/* /*
* Modify first record to pad stripe align of log * Modify first record to pad stripe align of log and
* bump the record count.
*/ */
arec->ar_blockcount = cpu_to_be32(start - arec->ar_blockcount = cpu_to_be32(start -
mp->m_ag_prealloc_blocks); mp->m_ag_prealloc_blocks);
be16_add_cpu(&block->bb_numrecs, 1);
nrec = arec + 1; nrec = arec + 1;
/* /*
...@@ -509,7 +511,6 @@ xfs_freesp_init_recs( ...@@ -509,7 +511,6 @@ xfs_freesp_init_recs(
be32_to_cpu(arec->ar_startblock) + be32_to_cpu(arec->ar_startblock) +
be32_to_cpu(arec->ar_blockcount)); be32_to_cpu(arec->ar_blockcount));
arec = nrec; arec = nrec;
be16_add_cpu(&block->bb_numrecs, 1);
} }
/* /*
* Change record start to after the internal log * Change record start to after the internal log
...@@ -518,15 +519,13 @@ xfs_freesp_init_recs( ...@@ -518,15 +519,13 @@ xfs_freesp_init_recs(
} }
/* /*
* Calculate the record block count and check for the case where * Calculate the block count of this record; if it is nonzero,
* the log might have consumed all available space in the AG. If * increment the record count.
* so, reset the record count to 0 to avoid exposure of an invalid
* record start block.
*/ */
arec->ar_blockcount = cpu_to_be32(id->agsize - arec->ar_blockcount = cpu_to_be32(id->agsize -
be32_to_cpu(arec->ar_startblock)); be32_to_cpu(arec->ar_startblock));
if (!arec->ar_blockcount) if (arec->ar_blockcount)
block->bb_numrecs = 0; be16_add_cpu(&block->bb_numrecs, 1);
} }
/* /*
...@@ -538,7 +537,7 @@ xfs_bnoroot_init( ...@@ -538,7 +537,7 @@ xfs_bnoroot_init(
struct xfs_buf *bp, struct xfs_buf *bp,
struct aghdr_init_data *id) struct aghdr_init_data *id)
{ {
xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno); xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno);
xfs_freesp_init_recs(mp, bp, id); xfs_freesp_init_recs(mp, bp, id);
} }
...@@ -548,7 +547,7 @@ xfs_cntroot_init( ...@@ -548,7 +547,7 @@ xfs_cntroot_init(
struct xfs_buf *bp, struct xfs_buf *bp,
struct aghdr_init_data *id) struct aghdr_init_data *id)
{ {
xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno); xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno);
xfs_freesp_init_recs(mp, bp, id); xfs_freesp_init_recs(mp, bp, id);
} }
......
...@@ -3494,8 +3494,10 @@ xfs_bmap_btalloc_at_eof( ...@@ -3494,8 +3494,10 @@ xfs_bmap_btalloc_at_eof(
if (!caller_pag) if (!caller_pag)
args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno)); args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno));
error = xfs_alloc_vextent_exact_bno(args, ap->blkno); error = xfs_alloc_vextent_exact_bno(args, ap->blkno);
if (!caller_pag) if (!caller_pag) {
xfs_perag_put(args->pag); xfs_perag_put(args->pag);
args->pag = NULL;
}
if (error) if (error)
return error; return error;
...@@ -3505,7 +3507,6 @@ xfs_bmap_btalloc_at_eof( ...@@ -3505,7 +3507,6 @@ xfs_bmap_btalloc_at_eof(
* Exact allocation failed. Reset to try an aligned allocation * Exact allocation failed. Reset to try an aligned allocation
* according to the original allocation specification. * according to the original allocation specification.
*/ */
args->pag = NULL;
args->alignment = stripe_align; args->alignment = stripe_align;
args->minlen = nextminlen; args->minlen = nextminlen;
args->minalignslop = 0; args->minalignslop = 0;
......
...@@ -42,12 +42,12 @@ xchk_setup_inode_bmap( ...@@ -42,12 +42,12 @@ xchk_setup_inode_bmap(
xfs_ilock(sc->ip, XFS_IOLOCK_EXCL); xfs_ilock(sc->ip, XFS_IOLOCK_EXCL);
/* /*
* We don't want any ephemeral data fork updates sitting around * We don't want any ephemeral data/cow fork updates sitting around
* while we inspect block mappings, so wait for directio to finish * while we inspect block mappings, so wait for directio to finish
* and flush dirty data if we have delalloc reservations. * and flush dirty data if we have delalloc reservations.
*/ */
if (S_ISREG(VFS_I(sc->ip)->i_mode) && if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) { sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
struct address_space *mapping = VFS_I(sc->ip)->i_mapping; struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
sc->ilock_flags |= XFS_MMAPLOCK_EXCL; sc->ilock_flags |= XFS_MMAPLOCK_EXCL;
......
...@@ -1164,32 +1164,6 @@ xchk_metadata_inode_forks( ...@@ -1164,32 +1164,6 @@ xchk_metadata_inode_forks(
return 0; return 0;
} }
/* Pause background reaping of resources. */
void
xchk_stop_reaping(
struct xfs_scrub *sc)
{
sc->flags |= XCHK_REAPING_DISABLED;
xfs_blockgc_stop(sc->mp);
xfs_inodegc_stop(sc->mp);
}
/* Restart background reaping of resources. */
void
xchk_start_reaping(
struct xfs_scrub *sc)
{
/*
* Readonly filesystems do not perform inactivation or speculative
* preallocation, so there's no need to restart the workers.
*/
if (!xfs_is_readonly(sc->mp)) {
xfs_inodegc_start(sc->mp);
xfs_blockgc_start(sc->mp);
}
sc->flags &= ~XCHK_REAPING_DISABLED;
}
/* /*
* Enable filesystem hooks (i.e. runtime code patching) before starting a scrub * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub
* operation. Callers must not hold any locks that intersect with the CPU * operation. Callers must not hold any locks that intersect with the CPU
......
...@@ -156,8 +156,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm) ...@@ -156,8 +156,6 @@ static inline bool xchk_skip_xref(struct xfs_scrub_metadata *sm)
} }
int xchk_metadata_inode_forks(struct xfs_scrub *sc); int xchk_metadata_inode_forks(struct xfs_scrub *sc);
void xchk_stop_reaping(struct xfs_scrub *sc);
void xchk_start_reaping(struct xfs_scrub *sc);
/* /*
* Setting up a hook to wait for intents to drain is costly -- we have to take * Setting up a hook to wait for intents to drain is costly -- we have to take
......
...@@ -150,13 +150,6 @@ xchk_setup_fscounters( ...@@ -150,13 +150,6 @@ xchk_setup_fscounters(
if (error) if (error)
return error; return error;
/*
* Pause background reclaim while we're scrubbing to reduce the
* likelihood of background perturbations to the counters throwing off
* our calculations.
*/
xchk_stop_reaping(sc);
return xchk_trans_alloc(sc, 0); return xchk_trans_alloc(sc, 0);
} }
...@@ -453,6 +446,12 @@ xchk_fscounters( ...@@ -453,6 +446,12 @@ xchk_fscounters(
if (frextents > mp->m_sb.sb_rextents) if (frextents > mp->m_sb.sb_rextents)
xchk_set_corrupt(sc); xchk_set_corrupt(sc);
/*
* XXX: We can't quiesce percpu counter updates, so exit early.
* This can be re-enabled when we gain exclusive freeze functionality.
*/
return 0;
/* /*
* If ifree exceeds icount by more than the minimum variance then * If ifree exceeds icount by more than the minimum variance then
* something's probably wrong with the counters. * something's probably wrong with the counters.
......
...@@ -186,8 +186,6 @@ xchk_teardown( ...@@ -186,8 +186,6 @@ xchk_teardown(
} }
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
mnt_drop_write_file(sc->file); mnt_drop_write_file(sc->file);
if (sc->flags & XCHK_REAPING_DISABLED)
xchk_start_reaping(sc);
if (sc->buf) { if (sc->buf) {
if (sc->buf_cleanup) if (sc->buf_cleanup)
sc->buf_cleanup(sc->buf); sc->buf_cleanup(sc->buf);
......
...@@ -106,7 +106,6 @@ struct xfs_scrub { ...@@ -106,7 +106,6 @@ struct xfs_scrub {
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */ /* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */ #define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
#define XCHK_REAPING_DISABLED (1 << 1) /* background block reaping paused */
#define XCHK_FSGATES_DRAIN (1 << 2) /* defer ops draining enabled */ #define XCHK_FSGATES_DRAIN (1 << 2) /* defer ops draining enabled */
#define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to drain defer ops */ #define XCHK_NEED_DRAIN (1 << 3) /* scrub needs to drain defer ops */
#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */ #define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
......
...@@ -98,7 +98,6 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS); ...@@ -98,7 +98,6 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
#define XFS_SCRUB_STATE_STRINGS \ #define XFS_SCRUB_STATE_STRINGS \
{ XCHK_TRY_HARDER, "try_harder" }, \ { XCHK_TRY_HARDER, "try_harder" }, \
{ XCHK_REAPING_DISABLED, "reaping_disabled" }, \
{ XCHK_FSGATES_DRAIN, "fsgates_drain" }, \ { XCHK_FSGATES_DRAIN, "fsgates_drain" }, \
{ XCHK_NEED_DRAIN, "need_drain" }, \ { XCHK_NEED_DRAIN, "need_drain" }, \
{ XREP_ALREADY_FIXED, "already_fixed" } { XREP_ALREADY_FIXED, "already_fixed" }
......
...@@ -558,7 +558,9 @@ xfs_getbmap( ...@@ -558,7 +558,9 @@ xfs_getbmap(
if (!xfs_iext_next_extent(ifp, &icur, &got)) { if (!xfs_iext_next_extent(ifp, &icur, &got)) {
xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); xfs_fileoff_t end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST; if (bmv->bmv_entries > 0)
out[bmv->bmv_entries - 1].bmv_oflags |=
BMV_OF_LAST;
if (whichfork != XFS_ATTR_FORK && bno < end && if (whichfork != XFS_ATTR_FORK && bno < end &&
!xfs_getbmap_full(bmv)) { !xfs_getbmap_full(bmv)) {
......
...@@ -435,18 +435,23 @@ xfs_iget_check_free_state( ...@@ -435,18 +435,23 @@ xfs_iget_check_free_state(
} }
/* Make all pending inactivation work start immediately. */ /* Make all pending inactivation work start immediately. */
static void static bool
xfs_inodegc_queue_all( xfs_inodegc_queue_all(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
struct xfs_inodegc *gc; struct xfs_inodegc *gc;
int cpu; int cpu;
bool ret = false;
for_each_online_cpu(cpu) { for_each_online_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu); gc = per_cpu_ptr(mp->m_inodegc, cpu);
if (!llist_empty(&gc->list)) if (!llist_empty(&gc->list)) {
mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0); mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
ret = true;
}
} }
return ret;
} }
/* /*
...@@ -1856,6 +1861,8 @@ xfs_inodegc_worker( ...@@ -1856,6 +1861,8 @@ xfs_inodegc_worker(
struct xfs_inode *ip, *n; struct xfs_inode *ip, *n;
unsigned int nofs_flag; unsigned int nofs_flag;
ASSERT(gc->cpu == smp_processor_id());
WRITE_ONCE(gc->items, 0); WRITE_ONCE(gc->items, 0);
if (!node) if (!node)
...@@ -1909,24 +1916,41 @@ xfs_inodegc_flush( ...@@ -1909,24 +1916,41 @@ xfs_inodegc_flush(
/* /*
* Flush all the pending work and then disable the inode inactivation background * Flush all the pending work and then disable the inode inactivation background
* workers and wait for them to stop. * workers and wait for them to stop. Caller must hold sb->s_umount to
* coordinate changes in the inodegc_enabled state.
*/ */
void void
xfs_inodegc_stop( xfs_inodegc_stop(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
bool rerun;
if (!xfs_clear_inodegc_enabled(mp)) if (!xfs_clear_inodegc_enabled(mp))
return; return;
/*
* Drain all pending inodegc work, including inodes that could be
* queued by racing xfs_inodegc_queue or xfs_inodegc_shrinker_scan
* threads that sample the inodegc state just prior to us clearing it.
* The inodegc flag state prevents new threads from queuing more
* inodes, so we queue pending work items and flush the workqueue until
* all inodegc lists are empty. IOWs, we cannot use drain_workqueue
* here because it does not allow other unserialized mechanisms to
* reschedule inodegc work while this draining is in progress.
*/
xfs_inodegc_queue_all(mp); xfs_inodegc_queue_all(mp);
drain_workqueue(mp->m_inodegc_wq); do {
flush_workqueue(mp->m_inodegc_wq);
rerun = xfs_inodegc_queue_all(mp);
} while (rerun);
trace_xfs_inodegc_stop(mp, __return_address); trace_xfs_inodegc_stop(mp, __return_address);
} }
/* /*
* Enable the inode inactivation background workers and schedule deferred inode * Enable the inode inactivation background workers and schedule deferred inode
* inactivation work if there is any. * inactivation work if there is any. Caller must hold sb->s_umount to
* coordinate changes in the inodegc_enabled state.
*/ */
void void
xfs_inodegc_start( xfs_inodegc_start(
...@@ -2069,7 +2093,8 @@ xfs_inodegc_queue( ...@@ -2069,7 +2093,8 @@ xfs_inodegc_queue(
queue_delay = 0; queue_delay = 0;
trace_xfs_inodegc_queue(mp, __return_address); trace_xfs_inodegc_queue(mp, __return_address);
mod_delayed_work(mp->m_inodegc_wq, &gc->work, queue_delay); mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
queue_delay);
put_cpu_ptr(gc); put_cpu_ptr(gc);
if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) { if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
...@@ -2113,7 +2138,8 @@ xfs_inodegc_cpu_dead( ...@@ -2113,7 +2138,8 @@ xfs_inodegc_cpu_dead(
if (xfs_is_inodegc_enabled(mp)) { if (xfs_is_inodegc_enabled(mp)) {
trace_xfs_inodegc_queue(mp, __return_address); trace_xfs_inodegc_queue(mp, __return_address);
mod_delayed_work(mp->m_inodegc_wq, &gc->work, 0); mod_delayed_work_on(current_cpu(), mp->m_inodegc_wq, &gc->work,
0);
} }
put_cpu_ptr(gc); put_cpu_ptr(gc);
} }
......
...@@ -1006,8 +1006,9 @@ xfs_buffered_write_iomap_begin( ...@@ -1006,8 +1006,9 @@ xfs_buffered_write_iomap_begin(
if (eof) if (eof)
imap.br_startoff = end_fsb; /* fake hole until the end */ imap.br_startoff = end_fsb; /* fake hole until the end */
/* We never need to allocate blocks for zeroing a hole. */ /* We never need to allocate blocks for zeroing or unsharing a hole. */
if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) { if ((flags & (IOMAP_UNSHARE | IOMAP_ZERO)) &&
imap.br_startoff > offset_fsb) {
xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff); xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
goto out_unlock; goto out_unlock;
} }
......
...@@ -66,6 +66,9 @@ struct xfs_inodegc { ...@@ -66,6 +66,9 @@ struct xfs_inodegc {
/* approximate count of inodes in the list */ /* approximate count of inodes in the list */
unsigned int items; unsigned int items;
unsigned int shrinker_hits; unsigned int shrinker_hits;
#if defined(DEBUG) || defined(XFS_WARN)
unsigned int cpu;
#endif
}; };
/* /*
......
...@@ -1095,6 +1095,9 @@ xfs_inodegc_init_percpu( ...@@ -1095,6 +1095,9 @@ xfs_inodegc_init_percpu(
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu); gc = per_cpu_ptr(mp->m_inodegc, cpu);
#if defined(DEBUG) || defined(XFS_WARN)
gc->cpu = cpu;
#endif
init_llist_head(&gc->list); init_llist_head(&gc->list);
gc->items = 0; gc->items = 0;
INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker); INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment