Commit 1e791234 authored by Dave Chinner's avatar Dave Chinner Committed by Dave Chinner

Merge tag 'scrub-iget-fixes-6.4_2023-04-12' of...

Merge tag 'scrub-iget-fixes-6.4_2023-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into guilt/xfs-for-next

xfs: fix iget/irele usage in online fsck [v24.5]

This patchset fixes a handful of problems relating to how we get and
release incore inodes in the online scrub code.  The first patch fixes
how we handle DONTCACHE -- our reasons for setting (or clearing it)
depend entirely on the runtime environment at irele time.  Hence we can
refactor iget and irele to use our own wrappers that set that context
appropriately.

The second patch fixes a race between the iget call in the inode core
scrubber and other writer threads that are allocating or freeing inodes
in the same AG by changing the behavior of xchk_iget (and the inode core
scrub setup function) to return either an incore inode or the AGI buffer
so that we can be sure that the inode cannot disappear on us.

The final patch elides MMAPLOCK from scrub paths when possible.  It did
not fit anywhere else.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parents a4466722 1fc7a059
...@@ -34,12 +34,12 @@ xchk_setup_inode_bmap( ...@@ -34,12 +34,12 @@ xchk_setup_inode_bmap(
if (xchk_need_intent_drain(sc)) if (xchk_need_intent_drain(sc))
xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
error = xchk_get_inode(sc); error = xchk_iget_for_scrubbing(sc);
if (error) if (error)
goto out; goto out;
sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; sc->ilock_flags = XFS_IOLOCK_EXCL;
xfs_ilock(sc->ip, sc->ilock_flags); xfs_ilock(sc->ip, XFS_IOLOCK_EXCL);
/* /*
* We don't want any ephemeral data fork updates sitting around * We don't want any ephemeral data fork updates sitting around
...@@ -50,6 +50,9 @@ xchk_setup_inode_bmap( ...@@ -50,6 +50,9 @@ xchk_setup_inode_bmap(
sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) { sc->sm->sm_type == XFS_SCRUB_TYPE_BMBTD) {
struct address_space *mapping = VFS_I(sc->ip)->i_mapping; struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
sc->ilock_flags |= XFS_MMAPLOCK_EXCL;
xfs_ilock(sc->ip, XFS_MMAPLOCK_EXCL);
inode_dio_wait(VFS_I(sc->ip)); inode_dio_wait(VFS_I(sc->ip));
/* /*
......
...@@ -643,6 +643,14 @@ xchk_ag_init( ...@@ -643,6 +643,14 @@ xchk_ag_init(
/* Per-scrubber setup functions */ /* Per-scrubber setup functions */
void
xchk_trans_cancel(
struct xfs_scrub *sc)
{
xfs_trans_cancel(sc->tp);
sc->tp = NULL;
}
/* /*
* Grab an empty transaction so that we can re-grab locked buffers if * Grab an empty transaction so that we can re-grab locked buffers if
* one of our btrees turns out to be cyclic. * one of our btrees turns out to be cyclic.
...@@ -718,80 +726,273 @@ xchk_checkpoint_log( ...@@ -718,80 +726,273 @@ xchk_checkpoint_log(
return 0; return 0;
} }
/* Verify that an inode is allocated ondisk, then return its cached inode. */
int
xchk_iget(
struct xfs_scrub *sc,
xfs_ino_t inum,
struct xfs_inode **ipp)
{
return xfs_iget(sc->mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, ipp);
}
/*
* Try to grab an inode in a manner that avoids races with physical inode
* allocation. If we can't, return the locked AGI buffer so that the caller
* can single-step the loading process to see where things went wrong.
* Callers must have a valid scrub transaction.
*
* If the iget succeeds, return 0, a NULL AGI, and the inode.
*
* If the iget fails, return the error, the locked AGI, and a NULL inode. This
* can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are
* no longer allocated; or any other corruption or runtime error.
*
* If the AGI read fails, return the error, a NULL AGI, and NULL inode.
*
* If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode.
*/
int
xchk_iget_agi(
struct xfs_scrub *sc,
xfs_ino_t inum,
struct xfs_buf **agi_bpp,
struct xfs_inode **ipp)
{
struct xfs_mount *mp = sc->mp;
struct xfs_trans *tp = sc->tp;
struct xfs_perag *pag;
int error;
ASSERT(sc->tp != NULL);
again:
*agi_bpp = NULL;
*ipp = NULL;
error = 0;
if (xchk_should_terminate(sc, &error))
return error;
/*
* Attach the AGI buffer to the scrub transaction to avoid deadlocks
* in the iget cache miss path.
*/
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
error = xfs_ialloc_read_agi(pag, tp, agi_bpp);
xfs_perag_put(pag);
if (error)
return error;
error = xfs_iget(mp, tp, inum,
XFS_IGET_NORETRY | XFS_IGET_UNTRUSTED, 0, ipp);
if (error == -EAGAIN) {
/*
* The inode may be in core but temporarily unavailable and may
* require the AGI buffer before it can be returned. Drop the
* AGI buffer and retry the lookup.
*
* Incore lookup will fail with EAGAIN on a cache hit if the
* inode is queued to the inactivation list. The inactivation
* worker may remove the inode from the unlinked list and hence
* needs the AGI.
*
* Hence xchk_iget_agi() needs to drop the AGI lock on EAGAIN
* to allow inodegc to make progress and move the inode to
* IRECLAIMABLE state where xfs_iget will be able to return it
* again if it can lock the inode.
*/
xfs_trans_brelse(tp, *agi_bpp);
delay(1);
goto again;
}
if (error)
return error;
/* We got the inode, so we can release the AGI. */
ASSERT(*ipp != NULL);
xfs_trans_brelse(tp, *agi_bpp);
*agi_bpp = NULL;
return 0;
}
/* Install an inode that we opened by handle for scrubbing. */
int
xchk_install_handle_inode(
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
xchk_irele(sc, ip);
return -ENOENT;
}
sc->ip = ip;
return 0;
}
/* /*
* Given an inode and the scrub control structure, grab either the * In preparation to scrub metadata structures that hang off of an inode,
* inode referenced in the control structure or the inode passed in. * grab either the inode referenced in the scrub control structure or the
* The inode is not locked. * inode passed in. If the inumber does not reference an allocated inode
* record, the function returns ENOENT to end the scrub early. The inode
* is not locked.
*/ */
int int
xchk_get_inode( xchk_iget_for_scrubbing(
struct xfs_scrub *sc) struct xfs_scrub *sc)
{ {
struct xfs_imap imap; struct xfs_imap imap;
struct xfs_mount *mp = sc->mp; struct xfs_mount *mp = sc->mp;
struct xfs_perag *pag; struct xfs_perag *pag;
struct xfs_buf *agi_bp;
struct xfs_inode *ip_in = XFS_I(file_inode(sc->file)); struct xfs_inode *ip_in = XFS_I(file_inode(sc->file));
struct xfs_inode *ip = NULL; struct xfs_inode *ip = NULL;
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino);
int error; int error;
ASSERT(sc->tp == NULL);
/* We want to scan the inode we already had opened. */ /* We want to scan the inode we already had opened. */
if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
sc->ip = ip_in; sc->ip = ip_in;
return 0; return 0;
} }
/* Look up the inode, see if the generation number matches. */ /* Reject internal metadata files and obviously bad inode numbers. */
if (xfs_internal_inum(mp, sc->sm->sm_ino)) if (xfs_internal_inum(mp, sc->sm->sm_ino))
return -ENOENT; return -ENOENT;
error = xfs_iget(mp, NULL, sc->sm->sm_ino, if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE, 0, &ip); return -ENOENT;
switch (error) {
case -ENOENT: /* Try a regular untrusted iget. */
/* Inode doesn't exist, just bail out. */ error = xchk_iget(sc, sc->sm->sm_ino, &ip);
if (!error)
return xchk_install_handle_inode(sc, ip);
if (error == -ENOENT)
return error; return error;
case 0: if (error != -EINVAL)
/* Got an inode, continue. */ goto out_error;
break;
case -EINVAL: /*
* EINVAL with IGET_UNTRUSTED probably means one of several things:
* userspace gave us an inode number that doesn't correspond to fs
* space; the inode btree lacks a record for this inode; or there is a
* record, and it says this inode is free.
*
* We want to look up this inode in the inobt to distinguish two
* scenarios: (1) the inobt says the inode is free, in which case
* there's nothing to do; and (2) the inobt says the inode is
* allocated, but loading it failed due to corruption.
*
* Allocate a transaction and grab the AGI to prevent inobt activity
* in this AG. Retry the iget in case someone allocated a new inode
* after the first iget failed.
*/
error = xchk_trans_alloc(sc, 0);
if (error)
goto out_error;
error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip);
if (error == 0) {
/* Actually got the inode, so install it. */
xchk_trans_cancel(sc);
return xchk_install_handle_inode(sc, ip);
}
if (error == -ENOENT)
goto out_gone;
if (error != -EINVAL)
goto out_cancel;
/* Ensure that we have protected against inode allocation/freeing. */
if (agi_bp == NULL) {
ASSERT(agi_bp != NULL);
error = -ECANCELED;
goto out_cancel;
}
/* /*
* -EINVAL with IGET_UNTRUSTED could mean one of several * Untrusted iget failed a second time. Let's try an inobt lookup.
* things: userspace gave us an inode number that doesn't * If the inobt thinks this the inode neither can exist inside the
* correspond to fs space, or doesn't have an inobt entry; * filesystem nor is allocated, return ENOENT to signal that the check
* or it could simply mean that the inode buffer failed the * can be skipped.
* read verifiers.
* *
* Try just the inode mapping lookup -- if it succeeds, then * If the lookup returns corruption, we'll mark this inode corrupt and
* the inode buffer verifier failed and something needs fixing. * exit to userspace. There's little chance of fixing anything until
* Otherwise, we really couldn't find it so tell userspace * the inobt is straightened out, but there's nothing we can do here.
* that it no longer exists. *
* If the lookup encounters any other error, exit to userspace.
*
* If the lookup succeeds, something else must be very wrong in the fs
* such that setting up the incore inode failed in some strange way.
* Treat those as corruptions.
*/ */
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino)); pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
if (pag) { if (!pag) {
error = -EFSCORRUPTED;
goto out_cancel;
}
error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap, error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE); XFS_IGET_UNTRUSTED);
xfs_perag_put(pag); xfs_perag_put(pag);
if (error) if (error == -EINVAL || error == -ENOENT)
return -ENOENT; goto out_gone;
} if (!error)
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
fallthrough;
default: out_cancel:
trace_xchk_op_error(sc, xchk_trans_cancel(sc);
XFS_INO_TO_AGNO(mp, sc->sm->sm_ino), out_error:
XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
error, __return_address); error, __return_address);
return error; return error;
} out_gone:
if (VFS_I(ip)->i_generation != sc->sm->sm_gen) { /* The file is gone, so there's nothing to check. */
xfs_irele(ip); xchk_trans_cancel(sc);
return -ENOENT; return -ENOENT;
}
/* Release an inode, possibly dropping it in the process. */
void
xchk_irele(
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
if (current->journal_info != NULL) {
ASSERT(current->journal_info == sc->tp);
/*
* If we are in a transaction, we /cannot/ drop the inode
* ourselves, because the VFS will trigger writeback, which
* can require a transaction. Clear DONTCACHE to force the
* inode to the LRU, where someone else can take care of
* dropping it.
*
* Note that when we grabbed our reference to the inode, it
* could have had an active ref and DONTCACHE set if a sysadmin
* is trying to coerce a change in file access mode. icache
* hits do not clear DONTCACHE, so we must do it here.
*/
spin_lock(&VFS_I(ip)->i_lock);
VFS_I(ip)->i_state &= ~I_DONTCACHE;
spin_unlock(&VFS_I(ip)->i_lock);
} else if (atomic_read(&VFS_I(ip)->i_count) == 1) {
/*
* If this is the last reference to the inode and the caller
* permits it, set DONTCACHE to avoid thrashing.
*/
d_mark_dontcache(VFS_I(ip));
} }
sc->ip = ip; xfs_irele(ip);
return 0;
} }
/* Set us up to scrub a file's contents. */ /*
* Set us up to scrub metadata mapped by a file's fork. Callers must not use
* this to operate on user-accessible regular file data because the MMAPLOCK is
* not taken.
*/
int int
xchk_setup_inode_contents( xchk_setup_inode_contents(
struct xfs_scrub *sc, struct xfs_scrub *sc,
...@@ -799,13 +1000,14 @@ xchk_setup_inode_contents( ...@@ -799,13 +1000,14 @@ xchk_setup_inode_contents(
{ {
int error; int error;
error = xchk_get_inode(sc); error = xchk_iget_for_scrubbing(sc);
if (error) if (error)
return error; return error;
/* Got the inode, lock it and we're ready to go. */ /* Lock the inode so the VFS cannot touch this file. */
sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; sc->ilock_flags = XFS_IOLOCK_EXCL;
xfs_ilock(sc->ip, sc->ilock_flags); xfs_ilock(sc->ip, sc->ilock_flags);
error = xchk_trans_alloc(sc, resblks); error = xchk_trans_alloc(sc, resblks);
if (error) if (error)
goto out; goto out;
......
...@@ -32,6 +32,8 @@ xchk_should_terminate( ...@@ -32,6 +32,8 @@ xchk_should_terminate(
} }
int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks); int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks);
void xchk_trans_cancel(struct xfs_scrub *sc);
bool xchk_process_error(struct xfs_scrub *sc, xfs_agnumber_t agno, bool xchk_process_error(struct xfs_scrub *sc, xfs_agnumber_t agno,
xfs_agblock_t bno, int *error); xfs_agblock_t bno, int *error);
bool xchk_fblock_process_error(struct xfs_scrub *sc, int whichfork, bool xchk_fblock_process_error(struct xfs_scrub *sc, int whichfork,
...@@ -133,10 +135,16 @@ int xchk_count_rmap_ownedby_ag(struct xfs_scrub *sc, struct xfs_btree_cur *cur, ...@@ -133,10 +135,16 @@ int xchk_count_rmap_ownedby_ag(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
const struct xfs_owner_info *oinfo, xfs_filblks_t *blocks); const struct xfs_owner_info *oinfo, xfs_filblks_t *blocks);
int xchk_setup_ag_btree(struct xfs_scrub *sc, bool force_log); int xchk_setup_ag_btree(struct xfs_scrub *sc, bool force_log);
int xchk_get_inode(struct xfs_scrub *sc); int xchk_iget_for_scrubbing(struct xfs_scrub *sc);
int xchk_setup_inode_contents(struct xfs_scrub *sc, unsigned int resblks); int xchk_setup_inode_contents(struct xfs_scrub *sc, unsigned int resblks);
void xchk_buffer_recheck(struct xfs_scrub *sc, struct xfs_buf *bp); void xchk_buffer_recheck(struct xfs_scrub *sc, struct xfs_buf *bp);
int xchk_iget(struct xfs_scrub *sc, xfs_ino_t inum, struct xfs_inode **ipp);
int xchk_iget_agi(struct xfs_scrub *sc, xfs_ino_t inum,
struct xfs_buf **agi_bpp, struct xfs_inode **ipp);
void xchk_irele(struct xfs_scrub *sc, struct xfs_inode *ip);
int xchk_install_handle_inode(struct xfs_scrub *sc, struct xfs_inode *ip);
/* /*
* Don't bother cross-referencing if we already found corruption or cross * Don't bother cross-referencing if we already found corruption or cross
* referencing discrepancies. * referencing discrepancies.
......
...@@ -117,21 +117,15 @@ xchk_dir_actor( ...@@ -117,21 +117,15 @@ xchk_dir_actor(
} }
/* /*
* Grab the inode pointed to by the dirent. We release the * Grab the inode pointed to by the dirent. We release the inode
* inode before we cancel the scrub transaction. Since we're * before we cancel the scrub transaction.
* don't know a priori that releasing the inode won't trigger
* eofblocks cleanup (which allocates what would be a nested
* transaction), we can't use DONTCACHE here because DONTCACHE
* inodes can trigger immediate inactive cleanup of the inode.
* Use UNTRUSTED here to check the allocation status of the inode in
* the inode btrees.
* *
* If _iget returns -EINVAL or -ENOENT then the child inode number is * If _iget returns -EINVAL or -ENOENT then the child inode number is
* garbage and the directory is corrupt. If the _iget returns * garbage and the directory is corrupt. If the _iget returns
* -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a
* cross referencing error. Any other error is an operational error. * cross referencing error. Any other error is an operational error.
*/ */
error = xfs_iget(mp, sc->tp, ino, XFS_IGET_UNTRUSTED, 0, &ip); error = xchk_iget(sc, ino, &ip);
if (error == -EINVAL || error == -ENOENT) { if (error == -EINVAL || error == -ENOENT) {
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
...@@ -141,7 +135,7 @@ xchk_dir_actor( ...@@ -141,7 +135,7 @@ xchk_dir_actor(
goto out; goto out;
xchk_dir_check_ftype(sc, offset, ip, name->type); xchk_dir_check_ftype(sc, offset, ip, name->type);
xfs_irele(ip); xchk_irele(sc, ip);
out: out:
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return -ECANCELED; return -ECANCELED;
......
...@@ -11,8 +11,11 @@ ...@@ -11,8 +11,11 @@
#include "xfs_mount.h" #include "xfs_mount.h"
#include "xfs_btree.h" #include "xfs_btree.h"
#include "xfs_log_format.h" #include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_ag.h"
#include "xfs_inode.h" #include "xfs_inode.h"
#include "xfs_ialloc.h" #include "xfs_ialloc.h"
#include "xfs_icache.h"
#include "xfs_da_format.h" #include "xfs_da_format.h"
#include "xfs_reflink.h" #include "xfs_reflink.h"
#include "xfs_rmap.h" #include "xfs_rmap.h"
...@@ -20,48 +23,176 @@ ...@@ -20,48 +23,176 @@
#include "scrub/scrub.h" #include "scrub/scrub.h"
#include "scrub/common.h" #include "scrub/common.h"
#include "scrub/btree.h" #include "scrub/btree.h"
#include "scrub/trace.h"
/* Prepare the attached inode for scrubbing. */
static inline int
xchk_prepare_iscrub(
struct xfs_scrub *sc)
{
int error;
sc->ilock_flags = XFS_IOLOCK_EXCL;
xfs_ilock(sc->ip, sc->ilock_flags);
error = xchk_trans_alloc(sc, 0);
if (error)
return error;
sc->ilock_flags |= XFS_ILOCK_EXCL;
xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
return 0;
}
/* Install this scrub-by-handle inode and prepare it for scrubbing. */
static inline int
xchk_install_handle_iscrub(
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
int error;
error = xchk_install_handle_inode(sc, ip);
if (error)
return error;
return xchk_prepare_iscrub(sc);
}
/* /*
* Grab total control of the inode metadata. It doesn't matter here if * Grab total control of the inode metadata. In the best case, we grab the
* the file data is still changing; exclusive access to the metadata is * incore inode and take all locks on it. If the incore inode cannot be
* the goal. * constructed due to corruption problems, lock the AGI so that we can single
* step the loading process to fix everything that can go wrong.
*/ */
int int
xchk_setup_inode( xchk_setup_inode(
struct xfs_scrub *sc) struct xfs_scrub *sc)
{ {
struct xfs_imap imap;
struct xfs_inode *ip;
struct xfs_mount *mp = sc->mp;
struct xfs_inode *ip_in = XFS_I(file_inode(sc->file));
struct xfs_buf *agi_bp;
struct xfs_perag *pag;
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino);
int error; int error;
if (xchk_need_intent_drain(sc)) if (xchk_need_intent_drain(sc))
xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
/* We want to scan the opened inode, so lock it and exit. */
if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) {
sc->ip = ip_in;
return xchk_prepare_iscrub(sc);
}
/* Reject internal metadata files and obviously bad inode numbers. */
if (xfs_internal_inum(mp, sc->sm->sm_ino))
return -ENOENT;
if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
return -ENOENT;
/* Try a regular untrusted iget. */
error = xchk_iget(sc, sc->sm->sm_ino, &ip);
if (!error)
return xchk_install_handle_iscrub(sc, ip);
if (error == -ENOENT)
return error;
if (error != -EFSCORRUPTED && error != -EFSBADCRC && error != -EINVAL)
goto out_error;
/* /*
* Try to get the inode. If the verifiers fail, we try again * EINVAL with IGET_UNTRUSTED probably means one of several things:
* in raw mode. * userspace gave us an inode number that doesn't correspond to fs
* space; the inode btree lacks a record for this inode; or there is
* a record, and it says this inode is free.
*
* EFSCORRUPTED/EFSBADCRC could mean that the inode was mappable, but
* some other metadata corruption (e.g. inode forks) prevented
* instantiation of the incore inode. Or it could mean the inobt is
* corrupt.
*
* We want to look up this inode in the inobt directly to distinguish
* three different scenarios: (1) the inobt says the inode is free,
* in which case there's nothing to do; (2) the inobt is corrupt so we
* should flag the corruption and exit to userspace to let it fix the
* inobt; and (3) the inobt says the inode is allocated, but loading it
* failed due to corruption.
*
* Allocate a transaction and grab the AGI to prevent inobt activity in
* this AG. Retry the iget in case someone allocated a new inode after
* the first iget failed.
*/ */
error = xchk_get_inode(sc); error = xchk_trans_alloc(sc, 0);
switch (error) { if (error)
case 0: goto out_error;
break;
case -EFSCORRUPTED: error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip);
case -EFSBADCRC: if (error == 0) {
return xchk_trans_alloc(sc, 0); /* Actually got the incore inode, so install it and proceed. */
default: xchk_trans_cancel(sc);
return error; return xchk_install_handle_iscrub(sc, ip);
}
if (error == -ENOENT)
goto out_gone;
if (error != -EFSCORRUPTED && error != -EFSBADCRC && error != -EINVAL)
goto out_cancel;
/* Ensure that we have protected against inode allocation/freeing. */
if (agi_bp == NULL) {
ASSERT(agi_bp != NULL);
error = -ECANCELED;
goto out_cancel;
} }
/* Got the inode, lock it and we're ready to go. */ /*
sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; * Untrusted iget failed a second time. Let's try an inobt lookup.
xfs_ilock(sc->ip, sc->ilock_flags); * If the inobt doesn't think this is an allocated inode then we'll
error = xchk_trans_alloc(sc, 0); * return ENOENT to signal that the check can be skipped.
*
* If the lookup signals corruption, we'll mark this inode corrupt and
* exit to userspace. There's little chance of fixing anything until
* the inobt is straightened out, but there's nothing we can do here.
*
* If the lookup encounters a runtime error, exit to userspace.
*/
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
if (!pag) {
error = -EFSCORRUPTED;
goto out_cancel;
}
error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
XFS_IGET_UNTRUSTED);
xfs_perag_put(pag);
if (error == -EINVAL || error == -ENOENT)
goto out_gone;
if (error) if (error)
goto out; goto out_cancel;
sc->ilock_flags |= XFS_ILOCK_EXCL;
xfs_ilock(sc->ip, XFS_ILOCK_EXCL);
out: /*
/* scrub teardown will unlock and release the inode for us */ * The lookup succeeded. Chances are the ondisk inode is corrupt and
* preventing iget from reading it. Retain the scrub transaction and
* the AGI buffer to prevent anyone from allocating or freeing inodes.
* This ensures that we preserve the inconsistency between the inobt
* saying the inode is allocated and the icache being unable to load
* the inode until we can flag the corruption in xchk_inode. The
* scrub function has to note the corruption, since we're not really
* supposed to do that from the setup function.
*/
return 0;
out_cancel:
xchk_trans_cancel(sc);
out_error:
trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
error, __return_address);
return error; return error;
out_gone:
/* The file is gone, so there's nothing to check. */
xchk_trans_cancel(sc);
return -ENOENT;
} }
/* Inode core */ /* Inode core */
......
...@@ -127,20 +127,15 @@ xchk_parent_validate( ...@@ -127,20 +127,15 @@ xchk_parent_validate(
expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1;
/* /*
* Grab this parent inode. We release the inode before we * Grab the parent directory inode. This must be released before we
* cancel the scrub transaction. Since we're don't know a * cancel the scrub transaction.
* priori that releasing the inode won't trigger eofblocks
* cleanup (which allocates what would be a nested transaction)
* if the parent pointer erroneously points to a file, we
* can't use DONTCACHE here because DONTCACHE inodes can trigger
* immediate inactive cleanup of the inode.
* *
* If _iget returns -EINVAL or -ENOENT then the parent inode number is * If _iget returns -EINVAL or -ENOENT then the parent inode number is
* garbage and the directory is corrupt. If the _iget returns * garbage and the directory is corrupt. If the _iget returns
* -EFSCORRUPTED or -EFSBADCRC then the parent is corrupt which is a * -EFSCORRUPTED or -EFSBADCRC then the parent is corrupt which is a
* cross referencing error. Any other error is an operational error. * cross referencing error. Any other error is an operational error.
*/ */
error = xfs_iget(mp, sc->tp, parent_ino, XFS_IGET_UNTRUSTED, 0, &dp); error = xchk_iget(sc, parent_ino, &dp);
if (error == -EINVAL || error == -ENOENT) { if (error == -EINVAL || error == -ENOENT) {
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
...@@ -176,7 +171,7 @@ xchk_parent_validate( ...@@ -176,7 +171,7 @@ xchk_parent_validate(
out_unlock: out_unlock:
xfs_iunlock(dp, lock_mode); xfs_iunlock(dp, lock_mode);
out_rele: out_rele:
xfs_irele(dp); xchk_irele(sc, dp);
return error; return error;
} }
......
...@@ -181,7 +181,7 @@ xchk_teardown( ...@@ -181,7 +181,7 @@ xchk_teardown(
xfs_iunlock(sc->ip, sc->ilock_flags); xfs_iunlock(sc->ip, sc->ilock_flags);
if (sc->ip != ip_in && if (sc->ip != ip_in &&
!xfs_internal_inum(sc->mp, sc->ip->i_ino)) !xfs_internal_inum(sc->mp, sc->ip->i_ino))
xfs_irele(sc->ip); xchk_irele(sc, sc->ip);
sc->ip = NULL; sc->ip = NULL;
} }
if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
......
...@@ -767,7 +767,8 @@ xfs_iget( ...@@ -767,7 +767,8 @@ xfs_iget(
return 0; return 0;
out_error_or_again: out_error_or_again:
if (!(flags & XFS_IGET_INCORE) && error == -EAGAIN) { if (!(flags & (XFS_IGET_INCORE | XFS_IGET_NORETRY)) &&
error == -EAGAIN) {
delay(1); delay(1);
goto again; goto again;
} }
......
...@@ -34,10 +34,13 @@ struct xfs_icwalk { ...@@ -34,10 +34,13 @@ struct xfs_icwalk {
/* /*
* Flags for xfs_iget() * Flags for xfs_iget()
*/ */
#define XFS_IGET_CREATE 0x1 #define XFS_IGET_CREATE (1U << 0)
#define XFS_IGET_UNTRUSTED 0x2 #define XFS_IGET_UNTRUSTED (1U << 1)
#define XFS_IGET_DONTCACHE 0x4 #define XFS_IGET_DONTCACHE (1U << 2)
#define XFS_IGET_INCORE 0x8 /* don't read from disk or reinit */ /* don't read from disk or reinit */
#define XFS_IGET_INCORE (1U << 3)
/* Return -EAGAIN immediately if the inode is unavailable. */
#define XFS_IGET_NORETRY (1U << 4)
int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino, int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
uint flags, uint lock_flags, xfs_inode_t **ipp); uint flags, uint lock_flags, xfs_inode_t **ipp);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment