Commit 27c14b5d authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: ensure inobt record walks always make forward progress

The aim of the inode btree record iterator function is to call a
callback on every record in the btree.  To avoid having to tear down and
recreate the inode btree cursor around every callback, it caches a
certain number of records in a memory buffer.  After each batch of
callback invocations, we have to perform a btree lookup to find the
next record after where we left off.

However, if the keys of the inode btree are corrupt, the lookup might
put us in the wrong part of the inode btree, causing the walk function
to loop forever.  Therefore, we add extra cursor tracking to make sure
that we never go backwards neither when performing the lookup nor when
jumping to the next inobt record.  This also fixes an off by one error
where upon resume the lookup should have been for the inode /after/ the
point at which we stopped.

Found by fuzzing xfs/460 with keys[2].startino = ones causing bulkstat
and quotacheck to hang.

Fixes: a211432c ("xfs: create simplified inode walk function")
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarChandan Babu R <chandanrlinux@gmail.com>
parent ada49d64
...@@ -55,6 +55,9 @@ struct xfs_iwalk_ag { ...@@ -55,6 +55,9 @@ struct xfs_iwalk_ag {
/* Where do we start the traversal? */ /* Where do we start the traversal? */
xfs_ino_t startino; xfs_ino_t startino;
/* What was the last inode number we saw when iterating the inobt? */
xfs_ino_t lastino;
/* Array of inobt records we cache. */ /* Array of inobt records we cache. */
struct xfs_inobt_rec_incore *recs; struct xfs_inobt_rec_incore *recs;
...@@ -301,6 +304,9 @@ xfs_iwalk_ag_start( ...@@ -301,6 +304,9 @@ xfs_iwalk_ag_start(
if (XFS_IS_CORRUPT(mp, *has_more != 1)) if (XFS_IS_CORRUPT(mp, *has_more != 1))
return -EFSCORRUPTED; return -EFSCORRUPTED;
iwag->lastino = XFS_AGINO_TO_INO(mp, agno,
irec->ir_startino + XFS_INODES_PER_CHUNK - 1);
/* /*
* If the LE lookup yielded an inobt record before the cursor position, * If the LE lookup yielded an inobt record before the cursor position,
* skip it and see if there's another one after it. * skip it and see if there's another one after it.
...@@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks( ...@@ -347,15 +353,17 @@ xfs_iwalk_run_callbacks(
struct xfs_mount *mp = iwag->mp; struct xfs_mount *mp = iwag->mp;
struct xfs_trans *tp = iwag->tp; struct xfs_trans *tp = iwag->tp;
struct xfs_inobt_rec_incore *irec; struct xfs_inobt_rec_incore *irec;
xfs_agino_t restart; xfs_agino_t next_agino;
int error; int error;
next_agino = XFS_INO_TO_AGINO(mp, iwag->lastino) + 1;
ASSERT(iwag->nr_recs > 0); ASSERT(iwag->nr_recs > 0);
/* Delete cursor but remember the last record we cached... */ /* Delete cursor but remember the last record we cached... */
xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
irec = &iwag->recs[iwag->nr_recs - 1]; irec = &iwag->recs[iwag->nr_recs - 1];
restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1; ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK);
error = xfs_iwalk_ag_recs(iwag); error = xfs_iwalk_ag_recs(iwag);
if (error) if (error)
...@@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks( ...@@ -372,7 +380,7 @@ xfs_iwalk_run_callbacks(
if (error) if (error)
return error; return error;
return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more); return xfs_inobt_lookup(*curpp, next_agino, XFS_LOOKUP_GE, has_more);
} }
/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */ /* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
...@@ -396,6 +404,7 @@ xfs_iwalk_ag( ...@@ -396,6 +404,7 @@ xfs_iwalk_ag(
while (!error && has_more) { while (!error && has_more) {
struct xfs_inobt_rec_incore *irec; struct xfs_inobt_rec_incore *irec;
xfs_ino_t rec_fsino;
cond_resched(); cond_resched();
if (xfs_pwork_want_abort(&iwag->pwork)) if (xfs_pwork_want_abort(&iwag->pwork))
...@@ -407,6 +416,15 @@ xfs_iwalk_ag( ...@@ -407,6 +416,15 @@ xfs_iwalk_ag(
if (error || !has_more) if (error || !has_more)
break; break;
/* Make sure that we always move forward. */
rec_fsino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino);
if (iwag->lastino != NULLFSINO &&
XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) {
error = -EFSCORRUPTED;
goto out;
}
iwag->lastino = rec_fsino + XFS_INODES_PER_CHUNK - 1;
/* No allocated inodes in this chunk; skip it. */ /* No allocated inodes in this chunk; skip it. */
if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) { if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
error = xfs_btree_increment(cur, 0, &has_more); error = xfs_btree_increment(cur, 0, &has_more);
...@@ -535,6 +553,7 @@ xfs_iwalk( ...@@ -535,6 +553,7 @@ xfs_iwalk(
.trim_start = 1, .trim_start = 1,
.skip_empty = 1, .skip_empty = 1,
.pwork = XFS_PWORK_SINGLE_THREADED, .pwork = XFS_PWORK_SINGLE_THREADED,
.lastino = NULLFSINO,
}; };
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
int error; int error;
...@@ -623,6 +642,7 @@ xfs_iwalk_threaded( ...@@ -623,6 +642,7 @@ xfs_iwalk_threaded(
iwag->data = data; iwag->data = data;
iwag->startino = startino; iwag->startino = startino;
iwag->sz_recs = xfs_iwalk_prefetch(inode_records); iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
iwag->lastino = NULLFSINO;
xfs_pwork_queue(&pctl, &iwag->pwork); xfs_pwork_queue(&pctl, &iwag->pwork);
startino = XFS_AGINO_TO_INO(mp, agno + 1, 0); startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
if (flags & XFS_INOBT_WALK_SAME_AG) if (flags & XFS_INOBT_WALK_SAME_AG)
...@@ -696,6 +716,7 @@ xfs_inobt_walk( ...@@ -696,6 +716,7 @@ xfs_inobt_walk(
.startino = startino, .startino = startino,
.sz_recs = xfs_inobt_walk_prefetch(inobt_records), .sz_recs = xfs_inobt_walk_prefetch(inobt_records),
.pwork = XFS_PWORK_SINGLE_THREADED, .pwork = XFS_PWORK_SINGLE_THREADED,
.lastino = NULLFSINO,
}; };
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino); xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
int error; int error;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment