Commit f1bc5c56 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: merge xfs_reclaim_inodes_ag into xfs_inode_walk_ag

Merge these two inode walk loops together, since they're pretty similar
now.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
parent 9d5ee837
...@@ -43,6 +43,7 @@ enum xfs_icwalk_goal { ...@@ -43,6 +43,7 @@ enum xfs_icwalk_goal {
/* Goals directly associated with tagged inodes. */ /* Goals directly associated with tagged inodes. */
XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG, XFS_ICWALK_BLOCKGC = XFS_ICI_BLOCKGC_TAG,
XFS_ICWALK_RECLAIM = XFS_ICI_RECLAIM_TAG,
}; };
#define XFS_ICWALK_NULL_TAG (-1U) #define XFS_ICWALK_NULL_TAG (-1U)
...@@ -67,9 +68,13 @@ static int xfs_icwalk_ag(struct xfs_perag *pag, ...@@ -67,9 +68,13 @@ static int xfs_icwalk_ag(struct xfs_perag *pag,
#define XFS_ICWALK_FLAG_DROP_GDQUOT (1U << 30) #define XFS_ICWALK_FLAG_DROP_GDQUOT (1U << 30)
#define XFS_ICWALK_FLAG_DROP_PDQUOT (1U << 29) #define XFS_ICWALK_FLAG_DROP_PDQUOT (1U << 29)
/* Stop scanning after icw_scan_limit inodes. */
#define XFS_ICWALK_FLAG_SCAN_LIMIT (1U << 28)
#define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_DROP_UDQUOT | \ #define XFS_ICWALK_PRIVATE_FLAGS (XFS_ICWALK_FLAG_DROP_UDQUOT | \
XFS_ICWALK_FLAG_DROP_GDQUOT | \ XFS_ICWALK_FLAG_DROP_GDQUOT | \
XFS_ICWALK_FLAG_DROP_PDQUOT) XFS_ICWALK_FLAG_DROP_PDQUOT | \
XFS_ICWALK_FLAG_SCAN_LIMIT)
/* /*
* Allocate and initialise an xfs_inode. * Allocate and initialise an xfs_inode.
...@@ -760,17 +765,6 @@ xfs_icache_inode_is_allocated( ...@@ -760,17 +765,6 @@ xfs_icache_inode_is_allocated(
return 0; return 0;
} }
/*
* The inode lookup is done in batches to keep the amount of lock traffic and
* radix tree lookups to a minimum. The batch size is a trade off between
* lookup reduction and stack usage. This is in the reclaim path, so we can't
* be too greedy.
*
* XXX: This will be moved closer to xfs_icwalk* once we get rid of the
* separate reclaim walk functions.
*/
#define XFS_LOOKUP_BATCH 32
#ifdef CONFIG_XFS_QUOTA #ifdef CONFIG_XFS_QUOTA
/* Decide if we want to grab this inode to drop its dquots. */ /* Decide if we want to grab this inode to drop its dquots. */
static bool static bool
...@@ -880,7 +874,7 @@ xfs_dqrele_all_inodes( ...@@ -880,7 +874,7 @@ xfs_dqrele_all_inodes(
* Return true if we grabbed it, false otherwise. * Return true if we grabbed it, false otherwise.
*/ */
static bool static bool
xfs_reclaim_inode_grab( xfs_reclaim_igrab(
struct xfs_inode *ip) struct xfs_inode *ip)
{ {
ASSERT(rcu_read_lock_held()); ASSERT(rcu_read_lock_held());
...@@ -990,108 +984,13 @@ xfs_reclaim_inode( ...@@ -990,108 +984,13 @@ xfs_reclaim_inode(
xfs_iflags_clear(ip, XFS_IRECLAIM); xfs_iflags_clear(ip, XFS_IRECLAIM);
} }
/*
* Walk the AGs and reclaim the inodes in them. Even if the filesystem is
* corrupted, we still want to try to reclaim all the inodes. If we don't,
* then a shut down during filesystem unmount reclaim walk leak all the
* unreclaimed inodes.
*
* Returns non-zero if any AGs or inodes were skipped in the reclaim pass
* so that callers that want to block until all dirty inodes are written back
* and reclaimed can sanely loop.
*/
static void
xfs_reclaim_inodes_ag(
struct xfs_mount *mp,
int *nr_to_scan)
{
struct xfs_perag *pag;
xfs_agnumber_t ag = 0;
while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
unsigned long first_index = 0;
int done = 0;
int nr_found = 0;
ag = pag->pag_agno + 1;
first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
do {
struct xfs_inode *batch[XFS_LOOKUP_BATCH];
int i;
rcu_read_lock();
nr_found = radix_tree_gang_lookup_tag(
&pag->pag_ici_root,
(void **)batch, first_index,
XFS_LOOKUP_BATCH,
XFS_ICI_RECLAIM_TAG);
if (!nr_found) {
done = 1;
rcu_read_unlock();
break;
}
/*
* Grab the inodes before we drop the lock. if we found
* nothing, nr == 0 and the loop will be skipped.
*/
for (i = 0; i < nr_found; i++) {
struct xfs_inode *ip = batch[i];
if (done || !xfs_reclaim_inode_grab(ip))
batch[i] = NULL;
/*
* Update the index for the next lookup. Catch
* overflows into the next AG range which can
* occur if we have inodes in the last block of
* the AG and we are currently pointing to the
* last inode.
*
* Because we may see inodes that are from the
* wrong AG due to RCU freeing and
* reallocation, only update the index if it
* lies in this AG. It was a race that lead us
* to see this inode, so another lookup from
* the same index will not find it again.
*/
if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
pag->pag_agno)
continue;
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
done = 1;
}
/* unlock now we've grabbed the inodes. */
rcu_read_unlock();
for (i = 0; i < nr_found; i++) {
if (batch[i])
xfs_reclaim_inode(batch[i], pag);
}
*nr_to_scan -= XFS_LOOKUP_BATCH;
cond_resched();
} while (nr_found && !done && *nr_to_scan > 0);
if (done)
first_index = 0;
WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
xfs_perag_put(pag);
}
}
void void
xfs_reclaim_inodes( xfs_reclaim_inodes(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
int nr_to_scan = INT_MAX;
while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { while (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
xfs_ail_push_all_sync(mp->m_ail); xfs_ail_push_all_sync(mp->m_ail);
xfs_reclaim_inodes_ag(mp, &nr_to_scan); xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
} }
} }
...@@ -1107,11 +1006,16 @@ xfs_reclaim_inodes_nr( ...@@ -1107,11 +1006,16 @@ xfs_reclaim_inodes_nr(
struct xfs_mount *mp, struct xfs_mount *mp,
int nr_to_scan) int nr_to_scan)
{ {
struct xfs_eofblocks eofb = {
.eof_flags = XFS_ICWALK_FLAG_SCAN_LIMIT,
.icw_scan_limit = nr_to_scan,
};
/* kick background reclaimer and push the AIL */ /* kick background reclaimer and push the AIL */
xfs_reclaim_work_queue(mp); xfs_reclaim_work_queue(mp);
xfs_ail_push_all(mp->m_ail); xfs_ail_push_all(mp->m_ail);
xfs_reclaim_inodes_ag(mp, &nr_to_scan); xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &eofb);
return 0; return 0;
} }
...@@ -1221,9 +1125,8 @@ xfs_reclaim_worker( ...@@ -1221,9 +1125,8 @@ xfs_reclaim_worker(
{ {
struct xfs_mount *mp = container_of(to_delayed_work(work), struct xfs_mount *mp = container_of(to_delayed_work(work),
struct xfs_mount, m_reclaim_work); struct xfs_mount, m_reclaim_work);
int nr_to_scan = INT_MAX;
xfs_reclaim_inodes_ag(mp, &nr_to_scan); xfs_icwalk(mp, XFS_ICWALK_RECLAIM, NULL);
xfs_reclaim_work_queue(mp); xfs_reclaim_work_queue(mp);
} }
...@@ -1693,6 +1596,15 @@ xfs_blockgc_free_quota( ...@@ -1693,6 +1596,15 @@ xfs_blockgc_free_quota(
/* XFS Inode Cache Walking Code */ /* XFS Inode Cache Walking Code */
/*
* The inode lookup is done in batches to keep the amount of lock traffic and
* radix tree lookups to a minimum. The batch size is a trade off between
* lookup reduction and stack usage. This is in the reclaim path, so we can't
* be too greedy.
*/
#define XFS_LOOKUP_BATCH 32
/* /*
* Decide if we want to grab this inode in anticipation of doing work towards * Decide if we want to grab this inode in anticipation of doing work towards
* the goal. * the goal.
...@@ -1707,6 +1619,8 @@ xfs_icwalk_igrab( ...@@ -1707,6 +1619,8 @@ xfs_icwalk_igrab(
return xfs_dqrele_igrab(ip); return xfs_dqrele_igrab(ip);
case XFS_ICWALK_BLOCKGC: case XFS_ICWALK_BLOCKGC:
return xfs_blockgc_igrab(ip); return xfs_blockgc_igrab(ip);
case XFS_ICWALK_RECLAIM:
return xfs_reclaim_igrab(ip);
default: default:
return false; return false;
} }
...@@ -1720,6 +1634,7 @@ static inline int ...@@ -1720,6 +1634,7 @@ static inline int
xfs_icwalk_process_inode( xfs_icwalk_process_inode(
enum xfs_icwalk_goal goal, enum xfs_icwalk_goal goal,
struct xfs_inode *ip, struct xfs_inode *ip,
struct xfs_perag *pag,
struct xfs_eofblocks *eofb) struct xfs_eofblocks *eofb)
{ {
int error = 0; int error = 0;
...@@ -1731,6 +1646,9 @@ xfs_icwalk_process_inode( ...@@ -1731,6 +1646,9 @@ xfs_icwalk_process_inode(
case XFS_ICWALK_BLOCKGC: case XFS_ICWALK_BLOCKGC:
error = xfs_blockgc_scan_inode(ip, eofb); error = xfs_blockgc_scan_inode(ip, eofb);
break; break;
case XFS_ICWALK_RECLAIM:
xfs_reclaim_inode(ip, pag);
break;
} }
return error; return error;
} }
...@@ -1755,7 +1673,10 @@ xfs_icwalk_ag( ...@@ -1755,7 +1673,10 @@ xfs_icwalk_ag(
restart: restart:
done = false; done = false;
skipped = 0; skipped = 0;
first_index = 0; if (goal == XFS_ICWALK_RECLAIM)
first_index = READ_ONCE(pag->pag_ici_reclaim_cursor);
else
first_index = 0;
nr_found = 0; nr_found = 0;
do { do {
struct xfs_inode *batch[XFS_LOOKUP_BATCH]; struct xfs_inode *batch[XFS_LOOKUP_BATCH];
...@@ -1776,6 +1697,7 @@ xfs_icwalk_ag( ...@@ -1776,6 +1697,7 @@ xfs_icwalk_ag(
XFS_LOOKUP_BATCH, tag); XFS_LOOKUP_BATCH, tag);
if (!nr_found) { if (!nr_found) {
done = true;
rcu_read_unlock(); rcu_read_unlock();
break; break;
} }
...@@ -1815,7 +1737,8 @@ xfs_icwalk_ag( ...@@ -1815,7 +1737,8 @@ xfs_icwalk_ag(
for (i = 0; i < nr_found; i++) { for (i = 0; i < nr_found; i++) {
if (!batch[i]) if (!batch[i])
continue; continue;
error = xfs_icwalk_process_inode(goal, batch[i], eofb); error = xfs_icwalk_process_inode(goal, batch[i], pag,
eofb);
if (error == -EAGAIN) { if (error == -EAGAIN) {
skipped++; skipped++;
continue; continue;
...@@ -1830,8 +1753,19 @@ xfs_icwalk_ag( ...@@ -1830,8 +1753,19 @@ xfs_icwalk_ag(
cond_resched(); cond_resched();
if (eofb && (eofb->eof_flags & XFS_ICWALK_FLAG_SCAN_LIMIT)) {
eofb->icw_scan_limit -= XFS_LOOKUP_BATCH;
if (eofb->icw_scan_limit <= 0)
break;
}
} while (nr_found && !done); } while (nr_found && !done);
if (goal == XFS_ICWALK_RECLAIM) {
if (done)
first_index = 0;
WRITE_ONCE(pag->pag_ici_reclaim_cursor, first_index);
}
if (skipped) { if (skipped) {
delay(1); delay(1);
goto restart; goto restart;
......
...@@ -15,6 +15,7 @@ struct xfs_eofblocks { ...@@ -15,6 +15,7 @@ struct xfs_eofblocks {
kgid_t eof_gid; kgid_t eof_gid;
prid_t eof_prid; prid_t eof_prid;
__u64 eof_min_file_size; __u64 eof_min_file_size;
int icw_scan_limit;
}; };
/* /*
......
...@@ -3898,6 +3898,7 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class, ...@@ -3898,6 +3898,7 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class,
__field(uint32_t, gid) __field(uint32_t, gid)
__field(prid_t, prid) __field(prid_t, prid)
__field(__u64, min_file_size) __field(__u64, min_file_size)
__field(int, scan_limit)
__field(unsigned long, caller_ip) __field(unsigned long, caller_ip)
), ),
TP_fast_assign( TP_fast_assign(
...@@ -3909,15 +3910,17 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class, ...@@ -3909,15 +3910,17 @@ DECLARE_EVENT_CLASS(xfs_eofblocks_class,
eofb->eof_gid) : 0; eofb->eof_gid) : 0;
__entry->prid = eofb ? eofb->eof_prid : 0; __entry->prid = eofb ? eofb->eof_prid : 0;
__entry->min_file_size = eofb ? eofb->eof_min_file_size : 0; __entry->min_file_size = eofb ? eofb->eof_min_file_size : 0;
__entry->scan_limit = eofb ? eofb->icw_scan_limit : 0;
__entry->caller_ip = caller_ip; __entry->caller_ip = caller_ip;
), ),
TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu caller %pS", TP_printk("dev %d:%d flags 0x%x uid %u gid %u prid %u minsize %llu scan_limit %d caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->flags, __entry->flags,
__entry->uid, __entry->uid,
__entry->gid, __entry->gid,
__entry->prid, __entry->prid,
__entry->min_file_size, __entry->min_file_size,
__entry->scan_limit,
(char *)__entry->caller_ip) (char *)__entry->caller_ip)
); );
#define DEFINE_EOFBLOCKS_EVENT(name) \ #define DEFINE_EOFBLOCKS_EVENT(name) \
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment