Commit abf7c819 authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'fix-iunlink-list-6.6_2023-09-12' of...

Merge tag 'fix-iunlink-list-6.6_2023-09-12' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.6-fixesA

xfs: reload entire iunlink lists

This is the second part of correcting XFS to reload the incore unlinked
inode list from the ondisk contents.  Whereas part one tackled failures
from regular filesystem calls, this part takes on the problem of needing
to reload the entire incore unlinked inode list on account of somebody
loading an inode that's in the /middle/ of an unlinked list.  This
happens during quotacheck, bulkstat, or even opening a file by handle.

In this case we don't know the length of the list that we're reloading,
so we don't want to create a new unbounded memory load while holding
resources locked.  Instead, we'll target UNTRUSTED iget calls to reload
the entire bucket.

Note that this changes the definition of the incore unlinked inode list
slightly -- i_prev_unlinked == 0 now means "not on the incore list".
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'fix-iunlink-list-6.6_2023-09-12' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: make inode unlinked bucket recovery work with quotacheck
  xfs: reload entire unlinked bucket lists
  xfs: use i_prev_unlinked to distinguish inodes that are not on the unlinked list
parents fffcdcc3 49813a21
......@@ -333,7 +333,6 @@ xfs_attr_inactive(
int error = 0;
mp = dp->i_mount;
ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
xfs_ilock(dp, lock_mode);
if (!xfs_inode_has_attr_fork(dp))
......
......@@ -146,6 +146,12 @@ xfs_nfs_get_inode(
return ERR_PTR(error);
}
error = xfs_inode_reload_unlinked(ip);
if (error) {
xfs_irele(ip);
return ERR_PTR(error);
}
if (VFS_I(ip)->i_generation != generation) {
xfs_irele(ip);
return ERR_PTR(-ESTALE);
......
......@@ -113,7 +113,7 @@ xfs_inode_alloc(
INIT_LIST_HEAD(&ip->i_ioend_list);
spin_lock_init(&ip->i_ioend_lock);
ip->i_next_unlinked = NULLAGINO;
ip->i_prev_unlinked = NULLAGINO;
ip->i_prev_unlinked = 0;
return ip;
}
......
......@@ -1742,9 +1742,13 @@ xfs_inactive(
ip->i_df.if_nextents > 0 || ip->i_delayed_blks > 0))
truncate = 1;
error = xfs_qm_dqattach(ip);
if (error)
goto out;
if (xfs_iflags_test(ip, XFS_IQUOTAUNCHECKED)) {
xfs_qm_dqdetach(ip);
} else {
error = xfs_qm_dqattach(ip);
if (error)
goto out;
}
if (S_ISLNK(VFS_I(ip)->i_mode))
error = xfs_inactive_symlink(ip);
......@@ -1962,6 +1966,8 @@ xfs_iunlink_reload_next(
trace_xfs_iunlink_reload_next(next_ip);
rele:
ASSERT(!(VFS_I(next_ip)->i_state & I_DONTCACHE));
if (xfs_is_quotacheck_running(mp) && next_ip)
xfs_iflags_set(next_ip, XFS_IQUOTAUNCHECKED);
xfs_irele(next_ip);
return error;
}
......@@ -2014,6 +2020,7 @@ xfs_iunlink_insert_inode(
}
/* Point the head of the list to point to this inode. */
ip->i_prev_unlinked = NULLAGINO;
return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino);
}
......@@ -2116,7 +2123,7 @@ xfs_iunlink_remove_inode(
}
ip->i_next_unlinked = NULLAGINO;
ip->i_prev_unlinked = NULLAGINO;
ip->i_prev_unlinked = 0;
return error;
}
......@@ -3605,3 +3612,103 @@ xfs_iunlock2_io_mmap(
if (ip1 != ip2)
inode_unlock(VFS_I(ip1));
}
/*
* Reload the incore inode list for this inode. Caller should ensure that
* the link count cannot change, either by taking ILOCK_SHARED or otherwise
* preventing other threads from executing.
*/
int
xfs_inode_reload_unlinked_bucket(
struct xfs_trans *tp,
struct xfs_inode *ip)
{
struct xfs_mount *mp = tp->t_mountp;
struct xfs_buf *agibp;
struct xfs_agi *agi;
struct xfs_perag *pag;
xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
xfs_agino_t prev_agino, next_agino;
unsigned int bucket;
bool foundit = false;
int error;
/* Grab the first inode in the list */
pag = xfs_perag_get(mp, agno);
error = xfs_ialloc_read_agi(pag, tp, &agibp);
xfs_perag_put(pag);
if (error)
return error;
bucket = agino % XFS_AGI_UNLINKED_BUCKETS;
agi = agibp->b_addr;
trace_xfs_inode_reload_unlinked_bucket(ip);
xfs_info_ratelimited(mp,
"Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating list recovery.",
agino, agno);
prev_agino = NULLAGINO;
next_agino = be32_to_cpu(agi->agi_unlinked[bucket]);
while (next_agino != NULLAGINO) {
struct xfs_inode *next_ip = NULL;
if (next_agino == agino) {
/* Found this inode, set its backlink. */
next_ip = ip;
next_ip->i_prev_unlinked = prev_agino;
foundit = true;
}
if (!next_ip) {
/* Inode already in memory. */
next_ip = xfs_iunlink_lookup(pag, next_agino);
}
if (!next_ip) {
/* Inode not in memory, reload. */
error = xfs_iunlink_reload_next(tp, agibp, prev_agino,
next_agino);
if (error)
break;
next_ip = xfs_iunlink_lookup(pag, next_agino);
}
if (!next_ip) {
/* No incore inode at all? We reloaded it... */
ASSERT(next_ip != NULL);
error = -EFSCORRUPTED;
break;
}
prev_agino = next_agino;
next_agino = next_ip->i_next_unlinked;
}
xfs_trans_brelse(tp, agibp);
/* Should have found this inode somewhere in the iunlinked bucket. */
if (!error && !foundit)
error = -EFSCORRUPTED;
return error;
}
/* Decide if this inode is missing its unlinked list and reload it. */
int
xfs_inode_reload_unlinked(
struct xfs_inode *ip)
{
struct xfs_trans *tp;
int error;
error = xfs_trans_alloc_empty(ip->i_mount, &tp);
if (error)
return error;
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_inode_unlinked_incomplete(ip))
error = xfs_inode_reload_unlinked_bucket(tp, ip);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
xfs_trans_cancel(tp);
return error;
}
......@@ -68,8 +68,21 @@ typedef struct xfs_inode {
uint64_t i_diflags2; /* XFS_DIFLAG2_... */
struct timespec64 i_crtime; /* time created */
/* unlinked list pointers */
/*
* Unlinked list pointers. These point to the next and previous inodes
* in the AGI unlinked bucket list, respectively. These fields can
* only be updated with the AGI locked.
*
* i_next_unlinked caches di_next_unlinked.
*/
xfs_agino_t i_next_unlinked;
/*
* If the inode is not on an unlinked list, this field is zero. If the
* inode is the first element in an unlinked list, this field is
* NULLAGINO. Otherwise, i_prev_unlinked points to the previous inode
* in the unlinked list.
*/
xfs_agino_t i_prev_unlinked;
/* VFS inode */
......@@ -81,6 +94,11 @@ typedef struct xfs_inode {
struct list_head i_ioend_list;
} xfs_inode_t;
static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip)
{
return ip->i_prev_unlinked != 0;
}
static inline bool xfs_inode_has_attr_fork(struct xfs_inode *ip)
{
return ip->i_forkoff > 0;
......@@ -326,6 +344,9 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
*/
#define XFS_INACTIVATING (1 << 13)
/* Quotacheck is running but inode has not been added to quota counts. */
#define XFS_IQUOTAUNCHECKED (1 << 14)
/* All inode state flags related to inode reclaim. */
#define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \
XFS_IRECLAIM | \
......@@ -340,7 +361,7 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
#define XFS_IRECLAIM_RESET_FLAGS \
(XFS_IRECLAIMABLE | XFS_IRECLAIM | \
XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | XFS_NEED_INACTIVE | \
XFS_INACTIVATING)
XFS_INACTIVATING | XFS_IQUOTAUNCHECKED)
/*
* Flags for inode locking.
......@@ -575,4 +596,13 @@ void xfs_end_io(struct work_struct *work);
int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
static inline bool
xfs_inode_unlinked_incomplete(
struct xfs_inode *ip)
{
return VFS_I(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip);
}
int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip);
int xfs_inode_reload_unlinked(struct xfs_inode *ip);
#endif /* __XFS_INODE_H__ */
......@@ -80,6 +80,15 @@ xfs_bulkstat_one_int(
if (error)
goto out;
if (xfs_inode_unlinked_incomplete(ip)) {
error = xfs_inode_reload_unlinked_bucket(tp, ip);
if (error) {
xfs_iunlock(ip, XFS_ILOCK_SHARED);
xfs_irele(ip);
return error;
}
}
ASSERT(ip != NULL);
ASSERT(ip->i_imap.im_blkno != 0);
inode = VFS_I(ip);
......
......@@ -405,6 +405,8 @@ __XFS_HAS_FEAT(nouuid, NOUUID)
#define XFS_OPSTATE_WARNED_SHRINK 8
/* Kernel has logged a warning about logged xattr updates being used. */
#define XFS_OPSTATE_WARNED_LARP 9
/* Mount time quotacheck is running */
#define XFS_OPSTATE_QUOTACHECK_RUNNING 10
#define __XFS_IS_OPSTATE(name, NAME) \
static inline bool xfs_is_ ## name (struct xfs_mount *mp) \
......@@ -427,6 +429,11 @@ __XFS_IS_OPSTATE(inode32, INODE32)
__XFS_IS_OPSTATE(readonly, READONLY)
__XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED)
__XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED)
#ifdef CONFIG_XFS_QUOTA
__XFS_IS_OPSTATE(quotacheck_running, QUOTACHECK_RUNNING)
#else
# define xfs_is_quotacheck_running(mp) (false)
#endif
static inline bool
xfs_should_warn(struct xfs_mount *mp, long nr)
......@@ -444,7 +451,8 @@ xfs_should_warn(struct xfs_mount *mp, long nr)
{ (1UL << XFS_OPSTATE_BLOCKGC_ENABLED), "blockgc" }, \
{ (1UL << XFS_OPSTATE_WARNED_SCRUB), "wscrub" }, \
{ (1UL << XFS_OPSTATE_WARNED_SHRINK), "wshrink" }, \
{ (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }
{ (1UL << XFS_OPSTATE_WARNED_LARP), "wlarp" }, \
{ (1UL << XFS_OPSTATE_QUOTACHECK_RUNNING), "quotacheck" }
/*
* Max and min values for mount-option defined I/O
......
......@@ -1160,6 +1160,10 @@ xfs_qm_dqusage_adjust(
if (error)
return error;
error = xfs_inode_reload_unlinked(ip);
if (error)
goto error0;
ASSERT(ip->i_delayed_blks == 0);
if (XFS_IS_REALTIME_INODE(ip)) {
......@@ -1173,6 +1177,7 @@ xfs_qm_dqusage_adjust(
}
nblks = (xfs_qcnt_t)ip->i_nblocks - rtblks;
xfs_iflags_clear(ip, XFS_IQUOTAUNCHECKED);
/*
* Add the (disk blocks and inode) resources occupied by this
......@@ -1319,8 +1324,10 @@ xfs_qm_quotacheck(
flags |= XFS_PQUOTA_CHKD;
}
xfs_set_quotacheck_running(mp);
error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
NULL);
xfs_clear_quotacheck_running(mp);
/*
* On error, the inode walk may have partially populated the dquot
......
......@@ -3849,6 +3849,26 @@ TRACE_EVENT(xfs_iunlink_reload_next,
__entry->next_agino)
);
TRACE_EVENT(xfs_inode_reload_unlinked_bucket,
TP_PROTO(struct xfs_inode *ip),
TP_ARGS(ip),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(xfs_agino_t, agino)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino);
__entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino);
),
TP_printk("dev %d:%d agno 0x%x agino 0x%x bucket %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agino,
__entry->agino % XFS_AGI_UNLINKED_BUCKETS)
);
DECLARE_EVENT_CLASS(xfs_ag_inode_class,
TP_PROTO(struct xfs_inode *ip),
TP_ARGS(ip),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment