Commit 128d0fd1 authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'scrub-nlinks-6.9_2024-02-23' of...

Merge tag 'scrub-nlinks-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.9-mergeC

xfs: online repair of file link counts

Now that we've created the infrastructure to perform live scans of every
file in the filesystem and the necessary hook infrastructure to observe
live updates, use it to scan directories to compute the correct link
counts for files in the filesystem, and reset those link counts.

This patchset creates a tailored readdir implementation for scrub
because the regular version has to cycle ILOCKs to copy information to
userspace.  We can't cycle the ILOCK during the nlink scan and we don't
need all the other VFS support code (maintaining a readdir cursor and
translating XFS structures to VFS structures and back) so it was easier
to duplicate the code.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'scrub-nlinks-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: teach repair to fix file nlinks
  xfs: track directory entry updates during live nlinks fsck
  xfs: teach scrub to check file nlinks
  xfs: report health of inode link counts
parents aa03f524 6b631c60
......@@ -160,6 +160,7 @@ xfs-y += $(addprefix scrub/, \
ialloc.o \
inode.o \
iscan.o \
nlinks.o \
parent.o \
readdir.o \
refcount.o \
......@@ -193,6 +194,7 @@ xfs-y += $(addprefix scrub/, \
ialloc_repair.o \
inode_repair.o \
newbt.o \
nlinks_repair.o \
reap.o \
refcount_repair.o \
repair.o \
......
......@@ -196,6 +196,7 @@ struct xfs_fsop_geom {
#define XFS_FSOP_GEOM_SICK_RT_BITMAP (1 << 4) /* realtime bitmap */
#define XFS_FSOP_GEOM_SICK_RT_SUMMARY (1 << 5) /* realtime summary */
#define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */
#define XFS_FSOP_GEOM_SICK_NLINKS (1 << 7) /* inode link counts */
/* Output for XFS_FS_COUNTS */
typedef struct xfs_fsop_counts {
......@@ -711,9 +712,10 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_PQUOTA 23 /* project quotas */
#define XFS_SCRUB_TYPE_FSCOUNTERS 24 /* fs summary counters */
#define XFS_SCRUB_TYPE_QUOTACHECK 25 /* quota counters */
#define XFS_SCRUB_TYPE_NLINKS 26 /* inode link counts */
/* Number of scrub subcommands. */
#define XFS_SCRUB_TYPE_NR 26
#define XFS_SCRUB_TYPE_NR 27
/* i: Repair this metadata. */
#define XFS_SCRUB_IFLAG_REPAIR (1u << 0)
......
......@@ -42,6 +42,7 @@ struct xfs_fsop_geom;
#define XFS_SICK_FS_GQUOTA (1 << 2) /* group quota */
#define XFS_SICK_FS_PQUOTA (1 << 3) /* project quota */
#define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */
#define XFS_SICK_FS_NLINKS (1 << 5) /* inode link counts */
/* Observable health issues for realtime volume metadata. */
#define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */
......@@ -79,7 +80,8 @@ struct xfs_fsop_geom;
XFS_SICK_FS_UQUOTA | \
XFS_SICK_FS_GQUOTA | \
XFS_SICK_FS_PQUOTA | \
XFS_SICK_FS_QUOTACHECK)
XFS_SICK_FS_QUOTACHECK | \
XFS_SICK_FS_NLINKS)
#define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \
XFS_SICK_RT_SUMMARY)
......
......@@ -1302,6 +1302,9 @@ xchk_fsgates_enable(
if (scrub_fsgates & XCHK_FSGATES_QUOTA)
xfs_dqtrx_hook_enable();
if (scrub_fsgates & XCHK_FSGATES_DIRENTS)
xfs_dir_hook_enable();
sc->flags |= scrub_fsgates;
}
......
......@@ -129,6 +129,7 @@ xchk_setup_quotacheck(struct xfs_scrub *sc)
}
#endif
int xchk_setup_fscounters(struct xfs_scrub *sc);
int xchk_setup_nlinks(struct xfs_scrub *sc);
void xchk_ag_free(struct xfs_scrub *sc, struct xchk_ag *sa);
int xchk_ag_init(struct xfs_scrub *sc, xfs_agnumber_t agno,
......
......@@ -106,6 +106,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_PQUOTA] = { XHG_FS, XFS_SICK_FS_PQUOTA },
[XFS_SCRUB_TYPE_FSCOUNTERS] = { XHG_FS, XFS_SICK_FS_COUNTERS },
[XFS_SCRUB_TYPE_QUOTACHECK] = { XHG_FS, XFS_SICK_FS_QUOTACHECK },
[XFS_SCRUB_TYPE_NLINKS] = { XHG_FS, XFS_SICK_FS_NLINKS },
};
/* Return the health status mask for this scrub type. */
......
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_NLINKS_H__
#define __XFS_SCRUB_NLINKS_H__
/* Live link count control structure. */
struct xchk_nlink_ctrs {
struct xfs_scrub *sc;
/* Shadow link count data and its mutex. */
struct xfarray *nlinks;
struct mutex lock;
/*
* The collection step uses a separate iscan context from the compare
* step because the collection iscan coordinates live updates to the
* observation data while this scanner is running. The compare iscan
* is secondary and can be reinitialized as needed.
*/
struct xchk_iscan collect_iscan;
struct xchk_iscan compare_iscan;
/*
* Hook into directory updates so that we can receive live updates
* from other writer threads.
*/
struct xfs_dir_hook dhook;
};
/*
* In-core link counts for a given inode in the filesystem.
*
* For an empty rootdir, the directory entries and the field to which they are
* accounted are as follows:
*
* Root directory:
*
* . points to self (root.child)
* .. points to self (root.parent)
* f1 points to a child file (f1.parent)
* d1 points to a child dir (d1.parent, root.child)
*
* Subdirectory d1:
*
* . points to self (d1.child)
* .. points to root dir (root.backref)
* f2 points to child file (f2.parent)
* f3 points to root.f1 (f1.parent)
*
* root.nlink == 3 (root.dot, root.dotdot, root.d1)
* d1.nlink == 2 (root.d1, d1.dot)
* f1.nlink == 2 (root.f1, d1.f3)
* f2.nlink == 1 (d1.f2)
*/
struct xchk_nlink {
/* Count of forward links from parent directories to this file. */
xfs_nlink_t parents;
/*
* Count of back links to this parent directory from child
* subdirectories.
*/
xfs_nlink_t backrefs;
/*
* Count of forward links from this directory to all child files and
* the number of dot entries. Should be zero for non-directories.
*/
xfs_nlink_t children;
/* Record state flags */
unsigned int flags;
};
/*
* This incore link count has been written at least once. We never want to
* store an xchk_nlink that looks uninitialized.
*/
#define XCHK_NLINK_WRITTEN (1U << 0)
/* Already checked this link count record. */
#define XCHK_NLINK_COMPARE_SCANNED (1U << 1)
/* Already made a repair with this link count record. */
#define XREP_NLINK_DIRTY (1U << 2)
/* Compute total link count, using large enough variables to detect overflow. */
static inline uint64_t
xchk_nlink_total(struct xfs_inode *ip, const struct xchk_nlink *live)
{
uint64_t ret = live->parents;
/* Add one link count for the dot entry of any linked directory. */
if (ip && S_ISDIR(VFS_I(ip)->i_mode) && VFS_I(ip)->i_nlink)
ret++;
return ret + live->children;
}
#endif /* __XFS_SCRUB_NLINKS_H__ */
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
#include "xfs_bmap_util.h"
#include "xfs_iwalk.h"
#include "xfs_ialloc.h"
#include "xfs_sb.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/iscan.h"
#include "scrub/nlinks.h"
#include "scrub/trace.h"
/*
* Live Inode Link Count Repair
* ============================
*
* Use the live inode link count information that we collected to replace the
* nlink values of the incore inodes. A scrub->repair cycle should have left
* the live data and hooks active, so this is safe so long as we make sure the
* inode is locked.
*/
/*
* Correct the link count of the given inode. Because we have to grab locks
* and resources in a certain order, it's possible that this will be a no-op.
*/
STATIC int
xrep_nlinks_repair_inode(
struct xchk_nlink_ctrs *xnc)
{
struct xchk_nlink obs;
struct xfs_scrub *sc = xnc->sc;
struct xfs_mount *mp = sc->mp;
struct xfs_inode *ip = sc->ip;
uint64_t total_links;
uint64_t actual_nlink;
bool dirty = false;
int error;
xchk_ilock(sc, XFS_IOLOCK_EXCL);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &sc->tp);
if (error)
return error;
xchk_ilock(sc, XFS_ILOCK_EXCL);
xfs_trans_ijoin(sc->tp, ip, 0);
mutex_lock(&xnc->lock);
if (xchk_iscan_aborted(&xnc->collect_iscan)) {
error = -ECANCELED;
goto out_scanlock;
}
error = xfarray_load_sparse(xnc->nlinks, ip->i_ino, &obs);
if (error)
goto out_scanlock;
/*
* We're done accessing the shared scan data, so we can drop the lock.
* We still hold @ip's ILOCK, so its link count cannot change.
*/
mutex_unlock(&xnc->lock);
total_links = xchk_nlink_total(ip, &obs);
actual_nlink = VFS_I(ip)->i_nlink;
/*
* Non-directories cannot have directories pointing up to them.
*
* We previously set error to zero, but set it again because one static
* checker author fears that programmers will fail to maintain this
* invariant and built their tool to flag this as a security risk. A
* different tool author made their bot complain about the redundant
* store. This is a never-ending and stupid battle; both tools missed
* *actual bugs* elsewhere; and I no longer care.
*/
if (!S_ISDIR(VFS_I(ip)->i_mode) && obs.children != 0) {
trace_xrep_nlinks_unfixable_inode(mp, ip, &obs);
error = 0;
goto out_trans;
}
/*
* We did not find any links to this inode. If the inode agrees, we
* have nothing further to do. If not, the inode has a nonzero link
* count and we don't have anywhere to graft the child onto. Dropping
* a live inode's link count to zero can cause unexpected shutdowns in
* inactivation, so leave it alone.
*/
if (total_links == 0) {
if (actual_nlink != 0)
trace_xrep_nlinks_unfixable_inode(mp, ip, &obs);
goto out_trans;
}
/* Commit the new link count if it changed. */
if (total_links != actual_nlink) {
if (total_links > XFS_MAXLINK) {
trace_xrep_nlinks_unfixable_inode(mp, ip, &obs);
goto out_trans;
}
trace_xrep_nlinks_update_inode(mp, ip, &obs);
set_nlink(VFS_I(ip), total_links);
dirty = true;
}
if (!dirty) {
error = 0;
goto out_trans;
}
xfs_trans_log_inode(sc->tp, ip, XFS_ILOG_CORE);
error = xrep_trans_commit(sc);
xchk_iunlock(sc, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
return error;
out_scanlock:
mutex_unlock(&xnc->lock);
out_trans:
xchk_trans_cancel(sc);
xchk_iunlock(sc, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
return error;
}
/*
* Try to visit every inode in the filesystem for repairs. Move on if we can't
* grab an inode, since we're still making forward progress.
*/
static int
xrep_nlinks_iter(
struct xchk_nlink_ctrs *xnc,
struct xfs_inode **ipp)
{
int error;
do {
error = xchk_iscan_iter(&xnc->compare_iscan, ipp);
} while (error == -EBUSY);
return error;
}
/* Commit the new inode link counters. */
int
xrep_nlinks(
struct xfs_scrub *sc)
{
struct xchk_nlink_ctrs *xnc = sc->buf;
int error;
/*
* We need ftype for an accurate count of the number of child
* subdirectory links. Child subdirectories with a back link (dotdot
* entry) but no forward link are unfixable, so we cannot repair the
* link count of the parent directory based on the back link count
* alone. Filesystems without ftype support are rare (old V4) so we
* just skip out here.
*/
if (!xfs_has_ftype(sc->mp))
return -EOPNOTSUPP;
/*
* Use the inobt to walk all allocated inodes to compare and fix the
* link counts. Retry iget every tenth of a second for up to 30
* seconds -- even if repair misses a few inodes, we still try to fix
* as many of them as we can.
*/
xchk_iscan_start(sc, 30000, 100, &xnc->compare_iscan);
ASSERT(sc->ip == NULL);
while ((error = xrep_nlinks_iter(xnc, &sc->ip)) == 1) {
/*
* Commit the scrub transaction so that we can create repair
* transactions with the correct reservations.
*/
xchk_trans_cancel(sc);
error = xrep_nlinks_repair_inode(xnc);
xchk_iscan_mark_visited(&xnc->compare_iscan, sc->ip);
xchk_irele(sc, sc->ip);
sc->ip = NULL;
if (error)
break;
if (xchk_should_terminate(sc, &error))
break;
/*
* Create a new empty transaction so that we can advance the
* iscan cursor without deadlocking if the inobt has a cycle.
* We can only push the inactivation workqueues with an empty
* transaction.
*/
error = xchk_trans_alloc_empty(sc);
if (error)
break;
}
xchk_iscan_iter_finish(&xnc->compare_iscan);
xchk_iscan_teardown(&xnc->compare_iscan);
return error;
}
......@@ -116,6 +116,7 @@ int xrep_inode(struct xfs_scrub *sc);
int xrep_bmap_data(struct xfs_scrub *sc);
int xrep_bmap_attr(struct xfs_scrub *sc);
int xrep_bmap_cow(struct xfs_scrub *sc);
int xrep_nlinks(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xrep_rtbitmap(struct xfs_scrub *sc);
......@@ -196,6 +197,7 @@ xrep_setup_nothing(
#define xrep_rtbitmap xrep_notsupported
#define xrep_quota xrep_notsupported
#define xrep_quotacheck xrep_notsupported
#define xrep_nlinks xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
......
......@@ -160,6 +160,9 @@ xchk_fsgates_disable(
if (sc->flags & XCHK_FSGATES_QUOTA)
xfs_dqtrx_hook_disable();
if (sc->flags & XCHK_FSGATES_DIRENTS)
xfs_dir_hook_disable();
sc->flags &= ~XCHK_FSGATES_ALL;
}
......@@ -369,6 +372,12 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.scrub = xchk_quotacheck,
.repair = xrep_quotacheck,
},
[XFS_SCRUB_TYPE_NLINKS] = { /* inode link counts */
.type = ST_FS,
.setup = xchk_setup_nlinks,
.scrub = xchk_nlinks,
.repair = xrep_nlinks,
},
};
static int
......
......@@ -122,6 +122,7 @@ struct xfs_scrub {
#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */
#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */
#define XCHK_FSGATES_QUOTA (1U << 4) /* quota live update enabled */
#define XCHK_FSGATES_DIRENTS (1U << 5) /* directory live update enabled */
#define XREP_RESET_PERAG_RESV (1U << 30) /* must reset AG space reservation */
#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */
......@@ -132,7 +133,8 @@ struct xfs_scrub {
* must be enabled during scrub setup and can only be torn down afterwards.
*/
#define XCHK_FSGATES_ALL (XCHK_FSGATES_DRAIN | \
XCHK_FSGATES_QUOTA)
XCHK_FSGATES_QUOTA | \
XCHK_FSGATES_DIRENTS)
/* Metadata scrubbers */
int xchk_tester(struct xfs_scrub *sc);
......@@ -183,6 +185,7 @@ xchk_quotacheck(struct xfs_scrub *sc)
}
#endif
int xchk_fscounters(struct xfs_scrub *sc);
int xchk_nlinks(struct xfs_scrub *sc);
/* cross-referencing helpers */
void xchk_xref_is_used_space(struct xfs_scrub *sc, xfs_agblock_t agbno,
......
......@@ -78,6 +78,7 @@ static const char *name_map[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_PQUOTA] = "prjquota",
[XFS_SCRUB_TYPE_FSCOUNTERS] = "fscounters",
[XFS_SCRUB_TYPE_QUOTACHECK] = "quotacheck",
[XFS_SCRUB_TYPE_NLINKS] = "nlinks",
};
/* Format the scrub stats into a text buffer, similar to pcp style. */
......
......@@ -17,11 +17,13 @@
#include "xfs_quota.h"
#include "xfs_quota_defs.h"
#include "xfs_da_format.h"
#include "xfs_dir2.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/quota.h"
#include "scrub/iscan.h"
#include "scrub/nlinks.h"
/* Figure out which block the btree cursor was pointing to. */
static inline xfs_fsblock_t
......
......@@ -23,6 +23,7 @@ struct xfarray;
struct xfarray_sortinfo;
struct xchk_dqiter;
struct xchk_iscan;
struct xchk_nlink;
/*
* ftrace's __print_symbolic requires that all enum values be wrapped in the
......@@ -67,6 +68,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_GQUOTA);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PQUOTA);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_QUOTACHECK);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_NLINKS);
#define XFS_SCRUB_TYPE_STRINGS \
{ XFS_SCRUB_TYPE_PROBE, "probe" }, \
......@@ -94,7 +96,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_QUOTACHECK);
{ XFS_SCRUB_TYPE_GQUOTA, "grpquota" }, \
{ XFS_SCRUB_TYPE_PQUOTA, "prjquota" }, \
{ XFS_SCRUB_TYPE_FSCOUNTERS, "fscounters" }, \
{ XFS_SCRUB_TYPE_QUOTACHECK, "quotacheck" }
{ XFS_SCRUB_TYPE_QUOTACHECK, "quotacheck" }, \
{ XFS_SCRUB_TYPE_NLINKS, "nlinks" }
#define XFS_SCRUB_FLAG_STRINGS \
{ XFS_SCRUB_IFLAG_REPAIR, "repair" }, \
......@@ -113,6 +116,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_QUOTACHECK);
{ XCHK_FSGATES_DRAIN, "fsgates_drain" }, \
{ XCHK_NEED_DRAIN, "need_drain" }, \
{ XCHK_FSGATES_QUOTA, "fsgates_quota" }, \
{ XCHK_FSGATES_DIRENTS, "fsgates_dirents" }, \
{ XREP_RESET_PERAG_RESV, "reset_perag_resv" }, \
{ XREP_ALREADY_FIXED, "already_fixed" }
......@@ -1318,6 +1322,180 @@ TRACE_EVENT(xchk_iscan_iget_retry_wait,
__entry->retry_delay)
);
TRACE_EVENT(xchk_nlinks_collect_dirent,
TP_PROTO(struct xfs_mount *mp, struct xfs_inode *dp,
xfs_ino_t ino, const struct xfs_name *name),
TP_ARGS(mp, dp, ino, name),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, dir)
__field(xfs_ino_t, ino)
__field(unsigned int, namelen)
__dynamic_array(char, name, name->len)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->dir = dp->i_ino;
__entry->ino = ino;
__entry->namelen = name->len;
memcpy(__get_str(name), name->name, name->len);
),
TP_printk("dev %d:%d dir 0x%llx -> ino 0x%llx name '%.*s'",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->dir,
__entry->ino,
__entry->namelen,
__get_str(name))
);
TRACE_EVENT(xchk_nlinks_collect_metafile,
TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino),
TP_ARGS(mp, ino),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->ino = ino;
),
TP_printk("dev %d:%d ino 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino)
);
TRACE_EVENT(xchk_nlinks_live_update,
TP_PROTO(struct xfs_mount *mp, const struct xfs_inode *dp,
int action, xfs_ino_t ino, int delta,
const char *name, unsigned int namelen),
TP_ARGS(mp, dp, action, ino, delta, name, namelen),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, dir)
__field(int, action)
__field(xfs_ino_t, ino)
__field(int, delta)
__field(unsigned int, namelen)
__dynamic_array(char, name, namelen)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->dir = dp ? dp->i_ino : NULLFSINO;
__entry->action = action;
__entry->ino = ino;
__entry->delta = delta;
__entry->namelen = namelen;
memcpy(__get_str(name), name, namelen);
),
TP_printk("dev %d:%d dir 0x%llx ino 0x%llx nlink_delta %d name '%.*s'",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->dir,
__entry->ino,
__entry->delta,
__entry->namelen,
__get_str(name))
);
TRACE_EVENT(xchk_nlinks_check_zero,
TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino,
const struct xchk_nlink *live),
TP_ARGS(mp, ino, live),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_nlink_t, parents)
__field(xfs_nlink_t, backrefs)
__field(xfs_nlink_t, children)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->ino = ino;
__entry->parents = live->parents;
__entry->backrefs = live->backrefs;
__entry->children = live->children;
),
TP_printk("dev %d:%d ino 0x%llx parents %u backrefs %u children %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->parents,
__entry->backrefs,
__entry->children)
);
TRACE_EVENT(xchk_nlinks_update_incore,
TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino,
const struct xchk_nlink *live, int parents_delta,
int backrefs_delta, int children_delta),
TP_ARGS(mp, ino, live, parents_delta, backrefs_delta, children_delta),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_nlink_t, parents)
__field(xfs_nlink_t, backrefs)
__field(xfs_nlink_t, children)
__field(int, parents_delta)
__field(int, backrefs_delta)
__field(int, children_delta)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->ino = ino;
__entry->parents = live->parents;
__entry->backrefs = live->backrefs;
__entry->children = live->children;
__entry->parents_delta = parents_delta;
__entry->backrefs_delta = backrefs_delta;
__entry->children_delta = children_delta;
),
TP_printk("dev %d:%d ino 0x%llx parents %d:%u backrefs %d:%u children %d:%u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->parents_delta,
__entry->parents,
__entry->backrefs_delta,
__entry->backrefs,
__entry->children_delta,
__entry->children)
);
DECLARE_EVENT_CLASS(xchk_nlinks_diff_class,
TP_PROTO(struct xfs_mount *mp, struct xfs_inode *ip,
const struct xchk_nlink *live),
TP_ARGS(mp, ip, live),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(uint8_t, ftype)
__field(xfs_nlink_t, nlink)
__field(xfs_nlink_t, parents)
__field(xfs_nlink_t, backrefs)
__field(xfs_nlink_t, children)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->ftype = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
__entry->nlink = VFS_I(ip)->i_nlink;
__entry->parents = live->parents;
__entry->backrefs = live->backrefs;
__entry->children = live->children;
),
TP_printk("dev %d:%d ino 0x%llx ftype %s nlink %u parents %u backrefs %u children %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_symbolic(__entry->ftype, XFS_DIR3_FTYPE_STR),
__entry->nlink,
__entry->parents,
__entry->backrefs,
__entry->children)
);
#define DEFINE_SCRUB_NLINKS_DIFF_EVENT(name) \
DEFINE_EVENT(xchk_nlinks_diff_class, name, \
TP_PROTO(struct xfs_mount *mp, struct xfs_inode *ip, \
const struct xchk_nlink *live), \
TP_ARGS(mp, ip, live))
DEFINE_SCRUB_NLINKS_DIFF_EVENT(xchk_nlinks_compare_inode);
/* repair tracepoints */
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
......@@ -2007,6 +2185,9 @@ DEFINE_XREP_DQUOT_EVENT(xrep_dquot_item_fill_bmap_hole);
DEFINE_XREP_DQUOT_EVENT(xrep_quotacheck_dquot);
#endif /* CONFIG_XFS_QUOTA */
DEFINE_SCRUB_NLINKS_DIFF_EVENT(xrep_nlinks_update_inode);
DEFINE_SCRUB_NLINKS_DIFF_EVENT(xrep_nlinks_unfixable_inode);
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
......
......@@ -281,6 +281,7 @@ static const struct ioctl_sick_map fs_map[] = {
{ XFS_SICK_FS_GQUOTA, XFS_FSOP_GEOM_SICK_GQUOTA },
{ XFS_SICK_FS_PQUOTA, XFS_FSOP_GEOM_SICK_PQUOTA },
{ XFS_SICK_FS_QUOTACHECK, XFS_FSOP_GEOM_SICK_QUOTACHECK },
{ XFS_SICK_FS_NLINKS, XFS_FSOP_GEOM_SICK_NLINKS },
{ 0, 0 },
};
......
......@@ -925,6 +925,81 @@ xfs_bumplink(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
#ifdef CONFIG_XFS_LIVE_HOOKS
/*
* Use a static key here to reduce the overhead of directory live update hooks.
* If the compiler supports jump labels, the static branch will be replaced by
* a nop sled when there are no hook users. Online fsck is currently the only
* caller, so this is a reasonable tradeoff.
*
* Note: Patching the kernel code requires taking the cpu hotplug lock. Other
* parts of the kernel allocate memory with that lock held, which means that
* XFS callers cannot hold any locks that might be used by memory reclaim or
* writeback when calling the static_branch_{inc,dec} functions.
*/
DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dir_hooks_switch);
void
xfs_dir_hook_disable(void)
{
xfs_hooks_switch_off(&xfs_dir_hooks_switch);
}
void
xfs_dir_hook_enable(void)
{
xfs_hooks_switch_on(&xfs_dir_hooks_switch);
}
/* Call hooks for a directory update relating to a child dirent update. */
inline void
xfs_dir_update_hook(
struct xfs_inode *dp,
struct xfs_inode *ip,
int delta,
const struct xfs_name *name)
{
if (xfs_hooks_switched_on(&xfs_dir_hooks_switch)) {
struct xfs_dir_update_params p = {
.dp = dp,
.ip = ip,
.delta = delta,
.name = name,
};
struct xfs_mount *mp = ip->i_mount;
xfs_hooks_call(&mp->m_dir_update_hooks, 0, &p);
}
}
/* Call the specified function during a directory update. */
int
xfs_dir_hook_add(
struct xfs_mount *mp,
struct xfs_dir_hook *hook)
{
return xfs_hooks_add(&mp->m_dir_update_hooks, &hook->dirent_hook);
}
/* Stop calling the specified function during a directory update. */
void
xfs_dir_hook_del(
struct xfs_mount *mp,
struct xfs_dir_hook *hook)
{
xfs_hooks_del(&mp->m_dir_update_hooks, &hook->dirent_hook);
}
/* Configure directory update hook functions. */
void
xfs_dir_hook_setup(
struct xfs_dir_hook *hook,
notifier_fn_t mod_fn)
{
xfs_hook_setup(&hook->dirent_hook, mod_fn);
}
#endif /* CONFIG_XFS_LIVE_HOOKS */
int
xfs_create(
struct mnt_idmap *idmap,
......@@ -1035,6 +1110,12 @@ xfs_create(
xfs_bumplink(tp, dp);
}
/*
* Create ip with a reference from dp, and add '.' and '..' references
* if it's a directory.
*/
xfs_dir_update_hook(dp, ip, 1, name);
/*
* If this is a synchronous mount, make sure that the
* create transaction goes to disk before returning to
......@@ -1249,6 +1330,7 @@ xfs_link(
xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
xfs_bumplink(tp, sip);
xfs_dir_update_hook(tdp, sip, 1, target_name);
/*
* If this is a synchronous mount, make sure that the
......@@ -2562,6 +2644,12 @@ xfs_remove(
goto out_trans_cancel;
}
/*
* Drop the link from dp to ip, and if ip was a directory, remove the
* '.' and '..' references since we freed the directory.
*/
xfs_dir_update_hook(dp, ip, -1, name);
/*
* If this is a synchronous mount, make sure that the
* remove transaction goes to disk before returning to
......@@ -2752,6 +2840,20 @@ xfs_cross_rename(
}
xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
/*
* Inform our hook clients that we've finished an exchange operation as
* follows: removed the source and target files from their directories;
* added the target to the source directory; and added the source to
* the target directory. All inodes are locked, so it's ok to model a
* rename this way so long as we say we deleted entries before we add
* new ones.
*/
xfs_dir_update_hook(dp1, ip1, -1, name1);
xfs_dir_update_hook(dp2, ip2, -1, name2);
xfs_dir_update_hook(dp1, ip2, 1, name1);
xfs_dir_update_hook(dp2, ip1, 1, name2);
return xfs_finish_rename(tp);
out_trans_abort:
......@@ -3135,6 +3237,21 @@ xfs_rename(
if (new_parent)
xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
/*
* Inform our hook clients that we've finished a rename operation as
* follows: removed the source and target files from their directories;
* that we've added the source to the target directory; and finally
* that we've added the whiteout, if there was one. All inodes are
* locked, so it's ok to model a rename this way so long as we say we
* deleted entries before we add new ones.
*/
if (target_ip)
xfs_dir_update_hook(target_dp, target_ip, -1, target_name);
xfs_dir_update_hook(src_dp, src_ip, -1, src_name);
xfs_dir_update_hook(target_dp, src_ip, 1, target_name);
if (wip)
xfs_dir_update_hook(src_dp, wip, 1, src_name);
error = xfs_finish_rename(tp);
if (wip)
xfs_irele(wip);
......
......@@ -171,6 +171,12 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
return &ip->i_vnode;
}
/* convert from const xfs inode to const vfs inode */
static inline const struct inode *VFS_IC(const struct xfs_inode *ip)
{
return &ip->i_vnode;
}
/*
* For regular files we only update the on-disk filesize when actually
* writing data back to disk. Until then only the copy in the VFS inode
......@@ -626,4 +632,29 @@ bool xfs_ifork_zapped(const struct xfs_inode *ip, int whichfork);
void xfs_inode_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_filblks_t *dblocks, xfs_filblks_t *rblocks);
struct xfs_dir_update_params {
const struct xfs_inode *dp;
const struct xfs_inode *ip;
const struct xfs_name *name;
int delta;
};
#ifdef CONFIG_XFS_LIVE_HOOKS
void xfs_dir_update_hook(struct xfs_inode *dp, struct xfs_inode *ip,
int delta, const struct xfs_name *name);
struct xfs_dir_hook {
struct xfs_hook dirent_hook;
};
void xfs_dir_hook_disable(void);
void xfs_dir_hook_enable(void);
int xfs_dir_hook_add(struct xfs_mount *mp, struct xfs_dir_hook *hook);
void xfs_dir_hook_del(struct xfs_mount *mp, struct xfs_dir_hook *hook);
void xfs_dir_hook_setup(struct xfs_dir_hook *hook, notifier_fn_t mod_fn);
#else
# define xfs_dir_update_hook(dp, ip, delta, name) ((void)0)
#endif /* CONFIG_XFS_LIVE_HOOKS */
#endif /* __XFS_INODE_H__ */
......@@ -252,6 +252,9 @@ typedef struct xfs_mount {
/* cpus that have inodes queued for inactivation */
struct cpumask m_inodegc_cpumask;
/* Hook to feed dirent updates to an active online repair. */
struct xfs_hooks m_dir_update_hooks;
} xfs_mount_t;
#define M_IGEO(mp) (&(mp)->m_ino_geo)
......
......@@ -2011,6 +2011,8 @@ static int xfs_init_fs_context(
mp->m_logbsize = -1;
mp->m_allocsize_log = 16; /* 64k */
xfs_hooks_init(&mp->m_dir_update_hooks);
fc->s_fs_info = mp;
fc->ops = &xfs_context_ops;
......
......@@ -322,6 +322,7 @@ xfs_symlink(
goto out_trans_cancel;
xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
xfs_dir_update_hook(dp, ip, 1, link_name);
/*
* If this is a synchronous mount, make sure that the
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment