Commit 4787fc80 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: create a shadow rmap btree during rmap repair

Create an in-memory btree of rmap records instead of an array.  This
enables us to do live record collection instead of freezing the fs.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 32080a9b
......@@ -269,6 +269,16 @@ xfs_rmap_check_irec(
return NULL;
}
static inline xfs_failaddr_t
xfs_rmap_check_btrec(
struct xfs_btree_cur *cur,
const struct xfs_rmap_irec *irec)
{
if (xfs_btree_is_mem_rmap(cur->bc_ops))
return xfs_rmap_check_irec(cur->bc_mem.pag, irec);
return xfs_rmap_check_irec(cur->bc_ag.pag, irec);
}
static inline int
xfs_rmap_complain_bad_rec(
struct xfs_btree_cur *cur,
......@@ -277,9 +287,13 @@ xfs_rmap_complain_bad_rec(
{
struct xfs_mount *mp = cur->bc_mp;
xfs_warn(mp,
"Reverse Mapping BTree record corruption in AG %d detected at %pS!",
cur->bc_ag.pag->pag_agno, fa);
if (xfs_btree_is_mem_rmap(cur->bc_ops))
xfs_warn(mp,
"In-Memory Reverse Mapping BTree record corruption detected at %pS!", fa);
else
xfs_warn(mp,
"Reverse Mapping BTree record corruption in AG %d detected at %pS!",
cur->bc_ag.pag->pag_agno, fa);
xfs_warn(mp,
"Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x",
irec->rm_owner, irec->rm_flags, irec->rm_startblock,
......@@ -307,7 +321,7 @@ xfs_rmap_get_rec(
fa = xfs_rmap_btrec_to_irec(rec, irec);
if (!fa)
fa = xfs_rmap_check_irec(cur->bc_ag.pag, irec);
fa = xfs_rmap_check_btrec(cur, irec);
if (fa)
return xfs_rmap_complain_bad_rec(cur, fa, irec);
......@@ -2404,15 +2418,12 @@ xfs_rmap_map_raw(
{
struct xfs_owner_info oinfo;
oinfo.oi_owner = rmap->rm_owner;
oinfo.oi_offset = rmap->rm_offset;
oinfo.oi_flags = 0;
if (rmap->rm_flags & XFS_RMAP_ATTR_FORK)
oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK;
if (rmap->rm_flags & XFS_RMAP_BMBT_BLOCK)
oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK;
xfs_owner_info_pack(&oinfo, rmap->rm_owner, rmap->rm_offset,
rmap->rm_flags);
if (rmap->rm_flags || XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
if ((rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK |
XFS_RMAP_UNWRITTEN)) ||
XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner))
return xfs_rmap_map(cur, rmap->rm_startblock,
rmap->rm_blockcount,
rmap->rm_flags & XFS_RMAP_UNWRITTEN,
......@@ -2442,7 +2453,7 @@ xfs_rmap_query_range_helper(
fa = xfs_rmap_btrec_to_irec(rec, &irec);
if (!fa)
fa = xfs_rmap_check_irec(cur->bc_ag.pag, &irec);
fa = xfs_rmap_check_btrec(cur, &irec);
if (fa)
return xfs_rmap_complain_bad_rec(cur, fa, &irec);
......
......@@ -22,6 +22,8 @@
#include "xfs_extent_busy.h"
#include "xfs_ag.h"
#include "xfs_ag_resv.h"
#include "xfs_buf_mem.h"
#include "xfs_btree_mem.h"
static struct kmem_cache *xfs_rmapbt_cur_cache;
......@@ -541,6 +543,151 @@ xfs_rmapbt_init_cursor(
return cur;
}
#ifdef CONFIG_XFS_BTREE_IN_MEM
static inline unsigned int
xfs_rmapbt_mem_block_maxrecs(
unsigned int blocklen,
bool leaf)
{
if (leaf)
return blocklen / sizeof(struct xfs_rmap_rec);
return blocklen /
(2 * sizeof(struct xfs_rmap_key) + sizeof(__be64));
}
/*
* Validate an in-memory rmap btree block. Callers are allowed to generate an
* in-memory btree even if the ondisk feature is not enabled.
*/
static xfs_failaddr_t
xfs_rmapbt_mem_verify(
struct xfs_buf *bp)
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
xfs_failaddr_t fa;
unsigned int level;
unsigned int maxrecs;
if (!xfs_verify_magic(bp, block->bb_magic))
return __this_address;
fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
if (fa)
return fa;
level = be16_to_cpu(block->bb_level);
if (level >= xfs_rmapbt_maxlevels_ondisk())
return __this_address;
maxrecs = xfs_rmapbt_mem_block_maxrecs(
XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN, level == 0);
return xfs_btree_memblock_verify(bp, maxrecs);
}
static void
xfs_rmapbt_mem_rw_verify(
struct xfs_buf *bp)
{
xfs_failaddr_t fa = xfs_rmapbt_mem_verify(bp);
if (fa)
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
}
/* skip crc checks on in-memory btrees to save time */
static const struct xfs_buf_ops xfs_rmapbt_mem_buf_ops = {
.name = "xfs_rmapbt_mem",
.magic = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) },
.verify_read = xfs_rmapbt_mem_rw_verify,
.verify_write = xfs_rmapbt_mem_rw_verify,
.verify_struct = xfs_rmapbt_mem_verify,
};
const struct xfs_btree_ops xfs_rmapbt_mem_ops = {
.name = "mem_rmap",
.type = XFS_BTREE_TYPE_MEM,
.geom_flags = XFS_BTGEO_OVERLAPPING,
.rec_len = sizeof(struct xfs_rmap_rec),
/* Overlapping btree; 2 keys per pointer. */
.key_len = 2 * sizeof(struct xfs_rmap_key),
.ptr_len = XFS_BTREE_LONG_PTR_LEN,
.lru_refs = XFS_RMAP_BTREE_REF,
.statoff = XFS_STATS_CALC_INDEX(xs_rmap_mem_2),
.dup_cursor = xfbtree_dup_cursor,
.set_root = xfbtree_set_root,
.alloc_block = xfbtree_alloc_block,
.free_block = xfbtree_free_block,
.get_minrecs = xfbtree_get_minrecs,
.get_maxrecs = xfbtree_get_maxrecs,
.init_key_from_rec = xfs_rmapbt_init_key_from_rec,
.init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec,
.init_rec_from_cur = xfs_rmapbt_init_rec_from_cur,
.init_ptr_from_cur = xfbtree_init_ptr_from_cur,
.key_diff = xfs_rmapbt_key_diff,
.buf_ops = &xfs_rmapbt_mem_buf_ops,
.diff_two_keys = xfs_rmapbt_diff_two_keys,
.keys_inorder = xfs_rmapbt_keys_inorder,
.recs_inorder = xfs_rmapbt_recs_inorder,
.keys_contiguous = xfs_rmapbt_keys_contiguous,
};
/* Create a cursor for an in-memory btree. */
struct xfs_btree_cur *
xfs_rmapbt_mem_cursor(
struct xfs_perag *pag,
struct xfs_trans *tp,
struct xfbtree *xfbt)
{
struct xfs_btree_cur *cur;
struct xfs_mount *mp = pag->pag_mount;
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_mem_ops,
xfs_rmapbt_maxlevels_ondisk(), xfs_rmapbt_cur_cache);
cur->bc_mem.xfbtree = xfbt;
cur->bc_nlevels = xfbt->nlevels;
cur->bc_mem.pag = xfs_perag_hold(pag);
return cur;
}
/* Create an in-memory rmap btree. */
int
xfs_rmapbt_mem_init(
struct xfs_mount *mp,
struct xfbtree *xfbt,
struct xfs_buftarg *btp,
xfs_agnumber_t agno)
{
xfbt->owner = agno;
return xfbtree_init(mp, xfbt, btp, &xfs_rmapbt_mem_ops);
}
/* Compute the max possible height for reverse mapping btrees in memory. */
static unsigned int
xfs_rmapbt_mem_maxlevels(void)
{
unsigned int minrecs[2];
unsigned int blocklen;
blocklen = XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
minrecs[0] = xfs_rmapbt_mem_block_maxrecs(blocklen, true) / 2;
minrecs[1] = xfs_rmapbt_mem_block_maxrecs(blocklen, false) / 2;
/*
* How tall can an in-memory rmap btree become if we filled the entire
* AG with rmap records?
*/
return xfs_btree_compute_maxlevels(minrecs,
XFS_MAX_AG_BYTES / sizeof(struct xfs_rmap_rec));
}
#else
# define xfs_rmapbt_mem_maxlevels() (0)
#endif /* CONFIG_XFS_BTREE_IN_MEM */
/*
* Install a new reverse mapping btree root. Caller is responsible for
* invalidating and freeing the old btree blocks.
......@@ -611,7 +758,8 @@ xfs_rmapbt_maxlevels_ondisk(void)
* like if it consumes almost all the blocks in the AG due to maximal
* sharing factor.
*/
return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS);
return max(xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS),
xfs_rmapbt_mem_maxlevels());
}
/* Compute the maximum height of an rmap btree. */
......
......@@ -10,6 +10,7 @@ struct xfs_buf;
struct xfs_btree_cur;
struct xfs_mount;
struct xbtree_afakeroot;
struct xfbtree;
/* rmaps only exist on crc enabled filesystems */
#define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN
......@@ -62,4 +63,9 @@ unsigned int xfs_rmapbt_maxlevels_ondisk(void);
int __init xfs_rmapbt_init_cur_cache(void);
void xfs_rmapbt_destroy_cur_cache(void);
struct xfs_btree_cur *xfs_rmapbt_mem_cursor(struct xfs_perag *pag,
struct xfs_trans *tp, struct xfbtree *xfbtree);
int xfs_rmapbt_mem_init(struct xfs_mount *mp, struct xfbtree *xfbtree,
struct xfs_buftarg *btp, xfs_agnumber_t agno);
#endif /* __XFS_RMAP_BTREE_H__ */
......@@ -51,6 +51,7 @@ extern const struct xfs_btree_ops xfs_finobt_ops;
extern const struct xfs_btree_ops xfs_bmbt_ops;
extern const struct xfs_btree_ops xfs_refcountbt_ops;
extern const struct xfs_btree_ops xfs_rmapbt_ops;
extern const struct xfs_btree_ops xfs_rmapbt_mem_ops;
static inline bool xfs_btree_is_bno(const struct xfs_btree_ops *ops)
{
......@@ -87,6 +88,15 @@ static inline bool xfs_btree_is_rmap(const struct xfs_btree_ops *ops)
return ops == &xfs_rmapbt_ops;
}
#ifdef CONFIG_XFS_BTREE_IN_MEM
static inline bool xfs_btree_is_mem_rmap(const struct xfs_btree_ops *ops)
{
return ops == &xfs_rmapbt_mem_ops;
}
#else
# define xfs_btree_is_mem_rmap(...) (false)
#endif
/* log size calculation functions */
int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
int xfs_log_calc_minimum_size(struct xfs_mount *);
......
......@@ -31,12 +31,14 @@
#include "xfs_error.h"
#include "xfs_reflink.h"
#include "xfs_health.h"
#include "xfs_buf_mem.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
#include "scrub/stats.h"
#include "scrub/xfile.h"
/*
* Attempt to repair some metadata, if the metadata is corrupt and userspace
......@@ -1147,3 +1149,19 @@ xrep_metadata_inode_forks(
return 0;
}
/*
* Set up an in-memory buffer cache so that we can use the xfbtree. Allocating
* a shmem file might take loks, so we cannot be in transaction context. Park
* our resources in the scrub context and let the teardown function take care
* of them at the right time.
*/
int
xrep_setup_xfbtree(
struct xfs_scrub *sc,
const char *descr)
{
ASSERT(sc->tp == NULL);
return xmbuf_alloc(sc->mp, descr, &sc->xmbtp);
}
......@@ -81,6 +81,8 @@ int xrep_ino_dqattach(struct xfs_scrub *sc);
# define xrep_ino_dqattach(sc) (0)
#endif /* CONFIG_XFS_QUOTA */
int xrep_setup_xfbtree(struct xfs_scrub *sc, const char *descr);
int xrep_ino_ensure_extent_count(struct xfs_scrub *sc, int whichfork,
xfs_extnum_t nextents);
int xrep_reset_perag_resv(struct xfs_scrub *sc);
......
This diff is collapsed.
......@@ -50,7 +50,8 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
{ "ibt2", xfsstats_offset(xs_fibt_2) },
{ "fibt2", xfsstats_offset(xs_rmap_2) },
{ "rmapbt", xfsstats_offset(xs_refcbt_2) },
{ "refcntbt", xfsstats_offset(xs_qm_dqreclaims)},
{ "refcntbt", xfsstats_offset(xs_rmap_mem_2) },
{ "rmapbt_mem", xfsstats_offset(xs_qm_dqreclaims)},
/* we print both series of quota information together */
{ "qm", xfsstats_offset(xs_xstrat_bytes)},
};
......
......@@ -125,6 +125,7 @@ struct __xfsstats {
uint32_t xs_fibt_2[__XBTS_MAX];
uint32_t xs_rmap_2[__XBTS_MAX];
uint32_t xs_refcbt_2[__XBTS_MAX];
uint32_t xs_rmap_mem_2[__XBTS_MAX];
uint32_t xs_qm_dqreclaims;
uint32_t xs_qm_dqreclaim_misses;
uint32_t xs_qm_dquot_dups;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment