Commit 5e60ca3f authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'repair-prep-for-bulk-loading-6.8_2023-12-15' of...

Merge tag 'repair-prep-for-bulk-loading-6.8_2023-12-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.8-mergeB

xfs: prepare repair for bulk loading

Before we start merging the online repair functions, let's improve the
bulk loading code a bit.  First, we need to fix a misinteraction between
the AIL and the btree bulkloader wherein the delwri at the end of the
bulk load fails to queue a buffer for writeback if it happens to be on
the AIL list.

Second, we introduce a defer ops barrier object so that the process of
reaping blocks after a repair cannot queue more than two extents per EFI
log item.  This increases our exposure to leaking blocks if the system
goes down during a reap, but also should prevent transaction overflows,
which result in the system going down.

Third, we change the bulkloader itself to copy multiple records into a
block if possible, and add some debugging knobs so that developers can
control the slack factors, just like they can do for xfs_repair.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'repair-prep-for-bulk-loading-6.8_2023-12-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: constrain dirty buffers while formatting a staged btree
  xfs: move btree bulkload record initialization to ->get_record implementations
  xfs: add debug knobs to control btree bulk load slack factors
  xfs: read leaf blocks when computing keys for bulkloading into node blocks
  xfs: set XBF_DONE on newly formatted btree block that are ready for writing
  xfs: force all buffers to be written during btree bulk load
parents 0573676f e069d549
......@@ -1330,7 +1330,7 @@ xfs_btree_get_buf_block(
* Read in the buffer at the given ptr and return the buffer and
* the block pointer within the buffer.
*/
STATIC int
int
xfs_btree_read_buf_block(
struct xfs_btree_cur *cur,
const union xfs_btree_ptr *ptr,
......
......@@ -700,6 +700,9 @@ void xfs_btree_set_ptr_null(struct xfs_btree_cur *cur,
int xfs_btree_get_buf_block(struct xfs_btree_cur *cur,
const union xfs_btree_ptr *ptr, struct xfs_btree_block **block,
struct xfs_buf **bpp);
int xfs_btree_read_buf_block(struct xfs_btree_cur *cur,
const union xfs_btree_ptr *ptr, int flags,
struct xfs_btree_block **block, struct xfs_buf **bpp);
void xfs_btree_set_sibling(struct xfs_btree_cur *cur,
struct xfs_btree_block *block, const union xfs_btree_ptr *ptr,
int lr);
......
......@@ -333,20 +333,41 @@ xfs_btree_commit_ifakeroot(
/*
* Put a btree block that we're loading onto the ordered list and release it.
* The btree blocks will be written to disk when bulk loading is finished.
* If we reach the dirty buffer threshold, flush them to disk before
* continuing.
*/
static void
static int
xfs_btree_bload_drop_buf(
struct list_head *buffers_list,
struct xfs_buf **bpp)
struct xfs_btree_bload *bbl,
struct list_head *buffers_list,
struct xfs_buf **bpp)
{
if (*bpp == NULL)
return;
struct xfs_buf *bp = *bpp;
int error;
if (!xfs_buf_delwri_queue(*bpp, buffers_list))
ASSERT(0);
if (!bp)
return 0;
/*
* Mark this buffer XBF_DONE (i.e. uptodate) so that a subsequent
* xfs_buf_read will not pointlessly reread the contents from the disk.
*/
bp->b_flags |= XBF_DONE;
xfs_buf_relse(*bpp);
xfs_buf_delwri_queue_here(bp, buffers_list);
xfs_buf_relse(bp);
*bpp = NULL;
bbl->nr_dirty++;
if (!bbl->max_dirty || bbl->nr_dirty < bbl->max_dirty)
return 0;
error = xfs_buf_delwri_submit(buffers_list);
if (error)
return error;
bbl->nr_dirty = 0;
return 0;
}
/*
......@@ -418,7 +439,10 @@ xfs_btree_bload_prep_block(
*/
if (*blockp)
xfs_btree_set_sibling(cur, *blockp, &new_ptr, XFS_BB_RIGHTSIB);
xfs_btree_bload_drop_buf(buffers_list, bpp);
ret = xfs_btree_bload_drop_buf(bbl, buffers_list, bpp);
if (ret)
return ret;
/* Initialize the new btree block. */
xfs_btree_init_block_cur(cur, new_bp, level, nr_this_block);
......@@ -436,22 +460,19 @@ STATIC int
xfs_btree_bload_leaf(
struct xfs_btree_cur *cur,
unsigned int recs_this_block,
xfs_btree_bload_get_record_fn get_record,
xfs_btree_bload_get_records_fn get_records,
struct xfs_btree_block *block,
void *priv)
{
unsigned int j;
unsigned int j = 1;
int ret;
/* Fill the leaf block with records. */
for (j = 1; j <= recs_this_block; j++) {
union xfs_btree_rec *block_rec;
ret = get_record(cur, priv);
if (ret)
while (j <= recs_this_block) {
ret = get_records(cur, j, block, recs_this_block - j + 1, priv);
if (ret < 0)
return ret;
block_rec = xfs_btree_rec_addr(cur, j, block);
cur->bc_ops->init_rec_from_cur(cur, block_rec);
j += ret;
}
return 0;
......@@ -485,7 +506,12 @@ xfs_btree_bload_node(
ASSERT(!xfs_btree_ptr_is_null(cur, child_ptr));
ret = xfs_btree_get_buf_block(cur, child_ptr, &child_block,
/*
* Read the lower-level block in case the buffer for it has
* been reclaimed. LRU refs will be set on the block, which is
* desirable if the new btree commits.
*/
ret = xfs_btree_read_buf_block(cur, child_ptr, 0, &child_block,
&child_bp);
if (ret)
return ret;
......@@ -764,6 +790,7 @@ xfs_btree_bload(
cur->bc_nlevels = bbl->btree_height;
xfs_btree_set_ptr_null(cur, &child_ptr);
xfs_btree_set_ptr_null(cur, &ptr);
bbl->nr_dirty = 0;
xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level,
&avg_per_block, &blocks, &blocks_with_extra);
......@@ -789,7 +816,7 @@ xfs_btree_bload(
trace_xfs_btree_bload_block(cur, level, i, blocks, &ptr,
nr_this_block);
ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_record,
ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_records,
block, priv);
if (ret)
goto out;
......@@ -802,7 +829,10 @@ xfs_btree_bload(
xfs_btree_copy_ptrs(cur, &child_ptr, &ptr, 1);
}
total_blocks += blocks;
xfs_btree_bload_drop_buf(&buffers_list, &bp);
ret = xfs_btree_bload_drop_buf(bbl, &buffers_list, &bp);
if (ret)
goto out;
/* Populate the internal btree nodes. */
for (level = 1; level < cur->bc_nlevels; level++) {
......@@ -844,7 +874,11 @@ xfs_btree_bload(
xfs_btree_copy_ptrs(cur, &first_ptr, &ptr, 1);
}
total_blocks += blocks;
xfs_btree_bload_drop_buf(&buffers_list, &bp);
ret = xfs_btree_bload_drop_buf(bbl, &buffers_list, &bp);
if (ret)
goto out;
xfs_btree_copy_ptrs(cur, &child_ptr, &first_ptr, 1);
}
......
......@@ -47,7 +47,9 @@ void xfs_btree_commit_ifakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp,
int whichfork, const struct xfs_btree_ops *ops);
/* Bulk loading of staged btrees. */
typedef int (*xfs_btree_bload_get_record_fn)(struct xfs_btree_cur *cur, void *priv);
typedef int (*xfs_btree_bload_get_records_fn)(struct xfs_btree_cur *cur,
unsigned int idx, struct xfs_btree_block *block,
unsigned int nr_wanted, void *priv);
typedef int (*xfs_btree_bload_claim_block_fn)(struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr, void *priv);
typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur,
......@@ -55,11 +57,14 @@ typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur,
struct xfs_btree_bload {
/*
* This function will be called nr_records times to load records into
* the btree. The function does this by setting the cursor's bc_rec
* field in in-core format. Records must be returned in sort order.
* This function will be called to load @nr_wanted records into the
* btree. The implementation does this by setting the cursor's bc_rec
* field in in-core format and using init_rec_from_cur to set the
* records in the btree block. Records must be returned in sort order.
* The function must return the number of records loaded or the usual
* negative errno.
*/
xfs_btree_bload_get_record_fn get_record;
xfs_btree_bload_get_records_fn get_records;
/*
* This function will be called nr_blocks times to obtain a pointer
......@@ -107,6 +112,16 @@ struct xfs_btree_bload {
* height of the new btree.
*/
unsigned int btree_height;
/*
* Flush the new btree block buffer list to disk after this many blocks
* have been formatted. Zero prohibits writing any buffers until all
* blocks have been formatted.
*/
uint16_t max_dirty;
/* Number of dirty buffers. */
uint16_t nr_dirty;
};
int xfs_btree_bload_compute_geometry(struct xfs_btree_cur *cur,
......
......@@ -32,6 +32,7 @@
* btree bulk loading code calculates for us. However, there are some
* exceptions to this rule:
*
* (0) If someone turned one of the debug knobs.
* (1) If this is a per-AG btree and the AG has less than 10% space free.
* (2) If this is an inode btree and the FS has less than 10% space free.
......@@ -47,9 +48,13 @@ xrep_newbt_estimate_slack(
uint64_t free;
uint64_t sz;
/* Let the btree code compute the default slack values. */
bload->leaf_slack = -1;
bload->node_slack = -1;
/*
* The xfs_globals values are set to -1 (i.e. take the bload defaults)
* unless someone has set them otherwise, so we just pull the values
* here.
*/
bload->leaf_slack = xfs_globals.bload_leaf_slack;
bload->node_slack = xfs_globals.bload_node_slack;
if (sc->ops->type == ST_PERAG) {
free = sc->sa.pag->pagf_freeblks;
......@@ -89,6 +94,7 @@ xrep_newbt_init_ag(
xnr->alloc_hint = alloc_hint;
xnr->resv = resv;
INIT_LIST_HEAD(&xnr->resv_list);
xnr->bload.max_dirty = XFS_B_TO_FSBT(sc->mp, 256U << 10); /* 256K */
xrep_newbt_estimate_slack(xnr);
}
......
......@@ -2049,6 +2049,14 @@ xfs_alloc_buftarg(
return NULL;
}
static inline void
xfs_buf_list_del(
struct xfs_buf *bp)
{
list_del_init(&bp->b_list);
wake_up_var(&bp->b_list);
}
/*
* Cancel a delayed write list.
*
......@@ -2066,7 +2074,7 @@ xfs_buf_delwri_cancel(
xfs_buf_lock(bp);
bp->b_flags &= ~_XBF_DELWRI_Q;
list_del_init(&bp->b_list);
xfs_buf_list_del(bp);
xfs_buf_relse(bp);
}
}
......@@ -2119,6 +2127,34 @@ xfs_buf_delwri_queue(
return true;
}
/*
* Queue a buffer to this delwri list as part of a data integrity operation.
* If the buffer is on any other delwri list, we'll wait for that to clear
* so that the caller can submit the buffer for IO and wait for the result.
* Callers must ensure the buffer is not already on the list.
*/
void
xfs_buf_delwri_queue_here(
struct xfs_buf *bp,
struct list_head *buffer_list)
{
/*
* We need this buffer to end up on the /caller's/ delwri list, not any
* old list. This can happen if the buffer is marked stale (which
* clears DELWRI_Q) after the AIL queues the buffer to its list but
* before the AIL has a chance to submit the list.
*/
while (!list_empty(&bp->b_list)) {
xfs_buf_unlock(bp);
wait_var_event(&bp->b_list, list_empty(&bp->b_list));
xfs_buf_lock(bp);
}
ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
xfs_buf_delwri_queue(bp, buffer_list);
}
/*
* Compare function is more complex than it needs to be because
* the return value is only 32 bits and we are doing comparisons
......@@ -2181,7 +2217,7 @@ xfs_buf_delwri_submit_buffers(
* reference and remove it from the list here.
*/
if (!(bp->b_flags & _XBF_DELWRI_Q)) {
list_del_init(&bp->b_list);
xfs_buf_list_del(bp);
xfs_buf_relse(bp);
continue;
}
......@@ -2201,7 +2237,7 @@ xfs_buf_delwri_submit_buffers(
list_move_tail(&bp->b_list, wait_list);
} else {
bp->b_flags |= XBF_ASYNC;
list_del_init(&bp->b_list);
xfs_buf_list_del(bp);
}
__xfs_buf_submit(bp, false);
}
......@@ -2255,7 +2291,7 @@ xfs_buf_delwri_submit(
while (!list_empty(&wait_list)) {
bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
list_del_init(&bp->b_list);
xfs_buf_list_del(bp);
/*
* Wait on the locked buffer, check for errors and unlock and
......
......@@ -319,6 +319,7 @@ extern void xfs_buf_stale(struct xfs_buf *bp);
/* Delayed Write Buffer Routines */
extern void xfs_buf_delwri_cancel(struct list_head *);
extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl);
extern int xfs_buf_delwri_submit(struct list_head *);
extern int xfs_buf_delwri_submit_nowait(struct list_head *);
extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
......
......@@ -44,4 +44,16 @@ struct xfs_globals xfs_globals = {
.pwork_threads = -1, /* automatic thread detection */
.larp = false, /* log attribute replay */
#endif
/*
* Leave this many record slots empty when bulk loading btrees. By
* default we load new btree leaf blocks 75% full.
*/
.bload_leaf_slack = -1,
/*
* Leave this many key/ptr slots empty when bulk loading btrees. By
* default we load new btree node blocks 75% full.
*/
.bload_node_slack = -1,
};
......@@ -85,6 +85,8 @@ struct xfs_globals {
int pwork_threads; /* parallel workqueue threads */
bool larp; /* log attribute replay */
#endif
int bload_leaf_slack; /* btree bulk load leaf slack */
int bload_node_slack; /* btree bulk load node slack */
int log_recovery_delay; /* log recovery delay (secs) */
int mount_delay; /* mount setup delay (secs) */
bool bug_on_assert; /* BUG() the kernel on assert failure */
......
......@@ -262,6 +262,58 @@ larp_show(
XFS_SYSFS_ATTR_RW(larp);
#endif /* DEBUG */
STATIC ssize_t
bload_leaf_slack_store(
struct kobject *kobject,
const char *buf,
size_t count)
{
int ret;
int val;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
xfs_globals.bload_leaf_slack = val;
return count;
}
STATIC ssize_t
bload_leaf_slack_show(
struct kobject *kobject,
char *buf)
{
return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bload_leaf_slack);
}
XFS_SYSFS_ATTR_RW(bload_leaf_slack);
STATIC ssize_t
bload_node_slack_store(
struct kobject *kobject,
const char *buf,
size_t count)
{
int ret;
int val;
ret = kstrtoint(buf, 0, &val);
if (ret)
return ret;
xfs_globals.bload_node_slack = val;
return count;
}
STATIC ssize_t
bload_node_slack_show(
struct kobject *kobject,
char *buf)
{
return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.bload_node_slack);
}
XFS_SYSFS_ATTR_RW(bload_node_slack);
static struct attribute *xfs_dbg_attrs[] = {
ATTR_LIST(bug_on_assert),
ATTR_LIST(log_recovery_delay),
......@@ -271,6 +323,8 @@ static struct attribute *xfs_dbg_attrs[] = {
ATTR_LIST(pwork_threads),
ATTR_LIST(larp),
#endif
ATTR_LIST(bload_leaf_slack),
ATTR_LIST(bload_node_slack),
NULL,
};
ATTRIBUTE_GROUPS(xfs_dbg);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment