Commit 9fa8753a authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'rtalloc-speedups-6.7_2023-10-19' of...

Merge tag 'rtalloc-speedups-6.7_2023-10-19' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.7-mergeA

xfs: CPU usage optimizations for realtime allocator [v2.3]

This is version 2 of [Omar's] XFS realtime allocator opimization patch
series.

Changes since v1 [1]:

- Fixed potential overflow in patch 4.
- Changed deprecated typedefs to normal struct names
- Fixed broken indentation
- Used xfs_fileoff_t instead of xfs_fsblock_t where appropriate.
- Added calls to xfs_rtbuf_cache_relse anywhere that the cache is used
  instead of relying on the buffers being dirtied and thus attached to
  the transaction.
- Clarified comments and commit messages in a few places.
- Added Darrick's Reviewed-bys.

Cover letter from v1:

Our distributed storage system uses XFS's realtime device support as a
way to split an XFS filesystem between an SSD and an HDD -- we configure
the HDD as the realtime device so that metadata goes on the SSD and data
goes on the HDD.

We've been running this in production for a few years now, so we have
some fairly fragmented filesystems. This has exposed various CPU
inefficiencies in the realtime allocator. These became even worse when
we experimented with using XFS_XFLAG_EXTSIZE to force files to be
allocated contiguously.

This series adds several optimizations that don't change the realtime
allocator's decisions, but make them happen more efficiently, mainly by
avoiding redundant work. We've tested these in production and measured
~10%% lower CPU utilization. Furthermore, it made it possible to use
XFS_XFLAG_EXTSIZE to force contiguous allocations -- without these
patches, our most fragmented systems would become unresponsive due to
high CPU usage in the realtime allocator, but with them, CPU utilization
is actually ~4-6%% lower than before, and disk I/O utilization is 15-20%%
lower.

Patches 2 and 3 are preparations for later optimizations; the remaining
patches are the optimizations themselves.

1: https://lore.kernel.org/linux-xfs/cover.1687296675.git.osandov@osandov.com/

v2.1: djwong rebased everything atop his own cleanups, added dave's rtalloc_args
v2.2: rebase with new apis and clean them up too
v2.3: move struct definition around for lolz

With a bit of luck, this should all go splendidly.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'rtalloc-speedups-6.7_2023-10-19' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: don't look for end of extent further than necessary in xfs_rtallocate_extent_near()
  xfs: don't try redundant allocations in xfs_rtallocate_extent_near()
  xfs: limit maxlen based on available space in xfs_rtallocate_extent_near()
  xfs: return maximum free size from xfs_rtany_summary()
  xfs: invert the realtime summary cache
  xfs: simplify rt bitmap/summary block accessor functions
  xfs: simplify xfs_rtbuf_get calling conventions
  xfs: cache last bitmap block in realtime allocator
  xfs: consolidate realtime allocation arguments
parents 830b4abf e0f7422f
This diff is collapsed.
......@@ -6,6 +6,17 @@
#ifndef __XFS_RTBITMAP_H__
#define __XFS_RTBITMAP_H__
struct xfs_rtalloc_args {
struct xfs_mount *mp;
struct xfs_trans *tp;
struct xfs_buf *rbmbp; /* bitmap block buffer */
struct xfs_buf *sumbp; /* summary block buffer */
xfs_fileoff_t rbmoff; /* bitmap block number */
xfs_fileoff_t sumoff; /* summary block number */
};
static inline xfs_rtblock_t
xfs_rtx_to_rtb(
struct xfs_mount *mp,
......@@ -161,10 +172,10 @@ xfs_rbmblock_to_rtx(
/* Return a pointer to a bitmap word within a rt bitmap block. */
static inline union xfs_rtword_raw *
xfs_rbmblock_wordptr(
struct xfs_buf *bp,
struct xfs_rtalloc_args *args,
unsigned int index)
{
union xfs_rtword_raw *words = bp->b_addr;
union xfs_rtword_raw *words = args->rbmbp->b_addr;
return words + index;
}
......@@ -172,10 +183,10 @@ xfs_rbmblock_wordptr(
/* Convert an ondisk bitmap word to its incore representation. */
static inline xfs_rtword_t
xfs_rtbitmap_getword(
struct xfs_buf *bp,
struct xfs_rtalloc_args *args,
unsigned int index)
{
union xfs_rtword_raw *word = xfs_rbmblock_wordptr(bp, index);
union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index);
return word->old;
}
......@@ -183,11 +194,11 @@ xfs_rtbitmap_getword(
/* Set an ondisk bitmap word from an incore representation. */
static inline void
xfs_rtbitmap_setword(
struct xfs_buf *bp,
struct xfs_rtalloc_args *args,
unsigned int index,
xfs_rtword_t value)
{
union xfs_rtword_raw *word = xfs_rbmblock_wordptr(bp, index);
union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index);
word->old = value;
}
......@@ -234,10 +245,10 @@ xfs_rtsumoffs_to_infoword(
/* Return a pointer to a summary info word within a rt summary block. */
static inline union xfs_suminfo_raw *
xfs_rsumblock_infoptr(
struct xfs_buf *bp,
struct xfs_rtalloc_args *args,
unsigned int index)
{
union xfs_suminfo_raw *info = bp->b_addr;
union xfs_suminfo_raw *info = args->sumbp->b_addr;
return info + index;
}
......@@ -245,10 +256,10 @@ xfs_rsumblock_infoptr(
/* Get the current value of a summary counter. */
static inline xfs_suminfo_t
xfs_suminfo_get(
struct xfs_buf *bp,
struct xfs_rtalloc_args *args,
unsigned int index)
{
union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(bp, index);
union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index);
return info->old;
}
......@@ -256,11 +267,11 @@ xfs_suminfo_get(
/* Add to the current value of a summary counter and return the new value. */
static inline xfs_suminfo_t
xfs_suminfo_add(
struct xfs_buf *bp,
struct xfs_rtalloc_args *args,
unsigned int index,
int delta)
{
union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(bp, index);
union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index);
info->old += delta;
return info->old;
......@@ -281,29 +292,41 @@ typedef int (*xfs_rtalloc_query_range_fn)(
void *priv);
#ifdef CONFIG_XFS_RT
int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_fileoff_t block, int issum, struct xfs_buf **bpp);
int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_rtxnum_t start, xfs_rtxlen_t len, int val,
xfs_rtxnum_t *new, int *stat);
int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_rtxnum_t start, xfs_rtxnum_t limit,
xfs_rtxnum_t *rtblock);
int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_rtxnum_t start, xfs_rtxnum_t limit,
xfs_rtxnum_t *rtblock);
int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_rtxnum_t start, xfs_rtxlen_t len, int val);
int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
int log, xfs_fileoff_t bbno, int delta,
struct xfs_buf **rbpp, xfs_fileoff_t *rsb,
xfs_suminfo_t *sum);
int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
xfs_fileoff_t bbno, int delta, struct xfs_buf **rbpp,
xfs_fileoff_t *rsb);
int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_rtxnum_t start, xfs_rtxlen_t len,
struct xfs_buf **rbpp, xfs_fileoff_t *rsb);
void xfs_rtbuf_cache_relse(struct xfs_rtalloc_args *args);
int xfs_rtbuf_get(struct xfs_rtalloc_args *args, xfs_fileoff_t block,
int issum);
static inline int
xfs_rtbitmap_read_buf(
struct xfs_rtalloc_args *args,
xfs_fileoff_t block)
{
return xfs_rtbuf_get(args, block, 0);
}
static inline int
xfs_rtsummary_read_buf(
struct xfs_rtalloc_args *args,
xfs_fileoff_t block)
{
return xfs_rtbuf_get(args, block, 1);
}
int xfs_rtcheck_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
xfs_rtxlen_t len, int val, xfs_rtxnum_t *new, int *stat);
int xfs_rtfind_back(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock);
int xfs_rtfind_forw(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock);
int xfs_rtmodify_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
xfs_rtxlen_t len, int val);
int xfs_rtmodify_summary_int(struct xfs_rtalloc_args *args, int log,
xfs_fileoff_t bbno, int delta, xfs_suminfo_t *sum);
int xfs_rtmodify_summary(struct xfs_rtalloc_args *args, int log,
xfs_fileoff_t bbno, int delta);
int xfs_rtfree_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
xfs_rtxlen_t len);
int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp,
const struct xfs_rtalloc_rec *low_rec,
const struct xfs_rtalloc_rec *high_rec,
......@@ -342,7 +365,9 @@ unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp,
# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS)
# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS)
# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS)
# define xfs_rtbuf_get(m,t,b,i,p) (-ENOSYS)
# define xfs_rtbitmap_read_buf(a,b) (-ENOSYS)
# define xfs_rtsummary_read_buf(a,b) (-ENOSYS)
# define xfs_rtbuf_cache_relse(a) (0)
# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
static inline xfs_filblks_t
xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents)
......
......@@ -188,8 +188,11 @@ STATIC int
xchk_rtsum_compare(
struct xfs_scrub *sc)
{
struct xfs_rtalloc_args args = {
.mp = sc->mp,
.tp = sc->tp,
};
struct xfs_mount *mp = sc->mp;
struct xfs_buf *bp;
struct xfs_bmbt_irec map;
xfs_fileoff_t off;
xchk_rtsumoff_t sumoff = 0;
......@@ -217,23 +220,23 @@ xchk_rtsum_compare(
}
/* Read a block's worth of ondisk rtsummary file. */
error = xfs_rtbuf_get(mp, sc->tp, off, 1, &bp);
error = xfs_rtsummary_read_buf(&args, off);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
return error;
/* Read a block's worth of computed rtsummary file. */
error = xfsum_copyout(sc, sumoff, sc->buf, mp->m_blockwsize);
if (error) {
xfs_trans_brelse(sc->tp, bp);
xfs_rtbuf_cache_relse(&args);
return error;
}
ondisk_info = xfs_rsumblock_infoptr(bp, 0);
ondisk_info = xfs_rsumblock_infoptr(&args, 0);
if (memcmp(ondisk_info, sc->buf,
mp->m_blockwsize << XFS_WORDLOG) != 0)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off);
xfs_trans_brelse(sc->tp, bp);
xfs_rtbuf_cache_relse(&args);
sumoff += mp->m_blockwsize;
}
......
......@@ -101,9 +101,9 @@ typedef struct xfs_mount {
/*
* Optional cache of rt summary level per bitmap block with the
* invariant that m_rsum_cache[bbno] <= the minimum i for which
* rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip
* inode lock.
* invariant that m_rsum_cache[bbno] > the maximum i for which
* rsum[i][bbno] != 0, or 0 if rsum[i][bbno] == 0 for all i.
* Reads and writes are serialized by the rsumip inode lock.
*/
uint8_t *m_rsum_cache;
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment