Commit 610125ab authored by Dave Chinner's avatar Dave Chinner Committed by Darrick J. Wong

xfs: speed up directory bestfree block scanning

When running a "create millions inodes in a directory" test
recently, I noticed we were spending a huge amount of time
converting freespace block headers from disk format to in-memory
format:

 31.47%  [kernel]  [k] xfs_dir2_node_addname
 17.86%  [kernel]  [k] xfs_dir3_free_hdr_from_disk
  3.55%  [kernel]  [k] xfs_dir3_free_bests_p

We shouldn't be hitting the best free block scanning code so hard
when doing sequential directory creates, and it turns out there's
a highly suboptimal loop searching the the best free array in
the freespace block - it decodes the block header before checking
each entry inside a loop, instead of decoding the header once before
running the entry search loop.

This makes a massive difference to create rates. Profile now looks
like this:

  13.15%  [kernel]  [k] xfs_dir2_node_addname
   3.52%  [kernel]  [k] xfs_dir3_leaf_check_int
   3.11%  [kernel]  [k] xfs_log_commit_cil

And the wall time/average file create rate differences are
just as stark:

		create time(sec) / rate (files/s)
File count	     vanilla		    patched
  10k		   0.41 / 24.3k		   0.42 / 23.8k
  20k		   0.74	/ 27.0k		   0.76 / 26.3k
 100k		   3.81	/ 26.4k		   3.47 / 28.8k
 200k		   8.58	/ 23.3k		   7.19 / 27.8k
   1M		  85.69	/ 11.7k		  48.53 / 20.6k
   2M		 280.31	/  7.1k		 130.14 / 15.3k

The larger the directory, the bigger the performance improvement.
Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
parent 0e822255
...@@ -1750,8 +1750,8 @@ xfs_dir2_node_find_freeblk( ...@@ -1750,8 +1750,8 @@ xfs_dir2_node_find_freeblk(
xfs_dir2_db_t dbno = -1; xfs_dir2_db_t dbno = -1;
xfs_dir2_db_t fbno = -1; xfs_dir2_db_t fbno = -1;
xfs_fileoff_t fo; xfs_fileoff_t fo;
__be16 *bests; __be16 *bests = NULL;
int findex; int findex = 0;
int error; int error;
/* /*
...@@ -1781,14 +1781,14 @@ xfs_dir2_node_find_freeblk( ...@@ -1781,14 +1781,14 @@ xfs_dir2_node_find_freeblk(
*/ */
ifbno = fblk->blkno; ifbno = fblk->blkno;
fbno = ifbno; fbno = ifbno;
xfs_trans_brelse(tp, fbp);
fbp = NULL;
fblk->bp = NULL;
} }
ASSERT(dbno == -1);
findex = 0;
/* /*
* If we don't have a data block yet, we're going to scan the freespace * If we don't have a data block yet, we're going to scan the freespace
* blocks looking for one. Figure out what the highest freespace block * data for a data block with enough free space in it.
* number is.
*/ */
error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK); error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK);
if (error) if (error)
...@@ -1799,70 +1799,41 @@ xfs_dir2_node_find_freeblk( ...@@ -1799,70 +1799,41 @@ xfs_dir2_node_find_freeblk(
if (fbno == -1) if (fbno == -1)
fbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET); fbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);
/* for ( ; fbno < lastfbno; fbno++) {
* While we haven't identified a data block, search the freeblock /* If it's ifbno we already looked at it. */
* data for a good data block. If we find a null freeblock entry, if (fbno == ifbno)
* indicating a hole in the data blocks, remember that. continue;
*/
while (dbno == -1) {
/*
* If we don't have a freeblock in hand, get the next one.
*/
if (fbp == NULL) {
/*
* If it's ifbno we already looked at it.
*/
if (++fbno == ifbno)
fbno++;
/*
* If it's off the end we're done.
*/
if (fbno >= lastfbno)
break;
/*
* Read the block. There can be holes in the
* freespace blocks, so this might not succeed.
* This should be really rare, so there's no reason
* to avoid it.
*/
error = xfs_dir2_free_try_read(tp, dp,
xfs_dir2_db_to_da(args->geo, fbno),
&fbp);
if (error)
return error;
if (!fbp)
continue;
free = fbp->b_addr;
findex = 0;
}
/* /*
* Look at the current free entry. Is it good enough? * Read the block. There can be holes in the freespace blocks,
* * so this might not succeed. This should be really rare, so
* The bests initialisation should be where the bufer is read in * there's no reason to avoid it.
* the above branch. But gcc is too stupid to realise that bests
* and the freehdr are actually initialised if they are placed
* there, so we have to do it here to avoid warnings. Blech.
*/ */
error = xfs_dir2_free_try_read(tp, dp,
xfs_dir2_db_to_da(args->geo, fbno),
&fbp);
if (error)
return error;
if (!fbp)
continue;
free = fbp->b_addr;
bests = dp->d_ops->free_bests_p(free); bests = dp->d_ops->free_bests_p(free);
dp->d_ops->free_hdr_from_disk(&freehdr, free); dp->d_ops->free_hdr_from_disk(&freehdr, free);
if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
be16_to_cpu(bests[findex]) >= length) /* Scan the free entry array for a large enough free space. */
dbno = freehdr.firstdb + findex; for (findex = 0; findex < freehdr.nvalid; findex++) {
else { if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
/* be16_to_cpu(bests[findex]) >= length) {
* Are we done with the freeblock? dbno = freehdr.firstdb + findex;
*/ goto found_block;
if (++findex == freehdr.nvalid) {
/*
* Drop the block.
*/
xfs_trans_brelse(tp, fbp);
fbp = NULL;
if (fblk && fblk->bp)
fblk->bp = NULL;
} }
} }
/* Didn't find free space, go on to next free block */
xfs_trans_brelse(tp, fbp);
} }
found_block: found_block:
*dbnop = dbno; *dbnop = dbno;
*fbpp = fbp; *fbpp = fbp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment