Commit 4198bf03 authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: Fix lock thrashing in __bchfs_fallocate()

We've observed significant lock thrashing on fstests generic/083 in
fallocate, due to dropping and retaking btree locks when checking the
pagecache for data.

This adds a nonblocking mode to bch2_clamp_data_hole(), where we only
use folio_trylock(), and can thus be used safely while btree locks are
held - thus we only have to drop btree locks as a fallback, on actual
lock contention.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent f6e6f42b
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
#include <trace/events/writeback.h> #include <trace/events/writeback.h>
static void bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned); static int bch2_clamp_data_hole(struct inode *, u64 *, u64 *, unsigned, bool);
struct folio_vec { struct folio_vec {
struct folio *fv_folio; struct folio *fv_folio;
...@@ -3407,11 +3407,19 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode, ...@@ -3407,11 +3407,19 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
} }
if (!(mode & FALLOC_FL_ZERO_RANGE)) { if (!(mode & FALLOC_FL_ZERO_RANGE)) {
/*
* Lock ordering - can't be holding btree locks while
* blocking on a folio lock:
*/
if (bch2_clamp_data_hole(&inode->v,
&hole_start,
&hole_end,
opts.data_replicas, true))
ret = drop_locks_do(&trans, ret = drop_locks_do(&trans,
(bch2_clamp_data_hole(&inode->v, (bch2_clamp_data_hole(&inode->v,
&hole_start, &hole_start,
&hole_end, &hole_end,
opts.data_replicas), 0)); opts.data_replicas, false), 0));
bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start)); bch2_btree_iter_set_pos(&iter, POS(iter.pos.inode, hole_start));
if (ret) if (ret)
...@@ -3711,7 +3719,8 @@ static int folio_data_offset(struct folio *folio, loff_t pos, ...@@ -3711,7 +3719,8 @@ static int folio_data_offset(struct folio *folio, loff_t pos,
static loff_t bch2_seek_pagecache_data(struct inode *vinode, static loff_t bch2_seek_pagecache_data(struct inode *vinode,
loff_t start_offset, loff_t start_offset,
loff_t end_offset, loff_t end_offset,
unsigned min_replicas) unsigned min_replicas,
bool nonblock)
{ {
struct folio_batch fbatch; struct folio_batch fbatch;
pgoff_t start_index = start_offset >> PAGE_SHIFT; pgoff_t start_index = start_offset >> PAGE_SHIFT;
...@@ -3728,7 +3737,13 @@ static loff_t bch2_seek_pagecache_data(struct inode *vinode, ...@@ -3728,7 +3737,13 @@ static loff_t bch2_seek_pagecache_data(struct inode *vinode,
for (i = 0; i < folio_batch_count(&fbatch); i++) { for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct folio *folio = fbatch.folios[i]; struct folio *folio = fbatch.folios[i];
if (!nonblock) {
folio_lock(folio); folio_lock(folio);
} else if (!folio_trylock(folio)) {
folio_batch_release(&fbatch);
return -EAGAIN;
}
offset = folio_data_offset(folio, offset = folio_data_offset(folio,
max(folio_pos(folio), start_offset), max(folio_pos(folio), start_offset),
min_replicas); min_replicas);
...@@ -3793,7 +3808,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) ...@@ -3793,7 +3808,7 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
if (next_data > offset) if (next_data > offset)
next_data = bch2_seek_pagecache_data(&inode->v, next_data = bch2_seek_pagecache_data(&inode->v,
offset, next_data, 0); offset, next_data, 0, false);
if (next_data >= isize) if (next_data >= isize)
return -ENXIO; return -ENXIO;
...@@ -3801,15 +3816,18 @@ static loff_t bch2_seek_data(struct file *file, u64 offset) ...@@ -3801,15 +3816,18 @@ static loff_t bch2_seek_data(struct file *file, u64 offset)
return vfs_setpos(file, next_data, MAX_LFS_FILESIZE); return vfs_setpos(file, next_data, MAX_LFS_FILESIZE);
} }
static bool folio_hole_offset(struct address_space *mapping, loff_t *offset, static int folio_hole_offset(struct address_space *mapping, loff_t *offset,
unsigned min_replicas) unsigned min_replicas, bool nonblock)
{ {
struct folio *folio; struct folio *folio;
struct bch_folio *s; struct bch_folio *s;
unsigned i, sectors; unsigned i, sectors;
bool ret = true; bool ret = true;
folio = filemap_lock_folio(mapping, *offset >> PAGE_SHIFT); folio = __filemap_get_folio(mapping, *offset >> PAGE_SHIFT,
FGP_LOCK|(nonblock ? FGP_NOWAIT : 0), 0);
if (folio == ERR_PTR(-EAGAIN))
return -EAGAIN;
if (IS_ERR_OR_NULL(folio)) if (IS_ERR_OR_NULL(folio))
return true; return true;
...@@ -3837,31 +3855,44 @@ static bool folio_hole_offset(struct address_space *mapping, loff_t *offset, ...@@ -3837,31 +3855,44 @@ static bool folio_hole_offset(struct address_space *mapping, loff_t *offset,
static loff_t bch2_seek_pagecache_hole(struct inode *vinode, static loff_t bch2_seek_pagecache_hole(struct inode *vinode,
loff_t start_offset, loff_t start_offset,
loff_t end_offset, loff_t end_offset,
unsigned min_replicas) unsigned min_replicas,
bool nonblock)
{ {
struct address_space *mapping = vinode->i_mapping; struct address_space *mapping = vinode->i_mapping;
loff_t offset = start_offset; loff_t offset = start_offset;
while (offset < end_offset && while (offset < end_offset &&
!folio_hole_offset(mapping, &offset, min_replicas)) !folio_hole_offset(mapping, &offset, min_replicas, nonblock))
; ;
return min(offset, end_offset); return min(offset, end_offset);
} }
static void bch2_clamp_data_hole(struct inode *inode, static int bch2_clamp_data_hole(struct inode *inode,
u64 *hole_start, u64 *hole_start,
u64 *hole_end, u64 *hole_end,
unsigned min_replicas) unsigned min_replicas,
bool nonblock)
{ {
*hole_start = bch2_seek_pagecache_hole(inode, loff_t ret;
*hole_start << 9, *hole_end << 9, min_replicas) >> 9;
ret = bch2_seek_pagecache_hole(inode,
*hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
if (ret < 0)
return ret;
*hole_start = ret;
if (*hole_start == *hole_end) if (*hole_start == *hole_end)
return; return 0;
*hole_end = bch2_seek_pagecache_data(inode, ret = bch2_seek_pagecache_data(inode,
*hole_start << 9, *hole_end << 9, min_replicas) >> 9; *hole_start << 9, *hole_end << 9, min_replicas, nonblock) >> 9;
if (ret < 0)
return ret;
*hole_end = ret;
return 0;
} }
static loff_t bch2_seek_hole(struct file *file, u64 offset) static loff_t bch2_seek_hole(struct file *file, u64 offset)
...@@ -3893,12 +3924,12 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset) ...@@ -3893,12 +3924,12 @@ static loff_t bch2_seek_hole(struct file *file, u64 offset)
BTREE_ITER_SLOTS, k, ret) { BTREE_ITER_SLOTS, k, ret) {
if (k.k->p.inode != inode->v.i_ino) { if (k.k->p.inode != inode->v.i_ino) {
next_hole = bch2_seek_pagecache_hole(&inode->v, next_hole = bch2_seek_pagecache_hole(&inode->v,
offset, MAX_LFS_FILESIZE, 0); offset, MAX_LFS_FILESIZE, 0, false);
break; break;
} else if (!bkey_extent_is_data(k.k)) { } else if (!bkey_extent_is_data(k.k)) {
next_hole = bch2_seek_pagecache_hole(&inode->v, next_hole = bch2_seek_pagecache_hole(&inode->v,
max(offset, bkey_start_offset(k.k) << 9), max(offset, bkey_start_offset(k.k) << 9),
k.k->p.offset << 9, 0); k.k->p.offset << 9, 0, false);
if (next_hole < k.k->p.offset << 9) if (next_hole < k.k->p.offset << 9)
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment