Commit c0960603 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Shutdown path improvements

We're seeing occasional firings of the assertion in the key cache
shutdown code that nr_dirty == 0, which means we must sometimes be doing
transaction commits after we've gone read only.

Cleanups & changes:
 - BCH_FS_ALLOC_CLEAN renamed to BCH_FS_CLEAN_SHUTDOWN
 - new helper bch2_btree_interior_updates_flush(), which returns true if
   it had to wait
 - bch2_btree_flush_writes() now also returns true if there were btree
   writes in flight
 - __bch2_fs_read_only now checks if btree writes were in flight in the
   shutdown loop: btree write completion does a transaction update, to
   update the pointer in the parent node
 - assert that !BCH_FS_CLEAN_SHUTDOWN in __bch2_trans_commit
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
parent d8f31407
...@@ -494,7 +494,7 @@ struct bch_dev { ...@@ -494,7 +494,7 @@ struct bch_dev {
enum { enum {
/* startup: */ /* startup: */
BCH_FS_ALLOC_CLEAN, BCH_FS_CLEAN_SHUTDOWN,
BCH_FS_INITIAL_GC_DONE, BCH_FS_INITIAL_GC_DONE,
BCH_FS_INITIAL_GC_UNFIXED, BCH_FS_INITIAL_GC_UNFIXED,
BCH_FS_TOPOLOGY_REPAIR_DONE, BCH_FS_TOPOLOGY_REPAIR_DONE,
......
...@@ -1751,9 +1751,7 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only) ...@@ -1751,9 +1751,7 @@ int bch2_gc(struct bch_fs *c, bool initial, bool metadata_only)
down_write(&c->gc_lock); down_write(&c->gc_lock);
/* flush interior btree updates: */ bch2_btree_interior_updates_flush(c);
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
ret = bch2_gc_start(c, metadata_only) ?: ret = bch2_gc_start(c, metadata_only) ?:
bch2_gc_alloc_start(c, metadata_only) ?: bch2_gc_alloc_start(c, metadata_only) ?:
......
...@@ -2099,29 +2099,33 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b, ...@@ -2099,29 +2099,33 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b,
} }
} }
static void __bch2_btree_flush_all(struct bch_fs *c, unsigned flag) static bool __bch2_btree_flush_all(struct bch_fs *c, unsigned flag)
{ {
struct bucket_table *tbl; struct bucket_table *tbl;
struct rhash_head *pos; struct rhash_head *pos;
struct btree *b; struct btree *b;
unsigned i; unsigned i;
bool ret = false;
restart: restart:
rcu_read_lock(); rcu_read_lock();
for_each_cached_btree(b, c, tbl, i, pos) for_each_cached_btree(b, c, tbl, i, pos)
if (test_bit(flag, &b->flags)) { if (test_bit(flag, &b->flags)) {
rcu_read_unlock(); rcu_read_unlock();
wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE); wait_on_bit_io(&b->flags, flag, TASK_UNINTERRUPTIBLE);
ret = true;
goto restart; goto restart;
} }
rcu_read_unlock(); rcu_read_unlock();
return ret;
} }
void bch2_btree_flush_all_reads(struct bch_fs *c) bool bch2_btree_flush_all_reads(struct bch_fs *c)
{ {
__bch2_btree_flush_all(c, BTREE_NODE_read_in_flight); return __bch2_btree_flush_all(c, BTREE_NODE_read_in_flight);
} }
void bch2_btree_flush_all_writes(struct bch_fs *c) bool bch2_btree_flush_all_writes(struct bch_fs *c)
{ {
__bch2_btree_flush_all(c, BTREE_NODE_write_in_flight); return __bch2_btree_flush_all(c, BTREE_NODE_write_in_flight);
} }
...@@ -152,8 +152,8 @@ static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b, ...@@ -152,8 +152,8 @@ static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED); bch2_btree_node_write(c, b, lock_held, BTREE_WRITE_ONLY_IF_NEED);
} }
void bch2_btree_flush_all_reads(struct bch_fs *); bool bch2_btree_flush_all_reads(struct bch_fs *);
void bch2_btree_flush_all_writes(struct bch_fs *); bool bch2_btree_flush_all_writes(struct bch_fs *);
static inline void compat_bformat(unsigned level, enum btree_id btree_id, static inline void compat_bformat(unsigned level, enum btree_id btree_id,
unsigned version, unsigned big_endian, unsigned version, unsigned big_endian,
......
...@@ -2175,19 +2175,27 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c) ...@@ -2175,19 +2175,27 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
} }
size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *c) static bool bch2_btree_interior_updates_pending(struct bch_fs *c)
{ {
size_t ret = 0; bool ret;
struct list_head *i;
mutex_lock(&c->btree_interior_update_lock); mutex_lock(&c->btree_interior_update_lock);
list_for_each(i, &c->btree_interior_update_list) ret = !list_empty(&c->btree_interior_update_list);
ret++;
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
return ret; return ret;
} }
bool bch2_btree_interior_updates_flush(struct bch_fs *c)
{
bool ret = bch2_btree_interior_updates_pending(c);
if (ret)
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_pending(c));
return ret;
}
void bch2_journal_entries_to_btree_roots(struct bch_fs *c, struct jset *jset) void bch2_journal_entries_to_btree_roots(struct bch_fs *c, struct jset *jset)
{ {
struct btree_root *r; struct btree_root *r;
......
...@@ -309,7 +309,7 @@ static inline bool bch2_btree_node_insert_fits(struct bch_fs *c, ...@@ -309,7 +309,7 @@ static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *); void bch2_btree_updates_to_text(struct printbuf *, struct bch_fs *);
size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *); bool bch2_btree_interior_updates_flush(struct bch_fs *);
void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *); void bch2_journal_entries_to_btree_roots(struct bch_fs *, struct jset *);
struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *, struct jset_entry *bch2_btree_roots_to_journal_entries(struct bch_fs *,
......
...@@ -1117,6 +1117,8 @@ int __bch2_trans_commit(struct btree_trans *trans) ...@@ -1117,6 +1117,8 @@ int __bch2_trans_commit(struct btree_trans *trans)
goto out_reset; goto out_reset;
} }
EBUG_ON(test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags));
memset(&trans->journal_preres, 0, sizeof(trans->journal_preres)); memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
trans->journal_u64s = trans->extra_journal_entries.nr; trans->journal_u64s = trans->extra_journal_entries.nr;
......
...@@ -175,10 +175,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) ...@@ -175,10 +175,7 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
goto err; goto err;
} }
/* flush relevant btree updates */ bch2_btree_interior_updates_flush(c);
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
ret = 0; ret = 0;
err: err:
bch2_trans_exit(&trans); bch2_trans_exit(&trans);
......
...@@ -942,9 +942,7 @@ static int bch2_move_btree(struct bch_fs *c, ...@@ -942,9 +942,7 @@ static int bch2_move_btree(struct bch_fs *c,
if (ret) if (ret)
bch_err(c, "error %i in bch2_move_btree", ret); bch_err(c, "error %i in bch2_move_btree", ret);
/* flush relevant btree updates */ bch2_btree_interior_updates_flush(c);
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
progress_list_del(c, stats); progress_list_del(c, stats);
return ret; return ret;
......
...@@ -195,57 +195,33 @@ static void __bch2_fs_read_only(struct bch_fs *c) ...@@ -195,57 +195,33 @@ static void __bch2_fs_read_only(struct bch_fs *c)
{ {
struct bch_dev *ca; struct bch_dev *ca;
unsigned i, clean_passes = 0; unsigned i, clean_passes = 0;
u64 seq = 0;
bch2_rebalance_stop(c); bch2_rebalance_stop(c);
bch2_copygc_stop(c); bch2_copygc_stop(c);
bch2_gc_thread_stop(c); bch2_gc_thread_stop(c);
/*
* Flush journal before stopping allocators, because flushing journal
* blacklist entries involves allocating new btree nodes:
*/
bch2_journal_flush_all_pins(&c->journal);
bch_verbose(c, "flushing journal and stopping allocators"); bch_verbose(c, "flushing journal and stopping allocators");
bch2_journal_flush_all_pins(&c->journal);
do { do {
clean_passes++; clean_passes++;
if (bch2_journal_flush_all_pins(&c->journal)) if (bch2_btree_interior_updates_flush(c) ||
clean_passes = 0; bch2_journal_flush_all_pins(&c->journal) ||
bch2_btree_flush_all_writes(c) ||
/* seq != atomic64_read(&c->journal.seq)) {
* In flight interior btree updates will generate more journal seq = atomic64_read(&c->journal.seq);
* updates and btree updates (alloc btree):
*/
if (bch2_btree_interior_updates_nr_pending(c)) {
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
clean_passes = 0; clean_passes = 0;
} }
flush_work(&c->btree_interior_update_work);
if (bch2_journal_flush_all_pins(&c->journal))
clean_passes = 0;
} while (clean_passes < 2); } while (clean_passes < 2);
bch_verbose(c, "flushing journal and stopping allocators complete");
set_bit(BCH_FS_ALLOC_CLEAN, &c->flags); bch_verbose(c, "flushing journal and stopping allocators complete");
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
flush_work(&c->btree_interior_update_work);
if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
set_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
bch2_fs_journal_stop(&c->journal); bch2_fs_journal_stop(&c->journal);
/*
* the journal kicks off btree writes via reclaim - wait for in flight
* writes after stopping journal:
*/
bch2_btree_flush_all_writes(c);
/* /*
* After stopping journal: * After stopping journal:
*/ */
...@@ -304,7 +280,7 @@ void bch2_fs_read_only(struct bch_fs *c) ...@@ -304,7 +280,7 @@ void bch2_fs_read_only(struct bch_fs *c)
!test_bit(BCH_FS_ERROR, &c->flags) && !test_bit(BCH_FS_ERROR, &c->flags) &&
!test_bit(BCH_FS_EMERGENCY_RO, &c->flags) && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags) &&
test_bit(BCH_FS_STARTED, &c->flags) && test_bit(BCH_FS_STARTED, &c->flags) &&
test_bit(BCH_FS_ALLOC_CLEAN, &c->flags) && test_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags) &&
!c->opts.norecovery) { !c->opts.norecovery) {
bch_verbose(c, "marking filesystem clean"); bch_verbose(c, "marking filesystem clean");
bch2_fs_mark_clean(c); bch2_fs_mark_clean(c);
...@@ -395,7 +371,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) ...@@ -395,7 +371,7 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
if (ret) if (ret)
goto err; goto err;
clear_bit(BCH_FS_ALLOC_CLEAN, &c->flags); clear_bit(BCH_FS_CLEAN_SHUTDOWN, &c->flags);
for_each_rw_member(ca, c, i) for_each_rw_member(ca, c, i)
bch2_dev_allocator_add(c, ca); bch2_dev_allocator_add(c, ca);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment