Commit 039fc4c5 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Fixes for going RO

Now that interior btree updates are fully transactional, we don't need
to write out alloc info in a loop. However, interior btree updates do
put more things in the journal, so we still need a loop in the RO
sequence.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent baeed3c3
......@@ -869,6 +869,15 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
if (!invalidating_cached_data)
goto out;
/*
* If the read-only path is trying to shut down, we can't be generating
* new btree updates:
*/
if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) {
ret = 1;
goto out;
}
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b));
......@@ -956,7 +965,7 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans,
percpu_up_read(&c->mark_lock);
}
return ret;
return ret < 0 ? ret : 0;
}
static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
......
......@@ -482,6 +482,7 @@ enum {
BCH_FS_ALLOC_CLEAN,
BCH_FS_ALLOCATOR_STARTED,
BCH_FS_ALLOCATOR_RUNNING,
BCH_FS_ALLOCATOR_STOPPING,
BCH_FS_INITIAL_GC_DONE,
BCH_FS_FSCK_DONE,
BCH_FS_STARTED,
......
......@@ -413,10 +413,12 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq)
return ret;
}
static void journal_flush_pins(struct journal *j, u64 seq_to_flush,
/* returns true if we did work */
static bool journal_flush_pins(struct journal *j, u64 seq_to_flush,
unsigned min_nr)
{
struct journal_entry_pin *pin;
bool ret = false;
u64 seq;
lockdep_assert_held(&j->reclaim_lock);
......@@ -431,7 +433,10 @@ static void journal_flush_pins(struct journal *j, u64 seq_to_flush,
BUG_ON(j->flush_in_progress != pin);
j->flush_in_progress = NULL;
wake_up(&j->pin_flush_wait);
ret = true;
}
return ret;
}
/**
......@@ -523,7 +528,8 @@ void bch2_journal_reclaim_work(struct work_struct *work)
mutex_unlock(&j->reclaim_lock);
}
static int journal_flush_done(struct journal *j, u64 seq_to_flush)
static int journal_flush_done(struct journal *j, u64 seq_to_flush,
bool *did_work)
{
int ret;
......@@ -533,7 +539,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush)
mutex_lock(&j->reclaim_lock);
journal_flush_pins(j, seq_to_flush, 0);
*did_work = journal_flush_pins(j, seq_to_flush, 0);
spin_lock(&j->lock);
/*
......@@ -551,12 +557,17 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush)
return ret;
}
void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
{
bool did_work = false;
if (!test_bit(JOURNAL_STARTED, &j->flags))
return;
return false;
closure_wait_event(&j->async_wait,
journal_flush_done(j, seq_to_flush, &did_work));
closure_wait_event(&j->async_wait, journal_flush_done(j, seq_to_flush));
return did_work;
}
int bch2_journal_flush_device_pins(struct journal *j, int dev_idx)
......
......@@ -53,11 +53,11 @@ void bch2_journal_do_discards(struct journal *);
void bch2_journal_reclaim(struct journal *);
void bch2_journal_reclaim_work(struct work_struct *);
void bch2_journal_flush_pins(struct journal *, u64);
bool bch2_journal_flush_pins(struct journal *, u64);
static inline void bch2_journal_flush_all_pins(struct journal *j)
static inline bool bch2_journal_flush_all_pins(struct journal *j)
{
bch2_journal_flush_pins(j, U64_MAX);
return bch2_journal_flush_pins(j, U64_MAX);
}
int bch2_journal_flush_device_pins(struct journal *, int);
......
......@@ -175,7 +175,7 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid)
static void __bch2_fs_read_only(struct bch_fs *c)
{
struct bch_dev *ca;
bool wrote;
bool wrote = false;
unsigned i, clean_passes = 0;
int ret;
......@@ -200,12 +200,12 @@ static void __bch2_fs_read_only(struct bch_fs *c)
goto nowrote_alloc;
bch_verbose(c, "writing alloc info");
do {
wrote = false;
/*
* This should normally just be writing the bucket read/write clocks:
*/
ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?:
bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote);
bch_verbose(c, "writing alloc info complete");
if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags))
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
......@@ -213,26 +213,33 @@ static void __bch2_fs_read_only(struct bch_fs *c)
if (ret)
goto nowrote_alloc;
for_each_member_device(ca, c, i)
bch2_dev_allocator_quiesce(c, ca);
bch_verbose(c, "flushing journal and stopping allocators");
bch2_journal_flush_all_pins(&c->journal);
set_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
do {
clean_passes++;
if (bch2_journal_flush_all_pins(&c->journal))
clean_passes = 0;
/*
* We need to explicitly wait on btree interior updates to complete
* before stopping the journal, flushing all journal pins isn't
* sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree
* interior updates have to drop their journal pin before they're
* fully complete:
* In flight interior btree updates will generate more journal
* updates and btree updates (alloc btree):
*/
if (bch2_btree_interior_updates_nr_pending(c)) {
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
clean_passes = 0;
}
flush_work(&c->btree_interior_update_work);
clean_passes = wrote ? 0 : clean_passes + 1;
if (bch2_journal_flush_all_pins(&c->journal))
clean_passes = 0;
} while (clean_passes < 2);
bch_verbose(c, "flushing journal and stopping allocators complete");
bch_verbose(c, "writing alloc info complete");
set_bit(BCH_FS_ALLOC_CLEAN, &c->flags);
nowrote_alloc:
closure_wait_event(&c->btree_interior_update_wait,
......@@ -243,11 +250,10 @@ static void __bch2_fs_read_only(struct bch_fs *c)
bch2_dev_allocator_stop(ca);
clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags);
bch2_fs_journal_stop(&c->journal);
/* XXX: mark super that alloc info is persistent */
/*
* the journal kicks off btree writes via reclaim - wait for in flight
* writes after stopping journal:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment