Commit 430735cd authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Persist alloc info on clean shutdown

 - Does not persist alloc info for stripes yet
 - Also does not yet include filesystem block/sector counts yet, from
struct fs_usage
 - Not made use of just yet
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 5e5d9bdb
...@@ -250,6 +250,9 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list) ...@@ -250,6 +250,9 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
bch2_alloc_read_key(c, bkey_i_to_s_c(k)); bch2_alloc_read_key(c, bkey_i_to_s_c(k));
} }
for_each_member_device(ca, c, i)
bch2_dev_usage_from_buckets(c, ca);
mutex_lock(&c->bucket_clock[READ].lock); mutex_lock(&c->bucket_clock[READ].lock);
for_each_member_device(ca, c, i) { for_each_member_device(ca, c, i) {
down_read(&ca->bucket_lock); down_read(&ca->bucket_lock);
...@@ -281,35 +284,51 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca, ...@@ -281,35 +284,51 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
#endif #endif
struct bkey_i_alloc *a = bkey_alloc_init(&alloc_key.k); struct bkey_i_alloc *a = bkey_alloc_init(&alloc_key.k);
struct bucket *g; struct bucket *g;
struct bucket_mark m; struct bucket_mark m, new;
int ret; int ret;
BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
a->k.p = POS(ca->dev_idx, b); a->k.p = POS(ca->dev_idx, b);
bch2_btree_iter_set_pos(iter, a->k.p);
ret = bch2_btree_iter_traverse(iter);
if (ret)
return ret;
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
g = bucket(ca, b); g = bucket(ca, b);
m = bucket_cmpxchg(g, m, m.dirty = false); m = READ_ONCE(g->mark);
if (!m.dirty) {
percpu_up_read(&c->mark_lock);
return 0;
}
__alloc_write_key(a, g, m); __alloc_write_key(a, g, m);
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
bch2_btree_iter_cond_resched(iter); bch2_btree_iter_cond_resched(iter);
bch2_btree_iter_set_pos(iter, a->k.p);
ret = bch2_btree_insert_at(c, NULL, journal_seq, ret = bch2_btree_insert_at(c, NULL, journal_seq,
BTREE_INSERT_NOCHECK_RW|
BTREE_INSERT_NOFAIL| BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE| BTREE_INSERT_USE_RESERVE|
BTREE_INSERT_USE_ALLOC_RESERVE| BTREE_INSERT_USE_ALLOC_RESERVE|
flags, flags,
BTREE_INSERT_ENTRY(iter, &a->k_i)); BTREE_INSERT_ENTRY(iter, &a->k_i));
if (ret)
return ret;
if (!ret && ca->buckets_written) new = m;
new.dirty = false;
atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter);
if (ca->buckets_written)
set_bit(b, ca->buckets_written); set_bit(b, ca->buckets_written);
return ret; return 0;
} }
int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k) int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
...@@ -899,10 +918,19 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t ...@@ -899,10 +918,19 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t
for (i = 0; i < RESERVE_NR; i++) for (i = 0; i < RESERVE_NR; i++)
if (fifo_push(&ca->free[i], bucket)) { if (fifo_push(&ca->free[i], bucket)) {
fifo_pop(&ca->free_inc, bucket); fifo_pop(&ca->free_inc, bucket);
closure_wake_up(&c->freelist_wait); closure_wake_up(&c->freelist_wait);
ca->allocator_blocked_full = false;
spin_unlock(&c->freelist_lock); spin_unlock(&c->freelist_lock);
goto out; goto out;
} }
if (!ca->allocator_blocked_full) {
ca->allocator_blocked_full = true;
closure_wake_up(&c->freelist_wait);
}
spin_unlock(&c->freelist_lock); spin_unlock(&c->freelist_lock);
if ((current->flags & PF_KTHREAD) && if ((current->flags & PF_KTHREAD) &&
...@@ -1227,6 +1255,11 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) ...@@ -1227,6 +1255,11 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
set_bit(ca->dev_idx, c->rw_devs[i].d); set_bit(ca->dev_idx, c->rw_devs[i].d);
} }
void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca)
{
closure_wait_event(&c->freelist_wait, ca->allocator_blocked_full);
}
/* stop allocator thread: */ /* stop allocator thread: */
void bch2_dev_allocator_stop(struct bch_dev *ca) void bch2_dev_allocator_stop(struct bch_dev *ca)
{ {
......
...@@ -52,6 +52,7 @@ void bch2_recalc_capacity(struct bch_fs *); ...@@ -52,6 +52,7 @@ void bch2_recalc_capacity(struct bch_fs *);
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *); void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
void bch2_dev_allocator_stop(struct bch_dev *); void bch2_dev_allocator_stop(struct bch_dev *);
int bch2_dev_allocator_start(struct bch_dev *); int bch2_dev_allocator_start(struct bch_dev *);
......
...@@ -431,7 +431,13 @@ struct bch_dev { ...@@ -431,7 +431,13 @@ struct bch_dev {
size_t inc_gen_needs_gc; size_t inc_gen_needs_gc;
size_t inc_gen_really_needs_gc; size_t inc_gen_really_needs_gc;
/*
* XXX: this should be an enum for allocator state, so as to include
* error state
*/
bool allocator_blocked; bool allocator_blocked;
bool allocator_blocked_full;
alloc_heap alloc_heap; alloc_heap alloc_heap;
......
...@@ -78,6 +78,7 @@ enum { ...@@ -78,6 +78,7 @@ enum {
__BTREE_INSERT_ATOMIC, __BTREE_INSERT_ATOMIC,
__BTREE_INSERT_NOUNLOCK, __BTREE_INSERT_NOUNLOCK,
__BTREE_INSERT_NOFAIL, __BTREE_INSERT_NOFAIL,
__BTREE_INSERT_NOCHECK_RW,
__BTREE_INSERT_USE_RESERVE, __BTREE_INSERT_USE_RESERVE,
__BTREE_INSERT_USE_ALLOC_RESERVE, __BTREE_INSERT_USE_ALLOC_RESERVE,
__BTREE_INSERT_JOURNAL_REPLAY, __BTREE_INSERT_JOURNAL_REPLAY,
...@@ -101,6 +102,8 @@ enum { ...@@ -101,6 +102,8 @@ enum {
/* Don't check for -ENOSPC: */ /* Don't check for -ENOSPC: */
#define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL) #define BTREE_INSERT_NOFAIL (1 << __BTREE_INSERT_NOFAIL)
#define BTREE_INSERT_NOCHECK_RW (1 << __BTREE_INSERT_NOCHECK_RW)
/* for copygc, or when merging btree nodes */ /* for copygc, or when merging btree nodes */
#define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE) #define BTREE_INSERT_USE_RESERVE (1 << __BTREE_INSERT_USE_RESERVE)
#define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE) #define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
......
...@@ -1172,6 +1172,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b ...@@ -1172,6 +1172,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
mutex_lock(&c->btree_interior_update_lock); mutex_lock(&c->btree_interior_update_lock);
percpu_down_read(&c->mark_lock); percpu_down_read(&c->mark_lock);
preempt_disable();
fs_usage = bch2_fs_usage_get_scratch(c); fs_usage = bch2_fs_usage_get_scratch(c);
bch2_mark_key_locked(c, bkey_i_to_s_c(insert), bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
...@@ -1194,6 +1195,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b ...@@ -1194,6 +1195,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res, bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res,
gc_pos_btree_node(b)); gc_pos_btree_node(b));
preempt_enable();
percpu_up_read(&c->mark_lock); percpu_up_read(&c->mark_lock);
mutex_unlock(&c->btree_interior_update_lock); mutex_unlock(&c->btree_interior_update_lock);
......
...@@ -629,7 +629,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans) ...@@ -629,7 +629,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
trans_for_each_entry(trans, i) trans_for_each_entry(trans, i)
btree_insert_entry_checks(c, i); btree_insert_entry_checks(c, i);
if (unlikely(!percpu_ref_tryget(&c->writes))) if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
!percpu_ref_tryget(&c->writes)))
return -EROFS; return -EROFS;
retry: retry:
trans_for_each_iter(trans, i) { trans_for_each_iter(trans, i) {
...@@ -659,7 +660,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans) ...@@ -659,7 +660,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
trans_for_each_iter(trans, i) trans_for_each_iter(trans, i)
bch2_btree_iter_downgrade(i->iter); bch2_btree_iter_downgrade(i->iter);
out: out:
percpu_ref_put(&c->writes); if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
percpu_ref_put(&c->writes);
/* make sure we didn't drop or screw up locks: */ /* make sure we didn't drop or screw up locks: */
trans_for_each_iter(trans, i) { trans_for_each_iter(trans, i) {
......
...@@ -387,7 +387,8 @@ static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -387,7 +387,8 @@ static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
*old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ *old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
BUG_ON(!is_available_bucket(new)); BUG_ON(!is_available_bucket(new));
new.owned_by_allocator = 1; new.owned_by_allocator = true;
new.dirty = true;
new.data_type = 0; new.data_type = 0;
new.cached_sectors = 0; new.cached_sectors = 0;
new.dirty_sectors = 0; new.dirty_sectors = 0;
...@@ -460,6 +461,7 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -460,6 +461,7 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
type != BCH_DATA_JOURNAL); type != BCH_DATA_JOURNAL);
bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
new.dirty = true;
new.data_type = type; new.data_type = type;
checked_add(new.dirty_sectors, sectors); checked_add(new.dirty_sectors, sectors);
})); }));
...@@ -487,13 +489,14 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, ...@@ -487,13 +489,14 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
true); true);
} else { } else {
struct bucket *g; struct bucket *g;
struct bucket_mark old, new; struct bucket_mark new;
rcu_read_lock(); rcu_read_lock();
g = bucket(ca, b); g = bucket(ca, b);
old = bucket_cmpxchg(g, new, ({ bucket_cmpxchg(g, new, ({
new.data_type = type; new.dirty = true;
new.data_type = type;
checked_add(new.dirty_sectors, sectors); checked_add(new.dirty_sectors, sectors);
})); }));
...@@ -546,6 +549,8 @@ static void bch2_mark_pointer(struct bch_fs *c, ...@@ -546,6 +549,8 @@ static void bch2_mark_pointer(struct bch_fs *c,
do { do {
new.v.counter = old.v.counter = v; new.v.counter = old.v.counter = v;
new.dirty = true;
/* /*
* Check this after reading bucket mark to guard against * Check this after reading bucket mark to guard against
* the allocator invalidating a bucket after we've already * the allocator invalidating a bucket after we've already
...@@ -709,6 +714,7 @@ static void bucket_set_stripe(struct bch_fs *c, ...@@ -709,6 +714,7 @@ static void bucket_set_stripe(struct bch_fs *c,
BUG_ON(ptr_stale(ca, ptr)); BUG_ON(ptr_stale(ca, ptr));
old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({ old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
new.dirty = true;
new.stripe = enabled; new.stripe = enabled;
if (journal_seq) { if (journal_seq) {
new.journal_seq_valid = 1; new.journal_seq_valid = 1;
......
...@@ -182,6 +182,8 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m, ...@@ -182,6 +182,8 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *); struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *);
static inline u64 __dev_buckets_available(struct bch_dev *ca, static inline u64 __dev_buckets_available(struct bch_dev *ca,
struct bch_dev_usage stats) struct bch_dev_usage stats)
{ {
......
...@@ -174,7 +174,9 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) ...@@ -174,7 +174,9 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid)
static void __bch2_fs_read_only(struct bch_fs *c) static void __bch2_fs_read_only(struct bch_fs *c)
{ {
struct bch_dev *ca; struct bch_dev *ca;
bool wrote;
unsigned i; unsigned i;
int ret;
bch2_rebalance_stop(c); bch2_rebalance_stop(c);
...@@ -189,23 +191,36 @@ static void __bch2_fs_read_only(struct bch_fs *c) ...@@ -189,23 +191,36 @@ static void __bch2_fs_read_only(struct bch_fs *c)
*/ */
bch2_journal_flush_all_pins(&c->journal); bch2_journal_flush_all_pins(&c->journal);
for_each_member_device(ca, c, i) do {
bch2_dev_allocator_stop(ca); ret = bch2_alloc_write(c, false, &wrote);
if (ret) {
bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
break;
}
bch2_journal_flush_all_pins(&c->journal); for_each_member_device(ca, c, i)
bch2_dev_allocator_quiesce(c, ca);
/* bch2_journal_flush_all_pins(&c->journal);
* We need to explicitly wait on btree interior updates to complete
* before stopping the journal, flushing all journal pins isn't /*
* sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree * We need to explicitly wait on btree interior updates to complete
* interior updates have to drop their journal pin before they're * before stopping the journal, flushing all journal pins isn't
* fully complete: * sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree
*/ * interior updates have to drop their journal pin before they're
closure_wait_event(&c->btree_interior_update_wait, * fully complete:
!bch2_btree_interior_updates_nr_pending(c)); */
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
} while (wrote);
for_each_member_device(ca, c, i)
bch2_dev_allocator_stop(ca);
bch2_fs_journal_stop(&c->journal); bch2_fs_journal_stop(&c->journal);
/* XXX: mark super that alloc info is persistent */
/* /*
* the journal kicks off btree writes via reclaim - wait for in flight * the journal kicks off btree writes via reclaim - wait for in flight
* writes after stopping journal: * writes after stopping journal:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment