Commit c3de9b57 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'bcachefs-2024-06-22' of https://evilpiepirate.org/git/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "Lots of (mostly boring) fixes for syzbot bugs and rare(r) CI bugs.

  The LRU_TIME_BITS fix was slightly more involved; we only have 48 bits
  for the LRU position (we would prefer 64), so wraparound is possible
  for the cached data LRUs on a filesystem that has done sufficient
  (petabytes) reads; this is now handled.

  One notable user reported bugfix, where we were forgetting to
  correctly set the bucket data type, which should have been
  BCH_DATA_need_gc_gens instead of BCH_DATA_free; this was causing us to
  go emergency read-only on a filesystem that had seen heavy enough use
  to see bucket gen wraparoud.

  We're now starting to fix simple (safe) errors without requiring user
  intervention - i.e. a small incremental step towards full self
  healing.

  This is currently limited to just certain allocation information
  counters, and the error is still logged in the superblock; see that
  patch for more information. ("bcachefs: Fix safe errors by default")"

* tag 'bcachefs-2024-06-22' of https://evilpiepirate.org/git/bcachefs: (22 commits)
  bcachefs: Move the ei_flags setting to after initialization
  bcachefs: Fix a UAF after write_super()
  bcachefs: Use bch2_print_string_as_lines for long err
  bcachefs: Fix I_NEW warning in race path in bch2_inode_insert()
  bcachefs: Replace bare EEXIST with private error codes
  bcachefs: Fix missing alloc_data_type_set()
  closures: Change BUG_ON() to WARN_ON()
  bcachefs: fix alignment of VMA for memory mapped files on THP
  bcachefs: Fix safe errors by default
  bcachefs: Fix bch2_trans_put()
  bcachefs: set_worker_desc() for delete_dead_snapshots
  bcachefs: Fix bch2_sb_downgrade_update()
  bcachefs: Handle cached data LRU wraparound
  bcachefs: Guard against overflowing LRU_TIME_BITS
  bcachefs: delete_dead_snapshots() doesn't need to go RW
  bcachefs: Fix early init error path in journal code
  bcachefs: Check for invalid btree IDs
  bcachefs: Fix btree ID bitmasks
  bcachefs: Fix shift overflow in read_one_super()
  bcachefs: Fix a locking bug in the do_discard_fast() path
  ...
parents da3b6ef1 bd4da046
......@@ -259,6 +259,14 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k,
"invalid data type (got %u should be %u)",
a.v->data_type, alloc_data_type(*a.v, a.v->data_type));
for (unsigned i = 0; i < 2; i++)
bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX,
c, err,
alloc_key_io_time_bad,
"invalid io_time[%s]: %llu, max %llu",
i == READ ? "read" : "write",
a.v->io_time[i], LRU_TIME_MAX);
switch (a.v->data_type) {
case BCH_DATA_free:
case BCH_DATA_need_gc_gens:
......@@ -757,8 +765,8 @@ int bch2_trigger_alloc(struct btree_trans *trans,
alloc_data_type_set(new_a, new_a->data_type);
if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) {
new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now));
new_a->io_time[READ] = bch2_current_io_time(c, READ);
new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE);
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
}
......@@ -768,6 +776,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
new_a->gen++;
SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
alloc_data_type_set(new_a, new_a->data_type);
}
if (old_a->data_type != new_a->data_type ||
......@@ -781,7 +790,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
if (new_a->data_type == BCH_DATA_cached &&
!new_a->io_time[READ])
new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now));
new_a->io_time[READ] = bch2_current_io_time(c, READ);
u64 old_lru = alloc_lru_idx_read(*old_a);
u64 new_lru = alloc_lru_idx_read(*new_a);
......@@ -882,7 +891,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
closure_wake_up(&c->freelist_wait);
if (statechange(a->data_type == BCH_DATA_need_discard) &&
!bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
!bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) &&
bucket_flushed(new_a))
bch2_discard_one_bucket_fast(c, new.k->p);
......@@ -1579,7 +1588,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
if (ret)
goto err;
a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
a_mut->v.io_time[READ] = bch2_current_io_time(c, READ);
ret = bch2_trans_update(trans, alloc_iter,
&a_mut->k_i, BTREE_TRIGGER_norun);
if (ret)
......@@ -1634,7 +1643,7 @@ static int discard_in_flight_add(struct bch_fs *c, struct bpos bucket)
mutex_lock(&c->discard_buckets_in_flight_lock);
darray_for_each(c->discard_buckets_in_flight, i)
if (bkey_eq(*i, bucket)) {
ret = -EEXIST;
ret = -BCH_ERR_EEXIST_discard_in_flight_add;
goto out;
}
......@@ -1788,8 +1797,9 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
}
SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
alloc_data_type_set(&a->v, a->v.data_type);
write:
alloc_data_type_set(&a->v, a->v.data_type);
ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
bch2_trans_commit(trans, NULL, NULL,
BCH_WATERMARK_btree|
......@@ -1975,8 +1985,8 @@ static int invalidate_one_bucket(struct btree_trans *trans,
a->v.data_type = 0;
a->v.dirty_sectors = 0;
a->v.cached_sectors = 0;
a->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now);
a->v.io_time[WRITE] = atomic64_read(&c->io_clock[WRITE].now);
a->v.io_time[READ] = bch2_current_io_time(c, READ);
a->v.io_time[WRITE] = bch2_current_io_time(c, WRITE);
ret = bch2_trans_commit(trans, NULL, NULL,
BCH_WATERMARK_btree|
......@@ -2011,6 +2021,21 @@ static int invalidate_one_bucket(struct btree_trans *trans,
goto out;
}
static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter,
struct bch_dev *ca, bool *wrapped)
{
struct bkey_s_c k;
again:
k = bch2_btree_iter_peek_upto(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX));
if (!k.k && !*wrapped) {
bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0));
*wrapped = true;
goto again;
}
return k;
}
static void bch2_do_invalidates_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, invalidate_work);
......@@ -2024,12 +2049,33 @@ static void bch2_do_invalidates_work(struct work_struct *work)
for_each_member_device(c, ca) {
s64 nr_to_invalidate =
should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
struct btree_iter iter;
bool wrapped = false;
bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
lru_pos(ca->dev_idx, 0,
((bch2_current_io_time(c, READ) + U32_MAX) &
LRU_TIME_MAX)), 0);
ret = for_each_btree_key_upto(trans, iter, BTREE_ID_lru,
lru_pos(ca->dev_idx, 0, 0),
lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX),
BTREE_ITER_intent, k,
invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate));
while (true) {
bch2_trans_begin(trans);
struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
ret = bkey_err(k);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
if (!k.k)
break;
ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
if (ret)
break;
bch2_btree_iter_advance(&iter);
}
bch2_trans_iter_exit(trans, &iter);
if (ret < 0) {
bch2_dev_put(ca);
......@@ -2204,7 +2250,7 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
if (ret)
return ret;
now = atomic64_read(&c->io_clock[rw].now);
now = bch2_current_io_time(c, rw);
if (a->v.io_time[rw] == now)
goto out;
......
......@@ -141,7 +141,13 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a,
!bch2_bucket_sectors_fragmented(ca, a))
return 0;
u64 d = bch2_bucket_sectors_dirty(a);
/*
* avoid overflowing LRU_TIME_BITS on a corrupted fs, when
* bucket_sectors_dirty is (much) bigger than bucket_size
*/
u64 d = min(bch2_bucket_sectors_dirty(a),
ca->mi.bucket_size);
return div_u64(d * (1ULL << 31), ca->mi.bucket_size);
}
......
......@@ -1214,6 +1214,11 @@ static inline s64 bch2_current_time(const struct bch_fs *c)
return timespec_to_bch2_time(c, now);
}
static inline u64 bch2_current_io_time(const struct bch_fs *c, int rw)
{
return max(1ULL, (u64) atomic64_read(&c->io_clock[rw].now) & LRU_TIME_MAX);
}
static inline struct stdio_redirect *bch2_fs_stdio_redirect(struct bch_fs *c)
{
struct stdio_redirect *stdio = c->stdio;
......
......@@ -476,6 +476,9 @@ struct bch_lru {
#define LRU_ID_STRIPES (1U << 16)
#define LRU_TIME_BITS 48
#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
/* Optional/variable size superblock sections: */
struct bch_sb_field {
......@@ -987,8 +990,9 @@ enum bch_version_upgrade_opts {
#define BCH_ERROR_ACTIONS() \
x(continue, 0) \
x(ro, 1) \
x(panic, 2)
x(fix_safe, 1) \
x(panic, 2) \
x(ro, 3)
enum bch_error_actions {
#define x(t, n) BCH_ON_ERROR_##t = n,
......@@ -1382,9 +1386,10 @@ enum btree_id {
/*
* Maximum number of btrees that we will _ever_ have under the current scheme,
* where we refer to them with bitfields
* where we refer to them with 64 bit bitfields - and we also need a bit for
* the interior btree node type:
*/
#define BTREE_ID_NR_MAX 64
#define BTREE_ID_NR_MAX 63
static inline bool btree_id_is_alloc(enum btree_id id)
{
......
......@@ -1064,7 +1064,7 @@ void bch2_bkey_swab_key(const struct bkey_format *_f, struct bkey_packed *k)
{
const struct bkey_format *f = bkey_packed(k) ? _f : &bch2_bkey_format_current;
u8 *l = k->key_start;
u8 *h = (u8 *) (k->_data + f->key_u64s) - 1;
u8 *h = (u8 *) ((u64 *) k->_data + f->key_u64s) - 1;
while (l < h) {
swap(*l, *h);
......
......@@ -398,8 +398,12 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
for (i = 0; i < nr_compat; i++)
switch (!write ? i : nr_compat - 1 - i) {
case 0:
if (big_endian != CPU_BIG_ENDIAN)
if (big_endian != CPU_BIG_ENDIAN) {
bch2_bkey_swab_key(f, k);
} else if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
bch2_bkey_swab_key(f, k);
bch2_bkey_swab_key(f, k);
}
break;
case 1:
if (version < bcachefs_metadata_version_bkey_renumber)
......
......@@ -129,7 +129,8 @@ static inline void bch2_bkey_compat(unsigned level, enum btree_id btree_id,
struct bkey_packed *k)
{
if (version < bcachefs_metadata_version_current ||
big_endian != CPU_BIG_ENDIAN)
big_endian != CPU_BIG_ENDIAN ||
IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
__bch2_bkey_compat(level, btree_id, version,
big_endian, write, f, k);
......
......@@ -3161,6 +3161,7 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
list_add_done:
seqmutex_unlock(&c->btree_trans_lock);
got_trans:
trans->ref.closure_get_happened = false;
trans->c = c;
trans->last_begin_time = local_clock();
trans->fn_idx = fn_idx;
......@@ -3235,7 +3236,6 @@ void bch2_trans_put(struct btree_trans *trans)
trans_for_each_update(trans, i)
__btree_path_put(trans->paths + i->path, true);
trans->nr_updates = 0;
trans->locking_wait.task = NULL;
check_btree_paths_leaked(trans);
......@@ -3256,6 +3256,13 @@ void bch2_trans_put(struct btree_trans *trans)
if (unlikely(trans->journal_replay_not_finished))
bch2_journal_keys_put(c);
/*
* trans->ref protects trans->locking_wait.task, btree_paths arary; used
* by cycle detector
*/
closure_sync(&trans->ref);
trans->locking_wait.task = NULL;
unsigned long *paths_allocated = trans->paths_allocated;
trans->paths_allocated = NULL;
trans->paths = NULL;
......@@ -3273,8 +3280,6 @@ void bch2_trans_put(struct btree_trans *trans)
trans = this_cpu_xchg(c->btree_trans_bufs->trans, trans);
if (trans) {
closure_sync(&trans->ref);
seqmutex_lock(&c->btree_trans_lock);
list_del(&trans->list);
seqmutex_unlock(&c->btree_trans_lock);
......
......@@ -761,13 +761,13 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type)
static inline bool btree_node_type_is_extents(enum btree_node_type type)
{
const unsigned mask = 0
const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_EXTENTS)) << (nr + 1))
BCH_BTREE_IDS()
#undef x
;
return (1U << type) & mask;
return BIT_ULL(type) & mask;
}
static inline bool btree_id_is_extents(enum btree_id btree)
......@@ -777,35 +777,35 @@ static inline bool btree_id_is_extents(enum btree_id btree)
static inline bool btree_type_has_snapshots(enum btree_id id)
{
const unsigned mask = 0
const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_SNAPSHOTS)) << nr)
BCH_BTREE_IDS()
#undef x
;
return (1U << id) & mask;
return BIT_ULL(id) & mask;
}
static inline bool btree_type_has_snapshot_field(enum btree_id id)
{
const unsigned mask = 0
const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & (BTREE_ID_SNAPSHOT_FIELD|BTREE_ID_SNAPSHOTS))) << nr)
BCH_BTREE_IDS()
#undef x
;
return (1U << id) & mask;
return BIT_ULL(id) & mask;
}
static inline bool btree_type_has_ptrs(enum btree_id id)
{
const unsigned mask = 0
const u64 mask = 0
#define x(name, nr, flags, ...) |((!!((flags) & BTREE_ID_DATA)) << nr)
BCH_BTREE_IDS()
#undef x
;
return (1U << id) & mask;
return BIT_ULL(id) & mask;
}
struct btree_root {
......
......@@ -116,6 +116,9 @@
x(ENOENT, ENOENT_dev_idx_not_found) \
x(ENOTEMPTY, ENOTEMPTY_dir_not_empty) \
x(ENOTEMPTY, ENOTEMPTY_subvol_not_empty) \
x(EEXIST, EEXIST_str_hash_set) \
x(EEXIST, EEXIST_discard_in_flight_add) \
x(EEXIST, EEXIST_subvolume_create) \
x(0, open_buckets_empty) \
x(0, freelist_empty) \
x(BCH_ERR_freelist_empty, no_buckets_found) \
......
......@@ -15,6 +15,7 @@ bool bch2_inconsistent_error(struct bch_fs *c)
switch (c->opts.errors) {
case BCH_ON_ERROR_continue:
return false;
case BCH_ON_ERROR_fix_safe:
case BCH_ON_ERROR_ro:
if (bch2_fs_emergency_read_only(c))
bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
......@@ -191,6 +192,12 @@ static void prt_actioning(struct printbuf *out, const char *action)
prt_str(out, "ing");
}
static const u8 fsck_flags_extra[] = {
#define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags,
BCH_SB_ERRS()
#undef x
};
int bch2_fsck_err(struct bch_fs *c,
enum bch_fsck_flags flags,
enum bch_sb_error_id err,
......@@ -203,6 +210,9 @@ int bch2_fsck_err(struct bch_fs *c,
int ret = -BCH_ERR_fsck_ignore;
const char *action_orig = "fix?", *action = action_orig;
if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra)))
flags |= fsck_flags_extra[err];
if ((flags & FSCK_CAN_FIX) &&
test_bit(err, c->sb.errors_silent))
return -BCH_ERR_fsck_fix;
......@@ -265,7 +275,14 @@ int bch2_fsck_err(struct bch_fs *c,
prt_printf(out, bch2_log_msg(c, ""));
#endif
if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
if ((flags & FSCK_CAN_FIX) &&
(flags & FSCK_AUTOFIX) &&
(c->opts.errors == BCH_ON_ERROR_continue ||
c->opts.errors == BCH_ON_ERROR_fix_safe)) {
prt_str(out, ", ");
prt_actioning(out, action);
ret = -BCH_ERR_fsck_fix;
} else if (!test_bit(BCH_FS_fsck_running, &c->flags)) {
if (c->opts.errors != BCH_ON_ERROR_continue ||
!(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) {
prt_str(out, ", shutting down");
......
......@@ -108,13 +108,6 @@ struct fsck_err_state {
char *last_msg;
};
enum bch_fsck_flags {
FSCK_CAN_FIX = 1 << 0,
FSCK_CAN_IGNORE = 1 << 1,
FSCK_NEED_FSCK = 1 << 2,
FSCK_NO_RATELIMIT = 1 << 3,
};
#define fsck_err_count(_c, _err) bch2_sb_err_count(_c, BCH_FSCK_ERR_##_err)
__printf(4, 5) __cold
......
......@@ -373,7 +373,7 @@ static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp,
}
if (dst_dentry->d_inode) {
error = -EEXIST;
error = -BCH_ERR_EEXIST_subvolume_create;
goto err3;
}
......
......@@ -188,6 +188,12 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
BUG_ON(!old);
if (unlikely(old != inode)) {
/*
* bcachefs doesn't use I_NEW; we have no use for it since we
* only insert fully created inodes in the inode hash table. But
* discard_new_inode() expects it to be set...
*/
inode->v.i_flags |= I_NEW;
discard_new_inode(&inode->v);
inode = old;
} else {
......@@ -195,8 +201,10 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino
list_add(&inode->ei_vfs_inode_list, &c->vfs_inodes_list);
mutex_unlock(&c->vfs_inodes_lock);
/*
* we really don't want insert_inode_locked2() to be setting
* I_NEW...
* Again, I_NEW makes no sense for bcachefs. This is only needed
* for clearing I_NEW, but since the inode was already fully
* created and initialized we didn't actually want
* inode_insert5() to set it for us.
*/
unlock_new_inode(&inode->v);
}
......@@ -1157,6 +1165,7 @@ static const struct file_operations bch_file_operations = {
.read_iter = bch2_read_iter,
.write_iter = bch2_write_iter,
.mmap = bch2_mmap,
.get_unmapped_area = thp_get_unmapped_area,
.fsync = bch2_fsync,
.splice_read = filemap_splice_read,
.splice_write = iter_file_splice_write,
......@@ -1488,11 +1497,6 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
bch2_iget5_set(&inode->v, &inum);
bch2_inode_update_after_write(trans, inode, bi, ~0);
if (BCH_SUBVOLUME_SNAP(subvol))
set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
else
clear_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
inode->v.i_blocks = bi->bi_sectors;
inode->v.i_ino = bi->bi_inum;
inode->v.i_rdev = bi->bi_dev;
......@@ -1504,6 +1508,9 @@ static void bch2_vfs_inode_init(struct btree_trans *trans, subvol_inum inum,
inode->ei_qid = bch_qid(bi);
inode->ei_subvol = inum.subvol;
if (BCH_SUBVOLUME_SNAP(subvol))
set_bit(EI_INODE_SNAPSHOT, &inode->ei_flags);
inode->v.i_mapping->a_ops = &bch_address_space_operations;
switch (inode->v.i_mode & S_IFMT) {
......
......@@ -1167,6 +1167,9 @@ void bch2_dev_journal_stop(struct journal *j, struct bch_dev *ca)
void bch2_fs_journal_stop(struct journal *j)
{
if (!test_bit(JOURNAL_running, &j->flags))
return;
bch2_journal_reclaim_stop(j);
bch2_journal_flush_all_pins(j);
......
......@@ -1967,7 +1967,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
struct journal *j = container_of(w, struct journal, buf[w->idx]);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_replicas_padded replicas;
struct printbuf journal_debug_buf = PRINTBUF;
unsigned nr_rw_members = 0;
int ret;
......@@ -2011,11 +2010,15 @@ CLOSURE_CALLBACK(bch2_journal_write)
}
if (ret) {
__bch2_journal_debug_to_text(&journal_debug_buf, j);
struct printbuf buf = PRINTBUF;
buf.atomic++;
prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write: %s"),
bch2_err_str(ret));
__bch2_journal_debug_to_text(&buf, j);
spin_unlock(&j->lock);
bch_err(c, "Unable to allocate journal write:\n%s",
journal_debug_buf.buf);
printbuf_exit(&journal_debug_buf);
bch2_print_string_as_lines(KERN_ERR, buf.buf);
printbuf_exit(&buf);
goto err;
}
......
......@@ -2,9 +2,6 @@
#ifndef _BCACHEFS_LRU_H
#define _BCACHEFS_LRU_H
#define LRU_TIME_BITS 48
#define LRU_TIME_MAX ((1ULL << LRU_TIME_BITS) - 1)
static inline u64 lru_pos_id(struct bpos pos)
{
return pos.inode >> LRU_TIME_BITS;
......
......@@ -137,7 +137,7 @@ enum fsck_err_opts {
x(errors, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
OPT_STR(bch2_error_actions), \
BCH_SB_ERROR_ACTION, BCH_ON_ERROR_ro, \
BCH_SB_ERROR_ACTION, BCH_ON_ERROR_fix_safe, \
NULL, "Action to take on filesystem error") \
x(metadata_replicas, u8, \
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
......
......@@ -326,6 +326,12 @@ static int journal_replay_entry_early(struct bch_fs *c,
case BCH_JSET_ENTRY_btree_root: {
struct btree_root *r;
if (fsck_err_on(entry->btree_id >= BTREE_ID_NR_MAX,
c, invalid_btree_id,
"invalid btree id %u (max %u)",
entry->btree_id, BTREE_ID_NR_MAX))
return 0;
while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) {
ret = darray_push(&c->btree_roots_extra, (struct btree_root) { NULL });
if (ret)
......@@ -415,7 +421,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time));
}
}
fsck_err:
return ret;
}
......@@ -658,10 +664,10 @@ int bch2_fs_recovery(struct bch_fs *c)
if (check_version_upgrade(c))
write_sb = true;
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
if (write_sb)
bch2_write_super(c);
c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
mutex_unlock(&c->sb_lock);
if (c->opts.fsck && IS_ENABLED(CONFIG_BCACHEFS_DEBUG))
......
......@@ -228,7 +228,7 @@ int bch2_sb_downgrade_update(struct bch_fs *c)
dst = (void *) &darray_top(table);
dst->version = cpu_to_le16(src->version);
dst->recovery_passes[0] = cpu_to_le64(src->recovery_passes);
dst->recovery_passes[0] = cpu_to_le64(bch2_recovery_passes_to_stable(src->recovery_passes));
dst->recovery_passes[1] = 0;
dst->nr_errors = cpu_to_le16(src->nr_errors);
for (unsigned i = 0; i < src->nr_errors; i++)
......
This diff is collapsed.
......@@ -1565,13 +1565,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
return 0;
if (!test_bit(BCH_FS_started, &c->flags)) {
ret = bch2_fs_read_write_early(c);
bch_err_msg(c, ret, "deleting dead snapshots: error going rw");
if (ret)
return ret;
}
trans = bch2_trans_get(c);
/*
......@@ -1687,6 +1680,8 @@ void bch2_delete_dead_snapshots_work(struct work_struct *work)
{
struct bch_fs *c = container_of(work, struct bch_fs, snapshot_delete_work);
set_worker_desc("bcachefs-delete-dead-snapshots/%s", c->name);
bch2_delete_dead_snapshots(c);
bch2_write_ref_put(c, BCH_WRITE_REF_delete_dead_snapshots);
}
......
......@@ -300,7 +300,7 @@ int bch2_hash_set_in_snapshot(struct btree_trans *trans,
if (!found && (flags & STR_HASH_must_replace)) {
ret = -BCH_ERR_ENOENT_str_hash_set_must_replace;
} else if (found && (flags & STR_HASH_must_create)) {
ret = -EEXIST;
ret = -BCH_ERR_EEXIST_str_hash_set;
} else {
if (!found && slot.path)
swap(iter, slot);
......
......@@ -649,9 +649,10 @@ static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf
bytes = vstruct_bytes(sb->sb);
if (bytes > 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits)) {
prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %lu)",
bytes, 512UL << sb->sb->layout.sb_max_size_bits);
u64 sb_size = 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits);
if (bytes > sb_size) {
prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %llu)",
bytes, sb_size);
return -BCH_ERR_invalid_sb_too_big;
}
......
......@@ -912,9 +912,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
bch2_io_clock_init(&c->io_clock[WRITE]) ?:
bch2_fs_journal_init(&c->journal) ?:
bch2_fs_replicas_init(c) ?:
bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_cache_init(c) ?:
bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
bch2_fs_btree_iter_init(c) ?:
bch2_fs_btree_interior_update_init(c) ?:
bch2_fs_buckets_waiting_for_journal_init(c) ?:
bch2_fs_btree_write_buffer_init(c) ?:
......@@ -931,12 +931,13 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
if (ret)
goto err;
for (i = 0; i < c->sb.nr_devices; i++)
if (bch2_member_exists(c->disk_sb.sb, i) &&
bch2_dev_alloc(c, i)) {
ret = -EEXIST;
for (i = 0; i < c->sb.nr_devices; i++) {
if (!bch2_member_exists(c->disk_sb.sb, i))
continue;
ret = bch2_dev_alloc(c, i);
if (ret)
goto err;
}
}
bch2_journal_entry_res_resize(&c->journal,
&c->btree_root_journal_res,
......
......@@ -17,12 +17,18 @@ static inline void closure_put_after_sub(struct closure *cl, int flags)
{
int r = flags & CLOSURE_REMAINING_MASK;
BUG_ON(flags & CLOSURE_GUARD_MASK);
BUG_ON(!r && (flags & ~CLOSURE_DESTRUCTOR));
if (WARN(flags & CLOSURE_GUARD_MASK,
"closure has guard bits set: %x (%u)",
flags & CLOSURE_GUARD_MASK, (unsigned) __fls(r)))
r &= ~CLOSURE_GUARD_MASK;
if (!r) {
smp_acquire__after_ctrl_dep();
WARN(flags & ~CLOSURE_DESTRUCTOR,
"closure ref hit 0 with incorrect flags set: %x (%u)",
flags & ~CLOSURE_DESTRUCTOR, (unsigned) __fls(flags));
cl->closure_get_happened = false;
if (cl->fn && !(flags & CLOSURE_DESTRUCTOR)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment