Commit ff9bce3d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'bcachefs-2024-05-30' of https://evilpiepirate.org/git/bcachefs

Pull bcachefs fixes from Kent Overstreet:
 "Assorted odds and ends...

   - two downgrade fixes

   - a couple snapshot deletion and repair fixes, thanks to noradtux for
     finding these and providing the image to debug them

   - a couple assert fixes

   - convert to folio helper, from Matthew

   - some improved error messages

   - bit of code reorganization (just moving things around); doing this
     while things are quiet so I'm not rebasing fixes past reorgs

   - don't return -EROFS on inconsistency error in recovery, this
     confuses util-linux and has it retry the mount

   - fix failure to return error on misaligned dio write; reported as an
     issue with coreutils shred"

* tag 'bcachefs-2024-05-30' of https://evilpiepirate.org/git/bcachefs: (21 commits)
  bcachefs: Fix failure to return error on misaligned dio write
  bcachefs: Don't return -EROFS from mount on inconsistency error
  bcachefs: Fix uninitialized var warning
  bcachefs: Split out sb-errors_format.h
  bcachefs: Split out journal_seq_blacklist_format.h
  bcachefs: Split out replicas_format.h
  bcachefs: Split out disk_groups_format.h
  bcachefs: split out sb-downgrade_format.h
  bcachefs: split out sb-members_format.h
  bcachefs: Better fsck error message for key version
  bcachefs: btree_gc can now handle unknown btrees
  bcachefs: add missing MODULE_DESCRIPTION()
  bcachefs: Fix setting of downgrade recovery passes/errors
  bcachefs: Run check_key_has_snapshot in snapshot_delete_keys()
  bcachefs: Refactor delete_dead_snapshots()
  bcachefs: Fix locking assert
  bcachefs: Fix lookup_first_inode() when inode_generations are present
  bcachefs: Plumb bkey into __btree_err()
  bcachefs: Use copy_folio_from_iter_atomic()
  bcachefs: Fix sb-downgrade validation
  ...
parents d8ec1985 7b038b56
...@@ -690,7 +690,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans, ...@@ -690,7 +690,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
ptrs = bch2_bkey_ptrs_c(k); ptrs = bch2_bkey_ptrs_c(k);
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
struct bpos bucket_pos; struct bpos bucket_pos = POS_MIN;
struct bch_backpointer bp; struct bch_backpointer bp;
if (p.ptr.cached) if (p.ptr.cached)
......
...@@ -457,6 +457,7 @@ enum bch_time_stats { ...@@ -457,6 +457,7 @@ enum bch_time_stats {
}; };
#include "alloc_types.h" #include "alloc_types.h"
#include "btree_gc_types.h"
#include "btree_types.h" #include "btree_types.h"
#include "btree_node_scan_types.h" #include "btree_node_scan_types.h"
#include "btree_write_buffer_types.h" #include "btree_write_buffer_types.h"
...@@ -488,49 +489,6 @@ enum bch_time_stats { ...@@ -488,49 +489,6 @@ enum bch_time_stats {
struct btree; struct btree;
enum gc_phase {
GC_PHASE_NOT_RUNNING,
GC_PHASE_START,
GC_PHASE_SB,
GC_PHASE_BTREE_stripes,
GC_PHASE_BTREE_extents,
GC_PHASE_BTREE_inodes,
GC_PHASE_BTREE_dirents,
GC_PHASE_BTREE_xattrs,
GC_PHASE_BTREE_alloc,
GC_PHASE_BTREE_quotas,
GC_PHASE_BTREE_reflink,
GC_PHASE_BTREE_subvolumes,
GC_PHASE_BTREE_snapshots,
GC_PHASE_BTREE_lru,
GC_PHASE_BTREE_freespace,
GC_PHASE_BTREE_need_discard,
GC_PHASE_BTREE_backpointers,
GC_PHASE_BTREE_bucket_gens,
GC_PHASE_BTREE_snapshot_trees,
GC_PHASE_BTREE_deleted_inodes,
GC_PHASE_BTREE_logged_ops,
GC_PHASE_BTREE_rebalance_work,
GC_PHASE_BTREE_subvolume_children,
GC_PHASE_PENDING_DELETE,
};
struct gc_pos {
enum gc_phase phase;
u16 level;
struct bpos pos;
};
struct reflink_gc {
u64 offset;
u32 size;
u32 refcount;
};
typedef GENRADIX(struct reflink_gc) reflink_gc_table;
struct io_count { struct io_count {
u64 sectors[2][BCH_DATA_NR]; u64 sectors[2][BCH_DATA_NR];
}; };
......
...@@ -503,16 +503,22 @@ struct bch_sb_field { ...@@ -503,16 +503,22 @@ struct bch_sb_field {
#include "alloc_background_format.h" #include "alloc_background_format.h"
#include "extents_format.h" #include "extents_format.h"
#include "reflink_format.h"
#include "ec_format.h" #include "ec_format.h"
#include "inode_format.h"
#include "dirent_format.h" #include "dirent_format.h"
#include "xattr_format.h" #include "disk_groups_format.h"
#include "quota_format.h" #include "inode_format.h"
#include "journal_seq_blacklist_format.h"
#include "logged_ops_format.h" #include "logged_ops_format.h"
#include "quota_format.h"
#include "reflink_format.h"
#include "replicas_format.h"
#include "snapshot_format.h" #include "snapshot_format.h"
#include "subvolume_format.h" #include "subvolume_format.h"
#include "sb-counters_format.h" #include "sb-counters_format.h"
#include "sb-downgrade_format.h"
#include "sb-errors_format.h"
#include "sb-members_format.h"
#include "xattr_format.h"
enum bch_sb_field_type { enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr, #define x(f, nr) BCH_SB_FIELD_##f = nr,
...@@ -545,107 +551,6 @@ struct bch_sb_field_journal_v2 { ...@@ -545,107 +551,6 @@ struct bch_sb_field_journal_v2 {
} d[]; } d[];
}; };
/* BCH_SB_FIELD_members_v1: */
#define BCH_MIN_NR_NBUCKETS (1 << 6)
#define BCH_IOPS_MEASUREMENTS() \
x(seqread, 0) \
x(seqwrite, 1) \
x(randread, 2) \
x(randwrite, 3)
enum bch_iops_measurement {
#define x(t, n) BCH_IOPS_##t = n,
BCH_IOPS_MEASUREMENTS()
#undef x
BCH_IOPS_NR
};
#define BCH_MEMBER_ERROR_TYPES() \
x(read, 0) \
x(write, 1) \
x(checksum, 2)
enum bch_member_error_type {
#define x(t, n) BCH_MEMBER_ERROR_##t = n,
BCH_MEMBER_ERROR_TYPES()
#undef x
BCH_MEMBER_ERROR_NR
};
struct bch_member {
__uuid_t uuid;
__le64 nbuckets; /* device size */
__le16 first_bucket; /* index of first bucket used */
__le16 bucket_size; /* sectors */
__u8 btree_bitmap_shift;
__u8 pad[3];
__le64 last_mount; /* time_t */
__le64 flags;
__le32 iops[4];
__le64 errors[BCH_MEMBER_ERROR_NR];
__le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
__le64 errors_reset_time;
__le64 seq;
__le64 btree_allocated_bitmap;
/*
* On recovery from a clean shutdown we don't normally read the journal,
* but we still want to resume writing from where we left off so we
* don't overwrite more than is necessary, for list journal debugging:
*/
__le32 last_journal_bucket;
__le32 last_journal_bucket_offset;
};
/*
* This limit comes from the bucket_gens array - it's a single allocation, and
* kernel allocation are limited to INT_MAX
*/
#define BCH_MEMBER_NBUCKETS_MAX (INT_MAX - 64)
#define BCH_MEMBER_V1_BYTES 56
LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags, 0, 4)
/* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */
LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags, 14, 15)
LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags, 15, 20)
LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags, 20, 28)
LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags, 28, 30)
LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED,
struct bch_member, flags, 30, 31)
#if 0
LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
#endif
#define BCH_MEMBER_STATES() \
x(rw, 0) \
x(ro, 1) \
x(failed, 2) \
x(spare, 3)
enum bch_member_state {
#define x(t, n) BCH_MEMBER_STATE_##t = n,
BCH_MEMBER_STATES()
#undef x
BCH_MEMBER_STATE_NR
};
struct bch_sb_field_members_v1 {
struct bch_sb_field field;
struct bch_member _members[]; //Members are now variable size
};
struct bch_sb_field_members_v2 {
struct bch_sb_field field;
__le16 member_bytes; //size of single member entry
u8 pad[6];
struct bch_member _members[];
};
/* BCH_SB_FIELD_crypt: */ /* BCH_SB_FIELD_crypt: */
struct nonce { struct nonce {
...@@ -694,8 +599,6 @@ LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16); ...@@ -694,8 +599,6 @@ LE64_BITMASK(BCH_KDF_SCRYPT_N, struct bch_sb_field_crypt, kdf_flags, 0, 16);
LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32); LE64_BITMASK(BCH_KDF_SCRYPT_R, struct bch_sb_field_crypt, kdf_flags, 16, 32);
LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48); LE64_BITMASK(BCH_KDF_SCRYPT_P, struct bch_sb_field_crypt, kdf_flags, 32, 48);
/* BCH_SB_FIELD_replicas: */
#define BCH_DATA_TYPES() \ #define BCH_DATA_TYPES() \
x(free, 0) \ x(free, 0) \
x(sb, 1) \ x(sb, 1) \
...@@ -738,50 +641,6 @@ static inline bool data_type_is_hidden(enum bch_data_type type) ...@@ -738,50 +641,6 @@ static inline bool data_type_is_hidden(enum bch_data_type type)
} }
} }
struct bch_replicas_entry_v0 {
__u8 data_type;
__u8 nr_devs;
__u8 devs[];
} __packed;
struct bch_sb_field_replicas_v0 {
struct bch_sb_field field;
struct bch_replicas_entry_v0 entries[];
} __packed __aligned(8);
struct bch_replicas_entry_v1 {
__u8 data_type;
__u8 nr_devs;
__u8 nr_required;
__u8 devs[];
} __packed;
#define replicas_entry_bytes(_i) \
(offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
struct bch_sb_field_replicas {
struct bch_sb_field field;
struct bch_replicas_entry_v1 entries[];
} __packed __aligned(8);
/* BCH_SB_FIELD_disk_groups: */
#define BCH_SB_LABEL_SIZE 32
struct bch_disk_group {
__u8 label[BCH_SB_LABEL_SIZE];
__le64 flags[2];
} __packed __aligned(8);
LE64_BITMASK(BCH_GROUP_DELETED, struct bch_disk_group, flags[0], 0, 1)
LE64_BITMASK(BCH_GROUP_DATA_ALLOWED, struct bch_disk_group, flags[0], 1, 6)
LE64_BITMASK(BCH_GROUP_PARENT, struct bch_disk_group, flags[0], 6, 24)
struct bch_sb_field_disk_groups {
struct bch_sb_field field;
struct bch_disk_group entries[];
} __packed __aligned(8);
/* /*
* On clean shutdown, store btree roots and current journal sequence number in * On clean shutdown, store btree roots and current journal sequence number in
* the superblock: * the superblock:
...@@ -809,27 +668,6 @@ struct bch_sb_field_clean { ...@@ -809,27 +668,6 @@ struct bch_sb_field_clean {
__u64 _data[]; __u64 _data[];
}; };
struct journal_seq_blacklist_entry {
__le64 start;
__le64 end;
};
struct bch_sb_field_journal_seq_blacklist {
struct bch_sb_field field;
struct journal_seq_blacklist_entry start[];
};
struct bch_sb_field_errors {
struct bch_sb_field field;
struct bch_sb_field_error_entry {
__le64 v;
__le64 last_error_time;
} entries[];
};
LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);
LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
struct bch_sb_field_ext { struct bch_sb_field_ext {
struct bch_sb_field field; struct bch_sb_field field;
__le64 recovery_passes_required[2]; __le64 recovery_passes_required[2];
...@@ -837,18 +675,6 @@ struct bch_sb_field_ext { ...@@ -837,18 +675,6 @@ struct bch_sb_field_ext {
__le64 btrees_lost_data; __le64 btrees_lost_data;
}; };
struct bch_sb_field_downgrade_entry {
__le16 version;
__le64 recovery_passes[2];
__le16 nr_errors;
__le16 errors[] __counted_by(nr_errors);
} __packed __aligned(2);
struct bch_sb_field_downgrade {
struct bch_sb_field field;
struct bch_sb_field_downgrade_entry entries[];
};
/* Superblock: */ /* Superblock: */
/* /*
...@@ -909,7 +735,6 @@ unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_re ...@@ -909,7 +735,6 @@ unsigned bcachefs_metadata_required_upgrade_below = bcachefs_metadata_version_re
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
#define BCH_SB_SECTOR 8 #define BCH_SB_SECTOR 8
#define BCH_SB_MEMBERS_MAX 64 /* XXX kill */
#define BCH_SB_LAYOUT_SIZE_BITS_MAX 16 /* 32 MB */ #define BCH_SB_LAYOUT_SIZE_BITS_MAX 16 /* 32 MB */
......
...@@ -585,16 +585,17 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id, ...@@ -585,16 +585,17 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c, if (fsck_err_on(k.k->version.lo > atomic64_read(&c->key_version), c,
bkey_version_in_future, bkey_version_in_future,
"key version number higher than recorded: %llu > %llu", "key version number higher than recorded %llu\n %s",
k.k->version.lo, atomic64_read(&c->key_version),
atomic64_read(&c->key_version))) (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
atomic64_set(&c->key_version, k.k->version.lo); atomic64_set(&c->key_version, k.k->version.lo);
} }
if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k), if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, k),
c, btree_bitmap_not_marked, c, btree_bitmap_not_marked,
"btree ptr not marked in member info btree allocated bitmap\n %s", "btree ptr not marked in member info btree allocated bitmap\n %s",
(bch2_bkey_val_to_text(&buf, c, k), (printbuf_reset(&buf),
bch2_bkey_val_to_text(&buf, c, k),
buf.buf))) { buf.buf))) {
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
bch2_dev_btree_bitmap_mark(c, k); bch2_dev_btree_bitmap_mark(c, k);
...@@ -673,8 +674,7 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in ...@@ -673,8 +674,7 @@ static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree, bool in
static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r) static inline int btree_id_gc_phase_cmp(enum btree_id l, enum btree_id r)
{ {
return (int) btree_id_to_gc_phase(l) - return cmp_int(gc_btree_order(l), gc_btree_order(r));
(int) btree_id_to_gc_phase(r);
} }
static int bch2_gc_btrees(struct bch_fs *c) static int bch2_gc_btrees(struct bch_fs *c)
...@@ -711,7 +711,7 @@ static int bch2_gc_btrees(struct bch_fs *c) ...@@ -711,7 +711,7 @@ static int bch2_gc_btrees(struct bch_fs *c)
static int bch2_mark_superblocks(struct bch_fs *c) static int bch2_mark_superblocks(struct bch_fs *c)
{ {
mutex_lock(&c->sb_lock); mutex_lock(&c->sb_lock);
gc_pos_set(c, gc_phase(GC_PHASE_SB)); gc_pos_set(c, gc_phase(GC_PHASE_sb));
int ret = bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); int ret = bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc);
mutex_unlock(&c->sb_lock); mutex_unlock(&c->sb_lock);
...@@ -1209,7 +1209,7 @@ int bch2_check_allocations(struct bch_fs *c) ...@@ -1209,7 +1209,7 @@ int bch2_check_allocations(struct bch_fs *c)
if (ret) if (ret)
goto out; goto out;
gc_pos_set(c, gc_phase(GC_PHASE_START)); gc_pos_set(c, gc_phase(GC_PHASE_start));
ret = bch2_mark_superblocks(c); ret = bch2_mark_superblocks(c);
BUG_ON(ret); BUG_ON(ret);
...@@ -1231,7 +1231,7 @@ int bch2_check_allocations(struct bch_fs *c) ...@@ -1231,7 +1231,7 @@ int bch2_check_allocations(struct bch_fs *c)
percpu_down_write(&c->mark_lock); percpu_down_write(&c->mark_lock);
/* Indicates that gc is no longer in progress: */ /* Indicates that gc is no longer in progress: */
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING)); __gc_pos_set(c, gc_phase(GC_PHASE_not_running));
bch2_gc_free(c); bch2_gc_free(c);
percpu_up_write(&c->mark_lock); percpu_up_write(&c->mark_lock);
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#define _BCACHEFS_BTREE_GC_H #define _BCACHEFS_BTREE_GC_H
#include "bkey.h" #include "bkey.h"
#include "btree_gc_types.h"
#include "btree_types.h" #include "btree_types.h"
int bch2_check_topology(struct bch_fs *); int bch2_check_topology(struct bch_fs *);
...@@ -32,36 +33,15 @@ int bch2_check_allocations(struct bch_fs *); ...@@ -32,36 +33,15 @@ int bch2_check_allocations(struct bch_fs *);
/* Position of (the start of) a gc phase: */ /* Position of (the start of) a gc phase: */
static inline struct gc_pos gc_phase(enum gc_phase phase) static inline struct gc_pos gc_phase(enum gc_phase phase)
{ {
return (struct gc_pos) { return (struct gc_pos) { .phase = phase, };
.phase = phase,
.level = 0,
.pos = POS_MIN,
};
}
static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
{
return cmp_int(l.phase, r.phase) ?:
-cmp_int(l.level, r.level) ?:
bpos_cmp(l.pos, r.pos);
}
static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id)
{
switch (id) {
#define x(name, v, ...) case BTREE_ID_##name: return GC_PHASE_BTREE_##name;
BCH_BTREE_IDS()
#undef x
default:
BUG();
}
} }
static inline struct gc_pos gc_pos_btree(enum btree_id btree, unsigned level, static inline struct gc_pos gc_pos_btree(enum btree_id btree, unsigned level,
struct bpos pos) struct bpos pos)
{ {
return (struct gc_pos) { return (struct gc_pos) {
.phase = btree_id_to_gc_phase(btree), .phase = GC_PHASE_btree,
.btree = btree,
.level = level, .level = level,
.pos = pos, .pos = pos,
}; };
...@@ -76,6 +56,22 @@ static inline struct gc_pos gc_pos_btree_node(struct btree *b) ...@@ -76,6 +56,22 @@ static inline struct gc_pos gc_pos_btree_node(struct btree *b)
return gc_pos_btree(b->c.btree_id, b->c.level, b->key.k.p); return gc_pos_btree(b->c.btree_id, b->c.level, b->key.k.p);
} }
static inline int gc_btree_order(enum btree_id btree)
{
if (btree == BTREE_ID_stripes)
return -1;
return btree;
}
static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r)
{
return cmp_int(l.phase, r.phase) ?:
cmp_int(gc_btree_order(l.btree),
gc_btree_order(r.btree)) ?:
-cmp_int(l.level, r.level) ?:
bpos_cmp(l.pos, r.pos);
}
static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos) static inline bool gc_visited(struct bch_fs *c, struct gc_pos pos)
{ {
unsigned seq; unsigned seq;
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_BTREE_GC_TYPES_H
#define _BCACHEFS_BTREE_GC_TYPES_H
#include <linux/generic-radix-tree.h>
enum gc_phase {
GC_PHASE_not_running,
GC_PHASE_start,
GC_PHASE_sb,
GC_PHASE_btree,
};
struct gc_pos {
enum gc_phase phase:8;
enum btree_id btree:8;
u16 level;
struct bpos pos;
};
struct reflink_gc {
u64 offset;
u32 size;
u32 refcount;
};
typedef GENRADIX(struct reflink_gc) reflink_gc_table;
#endif /* _BCACHEFS_BTREE_GC_TYPES_H */
...@@ -519,7 +519,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b) ...@@ -519,7 +519,7 @@ void bch2_btree_init_next(struct btree_trans *trans, struct btree *b)
static void btree_err_msg(struct printbuf *out, struct bch_fs *c, static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
struct bch_dev *ca, struct bch_dev *ca,
struct btree *b, struct bset *i, struct btree *b, struct bset *i, struct bkey_packed *k,
unsigned offset, int write) unsigned offset, int write)
{ {
prt_printf(out, bch2_log_msg(c, "%s"), prt_printf(out, bch2_log_msg(c, "%s"),
...@@ -537,15 +537,20 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c, ...@@ -537,15 +537,20 @@ static void btree_err_msg(struct printbuf *out, struct bch_fs *c,
b->written, btree_ptr_sectors_written(&b->key)); b->written, btree_ptr_sectors_written(&b->key));
if (i) if (i)
prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s)); prt_printf(out, " bset u64s %u", le16_to_cpu(i->u64s));
if (k)
prt_printf(out, " bset byte offset %lu",
(unsigned long)(void *)k -
((unsigned long)(void *)i & ~511UL));
prt_str(out, ": "); prt_str(out, ": ");
} }
__printf(9, 10) __printf(10, 11)
static int __btree_err(int ret, static int __btree_err(int ret,
struct bch_fs *c, struct bch_fs *c,
struct bch_dev *ca, struct bch_dev *ca,
struct btree *b, struct btree *b,
struct bset *i, struct bset *i,
struct bkey_packed *k,
int write, int write,
bool have_retry, bool have_retry,
enum bch_sb_error_id err_type, enum bch_sb_error_id err_type,
...@@ -555,7 +560,7 @@ static int __btree_err(int ret, ...@@ -555,7 +560,7 @@ static int __btree_err(int ret,
bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes; bool silent = c->curr_recovery_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes;
va_list args; va_list args;
btree_err_msg(&out, c, ca, b, i, b->written, write); btree_err_msg(&out, c, ca, b, i, k, b->written, write);
va_start(args, fmt); va_start(args, fmt);
prt_vprintf(&out, fmt, args); prt_vprintf(&out, fmt, args);
...@@ -611,9 +616,9 @@ static int __btree_err(int ret, ...@@ -611,9 +616,9 @@ static int __btree_err(int ret,
return ret; return ret;
} }
#define btree_err(type, c, ca, b, i, _err_type, msg, ...) \ #define btree_err(type, c, ca, b, i, k, _err_type, msg, ...) \
({ \ ({ \
int _ret = __btree_err(type, c, ca, b, i, write, have_retry, \ int _ret = __btree_err(type, c, ca, b, i, k, write, have_retry, \
BCH_FSCK_ERR_##_err_type, \ BCH_FSCK_ERR_##_err_type, \
msg, ##__VA_ARGS__); \ msg, ##__VA_ARGS__); \
\ \
...@@ -690,7 +695,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, ...@@ -690,7 +695,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(!bch2_version_compatible(version), btree_err_on(!bch2_version_compatible(version),
-BCH_ERR_btree_node_read_err_incompatible, -BCH_ERR_btree_node_read_err_incompatible,
c, ca, b, i, c, ca, b, i, NULL,
btree_node_unsupported_version, btree_node_unsupported_version,
"unsupported bset version %u.%u", "unsupported bset version %u.%u",
BCH_VERSION_MAJOR(version), BCH_VERSION_MAJOR(version),
...@@ -698,7 +703,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, ...@@ -698,7 +703,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
if (btree_err_on(version < c->sb.version_min, if (btree_err_on(version < c->sb.version_min,
-BCH_ERR_btree_node_read_err_fixable, -BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i, c, NULL, b, i, NULL,
btree_node_bset_older_than_sb_min, btree_node_bset_older_than_sb_min,
"bset version %u older than superblock version_min %u", "bset version %u older than superblock version_min %u",
version, c->sb.version_min)) { version, c->sb.version_min)) {
...@@ -711,7 +716,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, ...@@ -711,7 +716,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
if (btree_err_on(BCH_VERSION_MAJOR(version) > if (btree_err_on(BCH_VERSION_MAJOR(version) >
BCH_VERSION_MAJOR(c->sb.version), BCH_VERSION_MAJOR(c->sb.version),
-BCH_ERR_btree_node_read_err_fixable, -BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i, c, NULL, b, i, NULL,
btree_node_bset_newer_than_sb, btree_node_bset_newer_than_sb,
"bset version %u newer than superblock version %u", "bset version %u newer than superblock version %u",
version, c->sb.version)) { version, c->sb.version)) {
...@@ -723,13 +728,13 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, ...@@ -723,13 +728,13 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(BSET_SEPARATE_WHITEOUTS(i), btree_err_on(BSET_SEPARATE_WHITEOUTS(i),
-BCH_ERR_btree_node_read_err_incompatible, -BCH_ERR_btree_node_read_err_incompatible,
c, ca, b, i, c, ca, b, i, NULL,
btree_node_unsupported_version, btree_node_unsupported_version,
"BSET_SEPARATE_WHITEOUTS no longer supported"); "BSET_SEPARATE_WHITEOUTS no longer supported");
if (btree_err_on(offset + sectors > btree_sectors(c), if (btree_err_on(offset + sectors > btree_sectors(c),
-BCH_ERR_btree_node_read_err_fixable, -BCH_ERR_btree_node_read_err_fixable,
c, ca, b, i, c, ca, b, i, NULL,
bset_past_end_of_btree_node, bset_past_end_of_btree_node,
"bset past end of btree node")) { "bset past end of btree node")) {
i->u64s = 0; i->u64s = 0;
...@@ -739,13 +744,13 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, ...@@ -739,13 +744,13 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(offset && !i->u64s, btree_err_on(offset && !i->u64s,
-BCH_ERR_btree_node_read_err_fixable, -BCH_ERR_btree_node_read_err_fixable,
c, ca, b, i, c, ca, b, i, NULL,
bset_empty, bset_empty,
"empty bset"); "empty bset");
btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset, btree_err_on(BSET_OFFSET(i) && BSET_OFFSET(i) != offset,
-BCH_ERR_btree_node_read_err_want_retry, -BCH_ERR_btree_node_read_err_want_retry,
c, ca, b, i, c, ca, b, i, NULL,
bset_wrong_sector_offset, bset_wrong_sector_offset,
"bset at wrong sector offset"); "bset at wrong sector offset");
...@@ -761,20 +766,20 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, ...@@ -761,20 +766,20 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
/* XXX endianness */ /* XXX endianness */
btree_err_on(bp->seq != bn->keys.seq, btree_err_on(bp->seq != bn->keys.seq,
-BCH_ERR_btree_node_read_err_must_retry, -BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, NULL, c, ca, b, NULL, NULL,
bset_bad_seq, bset_bad_seq,
"incorrect sequence number (wrong btree node)"); "incorrect sequence number (wrong btree node)");
} }
btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id, btree_err_on(BTREE_NODE_ID(bn) != b->c.btree_id,
-BCH_ERR_btree_node_read_err_must_retry, -BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, i, c, ca, b, i, NULL,
btree_node_bad_btree, btree_node_bad_btree,
"incorrect btree id"); "incorrect btree id");
btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level, btree_err_on(BTREE_NODE_LEVEL(bn) != b->c.level,
-BCH_ERR_btree_node_read_err_must_retry, -BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, i, c, ca, b, i, NULL,
btree_node_bad_level, btree_node_bad_level,
"incorrect level"); "incorrect level");
...@@ -793,7 +798,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, ...@@ -793,7 +798,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(!bpos_eq(b->data->min_key, bp->min_key), btree_err_on(!bpos_eq(b->data->min_key, bp->min_key),
-BCH_ERR_btree_node_read_err_must_retry, -BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, NULL, c, ca, b, NULL, NULL,
btree_node_bad_min_key, btree_node_bad_min_key,
"incorrect min_key: got %s should be %s", "incorrect min_key: got %s should be %s",
(printbuf_reset(&buf1), (printbuf_reset(&buf1),
...@@ -804,7 +809,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, ...@@ -804,7 +809,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(!bpos_eq(bn->max_key, b->key.k.p), btree_err_on(!bpos_eq(bn->max_key, b->key.k.p),
-BCH_ERR_btree_node_read_err_must_retry, -BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, i, c, ca, b, i, NULL,
btree_node_bad_max_key, btree_node_bad_max_key,
"incorrect max key %s", "incorrect max key %s",
(printbuf_reset(&buf1), (printbuf_reset(&buf1),
...@@ -816,7 +821,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, ...@@ -816,7 +821,7 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1), btree_err_on(bch2_bkey_format_invalid(c, &bn->format, write, &buf1),
-BCH_ERR_btree_node_read_err_bad_node, -BCH_ERR_btree_node_read_err_bad_node,
c, ca, b, i, c, ca, b, i, NULL,
btree_node_bad_format, btree_node_bad_format,
"invalid bkey format: %s\n %s", buf1.buf, "invalid bkey format: %s\n %s", buf1.buf,
(printbuf_reset(&buf2), (printbuf_reset(&buf2),
...@@ -883,7 +888,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, ...@@ -883,7 +888,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
if (btree_err_on(bkey_p_next(k) > vstruct_last(i), if (btree_err_on(bkey_p_next(k) > vstruct_last(i),
-BCH_ERR_btree_node_read_err_fixable, -BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i, c, NULL, b, i, k,
btree_node_bkey_past_bset_end, btree_node_bkey_past_bset_end,
"key extends past end of bset")) { "key extends past end of bset")) {
i->u64s = cpu_to_le16((u64 *) k - i->_data); i->u64s = cpu_to_le16((u64 *) k - i->_data);
...@@ -892,14 +897,14 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, ...@@ -892,14 +897,14 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
if (btree_err_on(k->format > KEY_FORMAT_CURRENT, if (btree_err_on(k->format > KEY_FORMAT_CURRENT,
-BCH_ERR_btree_node_read_err_fixable, -BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i, c, NULL, b, i, k,
btree_node_bkey_bad_format, btree_node_bkey_bad_format,
"invalid bkey format %u", k->format)) "invalid bkey format %u", k->format))
goto drop_this_key; goto drop_this_key;
if (btree_err_on(!bkeyp_u64s_valid(&b->format, k), if (btree_err_on(!bkeyp_u64s_valid(&b->format, k),
-BCH_ERR_btree_node_read_err_fixable, -BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i, c, NULL, b, i, k,
btree_node_bkey_bad_u64s, btree_node_bkey_bad_u64s,
"bad k->u64s %u (min %u max %zu)", k->u64s, "bad k->u64s %u (min %u max %zu)", k->u64s,
bkeyp_key_u64s(&b->format, k), bkeyp_key_u64s(&b->format, k),
...@@ -921,7 +926,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, ...@@ -921,7 +926,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
bch2_bkey_val_to_text(&buf, c, u.s_c); bch2_bkey_val_to_text(&buf, c, u.s_c);
btree_err(-BCH_ERR_btree_node_read_err_fixable, btree_err(-BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i, c, NULL, b, i, k,
btree_node_bad_bkey, btree_node_bad_bkey,
"invalid bkey: %s", buf.buf); "invalid bkey: %s", buf.buf);
goto drop_this_key; goto drop_this_key;
...@@ -942,7 +947,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, ...@@ -942,7 +947,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
bch2_bkey_to_text(&buf, u.k); bch2_bkey_to_text(&buf, u.k);
if (btree_err(-BCH_ERR_btree_node_read_err_fixable, if (btree_err(-BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i, c, NULL, b, i, k,
btree_node_bkey_out_of_order, btree_node_bkey_out_of_order,
"%s", buf.buf)) "%s", buf.buf))
goto drop_this_key; goto drop_this_key;
...@@ -1011,13 +1016,13 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1011,13 +1016,13 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
if (bch2_meta_read_fault("btree")) if (bch2_meta_read_fault("btree"))
btree_err(-BCH_ERR_btree_node_read_err_must_retry, btree_err(-BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, NULL, c, ca, b, NULL, NULL,
btree_node_fault_injected, btree_node_fault_injected,
"dynamic fault"); "dynamic fault");
btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c), btree_err_on(le64_to_cpu(b->data->magic) != bset_magic(c),
-BCH_ERR_btree_node_read_err_must_retry, -BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, NULL, c, ca, b, NULL, NULL,
btree_node_bad_magic, btree_node_bad_magic,
"bad magic: want %llx, got %llx", "bad magic: want %llx, got %llx",
bset_magic(c), le64_to_cpu(b->data->magic)); bset_magic(c), le64_to_cpu(b->data->magic));
...@@ -1032,7 +1037,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1032,7 +1037,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(b->data->keys.seq != bp->seq, btree_err_on(b->data->keys.seq != bp->seq,
-BCH_ERR_btree_node_read_err_must_retry, -BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, NULL, c, ca, b, NULL, NULL,
btree_node_bad_seq, btree_node_bad_seq,
"got wrong btree node: got\n%s", "got wrong btree node: got\n%s",
(printbuf_reset(&buf), (printbuf_reset(&buf),
...@@ -1041,7 +1046,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1041,7 +1046,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
} else { } else {
btree_err_on(!b->data->keys.seq, btree_err_on(!b->data->keys.seq,
-BCH_ERR_btree_node_read_err_must_retry, -BCH_ERR_btree_node_read_err_must_retry,
c, ca, b, NULL, c, ca, b, NULL, NULL,
btree_node_bad_seq, btree_node_bad_seq,
"bad btree header: seq 0\n%s", "bad btree header: seq 0\n%s",
(printbuf_reset(&buf), (printbuf_reset(&buf),
...@@ -1060,7 +1065,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1060,7 +1065,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-BCH_ERR_btree_node_read_err_want_retry, -BCH_ERR_btree_node_read_err_want_retry,
c, ca, b, i, c, ca, b, i, NULL,
bset_unknown_csum, bset_unknown_csum,
"unknown checksum type %llu", BSET_CSUM_TYPE(i)); "unknown checksum type %llu", BSET_CSUM_TYPE(i));
...@@ -1073,7 +1078,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1073,7 +1078,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(csum_bad, btree_err_on(csum_bad,
-BCH_ERR_btree_node_read_err_want_retry, -BCH_ERR_btree_node_read_err_want_retry,
c, ca, b, i, c, ca, b, i, NULL,
bset_bad_csum, bset_bad_csum,
"%s", "%s",
(printbuf_reset(&buf), (printbuf_reset(&buf),
...@@ -1088,7 +1093,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1088,7 +1093,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(btree_node_type_is_extents(btree_node_type(b)) && btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
!BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data), !BTREE_NODE_NEW_EXTENT_OVERWRITE(b->data),
-BCH_ERR_btree_node_read_err_incompatible, -BCH_ERR_btree_node_read_err_incompatible,
c, NULL, b, NULL, c, NULL, b, NULL, NULL,
btree_node_unsupported_version, btree_node_unsupported_version,
"btree node does not have NEW_EXTENT_OVERWRITE set"); "btree node does not have NEW_EXTENT_OVERWRITE set");
...@@ -1102,7 +1107,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1102,7 +1107,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)), btree_err_on(!bch2_checksum_type_valid(c, BSET_CSUM_TYPE(i)),
-BCH_ERR_btree_node_read_err_want_retry, -BCH_ERR_btree_node_read_err_want_retry,
c, ca, b, i, c, ca, b, i, NULL,
bset_unknown_csum, bset_unknown_csum,
"unknown checksum type %llu", BSET_CSUM_TYPE(i)); "unknown checksum type %llu", BSET_CSUM_TYPE(i));
...@@ -1114,7 +1119,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1114,7 +1119,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(csum_bad, btree_err_on(csum_bad,
-BCH_ERR_btree_node_read_err_want_retry, -BCH_ERR_btree_node_read_err_want_retry,
c, ca, b, i, c, ca, b, i, NULL,
bset_bad_csum, bset_bad_csum,
"%s", "%s",
(printbuf_reset(&buf), (printbuf_reset(&buf),
...@@ -1152,14 +1157,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1152,14 +1157,14 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
btree_err_on(blacklisted && first, btree_err_on(blacklisted && first,
-BCH_ERR_btree_node_read_err_fixable, -BCH_ERR_btree_node_read_err_fixable,
c, ca, b, i, c, ca, b, i, NULL,
bset_blacklisted_journal_seq, bset_blacklisted_journal_seq,
"first btree node bset has blacklisted journal seq (%llu)", "first btree node bset has blacklisted journal seq (%llu)",
le64_to_cpu(i->journal_seq)); le64_to_cpu(i->journal_seq));
btree_err_on(blacklisted && ptr_written, btree_err_on(blacklisted && ptr_written,
-BCH_ERR_btree_node_read_err_fixable, -BCH_ERR_btree_node_read_err_fixable,
c, ca, b, i, c, ca, b, i, NULL,
first_bset_blacklisted_journal_seq, first_bset_blacklisted_journal_seq,
"found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u", "found blacklisted bset (journal seq %llu) in btree node at offset %u-%u/%u",
le64_to_cpu(i->journal_seq), le64_to_cpu(i->journal_seq),
...@@ -1178,7 +1183,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1178,7 +1183,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
if (ptr_written) { if (ptr_written) {
btree_err_on(b->written < ptr_written, btree_err_on(b->written < ptr_written,
-BCH_ERR_btree_node_read_err_want_retry, -BCH_ERR_btree_node_read_err_want_retry,
c, ca, b, NULL, c, ca, b, NULL, NULL,
btree_node_data_missing, btree_node_data_missing,
"btree node data missing: expected %u sectors, found %u", "btree node data missing: expected %u sectors, found %u",
ptr_written, b->written); ptr_written, b->written);
...@@ -1191,7 +1196,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1191,7 +1196,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
le64_to_cpu(bne->keys.journal_seq), le64_to_cpu(bne->keys.journal_seq),
true), true),
-BCH_ERR_btree_node_read_err_want_retry, -BCH_ERR_btree_node_read_err_want_retry,
c, ca, b, NULL, c, ca, b, NULL, NULL,
btree_node_bset_after_end, btree_node_bset_after_end,
"found bset signature after last bset"); "found bset signature after last bset");
} }
...@@ -1235,7 +1240,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, ...@@ -1235,7 +1240,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
bch2_bkey_val_to_text(&buf, c, u.s_c); bch2_bkey_val_to_text(&buf, c, u.s_c);
btree_err(-BCH_ERR_btree_node_read_err_fixable, btree_err(-BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, i, c, NULL, b, i, k,
btree_node_bad_bkey, btree_node_bad_bkey,
"%s", buf.buf); "%s", buf.buf);
...@@ -1471,18 +1476,18 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done) ...@@ -1471,18 +1476,18 @@ static CLOSURE_CALLBACK(btree_node_read_all_replicas_done)
written2 = btree_node_sectors_written(c, ra->buf[i]); written2 = btree_node_sectors_written(c, ra->buf[i]);
if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable, if (btree_err_on(written2 != written, -BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, NULL, c, NULL, b, NULL, NULL,
btree_node_replicas_sectors_written_mismatch, btree_node_replicas_sectors_written_mismatch,
"btree node sectors written mismatch: %u != %u", "btree node sectors written mismatch: %u != %u",
written, written2) || written, written2) ||
btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]), btree_err_on(btree_node_has_extra_bsets(c, written2, ra->buf[i]),
-BCH_ERR_btree_node_read_err_fixable, -BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, NULL, c, NULL, b, NULL, NULL,
btree_node_bset_after_end, btree_node_bset_after_end,
"found bset signature after last bset") || "found bset signature after last bset") ||
btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9), btree_err_on(memcmp(ra->buf[best], ra->buf[i], written << 9),
-BCH_ERR_btree_node_read_err_fixable, -BCH_ERR_btree_node_read_err_fixable,
c, NULL, b, NULL, c, NULL, b, NULL, NULL,
btree_node_replicas_data_mismatch, btree_node_replicas_data_mismatch,
"btree node replicas content mismatch")) "btree node replicas content mismatch"))
dump_bset_maps = true; dump_bset_maps = true;
......
...@@ -424,16 +424,16 @@ static int btree_key_cache_fill(struct btree_trans *trans, ...@@ -424,16 +424,16 @@ static int btree_key_cache_fill(struct btree_trans *trans,
goto err; goto err;
} }
if (!bch2_btree_node_relock(trans, ck_path, 0)) { ret = bch2_trans_relock(trans);
if (ret) {
kfree(new_k); kfree(new_k);
trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
goto err; goto err;
} }
ret = bch2_trans_relock(trans); if (!bch2_btree_node_relock(trans, ck_path, 0)) {
if (ret) {
kfree(new_k); kfree(new_k);
trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path);
ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill);
goto err; goto err;
} }
} }
......
...@@ -1134,7 +1134,7 @@ static int __trigger_extent(struct btree_trans *trans, ...@@ -1134,7 +1134,7 @@ static int __trigger_extent(struct btree_trans *trans,
r.e.nr_required = 1; r.e.nr_required = 1;
bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
s64 disk_sectors; s64 disk_sectors = 0;
ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags); ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags);
if (ret < 0) if (ret < 0)
return ret; return ret;
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_DISK_GROUPS_FORMAT_H
#define _BCACHEFS_DISK_GROUPS_FORMAT_H
#define BCH_SB_LABEL_SIZE 32
struct bch_disk_group {
__u8 label[BCH_SB_LABEL_SIZE];
__le64 flags[2];
} __packed __aligned(8);
LE64_BITMASK(BCH_GROUP_DELETED, struct bch_disk_group, flags[0], 0, 1)
LE64_BITMASK(BCH_GROUP_DATA_ALLOWED, struct bch_disk_group, flags[0], 1, 6)
LE64_BITMASK(BCH_GROUP_PARENT, struct bch_disk_group, flags[0], 6, 24)
struct bch_sb_field_disk_groups {
struct bch_sb_field field;
struct bch_disk_group entries[];
} __packed __aligned(8);
#endif /* _BCACHEFS_DISK_GROUPS_FORMAT_H */
...@@ -908,7 +908,7 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp) ...@@ -908,7 +908,7 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
if (!genradix_ptr_alloc(&c->stripes, idx, gfp)) if (!genradix_ptr_alloc(&c->stripes, idx, gfp))
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc; return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING && if (c->gc_pos.phase != GC_PHASE_not_running &&
!genradix_ptr_alloc(&c->gc_stripes, idx, gfp)) !genradix_ptr_alloc(&c->gc_stripes, idx, gfp))
return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc; return -BCH_ERR_ENOMEM_ec_stripe_mem_alloc;
......
...@@ -437,8 +437,8 @@ static void bch2_writepage_io_done(struct bch_write_op *op) ...@@ -437,8 +437,8 @@ static void bch2_writepage_io_done(struct bch_write_op *op)
*/ */
/* /*
* PageWriteback is effectively our ref on the inode - fixup i_blocks * The writeback flag is effectively our ref on the inode -
* before calling end_page_writeback: * fixup i_blocks before calling folio_end_writeback:
*/ */
bch2_i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta); bch2_i_sectors_acct(c, io->inode, NULL, io->op.i_sectors_delta);
...@@ -898,7 +898,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode, ...@@ -898,7 +898,7 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
darray_for_each(fs, fi) { darray_for_each(fs, fi) {
f = *fi; f = *fi;
f_len = min(end, folio_end_pos(f)) - f_pos; f_len = min(end, folio_end_pos(f)) - f_pos;
f_copied = copy_page_from_iter_atomic(&f->page, f_offset, f_len, iter); f_copied = copy_folio_from_iter_atomic(f, f_offset, f_len, iter);
if (!f_copied) { if (!f_copied) {
folios_trunc(&fs, fi); folios_trunc(&fs, fi);
break; break;
......
...@@ -609,8 +609,10 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter) ...@@ -609,8 +609,10 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
if (unlikely(ret)) if (unlikely(ret))
goto err_put_write_ref; goto err_put_write_ref;
if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1))) {
ret = -EINVAL;
goto err_put_write_ref; goto err_put_write_ref;
}
inode_dio_begin(&inode->v); inode_dio_begin(&inode->v);
bch2_pagecache_block_get(inode); bch2_pagecache_block_get(inode);
......
...@@ -1939,8 +1939,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, ...@@ -1939,8 +1939,7 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
if (IS_ERR(sb)) { if (IS_ERR(sb)) {
ret = PTR_ERR(sb); ret = PTR_ERR(sb);
ret = bch2_err_class(ret); goto err;
return ERR_PTR(ret);
} }
c = sb->s_fs_info; c = sb->s_fs_info;
...@@ -2016,6 +2015,15 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type, ...@@ -2016,6 +2015,15 @@ static struct dentry *bch2_mount(struct file_system_type *fs_type,
err_put_super: err_put_super:
__bch2_fs_stop(c); __bch2_fs_stop(c);
deactivate_locked_super(sb); deactivate_locked_super(sb);
err:
/*
* On an inconsistency error in recovery we might see an -EROFS derived
* errorcode (from the journal), but we don't want to return that to
* userspace as that causes util-linux to retry the mount RO - which is
* confusing:
*/
if (bch2_err_matches(ret, EROFS) && ret != -EROFS)
ret = -EIO;
return ERR_PTR(bch2_err_class(ret)); return ERR_PTR(bch2_err_class(ret));
} }
......
...@@ -77,21 +77,17 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, ...@@ -77,21 +77,17 @@ static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr,
struct bkey_s_c k; struct bkey_s_c k;
int ret; int ret;
bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr),
POS(0, inode_nr), BTREE_ITER_all_snapshots, k, ret) {
BTREE_ITER_all_snapshots); if (k.k->p.offset != inode_nr)
k = bch2_btree_iter_peek(&iter); break;
ret = bkey_err(k); if (!bkey_is_inode(k.k))
if (ret) continue;
goto err; ret = bch2_inode_unpack(k, inode);
goto found;
if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) {
ret = -BCH_ERR_ENOENT_inode;
goto err;
} }
ret = -BCH_ERR_ENOENT_inode;
ret = bch2_inode_unpack(k, inode); found:
err:
bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr);
bch2_trans_iter_exit(trans, &iter); bch2_trans_iter_exit(trans, &iter);
return ret; return ret;
...@@ -770,25 +766,6 @@ static int get_visible_inodes(struct btree_trans *trans, ...@@ -770,25 +766,6 @@ static int get_visible_inodes(struct btree_trans *trans,
return ret; return ret;
} }
static int check_key_has_snapshot(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
int ret = 0;
if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
bkey_in_missing_snapshot,
"key in missing snapshot: %s",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node) ?: 1;
fsck_err:
printbuf_exit(&buf);
return ret;
}
static int hash_redo_key(struct btree_trans *trans, static int hash_redo_key(struct btree_trans *trans,
const struct bch_hash_desc desc, const struct bch_hash_desc desc,
struct bch_hash_info *hash_info, struct bch_hash_info *hash_info,
...@@ -983,7 +960,7 @@ static int check_inode(struct btree_trans *trans, ...@@ -983,7 +960,7 @@ static int check_inode(struct btree_trans *trans,
bool do_update = false; bool do_update = false;
int ret; int ret;
ret = check_key_has_snapshot(trans, iter, k); ret = bch2_check_key_has_snapshot(trans, iter, k);
if (ret < 0) if (ret < 0)
goto err; goto err;
if (ret) if (ret)
...@@ -1487,7 +1464,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, ...@@ -1487,7 +1464,7 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
int ret = 0; int ret = 0;
ret = check_key_has_snapshot(trans, iter, k); ret = bch2_check_key_has_snapshot(trans, iter, k);
if (ret) { if (ret) {
ret = ret < 0 ? ret : 0; ret = ret < 0 ? ret : 0;
goto out; goto out;
...@@ -2010,7 +1987,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, ...@@ -2010,7 +1987,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
struct printbuf buf = PRINTBUF; struct printbuf buf = PRINTBUF;
int ret = 0; int ret = 0;
ret = check_key_has_snapshot(trans, iter, k); ret = bch2_check_key_has_snapshot(trans, iter, k);
if (ret) { if (ret) {
ret = ret < 0 ? ret : 0; ret = ret < 0 ? ret : 0;
goto out; goto out;
...@@ -2165,7 +2142,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, ...@@ -2165,7 +2142,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
struct inode_walker_entry *i; struct inode_walker_entry *i;
int ret; int ret;
ret = check_key_has_snapshot(trans, iter, k); ret = bch2_check_key_has_snapshot(trans, iter, k);
if (ret < 0) if (ret < 0)
return ret; return ret;
if (ret) if (ret)
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_FORMAT_H
#define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_FORMAT_H
struct journal_seq_blacklist_entry {
__le64 start;
__le64 end;
};
struct bch_sb_field_journal_seq_blacklist {
struct bch_sb_field field;
struct journal_seq_blacklist_entry start[];
};
#endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_FORMAT_H */
...@@ -217,4 +217,5 @@ static struct kunit_suite mean_and_variance_test_suite = { ...@@ -217,4 +217,5 @@ static struct kunit_suite mean_and_variance_test_suite = {
kunit_test_suite(mean_and_variance_test_suite); kunit_test_suite(mean_and_variance_test_suite);
MODULE_AUTHOR("Daniel B. Hill"); MODULE_AUTHOR("Daniel B. Hill");
MODULE_DESCRIPTION("bcachefs filesystem mean and variance unit tests");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_REPLICAS_FORMAT_H
#define _BCACHEFS_REPLICAS_FORMAT_H
struct bch_replicas_entry_v0 {
__u8 data_type;
__u8 nr_devs;
__u8 devs[];
} __packed;
struct bch_sb_field_replicas_v0 {
struct bch_sb_field field;
struct bch_replicas_entry_v0 entries[];
} __packed __aligned(8);
struct bch_replicas_entry_v1 {
__u8 data_type;
__u8 nr_devs;
__u8 nr_required;
__u8 devs[];
} __packed;
struct bch_sb_field_replicas {
struct bch_sb_field field;
struct bch_replicas_entry_v1 entries[];
} __packed __aligned(8);
#define replicas_entry_bytes(_i) \
(offsetof(typeof(*(_i)), devs) + (_i)->nr_devs)
#endif /* _BCACHEFS_REPLICAS_FORMAT_H */
...@@ -146,10 +146,17 @@ static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f, ...@@ -146,10 +146,17 @@ static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
for (const struct bch_sb_field_downgrade_entry *i = e->entries; for (const struct bch_sb_field_downgrade_entry *i = e->entries;
(void *) i < vstruct_end(&e->field); (void *) i < vstruct_end(&e->field);
i = downgrade_entry_next_c(i)) { i = downgrade_entry_next_c(i)) {
/*
* Careful: sb_field_downgrade_entry is only 2 byte aligned, but
* section sizes are 8 byte aligned - an empty entry spanning
* the end of the section is allowed (and ignored):
*/
if ((void *) &i->errors[0] > vstruct_end(&e->field))
break;
if (flags & BCH_VALIDATE_write && if (flags & BCH_VALIDATE_write &&
((void *) &i->errors[0] > vstruct_end(&e->field) || (void *) downgrade_entry_next_c(i) > vstruct_end(&e->field)) {
(void *) downgrade_entry_next_c(i) > vstruct_end(&e->field))) { prt_printf(err, "downgrade entry overruns end of superblock section");
prt_printf(err, "downgrade entry overruns end of superblock section)");
return -BCH_ERR_invalid_sb_downgrade; return -BCH_ERR_invalid_sb_downgrade;
} }
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_SB_DOWNGRADE_FORMAT_H
#define _BCACHEFS_SB_DOWNGRADE_FORMAT_H
struct bch_sb_field_downgrade_entry {
__le16 version;
__le64 recovery_passes[2];
__le16 nr_errors;
__le16 errors[] __counted_by(nr_errors);
} __packed __aligned(2);
struct bch_sb_field_downgrade {
struct bch_sb_field field;
struct bch_sb_field_downgrade_entry entries[];
};
#endif /* _BCACHEFS_SB_DOWNGRADE_FORMAT_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_SB_ERRORS_FORMAT_H
#define _BCACHEFS_SB_ERRORS_FORMAT_H
#define BCH_SB_ERRS() \
x(clean_but_journal_not_empty, 0) \
x(dirty_but_no_journal_entries, 1) \
x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
x(sb_clean_journal_seq_mismatch, 3) \
x(sb_clean_btree_root_mismatch, 4) \
x(sb_clean_missing, 5) \
x(jset_unsupported_version, 6) \
x(jset_unknown_csum, 7) \
x(jset_last_seq_newer_than_seq, 8) \
x(jset_past_bucket_end, 9) \
x(jset_seq_blacklisted, 10) \
x(journal_entries_missing, 11) \
x(journal_entry_replicas_not_marked, 12) \
x(journal_entry_past_jset_end, 13) \
x(journal_entry_replicas_data_mismatch, 14) \
x(journal_entry_bkey_u64s_0, 15) \
x(journal_entry_bkey_past_end, 16) \
x(journal_entry_bkey_bad_format, 17) \
x(journal_entry_bkey_invalid, 18) \
x(journal_entry_btree_root_bad_size, 19) \
x(journal_entry_blacklist_bad_size, 20) \
x(journal_entry_blacklist_v2_bad_size, 21) \
x(journal_entry_blacklist_v2_start_past_end, 22) \
x(journal_entry_usage_bad_size, 23) \
x(journal_entry_data_usage_bad_size, 24) \
x(journal_entry_clock_bad_size, 25) \
x(journal_entry_clock_bad_rw, 26) \
x(journal_entry_dev_usage_bad_size, 27) \
x(journal_entry_dev_usage_bad_dev, 28) \
x(journal_entry_dev_usage_bad_pad, 29) \
x(btree_node_unreadable, 30) \
x(btree_node_fault_injected, 31) \
x(btree_node_bad_magic, 32) \
x(btree_node_bad_seq, 33) \
x(btree_node_unsupported_version, 34) \
x(btree_node_bset_older_than_sb_min, 35) \
x(btree_node_bset_newer_than_sb, 36) \
x(btree_node_data_missing, 37) \
x(btree_node_bset_after_end, 38) \
x(btree_node_replicas_sectors_written_mismatch, 39) \
x(btree_node_replicas_data_mismatch, 40) \
x(bset_unknown_csum, 41) \
x(bset_bad_csum, 42) \
x(bset_past_end_of_btree_node, 43) \
x(bset_wrong_sector_offset, 44) \
x(bset_empty, 45) \
x(bset_bad_seq, 46) \
x(bset_blacklisted_journal_seq, 47) \
x(first_bset_blacklisted_journal_seq, 48) \
x(btree_node_bad_btree, 49) \
x(btree_node_bad_level, 50) \
x(btree_node_bad_min_key, 51) \
x(btree_node_bad_max_key, 52) \
x(btree_node_bad_format, 53) \
x(btree_node_bkey_past_bset_end, 54) \
x(btree_node_bkey_bad_format, 55) \
x(btree_node_bad_bkey, 56) \
x(btree_node_bkey_out_of_order, 57) \
x(btree_root_bkey_invalid, 58) \
x(btree_root_read_error, 59) \
x(btree_root_bad_min_key, 60) \
x(btree_root_bad_max_key, 61) \
x(btree_node_read_error, 62) \
x(btree_node_topology_bad_min_key, 63) \
x(btree_node_topology_bad_max_key, 64) \
x(btree_node_topology_overwritten_by_prev_node, 65) \
x(btree_node_topology_overwritten_by_next_node, 66) \
x(btree_node_topology_interior_node_empty, 67) \
x(fs_usage_hidden_wrong, 68) \
x(fs_usage_btree_wrong, 69) \
x(fs_usage_data_wrong, 70) \
x(fs_usage_cached_wrong, 71) \
x(fs_usage_reserved_wrong, 72) \
x(fs_usage_persistent_reserved_wrong, 73) \
x(fs_usage_nr_inodes_wrong, 74) \
x(fs_usage_replicas_wrong, 75) \
x(dev_usage_buckets_wrong, 76) \
x(dev_usage_sectors_wrong, 77) \
x(dev_usage_fragmented_wrong, 78) \
x(dev_usage_buckets_ec_wrong, 79) \
x(bkey_version_in_future, 80) \
x(bkey_u64s_too_small, 81) \
x(bkey_invalid_type_for_btree, 82) \
x(bkey_extent_size_zero, 83) \
x(bkey_extent_size_greater_than_offset, 84) \
x(bkey_size_nonzero, 85) \
x(bkey_snapshot_nonzero, 86) \
x(bkey_snapshot_zero, 87) \
x(bkey_at_pos_max, 88) \
x(bkey_before_start_of_btree_node, 89) \
x(bkey_after_end_of_btree_node, 90) \
x(bkey_val_size_nonzero, 91) \
x(bkey_val_size_too_small, 92) \
x(alloc_v1_val_size_bad, 93) \
x(alloc_v2_unpack_error, 94) \
x(alloc_v3_unpack_error, 95) \
x(alloc_v4_val_size_bad, 96) \
x(alloc_v4_backpointers_start_bad, 97) \
x(alloc_key_data_type_bad, 98) \
x(alloc_key_empty_but_have_data, 99) \
x(alloc_key_dirty_sectors_0, 100) \
x(alloc_key_data_type_inconsistency, 101) \
x(alloc_key_to_missing_dev_bucket, 102) \
x(alloc_key_cached_inconsistency, 103) \
x(alloc_key_cached_but_read_time_zero, 104) \
x(alloc_key_to_missing_lru_entry, 105) \
x(alloc_key_data_type_wrong, 106) \
x(alloc_key_gen_wrong, 107) \
x(alloc_key_dirty_sectors_wrong, 108) \
x(alloc_key_cached_sectors_wrong, 109) \
x(alloc_key_stripe_wrong, 110) \
x(alloc_key_stripe_redundancy_wrong, 111) \
x(bucket_sector_count_overflow, 112) \
x(bucket_metadata_type_mismatch, 113) \
x(need_discard_key_wrong, 114) \
x(freespace_key_wrong, 115) \
x(freespace_hole_missing, 116) \
x(bucket_gens_val_size_bad, 117) \
x(bucket_gens_key_wrong, 118) \
x(bucket_gens_hole_wrong, 119) \
x(bucket_gens_to_invalid_dev, 120) \
x(bucket_gens_to_invalid_buckets, 121) \
x(bucket_gens_nonzero_for_invalid_buckets, 122) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
x(need_discard_freespace_key_bad, 124) \
x(backpointer_bucket_offset_wrong, 125) \
x(backpointer_to_missing_device, 126) \
x(backpointer_to_missing_alloc, 127) \
x(backpointer_to_missing_ptr, 128) \
x(lru_entry_at_time_0, 129) \
x(lru_entry_to_invalid_bucket, 130) \
x(lru_entry_bad, 131) \
x(btree_ptr_val_too_big, 132) \
x(btree_ptr_v2_val_too_big, 133) \
x(btree_ptr_has_non_ptr, 134) \
x(extent_ptrs_invalid_entry, 135) \
x(extent_ptrs_no_ptrs, 136) \
x(extent_ptrs_too_many_ptrs, 137) \
x(extent_ptrs_redundant_crc, 138) \
x(extent_ptrs_redundant_stripe, 139) \
x(extent_ptrs_unwritten, 140) \
x(extent_ptrs_written_and_unwritten, 141) \
x(ptr_to_invalid_device, 142) \
x(ptr_to_duplicate_device, 143) \
x(ptr_after_last_bucket, 144) \
x(ptr_before_first_bucket, 145) \
x(ptr_spans_multiple_buckets, 146) \
x(ptr_to_missing_backpointer, 147) \
x(ptr_to_missing_alloc_key, 148) \
x(ptr_to_missing_replicas_entry, 149) \
x(ptr_to_missing_stripe, 150) \
x(ptr_to_incorrect_stripe, 151) \
x(ptr_gen_newer_than_bucket_gen, 152) \
x(ptr_too_stale, 153) \
x(stale_dirty_ptr, 154) \
x(ptr_bucket_data_type_mismatch, 155) \
x(ptr_cached_and_erasure_coded, 156) \
x(ptr_crc_uncompressed_size_too_small, 157) \
x(ptr_crc_csum_type_unknown, 158) \
x(ptr_crc_compression_type_unknown, 159) \
x(ptr_crc_redundant, 160) \
x(ptr_crc_uncompressed_size_too_big, 161) \
x(ptr_crc_nonce_mismatch, 162) \
x(ptr_stripe_redundant, 163) \
x(reservation_key_nr_replicas_invalid, 164) \
x(reflink_v_refcount_wrong, 165) \
x(reflink_p_to_missing_reflink_v, 166) \
x(stripe_pos_bad, 167) \
x(stripe_val_size_bad, 168) \
x(stripe_sector_count_wrong, 169) \
x(snapshot_tree_pos_bad, 170) \
x(snapshot_tree_to_missing_snapshot, 171) \
x(snapshot_tree_to_missing_subvol, 172) \
x(snapshot_tree_to_wrong_subvol, 173) \
x(snapshot_tree_to_snapshot_subvol, 174) \
x(snapshot_pos_bad, 175) \
x(snapshot_parent_bad, 176) \
x(snapshot_children_not_normalized, 177) \
x(snapshot_child_duplicate, 178) \
x(snapshot_child_bad, 179) \
x(snapshot_skiplist_not_normalized, 180) \
x(snapshot_skiplist_bad, 181) \
x(snapshot_should_not_have_subvol, 182) \
x(snapshot_to_bad_snapshot_tree, 183) \
x(snapshot_bad_depth, 184) \
x(snapshot_bad_skiplist, 185) \
x(subvol_pos_bad, 186) \
x(subvol_not_master_and_not_snapshot, 187) \
x(subvol_to_missing_root, 188) \
x(subvol_root_wrong_bi_subvol, 189) \
x(bkey_in_missing_snapshot, 190) \
x(inode_pos_inode_nonzero, 191) \
x(inode_pos_blockdev_range, 192) \
x(inode_unpack_error, 193) \
x(inode_str_hash_invalid, 194) \
x(inode_v3_fields_start_bad, 195) \
x(inode_snapshot_mismatch, 196) \
x(inode_unlinked_but_clean, 197) \
x(inode_unlinked_but_nlink_nonzero, 198) \
x(inode_checksum_type_invalid, 199) \
x(inode_compression_type_invalid, 200) \
x(inode_subvol_root_but_not_dir, 201) \
x(inode_i_size_dirty_but_clean, 202) \
x(inode_i_sectors_dirty_but_clean, 203) \
x(inode_i_sectors_wrong, 204) \
x(inode_dir_wrong_nlink, 205) \
x(inode_dir_multiple_links, 206) \
x(inode_multiple_links_but_nlink_0, 207) \
x(inode_wrong_backpointer, 208) \
x(inode_wrong_nlink, 209) \
x(inode_unreachable, 210) \
x(deleted_inode_but_clean, 211) \
x(deleted_inode_missing, 212) \
x(deleted_inode_is_dir, 213) \
x(deleted_inode_not_unlinked, 214) \
x(extent_overlapping, 215) \
x(extent_in_missing_inode, 216) \
x(extent_in_non_reg_inode, 217) \
x(extent_past_end_of_inode, 218) \
x(dirent_empty_name, 219) \
x(dirent_val_too_big, 220) \
x(dirent_name_too_long, 221) \
x(dirent_name_embedded_nul, 222) \
x(dirent_name_dot_or_dotdot, 223) \
x(dirent_name_has_slash, 224) \
x(dirent_d_type_wrong, 225) \
x(inode_bi_parent_wrong, 226) \
x(dirent_in_missing_dir_inode, 227) \
x(dirent_in_non_dir_inode, 228) \
x(dirent_to_missing_inode, 229) \
x(dirent_to_missing_subvol, 230) \
x(dirent_to_itself, 231) \
x(quota_type_invalid, 232) \
x(xattr_val_size_too_small, 233) \
x(xattr_val_size_too_big, 234) \
x(xattr_invalid_type, 235) \
x(xattr_name_invalid_chars, 236) \
x(xattr_in_missing_inode, 237) \
x(root_subvol_missing, 238) \
x(root_dir_missing, 239) \
x(root_inode_not_dir, 240) \
x(dir_loop, 241) \
x(hash_table_key_duplicate, 242) \
x(hash_table_key_wrong_offset, 243) \
x(unlinked_inode_not_on_deleted_list, 244) \
x(reflink_p_front_pad_bad, 245) \
x(journal_entry_dup_same_device, 246) \
x(inode_bi_subvol_missing, 247) \
x(inode_bi_subvol_wrong, 248) \
x(inode_points_to_missing_dirent, 249) \
x(inode_points_to_wrong_dirent, 250) \
x(inode_bi_parent_nonzero, 251) \
x(dirent_to_missing_parent_subvol, 252) \
x(dirent_not_visible_in_parent_subvol, 253) \
x(subvol_fs_path_parent_wrong, 254) \
x(subvol_root_fs_path_parent_nonzero, 255) \
x(subvol_children_not_set, 256) \
x(subvol_children_bad, 257) \
x(subvol_loop, 258) \
x(subvol_unreachable, 259) \
x(btree_node_bkey_bad_u64s, 260) \
x(btree_node_topology_empty_interior_node, 261) \
x(btree_ptr_v2_min_key_bad, 262) \
x(btree_root_unreadable_and_scan_found_nothing, 263) \
x(snapshot_node_missing, 264) \
x(dup_backpointer_to_bad_csum_extent, 265) \
x(btree_bitmap_not_marked, 266) \
x(sb_clean_entry_overrun, 267) \
x(btree_ptr_v2_written_0, 268) \
x(subvol_snapshot_bad, 269) \
x(subvol_inode_bad, 270)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
BCH_SB_ERRS()
#undef x
BCH_SB_ERR_MAX
};
struct bch_sb_field_errors {
struct bch_sb_field field;
struct bch_sb_field_error_entry {
__le64 v;
__le64 last_error_time;
} entries[];
};
LE64_BITMASK(BCH_SB_ERROR_ENTRY_ID, struct bch_sb_field_error_entry, v, 0, 16);
LE64_BITMASK(BCH_SB_ERROR_ENTRY_NR, struct bch_sb_field_error_entry, v, 16, 64);
#endif /* _BCACHEFS_SB_ERRORS_FORMAT_H */
...@@ -4,286 +4,6 @@ ...@@ -4,286 +4,6 @@
#include "darray.h" #include "darray.h"
#define BCH_SB_ERRS() \
x(clean_but_journal_not_empty, 0) \
x(dirty_but_no_journal_entries, 1) \
x(dirty_but_no_journal_entries_post_drop_nonflushes, 2) \
x(sb_clean_journal_seq_mismatch, 3) \
x(sb_clean_btree_root_mismatch, 4) \
x(sb_clean_missing, 5) \
x(jset_unsupported_version, 6) \
x(jset_unknown_csum, 7) \
x(jset_last_seq_newer_than_seq, 8) \
x(jset_past_bucket_end, 9) \
x(jset_seq_blacklisted, 10) \
x(journal_entries_missing, 11) \
x(journal_entry_replicas_not_marked, 12) \
x(journal_entry_past_jset_end, 13) \
x(journal_entry_replicas_data_mismatch, 14) \
x(journal_entry_bkey_u64s_0, 15) \
x(journal_entry_bkey_past_end, 16) \
x(journal_entry_bkey_bad_format, 17) \
x(journal_entry_bkey_invalid, 18) \
x(journal_entry_btree_root_bad_size, 19) \
x(journal_entry_blacklist_bad_size, 20) \
x(journal_entry_blacklist_v2_bad_size, 21) \
x(journal_entry_blacklist_v2_start_past_end, 22) \
x(journal_entry_usage_bad_size, 23) \
x(journal_entry_data_usage_bad_size, 24) \
x(journal_entry_clock_bad_size, 25) \
x(journal_entry_clock_bad_rw, 26) \
x(journal_entry_dev_usage_bad_size, 27) \
x(journal_entry_dev_usage_bad_dev, 28) \
x(journal_entry_dev_usage_bad_pad, 29) \
x(btree_node_unreadable, 30) \
x(btree_node_fault_injected, 31) \
x(btree_node_bad_magic, 32) \
x(btree_node_bad_seq, 33) \
x(btree_node_unsupported_version, 34) \
x(btree_node_bset_older_than_sb_min, 35) \
x(btree_node_bset_newer_than_sb, 36) \
x(btree_node_data_missing, 37) \
x(btree_node_bset_after_end, 38) \
x(btree_node_replicas_sectors_written_mismatch, 39) \
x(btree_node_replicas_data_mismatch, 40) \
x(bset_unknown_csum, 41) \
x(bset_bad_csum, 42) \
x(bset_past_end_of_btree_node, 43) \
x(bset_wrong_sector_offset, 44) \
x(bset_empty, 45) \
x(bset_bad_seq, 46) \
x(bset_blacklisted_journal_seq, 47) \
x(first_bset_blacklisted_journal_seq, 48) \
x(btree_node_bad_btree, 49) \
x(btree_node_bad_level, 50) \
x(btree_node_bad_min_key, 51) \
x(btree_node_bad_max_key, 52) \
x(btree_node_bad_format, 53) \
x(btree_node_bkey_past_bset_end, 54) \
x(btree_node_bkey_bad_format, 55) \
x(btree_node_bad_bkey, 56) \
x(btree_node_bkey_out_of_order, 57) \
x(btree_root_bkey_invalid, 58) \
x(btree_root_read_error, 59) \
x(btree_root_bad_min_key, 60) \
x(btree_root_bad_max_key, 61) \
x(btree_node_read_error, 62) \
x(btree_node_topology_bad_min_key, 63) \
x(btree_node_topology_bad_max_key, 64) \
x(btree_node_topology_overwritten_by_prev_node, 65) \
x(btree_node_topology_overwritten_by_next_node, 66) \
x(btree_node_topology_interior_node_empty, 67) \
x(fs_usage_hidden_wrong, 68) \
x(fs_usage_btree_wrong, 69) \
x(fs_usage_data_wrong, 70) \
x(fs_usage_cached_wrong, 71) \
x(fs_usage_reserved_wrong, 72) \
x(fs_usage_persistent_reserved_wrong, 73) \
x(fs_usage_nr_inodes_wrong, 74) \
x(fs_usage_replicas_wrong, 75) \
x(dev_usage_buckets_wrong, 76) \
x(dev_usage_sectors_wrong, 77) \
x(dev_usage_fragmented_wrong, 78) \
x(dev_usage_buckets_ec_wrong, 79) \
x(bkey_version_in_future, 80) \
x(bkey_u64s_too_small, 81) \
x(bkey_invalid_type_for_btree, 82) \
x(bkey_extent_size_zero, 83) \
x(bkey_extent_size_greater_than_offset, 84) \
x(bkey_size_nonzero, 85) \
x(bkey_snapshot_nonzero, 86) \
x(bkey_snapshot_zero, 87) \
x(bkey_at_pos_max, 88) \
x(bkey_before_start_of_btree_node, 89) \
x(bkey_after_end_of_btree_node, 90) \
x(bkey_val_size_nonzero, 91) \
x(bkey_val_size_too_small, 92) \
x(alloc_v1_val_size_bad, 93) \
x(alloc_v2_unpack_error, 94) \
x(alloc_v3_unpack_error, 95) \
x(alloc_v4_val_size_bad, 96) \
x(alloc_v4_backpointers_start_bad, 97) \
x(alloc_key_data_type_bad, 98) \
x(alloc_key_empty_but_have_data, 99) \
x(alloc_key_dirty_sectors_0, 100) \
x(alloc_key_data_type_inconsistency, 101) \
x(alloc_key_to_missing_dev_bucket, 102) \
x(alloc_key_cached_inconsistency, 103) \
x(alloc_key_cached_but_read_time_zero, 104) \
x(alloc_key_to_missing_lru_entry, 105) \
x(alloc_key_data_type_wrong, 106) \
x(alloc_key_gen_wrong, 107) \
x(alloc_key_dirty_sectors_wrong, 108) \
x(alloc_key_cached_sectors_wrong, 109) \
x(alloc_key_stripe_wrong, 110) \
x(alloc_key_stripe_redundancy_wrong, 111) \
x(bucket_sector_count_overflow, 112) \
x(bucket_metadata_type_mismatch, 113) \
x(need_discard_key_wrong, 114) \
x(freespace_key_wrong, 115) \
x(freespace_hole_missing, 116) \
x(bucket_gens_val_size_bad, 117) \
x(bucket_gens_key_wrong, 118) \
x(bucket_gens_hole_wrong, 119) \
x(bucket_gens_to_invalid_dev, 120) \
x(bucket_gens_to_invalid_buckets, 121) \
x(bucket_gens_nonzero_for_invalid_buckets, 122) \
x(need_discard_freespace_key_to_invalid_dev_bucket, 123) \
x(need_discard_freespace_key_bad, 124) \
x(backpointer_bucket_offset_wrong, 125) \
x(backpointer_to_missing_device, 126) \
x(backpointer_to_missing_alloc, 127) \
x(backpointer_to_missing_ptr, 128) \
x(lru_entry_at_time_0, 129) \
x(lru_entry_to_invalid_bucket, 130) \
x(lru_entry_bad, 131) \
x(btree_ptr_val_too_big, 132) \
x(btree_ptr_v2_val_too_big, 133) \
x(btree_ptr_has_non_ptr, 134) \
x(extent_ptrs_invalid_entry, 135) \
x(extent_ptrs_no_ptrs, 136) \
x(extent_ptrs_too_many_ptrs, 137) \
x(extent_ptrs_redundant_crc, 138) \
x(extent_ptrs_redundant_stripe, 139) \
x(extent_ptrs_unwritten, 140) \
x(extent_ptrs_written_and_unwritten, 141) \
x(ptr_to_invalid_device, 142) \
x(ptr_to_duplicate_device, 143) \
x(ptr_after_last_bucket, 144) \
x(ptr_before_first_bucket, 145) \
x(ptr_spans_multiple_buckets, 146) \
x(ptr_to_missing_backpointer, 147) \
x(ptr_to_missing_alloc_key, 148) \
x(ptr_to_missing_replicas_entry, 149) \
x(ptr_to_missing_stripe, 150) \
x(ptr_to_incorrect_stripe, 151) \
x(ptr_gen_newer_than_bucket_gen, 152) \
x(ptr_too_stale, 153) \
x(stale_dirty_ptr, 154) \
x(ptr_bucket_data_type_mismatch, 155) \
x(ptr_cached_and_erasure_coded, 156) \
x(ptr_crc_uncompressed_size_too_small, 157) \
x(ptr_crc_csum_type_unknown, 158) \
x(ptr_crc_compression_type_unknown, 159) \
x(ptr_crc_redundant, 160) \
x(ptr_crc_uncompressed_size_too_big, 161) \
x(ptr_crc_nonce_mismatch, 162) \
x(ptr_stripe_redundant, 163) \
x(reservation_key_nr_replicas_invalid, 164) \
x(reflink_v_refcount_wrong, 165) \
x(reflink_p_to_missing_reflink_v, 166) \
x(stripe_pos_bad, 167) \
x(stripe_val_size_bad, 168) \
x(stripe_sector_count_wrong, 169) \
x(snapshot_tree_pos_bad, 170) \
x(snapshot_tree_to_missing_snapshot, 171) \
x(snapshot_tree_to_missing_subvol, 172) \
x(snapshot_tree_to_wrong_subvol, 173) \
x(snapshot_tree_to_snapshot_subvol, 174) \
x(snapshot_pos_bad, 175) \
x(snapshot_parent_bad, 176) \
x(snapshot_children_not_normalized, 177) \
x(snapshot_child_duplicate, 178) \
x(snapshot_child_bad, 179) \
x(snapshot_skiplist_not_normalized, 180) \
x(snapshot_skiplist_bad, 181) \
x(snapshot_should_not_have_subvol, 182) \
x(snapshot_to_bad_snapshot_tree, 183) \
x(snapshot_bad_depth, 184) \
x(snapshot_bad_skiplist, 185) \
x(subvol_pos_bad, 186) \
x(subvol_not_master_and_not_snapshot, 187) \
x(subvol_to_missing_root, 188) \
x(subvol_root_wrong_bi_subvol, 189) \
x(bkey_in_missing_snapshot, 190) \
x(inode_pos_inode_nonzero, 191) \
x(inode_pos_blockdev_range, 192) \
x(inode_unpack_error, 193) \
x(inode_str_hash_invalid, 194) \
x(inode_v3_fields_start_bad, 195) \
x(inode_snapshot_mismatch, 196) \
x(inode_unlinked_but_clean, 197) \
x(inode_unlinked_but_nlink_nonzero, 198) \
x(inode_checksum_type_invalid, 199) \
x(inode_compression_type_invalid, 200) \
x(inode_subvol_root_but_not_dir, 201) \
x(inode_i_size_dirty_but_clean, 202) \
x(inode_i_sectors_dirty_but_clean, 203) \
x(inode_i_sectors_wrong, 204) \
x(inode_dir_wrong_nlink, 205) \
x(inode_dir_multiple_links, 206) \
x(inode_multiple_links_but_nlink_0, 207) \
x(inode_wrong_backpointer, 208) \
x(inode_wrong_nlink, 209) \
x(inode_unreachable, 210) \
x(deleted_inode_but_clean, 211) \
x(deleted_inode_missing, 212) \
x(deleted_inode_is_dir, 213) \
x(deleted_inode_not_unlinked, 214) \
x(extent_overlapping, 215) \
x(extent_in_missing_inode, 216) \
x(extent_in_non_reg_inode, 217) \
x(extent_past_end_of_inode, 218) \
x(dirent_empty_name, 219) \
x(dirent_val_too_big, 220) \
x(dirent_name_too_long, 221) \
x(dirent_name_embedded_nul, 222) \
x(dirent_name_dot_or_dotdot, 223) \
x(dirent_name_has_slash, 224) \
x(dirent_d_type_wrong, 225) \
x(inode_bi_parent_wrong, 226) \
x(dirent_in_missing_dir_inode, 227) \
x(dirent_in_non_dir_inode, 228) \
x(dirent_to_missing_inode, 229) \
x(dirent_to_missing_subvol, 230) \
x(dirent_to_itself, 231) \
x(quota_type_invalid, 232) \
x(xattr_val_size_too_small, 233) \
x(xattr_val_size_too_big, 234) \
x(xattr_invalid_type, 235) \
x(xattr_name_invalid_chars, 236) \
x(xattr_in_missing_inode, 237) \
x(root_subvol_missing, 238) \
x(root_dir_missing, 239) \
x(root_inode_not_dir, 240) \
x(dir_loop, 241) \
x(hash_table_key_duplicate, 242) \
x(hash_table_key_wrong_offset, 243) \
x(unlinked_inode_not_on_deleted_list, 244) \
x(reflink_p_front_pad_bad, 245) \
x(journal_entry_dup_same_device, 246) \
x(inode_bi_subvol_missing, 247) \
x(inode_bi_subvol_wrong, 248) \
x(inode_points_to_missing_dirent, 249) \
x(inode_points_to_wrong_dirent, 250) \
x(inode_bi_parent_nonzero, 251) \
x(dirent_to_missing_parent_subvol, 252) \
x(dirent_not_visible_in_parent_subvol, 253) \
x(subvol_fs_path_parent_wrong, 254) \
x(subvol_root_fs_path_parent_nonzero, 255) \
x(subvol_children_not_set, 256) \
x(subvol_children_bad, 257) \
x(subvol_loop, 258) \
x(subvol_unreachable, 259) \
x(btree_node_bkey_bad_u64s, 260) \
x(btree_node_topology_empty_interior_node, 261) \
x(btree_ptr_v2_min_key_bad, 262) \
x(btree_root_unreadable_and_scan_found_nothing, 263) \
x(snapshot_node_missing, 264) \
x(dup_backpointer_to_bad_csum_extent, 265) \
x(btree_bitmap_not_marked, 266) \
x(sb_clean_entry_overrun, 267) \
x(btree_ptr_v2_written_0, 268) \
x(subvol_snapshot_bad, 269) \
x(subvol_inode_bad, 270)
enum bch_sb_error_id {
#define x(t, n) BCH_FSCK_ERR_##t = n,
BCH_SB_ERRS()
#undef x
BCH_SB_ERR_MAX
};
struct bch_sb_error_entry_cpu { struct bch_sb_error_entry_cpu {
u64 id:16, u64 id:16,
nr:48; nr:48;
...@@ -293,4 +13,3 @@ struct bch_sb_error_entry_cpu { ...@@ -293,4 +13,3 @@ struct bch_sb_error_entry_cpu {
typedef DARRAY(struct bch_sb_error_entry_cpu) bch_sb_errors_cpu; typedef DARRAY(struct bch_sb_error_entry_cpu) bch_sb_errors_cpu;
#endif /* _BCACHEFS_SB_ERRORS_TYPES_H */ #endif /* _BCACHEFS_SB_ERRORS_TYPES_H */
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_SB_MEMBERS_FORMAT_H
#define _BCACHEFS_SB_MEMBERS_FORMAT_H
/*
* We refer to members with bitmasks in various places - but we need to get rid
* of this limit:
*/
#define BCH_SB_MEMBERS_MAX 64
#define BCH_MIN_NR_NBUCKETS (1 << 6)
#define BCH_IOPS_MEASUREMENTS() \
x(seqread, 0) \
x(seqwrite, 1) \
x(randread, 2) \
x(randwrite, 3)
enum bch_iops_measurement {
#define x(t, n) BCH_IOPS_##t = n,
BCH_IOPS_MEASUREMENTS()
#undef x
BCH_IOPS_NR
};
#define BCH_MEMBER_ERROR_TYPES() \
x(read, 0) \
x(write, 1) \
x(checksum, 2)
enum bch_member_error_type {
#define x(t, n) BCH_MEMBER_ERROR_##t = n,
BCH_MEMBER_ERROR_TYPES()
#undef x
BCH_MEMBER_ERROR_NR
};
struct bch_member {
__uuid_t uuid;
__le64 nbuckets; /* device size */
__le16 first_bucket; /* index of first bucket used */
__le16 bucket_size; /* sectors */
__u8 btree_bitmap_shift;
__u8 pad[3];
__le64 last_mount; /* time_t */
__le64 flags;
__le32 iops[4];
__le64 errors[BCH_MEMBER_ERROR_NR];
__le64 errors_at_reset[BCH_MEMBER_ERROR_NR];
__le64 errors_reset_time;
__le64 seq;
__le64 btree_allocated_bitmap;
/*
* On recovery from a clean shutdown we don't normally read the journal,
* but we still want to resume writing from where we left off so we
* don't overwrite more than is necessary, for list journal debugging:
*/
__le32 last_journal_bucket;
__le32 last_journal_bucket_offset;
};
/*
* This limit comes from the bucket_gens array - it's a single allocation, and
* kernel allocation are limited to INT_MAX
*/
#define BCH_MEMBER_NBUCKETS_MAX (INT_MAX - 64)
#define BCH_MEMBER_V1_BYTES 56
LE64_BITMASK(BCH_MEMBER_STATE, struct bch_member, flags, 0, 4)
/* 4-14 unused, was TIER, HAS_(META)DATA, REPLACEMENT */
LE64_BITMASK(BCH_MEMBER_DISCARD, struct bch_member, flags, 14, 15)
LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED, struct bch_member, flags, 15, 20)
LE64_BITMASK(BCH_MEMBER_GROUP, struct bch_member, flags, 20, 28)
LE64_BITMASK(BCH_MEMBER_DURABILITY, struct bch_member, flags, 28, 30)
LE64_BITMASK(BCH_MEMBER_FREESPACE_INITIALIZED,
struct bch_member, flags, 30, 31)
#if 0
LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS, struct bch_member, flags[1], 0, 20);
LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
#endif
#define BCH_MEMBER_STATES() \
x(rw, 0) \
x(ro, 1) \
x(failed, 2) \
x(spare, 3)
enum bch_member_state {
#define x(t, n) BCH_MEMBER_STATE_##t = n,
BCH_MEMBER_STATES()
#undef x
BCH_MEMBER_STATE_NR
};
struct bch_sb_field_members_v1 {
struct bch_sb_field field;
struct bch_member _members[]; //Members are now variable size
};
struct bch_sb_field_members_v2 {
struct bch_sb_field field;
__le16 member_bytes; //size of single member entry
u8 pad[6];
struct bch_member _members[];
};
#endif /* _BCACHEFS_SB_MEMBERS_FORMAT_H */
...@@ -1042,6 +1042,25 @@ int bch2_reconstruct_snapshots(struct bch_fs *c) ...@@ -1042,6 +1042,25 @@ int bch2_reconstruct_snapshots(struct bch_fs *c)
return ret; return ret;
} }
int bch2_check_key_has_snapshot(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
{
struct bch_fs *c = trans->c;
struct printbuf buf = PRINTBUF;
int ret = 0;
if (fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c,
bkey_in_missing_snapshot,
"key in missing snapshot %s, delete?",
(bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
ret = bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node) ?: 1;
fsck_err:
printbuf_exit(&buf);
return ret;
}
/* /*
* Mark a snapshot as deleted, for future cleanup: * Mark a snapshot as deleted, for future cleanup:
*/ */
...@@ -1351,35 +1370,39 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent, ...@@ -1351,35 +1370,39 @@ int bch2_snapshot_node_create(struct btree_trans *trans, u32 parent,
* that key to snapshot leaf nodes, where we can mutate it * that key to snapshot leaf nodes, where we can mutate it
*/ */
static int snapshot_delete_key(struct btree_trans *trans, static int delete_dead_snapshots_process_key(struct btree_trans *trans,
struct btree_iter *iter, struct btree_iter *iter,
struct bkey_s_c k, struct bkey_s_c k,
snapshot_id_list *deleted, snapshot_id_list *deleted,
snapshot_id_list *equiv_seen, snapshot_id_list *equiv_seen,
struct bpos *last_pos) struct bpos *last_pos)
{ {
int ret = bch2_check_key_has_snapshot(trans, iter, k);
if (ret)
return ret < 0 ? ret : 0;
struct bch_fs *c = trans->c; struct bch_fs *c = trans->c;
u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot);
if (!equiv) /* key for invalid snapshot node, but we chose not to delete */
return 0;
if (!bkey_eq(k.k->p, *last_pos)) if (!bkey_eq(k.k->p, *last_pos))
equiv_seen->nr = 0; equiv_seen->nr = 0;
*last_pos = k.k->p;
if (snapshot_list_has_id(deleted, k.k->p.snapshot) || if (snapshot_list_has_id(deleted, k.k->p.snapshot))
snapshot_list_has_id(equiv_seen, equiv)) {
return bch2_btree_delete_at(trans, iter, return bch2_btree_delete_at(trans, iter,
BTREE_UPDATE_internal_snapshot_node); BTREE_UPDATE_internal_snapshot_node);
} else {
return snapshot_list_add(c, equiv_seen, equiv);
}
}
static int move_key_to_correct_snapshot(struct btree_trans *trans, if (!bpos_eq(*last_pos, k.k->p) &&
struct btree_iter *iter, snapshot_list_has_id(equiv_seen, equiv))
struct bkey_s_c k) return bch2_btree_delete_at(trans, iter,
{ BTREE_UPDATE_internal_snapshot_node);
struct bch_fs *c = trans->c;
u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); *last_pos = k.k->p;
ret = snapshot_list_add_nodup(c, equiv_seen, equiv);
if (ret)
return ret;
/* /*
* When we have a linear chain of snapshot nodes, we consider * When we have a linear chain of snapshot nodes, we consider
...@@ -1389,21 +1412,20 @@ static int move_key_to_correct_snapshot(struct btree_trans *trans, ...@@ -1389,21 +1412,20 @@ static int move_key_to_correct_snapshot(struct btree_trans *trans,
* *
* If there are multiple keys in different snapshots at the same * If there are multiple keys in different snapshots at the same
* position, we're only going to keep the one in the newest * position, we're only going to keep the one in the newest
* snapshot - the rest have been overwritten and are redundant, * snapshot (we delete the others above) - the rest have been
* and for the key we're going to keep we need to move it to the * overwritten and are redundant, and for the key we're going to keep we
* equivalance class ID if it's not there already. * need to move it to the equivalance class ID if it's not there
* already.
*/ */
if (equiv != k.k->p.snapshot) { if (equiv != k.k->p.snapshot) {
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k);
struct btree_iter new_iter; int ret = PTR_ERR_OR_ZERO(new);
int ret;
ret = PTR_ERR_OR_ZERO(new);
if (ret) if (ret)
return ret; return ret;
new->k.p.snapshot = equiv; new->k.p.snapshot = equiv;
struct btree_iter new_iter;
bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p, bch2_trans_iter_init(trans, &new_iter, iter->btree_id, new->k.p,
BTREE_ITER_all_snapshots| BTREE_ITER_all_snapshots|
BTREE_ITER_cached| BTREE_ITER_cached|
...@@ -1538,7 +1560,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) ...@@ -1538,7 +1560,6 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
struct btree_trans *trans; struct btree_trans *trans;
snapshot_id_list deleted = { 0 }; snapshot_id_list deleted = { 0 };
snapshot_id_list deleted_interior = { 0 }; snapshot_id_list deleted_interior = { 0 };
u32 id;
int ret = 0; int ret = 0;
if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags)) if (!test_and_clear_bit(BCH_FS_need_delete_dead_snapshots, &c->flags))
...@@ -1585,33 +1606,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c) ...@@ -1585,33 +1606,20 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
if (ret) if (ret)
goto err; goto err;
for (id = 0; id < BTREE_ID_NR; id++) { for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
struct bpos last_pos = POS_MIN; struct bpos last_pos = POS_MIN;
snapshot_id_list equiv_seen = { 0 }; snapshot_id_list equiv_seen = { 0 };
struct disk_reservation res = { 0 }; struct disk_reservation res = { 0 };
if (!btree_type_has_snapshots(id)) if (!btree_type_has_snapshots(btree))
continue;
/*
* deleted inodes btree is maintained by a trigger on the inodes
* btree - no work for us to do here, and it's not safe to scan
* it because we'll see out of date keys due to the btree write
* buffer:
*/
if (id == BTREE_ID_deleted_inodes)
continue; continue;
ret = for_each_btree_key_commit(trans, iter, ret = for_each_btree_key_commit(trans, iter,
id, POS_MIN, btree, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
&res, NULL, BCH_TRANS_COMMIT_no_enospc,
snapshot_delete_key(trans, &iter, k, &deleted, &equiv_seen, &last_pos)) ?:
for_each_btree_key_commit(trans, iter,
id, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
&res, NULL, BCH_TRANS_COMMIT_no_enospc, &res, NULL, BCH_TRANS_COMMIT_no_enospc,
move_key_to_correct_snapshot(trans, &iter, k)); delete_dead_snapshots_process_key(trans, &iter, k, &deleted,
&equiv_seen, &last_pos));
bch2_disk_reservation_put(c, &res); bch2_disk_reservation_put(c, &res);
darray_exit(&equiv_seen); darray_exit(&equiv_seen);
......
...@@ -242,6 +242,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32, ...@@ -242,6 +242,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
int bch2_check_snapshot_trees(struct bch_fs *); int bch2_check_snapshot_trees(struct bch_fs *);
int bch2_check_snapshots(struct bch_fs *); int bch2_check_snapshots(struct bch_fs *);
int bch2_reconstruct_snapshots(struct bch_fs *); int bch2_reconstruct_snapshots(struct bch_fs *);
int bch2_check_key_has_snapshot(struct btree_trans *, struct btree_iter *, struct bkey_s_c);
int bch2_snapshot_node_set_deleted(struct btree_trans *, u32); int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
void bch2_delete_dead_snapshots_work(struct work_struct *); void bch2_delete_dead_snapshots_work(struct work_struct *);
......
...@@ -1132,18 +1132,12 @@ bool bch2_check_version_downgrade(struct bch_fs *c) ...@@ -1132,18 +1132,12 @@ bool bch2_check_version_downgrade(struct bch_fs *c)
* c->sb will be checked before we write the superblock, so update it as * c->sb will be checked before we write the superblock, so update it as
* well: * well:
*/ */
if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) { if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current)
SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
c->sb.version_upgrade_complete = bcachefs_metadata_version_current; if (c->sb.version > bcachefs_metadata_version_current)
}
if (c->sb.version > bcachefs_metadata_version_current) {
c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current);
c->sb.version = bcachefs_metadata_version_current; if (c->sb.version_min > bcachefs_metadata_version_current)
}
if (c->sb.version_min > bcachefs_metadata_version_current) {
c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current); c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current);
c->sb.version_min = bcachefs_metadata_version_current;
}
c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1);
return ret; return ret;
} }
......
...@@ -564,7 +564,7 @@ static void __bch2_fs_free(struct bch_fs *c) ...@@ -564,7 +564,7 @@ static void __bch2_fs_free(struct bch_fs *c)
BUG_ON(atomic_read(&c->journal_keys.ref)); BUG_ON(atomic_read(&c->journal_keys.ref));
bch2_fs_btree_write_buffer_exit(c); bch2_fs_btree_write_buffer_exit(c);
percpu_free_rwsem(&c->mark_lock); percpu_free_rwsem(&c->mark_lock);
EBUG_ON(percpu_u64_get(c->online_reserved)); EBUG_ON(c->online_reserved && percpu_u64_get(c->online_reserved));
free_percpu(c->online_reserved); free_percpu(c->online_reserved);
darray_exit(&c->btree_roots_extra); darray_exit(&c->btree_roots_extra);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment