Commit ed9d58a2 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Run jset_validate in write path as well

This is because we had a bug where we were writing out journal entries
with garbage last_seq, and not catching it.

Also, completely ignore jset->last_seq when JSET_NO_FLUSH is true,
because of aforementioned bug, but change the write path to set last_seq
to 0 when JSET_NO_FLUSH is true.

Minor other cleanups and comments.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent ac958006
...@@ -1624,7 +1624,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, ...@@ -1624,7 +1624,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
validate_before_checksum = true; validate_before_checksum = true;
/* validate_bset will be modifying: */ /* validate_bset will be modifying: */
if (le16_to_cpu(i->version) < bcachefs_metadata_version_max) if (le16_to_cpu(i->version) <= bcachefs_metadata_version_inode_btree_change)
validate_before_checksum = true; validate_before_checksum = true;
/* if we're going to be encrypting, check metadata validity first: */ /* if we're going to be encrypting, check metadata validity first: */
......
...@@ -117,6 +117,9 @@ void __bch2_journal_buf_put(struct journal *j) ...@@ -117,6 +117,9 @@ void __bch2_journal_buf_put(struct journal *j)
/* /*
* Returns true if journal entry is now closed: * Returns true if journal entry is now closed:
*
* We don't close a journal_buf until the next journal_buf is finished writing,
* and can be opened again - this also initializes the next journal_buf:
*/ */
static bool __journal_entry_close(struct journal *j) static bool __journal_entry_close(struct journal *j)
{ {
...@@ -154,6 +157,7 @@ static bool __journal_entry_close(struct journal *j) ...@@ -154,6 +157,7 @@ static bool __journal_entry_close(struct journal *j)
} while ((v = atomic64_cmpxchg(&j->reservations.counter, } while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v); old.v, new.v)) != old.v);
/* Close out old buffer: */
buf->data->u64s = cpu_to_le32(old.cur_entry_offset); buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
sectors = vstruct_blocks_plus(buf->data, c->block_bits, sectors = vstruct_blocks_plus(buf->data, c->block_bits,
...@@ -184,6 +188,7 @@ static bool __journal_entry_close(struct journal *j) ...@@ -184,6 +188,7 @@ static bool __journal_entry_close(struct journal *j)
__bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq)); __bch2_journal_pin_put(j, le64_to_cpu(buf->data->seq));
/* Initialize new buffer: */
journal_pin_new_entry(j, 1); journal_pin_new_entry(j, 1);
bch2_journal_buf_init(j); bch2_journal_buf_init(j);
......
...@@ -469,7 +469,8 @@ static int jset_validate(struct bch_fs *c, ...@@ -469,7 +469,8 @@ static int jset_validate(struct bch_fs *c,
version < bcachefs_metadata_version_min) || version < bcachefs_metadata_version_min) ||
version >= bcachefs_metadata_version_max, c, version >= bcachefs_metadata_version_max, c,
"%s sector %llu seq %llu: unknown journal entry version %u", "%s sector %llu seq %llu: unknown journal entry version %u",
ca->name, sector, le64_to_cpu(jset->seq), ca ? ca->name : c->name,
sector, le64_to_cpu(jset->seq),
version)) { version)) {
/* don't try to continue: */ /* don't try to continue: */
return EINVAL; return EINVAL;
...@@ -481,32 +482,42 @@ static int jset_validate(struct bch_fs *c, ...@@ -481,32 +482,42 @@ static int jset_validate(struct bch_fs *c,
if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c, if (journal_entry_err_on(bytes > bucket_sectors_left << 9, c,
"%s sector %llu seq %llu: journal entry too big (%zu bytes)", "%s sector %llu seq %llu: journal entry too big (%zu bytes)",
ca->name, sector, le64_to_cpu(jset->seq), bytes)) { ca ? ca->name : c->name,
sector, le64_to_cpu(jset->seq), bytes)) {
ret = JOURNAL_ENTRY_BAD; ret = JOURNAL_ENTRY_BAD;
le32_add_cpu(&jset->u64s, le32_add_cpu(&jset->u64s,
-((bytes - (bucket_sectors_left << 9)) / 8)); -((bytes - (bucket_sectors_left << 9)) / 8));
} }
if (fsck_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c, if (journal_entry_err_on(!bch2_checksum_type_valid(c, JSET_CSUM_TYPE(jset)), c,
"%s sector %llu seq %llu: journal entry with unknown csum type %llu", "%s sector %llu seq %llu: journal entry with unknown csum type %llu",
ca->name, sector, le64_to_cpu(jset->seq), ca ? ca->name : c->name,
sector, le64_to_cpu(jset->seq),
JSET_CSUM_TYPE(jset))) { JSET_CSUM_TYPE(jset))) {
ret = JOURNAL_ENTRY_BAD; ret = JOURNAL_ENTRY_BAD;
goto bad_csum_type; goto csum_done;
} }
if (write)
goto csum_done;
csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset); csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset);
if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c, if (journal_entry_err_on(bch2_crc_cmp(csum, jset->csum), c,
"%s sector %llu seq %llu: journal checksum bad", "%s sector %llu seq %llu: journal checksum bad",
ca->name, sector, le64_to_cpu(jset->seq))) ca ? ca->name : c->name,
sector, le64_to_cpu(jset->seq)))
ret = JOURNAL_ENTRY_BAD; ret = JOURNAL_ENTRY_BAD;
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
jset->encrypted_start, jset->encrypted_start,
vstruct_end(jset) - (void *) jset->encrypted_start); vstruct_end(jset) - (void *) jset->encrypted_start);
bad_csum_type: csum_done:
if (journal_entry_err_on(le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c, /* last_seq is ignored when JSET_NO_FLUSH is true */
"invalid journal entry: last_seq > seq")) { if (journal_entry_err_on(!JSET_NO_FLUSH(jset) &&
le64_to_cpu(jset->last_seq) > le64_to_cpu(jset->seq), c,
"invalid journal entry: last_seq > seq (%llu > %llu)",
le64_to_cpu(jset->last_seq),
le64_to_cpu(jset->seq))) {
jset->last_seq = jset->seq; jset->last_seq = jset->seq;
return JOURNAL_ENTRY_BAD; return JOURNAL_ENTRY_BAD;
} }
...@@ -514,6 +525,14 @@ static int jset_validate(struct bch_fs *c, ...@@ -514,6 +525,14 @@ static int jset_validate(struct bch_fs *c,
return ret; return ret;
} }
static int jset_validate_for_write(struct bch_fs *c, struct jset *jset)
{
unsigned sectors = vstruct_sectors(jset, c->block_bits);
return jset_validate(c, NULL, jset, 0, sectors, sectors, WRITE) ?:
jset_validate_entries(c, jset, WRITE);
}
struct journal_read_buf { struct journal_read_buf {
void *data; void *data;
size_t size; size_t size;
...@@ -1081,9 +1100,7 @@ static void journal_write_done(struct closure *cl) ...@@ -1081,9 +1100,7 @@ static void journal_write_done(struct closure *cl)
bch2_bkey_devs(bkey_i_to_s_c(&w->key)); bch2_bkey_devs(bkey_i_to_s_c(&w->key));
struct bch_replicas_padded replicas; struct bch_replicas_padded replicas;
union journal_res_state old, new; union journal_res_state old, new;
u64 seq = le64_to_cpu(w->data->seq); u64 v, seq, last_seq;
u64 last_seq = le64_to_cpu(w->data->last_seq);
u64 v;
int err = 0; int err = 0;
bch2_time_stats_update(j->write_time, j->write_start_time); bch2_time_stats_update(j->write_time, j->write_start_time);
...@@ -1101,6 +1118,9 @@ static void journal_write_done(struct closure *cl) ...@@ -1101,6 +1118,9 @@ static void journal_write_done(struct closure *cl)
bch2_fatal_error(c); bch2_fatal_error(c);
spin_lock(&j->lock); spin_lock(&j->lock);
seq = le64_to_cpu(w->data->seq);
last_seq = le64_to_cpu(w->data->last_seq);
if (seq >= j->pin.front) if (seq >= j->pin.front)
journal_seq_pin(j, seq)->devs = devs; journal_seq_pin(j, seq)->devs = devs;
...@@ -1108,7 +1128,7 @@ static void journal_write_done(struct closure *cl) ...@@ -1108,7 +1128,7 @@ static void journal_write_done(struct closure *cl)
if (err && (!j->err_seq || seq < j->err_seq)) if (err && (!j->err_seq || seq < j->err_seq))
j->err_seq = seq; j->err_seq = seq;
if (!w->noflush) { if (!JSET_NO_FLUSH(w->data)) {
j->flushed_seq_ondisk = seq; j->flushed_seq_ondisk = seq;
j->last_seq_ondisk = last_seq; j->last_seq_ondisk = last_seq;
} }
...@@ -1196,7 +1216,7 @@ void bch2_journal_write(struct closure *cl) ...@@ -1196,7 +1216,7 @@ void bch2_journal_write(struct closure *cl)
test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)) { test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)) {
w->noflush = true; w->noflush = true;
SET_JSET_NO_FLUSH(jset, true); SET_JSET_NO_FLUSH(jset, true);
jset->last_seq = cpu_to_le64(j->last_seq_ondisk); jset->last_seq = 0;
j->nr_noflush_writes++; j->nr_noflush_writes++;
} else { } else {
...@@ -1248,11 +1268,11 @@ void bch2_journal_write(struct closure *cl) ...@@ -1248,11 +1268,11 @@ void bch2_journal_write(struct closure *cl)
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset))) if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
validate_before_checksum = true; validate_before_checksum = true;
if (le32_to_cpu(jset->version) < bcachefs_metadata_version_max) if (le32_to_cpu(jset->version) <= bcachefs_metadata_version_inode_btree_change)
validate_before_checksum = true; validate_before_checksum = true;
if (validate_before_checksum && if (validate_before_checksum &&
jset_validate_entries(c, jset, WRITE)) jset_validate_for_write(c, jset))
goto err; goto err;
bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), bch2_encrypt(c, JSET_CSUM_TYPE(jset), journal_nonce(jset),
...@@ -1263,7 +1283,7 @@ void bch2_journal_write(struct closure *cl) ...@@ -1263,7 +1283,7 @@ void bch2_journal_write(struct closure *cl)
journal_nonce(jset), jset); journal_nonce(jset), jset);
if (!validate_before_checksum && if (!validate_before_checksum &&
jset_validate_entries(c, jset, WRITE)) jset_validate_for_write(c, jset))
goto err; goto err;
sectors = vstruct_sectors(jset, c->block_bits); sectors = vstruct_sectors(jset, c->block_bits);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment