Commit d16b4a77 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Assorted journal refactoring

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent ecf37a4a
This diff is collapsed.
...@@ -179,6 +179,11 @@ static inline unsigned jset_u64s(unsigned u64s) ...@@ -179,6 +179,11 @@ static inline unsigned jset_u64s(unsigned u64s)
return u64s + sizeof(struct jset_entry) / sizeof(u64); return u64s + sizeof(struct jset_entry) / sizeof(u64);
} }
static inline int journal_entry_overhead(struct journal *j)
{
return sizeof(struct jset) / sizeof(u64) + j->entry_u64s_reserved;
}
static inline struct jset_entry * static inline struct jset_entry *
bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s) bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
{ {
...@@ -225,7 +230,7 @@ static inline void bch2_journal_add_keys(struct journal *j, struct journal_res * ...@@ -225,7 +230,7 @@ static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *
id, 0, k, k->k.u64s); id, 0, k, k->k.u64s);
} }
void bch2_journal_buf_put_slowpath(struct journal *, bool); void __bch2_journal_buf_put(struct journal *, bool);
static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
bool need_write_just_set) bool need_write_just_set)
...@@ -236,17 +241,10 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx, ...@@ -236,17 +241,10 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
.buf0_count = idx == 0, .buf0_count = idx == 0,
.buf1_count = idx == 1, .buf1_count = idx == 1,
}).v, &j->reservations.counter); }).v, &j->reservations.counter);
if (!journal_state_count(s, idx)) {
EBUG_ON(s.idx != idx && !s.prev_buf_unwritten); EBUG_ON(s.idx == idx || !s.prev_buf_unwritten);
__bch2_journal_buf_put(j, need_write_just_set);
/* }
* Do not initiate a journal write if the journal is in an error state
* (previous journal entry write may have failed)
*/
if (s.idx != idx &&
!journal_state_count(s, idx) &&
s.cur_entry_offset != JOURNAL_ENTRY_ERROR_VAL)
bch2_journal_buf_put_slowpath(j, need_write_just_set);
} }
/* /*
...@@ -333,6 +331,8 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re ...@@ -333,6 +331,8 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
return 0; return 0;
} }
/* journal_entry_res: */
void bch2_journal_entry_res_resize(struct journal *, void bch2_journal_entry_res_resize(struct journal *,
struct journal_entry_res *, struct journal_entry_res *,
unsigned); unsigned);
......
...@@ -902,13 +902,16 @@ static unsigned journal_dev_buckets_available(struct journal *j, ...@@ -902,13 +902,16 @@ static unsigned journal_dev_buckets_available(struct journal *j,
return available; return available;
} }
/* returns number of sectors available for next journal entry: */ int bch2_journal_space_available(struct journal *j)
int bch2_journal_entry_sectors(struct journal *j)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca; struct bch_dev *ca;
unsigned sectors_available = UINT_MAX; unsigned sectors_next_entry = UINT_MAX;
unsigned i, nr_online = 0, nr_devs = 0; unsigned i, nr_online = 0, nr_devs = 0;
unsigned unwritten_sectors = j->reservations.prev_buf_unwritten
? journal_prev_buf(j)->sectors
: 0;
int ret = 0;
lockdep_assert_held(&j->lock); lockdep_assert_held(&j->lock);
...@@ -921,16 +924,16 @@ int bch2_journal_entry_sectors(struct journal *j) ...@@ -921,16 +924,16 @@ int bch2_journal_entry_sectors(struct journal *j)
if (!ja->nr) if (!ja->nr)
continue; continue;
nr_online++;
buckets_this_device = journal_dev_buckets_available(j, ja); buckets_this_device = journal_dev_buckets_available(j, ja);
sectors_this_device = ja->sectors_free; sectors_this_device = ja->sectors_free;
nr_online++;
/* /*
* We that we don't allocate the space for a journal entry * We that we don't allocate the space for a journal entry
* until we write it out - thus, account for it here: * until we write it out - thus, account for it here:
*/ */
if (j->prev_buf_sectors >= sectors_this_device) { if (unwritten_sectors >= sectors_this_device) {
if (!buckets_this_device) if (!buckets_this_device)
continue; continue;
...@@ -938,7 +941,7 @@ int bch2_journal_entry_sectors(struct journal *j) ...@@ -938,7 +941,7 @@ int bch2_journal_entry_sectors(struct journal *j)
sectors_this_device = ca->mi.bucket_size; sectors_this_device = ca->mi.bucket_size;
} }
sectors_this_device -= j->prev_buf_sectors; sectors_this_device -= unwritten_sectors;
if (buckets_this_device) if (buckets_this_device)
sectors_this_device = ca->mi.bucket_size; sectors_this_device = ca->mi.bucket_size;
...@@ -946,19 +949,26 @@ int bch2_journal_entry_sectors(struct journal *j) ...@@ -946,19 +949,26 @@ int bch2_journal_entry_sectors(struct journal *j)
if (!sectors_this_device) if (!sectors_this_device)
continue; continue;
sectors_available = min(sectors_available, sectors_next_entry = min(sectors_next_entry,
sectors_this_device); sectors_this_device);
nr_devs++; nr_devs++;
} }
rcu_read_unlock(); rcu_read_unlock();
if (nr_online < c->opts.metadata_replicas_required) if (nr_online < c->opts.metadata_replicas_required) {
return -EROFS; ret = -EROFS;
sectors_next_entry = 0;
} else if (!sectors_next_entry ||
nr_devs < min_t(unsigned, nr_online,
c->opts.metadata_replicas)) {
ret = -ENOSPC;
sectors_next_entry = 0;
}
if (nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas)) WRITE_ONCE(j->cur_entry_sectors, sectors_next_entry);
return 0;
return sectors_available; return ret;
} }
static void __journal_write_alloc(struct journal *j, static void __journal_write_alloc(struct journal *j,
...@@ -1059,9 +1069,6 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w, ...@@ -1059,9 +1069,6 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
__journal_write_alloc(j, w, &devs_sorted, __journal_write_alloc(j, w, &devs_sorted,
sectors, &replicas, replicas_want); sectors, &replicas, replicas_want);
done: done:
if (replicas >= replicas_want)
j->prev_buf_sectors = 0;
spin_unlock(&j->lock); spin_unlock(&j->lock);
rcu_read_unlock(); rcu_read_unlock();
...@@ -1117,17 +1124,17 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf) ...@@ -1117,17 +1124,17 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
unsigned new_size = READ_ONCE(j->buf_size_want); unsigned new_size = READ_ONCE(j->buf_size_want);
void *new_buf; void *new_buf;
if (buf->size >= new_size) if (buf->buf_size >= new_size)
return; return;
new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN); new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN);
if (!new_buf) if (!new_buf)
return; return;
memcpy(new_buf, buf->data, buf->size); memcpy(new_buf, buf->data, buf->buf_size);
kvpfree(buf->data, buf->size); kvpfree(buf->data, buf->buf_size);
buf->data = new_buf; buf->data = new_buf;
buf->size = new_size; buf->buf_size = new_size;
} }
static void journal_write_done(struct closure *cl) static void journal_write_done(struct closure *cl)
...@@ -1227,15 +1234,14 @@ void bch2_journal_write(struct closure *cl) ...@@ -1227,15 +1234,14 @@ void bch2_journal_write(struct closure *cl)
j->write_start_time = local_clock(); j->write_start_time = local_clock();
start = vstruct_last(w->data); start = vstruct_last(jset);
end = bch2_journal_super_entries_add_common(c, start, end = bch2_journal_super_entries_add_common(c, start,
le64_to_cpu(jset->seq)); le64_to_cpu(jset->seq));
u64s = (u64 *) end - (u64 *) start; u64s = (u64 *) end - (u64 *) start;
BUG_ON(u64s > j->entry_u64s_reserved); BUG_ON(u64s > j->entry_u64s_reserved);
le32_add_cpu(&w->data->u64s, u64s); le32_add_cpu(&jset->u64s, u64s);
BUG_ON(vstruct_sectors(jset, c->block_bits) > BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors);
w->disk_sectors);
journal_write_compact(jset); journal_write_compact(jset);
...@@ -1273,10 +1279,10 @@ void bch2_journal_write(struct closure *cl) ...@@ -1273,10 +1279,10 @@ void bch2_journal_write(struct closure *cl)
goto err; goto err;
sectors = vstruct_sectors(jset, c->block_bits); sectors = vstruct_sectors(jset, c->block_bits);
BUG_ON(sectors > j->prev_buf_sectors); BUG_ON(sectors > w->sectors);
bytes = vstruct_bytes(w->data); bytes = vstruct_bytes(jset);
memset((void *) w->data + bytes, 0, (sectors << 9) - bytes); memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
if (journal_write_alloc(j, w, sectors)) { if (journal_write_alloc(j, w, sectors)) {
bch2_journal_halt(j); bch2_journal_halt(j);
...@@ -1286,6 +1292,12 @@ void bch2_journal_write(struct closure *cl) ...@@ -1286,6 +1292,12 @@ void bch2_journal_write(struct closure *cl)
return; return;
} }
/*
* write is allocated, no longer need to account for it in
* bch2_journal_entry_sectors:
*/
w->sectors = 0;
/* /*
* XXX: we really should just disable the entire journal in nochanges * XXX: we really should just disable the entire journal in nochanges
* mode * mode
...@@ -1316,7 +1328,7 @@ void bch2_journal_write(struct closure *cl) ...@@ -1316,7 +1328,7 @@ void bch2_journal_write(struct closure *cl)
trace_journal_write(bio); trace_journal_write(bio);
closure_bio_submit(bio, cl); closure_bio_submit(bio, cl);
ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq); ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(jset->seq);
} }
for_each_rw_member(ca, c, i) for_each_rw_member(ca, c, i)
......
...@@ -40,7 +40,7 @@ int bch2_journal_read(struct bch_fs *, struct list_head *); ...@@ -40,7 +40,7 @@ int bch2_journal_read(struct bch_fs *, struct list_head *);
void bch2_journal_entries_free(struct list_head *); void bch2_journal_entries_free(struct list_head *);
int bch2_journal_replay(struct bch_fs *, struct list_head *); int bch2_journal_replay(struct bch_fs *, struct list_head *);
int bch2_journal_entry_sectors(struct journal *); int bch2_journal_space_available(struct journal *);
void bch2_journal_write(struct closure *); void bch2_journal_write(struct closure *);
#endif /* _BCACHEFS_JOURNAL_IO_H */ #endif /* _BCACHEFS_JOURNAL_IO_H */
...@@ -22,8 +22,10 @@ struct journal_buf { ...@@ -22,8 +22,10 @@ struct journal_buf {
struct closure_waitlist wait; struct closure_waitlist wait;
unsigned size; unsigned buf_size; /* size in bytes of @data */
unsigned disk_sectors; unsigned sectors; /* maximum size for current entry */
unsigned disk_sectors; /* maximum size entry could have been, if
buf_size was bigger */
unsigned u64s_reserved; unsigned u64s_reserved;
/* bloom filter: */ /* bloom filter: */
unsigned long has_inode[1024 / sizeof(unsigned long)]; unsigned long has_inode[1024 / sizeof(unsigned long)];
...@@ -129,9 +131,14 @@ struct journal { ...@@ -129,9 +131,14 @@ struct journal {
unsigned long flags; unsigned long flags;
union journal_res_state reservations; union journal_res_state reservations;
/* Max size of current journal entry */
unsigned cur_entry_u64s; unsigned cur_entry_u64s;
unsigned prev_buf_sectors; unsigned cur_entry_sectors;
unsigned cur_buf_sectors;
/* Reserved space in journal entry to be used just prior to write */
unsigned entry_u64s_reserved;
unsigned buf_size_want; unsigned buf_size_want;
/* /*
...@@ -159,9 +166,6 @@ struct journal { ...@@ -159,9 +166,6 @@ struct journal {
u64 seq_ondisk; u64 seq_ondisk;
u64 last_seq_ondisk; u64 last_seq_ondisk;
/* Reserved space in journal entry to be used just prior to write */
unsigned entry_u64s_reserved;
/* /*
* FIFO of journal entries whose btree updates have not yet been * FIFO of journal entries whose btree updates have not yet been
* written out. * written out.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment