Commit d16b4a77 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Assorted journal refactoring

Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent ecf37a4a
......@@ -17,23 +17,14 @@
#include "super-io.h"
#include "trace.h"
static bool journal_entry_is_open(struct journal *j)
static bool __journal_entry_is_open(union journal_res_state state)
{
return j->reservations.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
return state.cur_entry_offset < JOURNAL_ENTRY_CLOSED_VAL;
}
void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
static bool journal_entry_is_open(struct journal *j)
{
struct journal_buf *w = journal_prev_buf(j);
atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count);
if (!need_write_just_set &&
test_bit(JOURNAL_NEED_WRITE, &j->flags))
bch2_time_stats_update(j->delay_time,
j->need_write_time);
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
return __journal_entry_is_open(j->reservations);
}
static void journal_pin_new_entry(struct journal *j, int count)
......@@ -77,39 +68,76 @@ static inline bool journal_entry_empty(struct jset *j)
return true;
}
static enum {
JOURNAL_ENTRY_ERROR,
JOURNAL_ENTRY_INUSE,
JOURNAL_ENTRY_CLOSED,
JOURNAL_UNLOCKED,
} journal_buf_switch(struct journal *j, bool need_write_just_set)
void bch2_journal_halt(struct journal *j)
{
union journal_res_state old, new;
u64 v = atomic64_read(&j->reservations.counter);
do {
old.v = new.v = v;
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
return;
new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL;
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v);
journal_wake(j);
closure_wake_up(&journal_cur_buf(j)->wait);
closure_wake_up(&journal_prev_buf(j)->wait);
}
/* journal entry close/open: */
void __bch2_journal_buf_put(struct journal *j, bool need_write_just_set)
{
struct journal_buf *w = journal_prev_buf(j);
atomic_dec_bug(&journal_seq_pin(j, le64_to_cpu(w->data->seq))->count);
if (!need_write_just_set &&
test_bit(JOURNAL_NEED_WRITE, &j->flags))
bch2_time_stats_update(j->delay_time,
j->need_write_time);
clear_bit(JOURNAL_NEED_WRITE, &j->flags);
closure_call(&j->io, bch2_journal_write, system_highpri_wq, NULL);
}
/*
* Returns true if journal entry is now closed:
*/
static bool __journal_entry_close(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_buf *buf = journal_cur_buf(j);
union journal_res_state old, new;
u64 v = atomic64_read(&j->reservations.counter);
bool set_need_write = false;
unsigned sectors;
lockdep_assert_held(&j->lock);
do {
old.v = new.v = v;
if (old.cur_entry_offset == JOURNAL_ENTRY_CLOSED_VAL)
return JOURNAL_ENTRY_CLOSED;
return true;
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) {
/* this entry will never be written: */
closure_wake_up(&buf->wait);
return JOURNAL_ENTRY_ERROR;
return true;
}
if (new.prev_buf_unwritten)
return JOURNAL_ENTRY_INUSE;
if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) {
set_bit(JOURNAL_NEED_WRITE, &j->flags);
j->need_write_time = local_clock();
set_need_write = true;
}
/*
* avoid race between setting buf->data->u64s and
* journal_res_put starting write:
*/
journal_state_inc(&new);
if (new.prev_buf_unwritten)
return false;
new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL;
new.idx++;
......@@ -119,15 +147,12 @@ static enum {
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v);
clear_bit(JOURNAL_NEED_WRITE, &j->flags);
buf->data->u64s = cpu_to_le32(old.cur_entry_offset);
j->prev_buf_sectors =
vstruct_blocks_plus(buf->data, c->block_bits,
buf->u64s_reserved) *
c->opts.block_size;
BUG_ON(j->prev_buf_sectors > j->cur_buf_sectors);
sectors = vstruct_blocks_plus(buf->data, c->block_bits,
buf->u64s_reserved) << c->block_bits;
BUG_ON(sectors > buf->sectors);
buf->sectors = sectors;
bkey_extent_init(&buf->key);
......@@ -163,32 +188,22 @@ static enum {
bch2_journal_buf_init(j);
cancel_delayed_work(&j->write_work);
spin_unlock(&j->lock);
/* ugh - might be called from __journal_res_get() under wait_event() */
__set_current_state(TASK_RUNNING);
bch2_journal_buf_put(j, old.idx, need_write_just_set);
return JOURNAL_UNLOCKED;
bch2_journal_buf_put(j, old.idx, set_need_write);
return true;
}
void bch2_journal_halt(struct journal *j)
static bool journal_entry_close(struct journal *j)
{
union journal_res_state old, new;
u64 v = atomic64_read(&j->reservations.counter);
do {
old.v = new.v = v;
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
return;
bool ret;
new.cur_entry_offset = JOURNAL_ENTRY_ERROR_VAL;
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v);
spin_lock(&j->lock);
ret = __journal_entry_close(j);
spin_unlock(&j->lock);
journal_wake(j);
closure_wake_up(&journal_cur_buf(j)->wait);
closure_wake_up(&journal_prev_buf(j)->wait);
return ret;
}
/*
......@@ -196,17 +211,16 @@ void bch2_journal_halt(struct journal *j)
* journal reservation - journal entry is open means journal is dirty:
*
* returns:
* 1: success
* 0: journal currently full (must wait)
* -EROFS: insufficient rw devices
* -EIO: journal error
* 0: success
* -ENOSPC: journal currently full, must invoke reclaim
* -EAGAIN: journal blocked, must wait
* -EROFS: insufficient rw devices or journal error
*/
static int journal_entry_open(struct journal *j)
{
struct journal_buf *buf = journal_cur_buf(j);
union journal_res_state old, new;
ssize_t u64s;
int sectors;
int u64s, ret;
u64 v;
lockdep_assert_held(&j->lock);
......@@ -216,29 +230,22 @@ static int journal_entry_open(struct journal *j)
return -EAGAIN;
if (!fifo_free(&j->pin))
return 0;
return -ENOSPC;
sectors = bch2_journal_entry_sectors(j);
if (sectors <= 0)
return sectors;
ret = bch2_journal_space_available(j);
if (ret)
return ret;
buf->disk_sectors = sectors;
buf->u64s_reserved = j->entry_u64s_reserved;
buf->disk_sectors = j->cur_entry_sectors;
buf->sectors = min(buf->disk_sectors, buf->buf_size >> 9);
sectors = min_t(unsigned, sectors, buf->size >> 9);
j->cur_buf_sectors = sectors;
u64s = (sectors << 9) / sizeof(u64);
/* Subtract the journal header */
u64s -= sizeof(struct jset) / sizeof(u64);
u64s -= buf->u64s_reserved;
u64s = max_t(ssize_t, 0L, u64s);
BUG_ON(u64s >= JOURNAL_ENTRY_CLOSED_VAL);
u64s = (int) (buf->sectors << 9) / sizeof(u64) -
journal_entry_overhead(j);
u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
if (u64s <= le32_to_cpu(buf->data->u64s))
return 0;
return -ENOSPC;
/*
* Must be set before marking the journal entry as open:
......@@ -250,10 +257,11 @@ static int journal_entry_open(struct journal *j)
old.v = new.v = v;
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
return -EIO;
return -EROFS;
/* Handle any already added entries */
new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
journal_state_inc(&new);
} while ((v = atomic64_cmpxchg(&j->reservations.counter,
old.v, new.v)) != old.v);
......@@ -266,48 +274,16 @@ static int journal_entry_open(struct journal *j)
&j->write_work,
msecs_to_jiffies(j->write_delay_ms));
journal_wake(j);
return 1;
}
static bool __journal_entry_close(struct journal *j)
{
bool set_need_write;
if (!journal_entry_is_open(j)) {
spin_unlock(&j->lock);
return true;
}
set_need_write = !test_and_set_bit(JOURNAL_NEED_WRITE, &j->flags);
if (set_need_write)
j->need_write_time = local_clock();
switch (journal_buf_switch(j, set_need_write)) {
case JOURNAL_ENTRY_INUSE:
spin_unlock(&j->lock);
return false;
default:
spin_unlock(&j->lock);
fallthrough;
case JOURNAL_UNLOCKED:
return false;
}
}
static bool journal_entry_close(struct journal *j)
{
spin_lock(&j->lock);
return __journal_entry_close(j);
return 0;
}
static bool journal_quiesced(struct journal *j)
{
bool ret;
union journal_res_state state = READ_ONCE(j->reservations);
bool ret = !state.prev_buf_unwritten && !__journal_entry_is_open(state);
spin_lock(&j->lock);
ret = !j->reservations.prev_buf_unwritten &&
!journal_entry_is_open(j);
__journal_entry_close(j);
if (!ret)
journal_entry_close(j);
return ret;
}
......@@ -357,7 +333,11 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
if (journal_res_get_fast(j, res, flags))
return 0;
if (bch2_journal_error(j))
return -EROFS;
spin_lock(&j->lock);
/*
* Recheck after taking the lock, so we don't race with another thread
* that just did journal_entry_open() and call journal_entry_close()
......@@ -375,56 +355,42 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
*/
buf = journal_cur_buf(j);
if (journal_entry_is_open(j) &&
buf->size >> 9 < buf->disk_sectors &&
buf->size < JOURNAL_ENTRY_SIZE_MAX)
j->buf_size_want = max(j->buf_size_want, buf->size << 1);
buf->buf_size >> 9 < buf->disk_sectors &&
buf->buf_size < JOURNAL_ENTRY_SIZE_MAX)
j->buf_size_want = max(j->buf_size_want, buf->buf_size << 1);
/*
* Close the current journal entry if necessary, then try to start a new
* one:
*/
switch (journal_buf_switch(j, false)) {
case JOURNAL_ENTRY_ERROR:
spin_unlock(&j->lock);
return -EROFS;
case JOURNAL_ENTRY_INUSE:
if (journal_entry_is_open(j) &&
!__journal_entry_close(j)) {
/*
* The current journal entry is still open, but we failed to get
* a journal reservation because there's not enough space in it,
* and we can't close it and start another because we haven't
* finished writing out the previous entry:
* We failed to get a reservation on the current open journal
* entry because it's full, and we can't close it because
* there's still a previous one in flight:
*/
spin_unlock(&j->lock);
trace_journal_entry_full(c);
goto blocked;
case JOURNAL_ENTRY_CLOSED:
break;
case JOURNAL_UNLOCKED:
goto retry;
ret = -EAGAIN;
} else {
ret = journal_entry_open(j);
}
/* We now have a new, closed journal buf - see if we can open it: */
ret = journal_entry_open(j);
if ((ret == -EAGAIN || ret == -ENOSPC) &&
!j->res_get_blocked_start)
j->res_get_blocked_start = local_clock() ?: 1;
spin_unlock(&j->lock);
if (ret < 0)
return ret;
if (ret)
if (!ret)
goto retry;
if (ret == -ENOSPC) {
/*
* Journal is full - can't rely on reclaim from work item due to
* freezing:
*/
trace_journal_full(c);
bch2_journal_reclaim_work(&j->reclaim_work.work);
ret = -EAGAIN;
}
/* Journal's full, we have to wait */
/*
* Direct reclaim - can't rely on reclaim from work item
* due to freezing..
*/
bch2_journal_reclaim_work(&j->reclaim_work.work);
trace_journal_full(c);
blocked:
if (!j->res_get_blocked_start)
j->res_get_blocked_start = local_clock() ?: 1;
return -EAGAIN;
return ret;
}
/*
......@@ -461,7 +427,7 @@ void bch2_journal_entry_res_resize(struct journal *j,
j->entry_u64s_reserved += d;
if (d <= 0)
goto out_unlock;
goto out;
j->cur_entry_u64s -= d;
smp_mb();
......@@ -474,15 +440,12 @@ void bch2_journal_entry_res_resize(struct journal *j,
* Not enough room in current journal entry, have to flush it:
*/
__journal_entry_close(j);
goto out;
} else {
journal_cur_buf(j)->u64s_reserved += d;
}
journal_cur_buf(j)->u64s_reserved += d;
out_unlock:
spin_unlock(&j->lock);
out:
spin_unlock(&j->lock);
res->u64s += d;
return;
}
/* journal flushing: */
......@@ -512,47 +475,47 @@ int bch2_journal_open_seq_async(struct journal *j, u64 seq, struct closure *cl)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
int ret;
retry:
spin_lock(&j->lock);
if (seq < journal_cur_seq(j) ||
/*
* Can't try to open more than one sequence number ahead:
*/
BUG_ON(journal_cur_seq(j) < seq && !journal_entry_is_open(j));
if (journal_cur_seq(j) > seq ||
journal_entry_is_open(j)) {
spin_unlock(&j->lock);
return 0;
}
if (journal_cur_seq(j) < seq) {
switch (journal_buf_switch(j, false)) {
case JOURNAL_ENTRY_ERROR:
spin_unlock(&j->lock);
return -EROFS;
case JOURNAL_ENTRY_INUSE:
/* haven't finished writing out the previous one: */
trace_journal_entry_full(c);
goto blocked;
case JOURNAL_ENTRY_CLOSED:
break;
case JOURNAL_UNLOCKED:
goto retry;
}
}
BUG_ON(journal_cur_seq(j) < seq);
if (journal_cur_seq(j) < seq &&
!__journal_entry_close(j)) {
/* haven't finished writing out the previous one: */
trace_journal_entry_full(c);
ret = -EAGAIN;
} else {
BUG_ON(journal_cur_seq(j) != seq);
ret = journal_entry_open(j);
if (ret) {
spin_unlock(&j->lock);
return ret < 0 ? ret : 0;
ret = journal_entry_open(j);
}
blocked:
if (!j->res_get_blocked_start)
if ((ret == -EAGAIN || ret == -ENOSPC) &&
!j->res_get_blocked_start)
j->res_get_blocked_start = local_clock() ?: 1;
closure_wait(&j->async_wait, cl);
if (ret == -EAGAIN || ret == -ENOSPC)
closure_wait(&j->async_wait, cl);
spin_unlock(&j->lock);
bch2_journal_reclaim_work(&j->reclaim_work.work);
return -EAGAIN;
if (ret == -ENOSPC) {
trace_journal_full(c);
bch2_journal_reclaim_work(&j->reclaim_work.work);
ret = -EAGAIN;
}
return ret;
}
static int journal_seq_error(struct journal *j, u64 seq)
......@@ -635,8 +598,7 @@ void bch2_journal_flush_seq_async(struct journal *j, u64 seq,
if (seq == journal_cur_seq(j))
__journal_entry_close(j);
else
spin_unlock(&j->lock);
spin_unlock(&j->lock);
}
static int journal_seq_flushed(struct journal *j, u64 seq)
......@@ -648,8 +610,7 @@ static int journal_seq_flushed(struct journal *j, u64 seq)
if (seq == journal_cur_seq(j))
__journal_entry_close(j);
else
spin_unlock(&j->lock);
spin_unlock(&j->lock);
return ret;
}
......@@ -783,7 +744,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
goto err;
journal_buckets = bch2_sb_resize_journal(&ca->disk_sb,
nr + sizeof(*journal_buckets) / sizeof(u64));
nr + sizeof(*journal_buckets) / sizeof(u64));
if (!journal_buckets)
goto err;
......@@ -846,9 +807,9 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
ja->nr++;
bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL,
ca->mi.bucket_size,
gc_phase(GC_PHASE_SB),
0);
ca->mi.bucket_size,
gc_phase(GC_PHASE_SB),
0);
if (c) {
spin_unlock(&c->journal.lock);
......@@ -899,7 +860,7 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
*/
if (bch2_disk_reservation_get(c, &disk_res,
bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
bucket_to_sector(ca, nr - ja->nr), 1, 0)) {
mutex_unlock(&c->sb_lock);
return -ENOSPC;
}
......@@ -996,7 +957,7 @@ void bch2_fs_journal_start(struct journal *j)
journal_pin_new_entry(j, 0);
/*
* journal_buf_switch() only inits the next journal entry when it
* __journal_entry_close() only inits the next journal entry when it
* closes an open journal entry - the very first journal entry gets
* initialized here:
*/
......@@ -1063,8 +1024,8 @@ int bch2_dev_journal_init(struct bch_dev *ca, struct bch_sb *sb)
void bch2_fs_journal_exit(struct journal *j)
{
kvpfree(j->buf[1].data, j->buf[1].size);
kvpfree(j->buf[0].data, j->buf[0].size);
kvpfree(j->buf[1].data, j->buf[1].buf_size);
kvpfree(j->buf[0].data, j->buf[0].buf_size);
free_fifo(&j->pin);
}
......@@ -1088,8 +1049,8 @@ int bch2_fs_journal_init(struct journal *j)
lockdep_init_map(&j->res_map, "journal res", &res_key, 0);
j->buf[0].size = JOURNAL_ENTRY_SIZE_MIN;
j->buf[1].size = JOURNAL_ENTRY_SIZE_MIN;
j->buf[0].buf_size = JOURNAL_ENTRY_SIZE_MIN;
j->buf[1].buf_size = JOURNAL_ENTRY_SIZE_MIN;
j->write_delay_ms = 1000;
j->reclaim_delay_ms = 100;
......@@ -1102,8 +1063,8 @@ int bch2_fs_journal_init(struct journal *j)
{ .cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL }).v);
if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) ||
!(j->buf[0].data = kvpmalloc(j->buf[0].size, GFP_KERNEL)) ||
!(j->buf[1].data = kvpmalloc(j->buf[1].size, GFP_KERNEL))) {
!(j->buf[0].data = kvpmalloc(j->buf[0].buf_size, GFP_KERNEL)) ||
!(j->buf[1].data = kvpmalloc(j->buf[1].buf_size, GFP_KERNEL))) {
ret = -ENOMEM;
goto out;
}
......
......@@ -179,6 +179,11 @@ static inline unsigned jset_u64s(unsigned u64s)
return u64s + sizeof(struct jset_entry) / sizeof(u64);
}
static inline int journal_entry_overhead(struct journal *j)
{
return sizeof(struct jset) / sizeof(u64) + j->entry_u64s_reserved;
}
static inline struct jset_entry *
bch2_journal_add_entry_noreservation(struct journal_buf *buf, size_t u64s)
{
......@@ -225,7 +230,7 @@ static inline void bch2_journal_add_keys(struct journal *j, struct journal_res *
id, 0, k, k->k.u64s);
}
void bch2_journal_buf_put_slowpath(struct journal *, bool);
void __bch2_journal_buf_put(struct journal *, bool);
static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
bool need_write_just_set)
......@@ -236,17 +241,10 @@ static inline void bch2_journal_buf_put(struct journal *j, unsigned idx,
.buf0_count = idx == 0,
.buf1_count = idx == 1,
}).v, &j->reservations.counter);
EBUG_ON(s.idx != idx && !s.prev_buf_unwritten);
/*
* Do not initiate a journal write if the journal is in an error state
* (previous journal entry write may have failed)
*/
if (s.idx != idx &&
!journal_state_count(s, idx) &&
s.cur_entry_offset != JOURNAL_ENTRY_ERROR_VAL)
bch2_journal_buf_put_slowpath(j, need_write_just_set);
if (!journal_state_count(s, idx)) {
EBUG_ON(s.idx == idx || !s.prev_buf_unwritten);
__bch2_journal_buf_put(j, need_write_just_set);
}
}
/*
......@@ -333,6 +331,8 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
return 0;
}
/* journal_entry_res: */
void bch2_journal_entry_res_resize(struct journal *,
struct journal_entry_res *,
unsigned);
......
......@@ -902,13 +902,16 @@ static unsigned journal_dev_buckets_available(struct journal *j,
return available;
}
/* returns number of sectors available for next journal entry: */
int bch2_journal_entry_sectors(struct journal *j)
int bch2_journal_space_available(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_dev *ca;
unsigned sectors_available = UINT_MAX;
unsigned sectors_next_entry = UINT_MAX;
unsigned i, nr_online = 0, nr_devs = 0;
unsigned unwritten_sectors = j->reservations.prev_buf_unwritten
? journal_prev_buf(j)->sectors
: 0;
int ret = 0;
lockdep_assert_held(&j->lock);
......@@ -921,16 +924,16 @@ int bch2_journal_entry_sectors(struct journal *j)
if (!ja->nr)
continue;
nr_online++;
buckets_this_device = journal_dev_buckets_available(j, ja);
sectors_this_device = ja->sectors_free;
nr_online++;
/*
* We that we don't allocate the space for a journal entry
* until we write it out - thus, account for it here:
*/
if (j->prev_buf_sectors >= sectors_this_device) {
if (unwritten_sectors >= sectors_this_device) {
if (!buckets_this_device)
continue;
......@@ -938,7 +941,7 @@ int bch2_journal_entry_sectors(struct journal *j)
sectors_this_device = ca->mi.bucket_size;
}
sectors_this_device -= j->prev_buf_sectors;
sectors_this_device -= unwritten_sectors;
if (buckets_this_device)
sectors_this_device = ca->mi.bucket_size;
......@@ -946,19 +949,26 @@ int bch2_journal_entry_sectors(struct journal *j)
if (!sectors_this_device)
continue;
sectors_available = min(sectors_available,
sectors_this_device);
sectors_next_entry = min(sectors_next_entry,
sectors_this_device);
nr_devs++;
}
rcu_read_unlock();
if (nr_online < c->opts.metadata_replicas_required)
return -EROFS;
if (nr_online < c->opts.metadata_replicas_required) {
ret = -EROFS;
sectors_next_entry = 0;
} else if (!sectors_next_entry ||
nr_devs < min_t(unsigned, nr_online,
c->opts.metadata_replicas)) {
ret = -ENOSPC;
sectors_next_entry = 0;
}
if (nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas))
return 0;
WRITE_ONCE(j->cur_entry_sectors, sectors_next_entry);
return sectors_available;
return ret;
}
static void __journal_write_alloc(struct journal *j,
......@@ -1059,9 +1069,6 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w,
__journal_write_alloc(j, w, &devs_sorted,
sectors, &replicas, replicas_want);
done:
if (replicas >= replicas_want)
j->prev_buf_sectors = 0;
spin_unlock(&j->lock);
rcu_read_unlock();
......@@ -1117,17 +1124,17 @@ static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
unsigned new_size = READ_ONCE(j->buf_size_want);
void *new_buf;
if (buf->size >= new_size)
if (buf->buf_size >= new_size)
return;
new_buf = kvpmalloc(new_size, GFP_NOIO|__GFP_NOWARN);
if (!new_buf)
return;
memcpy(new_buf, buf->data, buf->size);
kvpfree(buf->data, buf->size);
memcpy(new_buf, buf->data, buf->buf_size);
kvpfree(buf->data, buf->buf_size);
buf->data = new_buf;
buf->size = new_size;
buf->buf_size = new_size;
}
static void journal_write_done(struct closure *cl)
......@@ -1227,15 +1234,14 @@ void bch2_journal_write(struct closure *cl)
j->write_start_time = local_clock();
start = vstruct_last(w->data);
start = vstruct_last(jset);
end = bch2_journal_super_entries_add_common(c, start,
le64_to_cpu(jset->seq));
u64s = (u64 *) end - (u64 *) start;
BUG_ON(u64s > j->entry_u64s_reserved);
le32_add_cpu(&w->data->u64s, u64s);
BUG_ON(vstruct_sectors(jset, c->block_bits) >
w->disk_sectors);
le32_add_cpu(&jset->u64s, u64s);
BUG_ON(vstruct_sectors(jset, c->block_bits) > w->sectors);
journal_write_compact(jset);
......@@ -1273,10 +1279,10 @@ void bch2_journal_write(struct closure *cl)
goto err;
sectors = vstruct_sectors(jset, c->block_bits);
BUG_ON(sectors > j->prev_buf_sectors);
BUG_ON(sectors > w->sectors);
bytes = vstruct_bytes(w->data);
memset((void *) w->data + bytes, 0, (sectors << 9) - bytes);
bytes = vstruct_bytes(jset);
memset((void *) jset + bytes, 0, (sectors << 9) - bytes);
if (journal_write_alloc(j, w, sectors)) {
bch2_journal_halt(j);
......@@ -1286,6 +1292,12 @@ void bch2_journal_write(struct closure *cl)
return;
}
/*
* write is allocated, no longer need to account for it in
* bch2_journal_entry_sectors:
*/
w->sectors = 0;
/*
* XXX: we really should just disable the entire journal in nochanges
* mode
......@@ -1316,7 +1328,7 @@ void bch2_journal_write(struct closure *cl)
trace_journal_write(bio);
closure_bio_submit(bio, cl);
ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(w->data->seq);
ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(jset->seq);
}
for_each_rw_member(ca, c, i)
......
......@@ -40,7 +40,7 @@ int bch2_journal_read(struct bch_fs *, struct list_head *);
void bch2_journal_entries_free(struct list_head *);
int bch2_journal_replay(struct bch_fs *, struct list_head *);
int bch2_journal_entry_sectors(struct journal *);
int bch2_journal_space_available(struct journal *);
void bch2_journal_write(struct closure *);
#endif /* _BCACHEFS_JOURNAL_IO_H */
......@@ -22,8 +22,10 @@ struct journal_buf {
struct closure_waitlist wait;
unsigned size;
unsigned disk_sectors;
unsigned buf_size; /* size in bytes of @data */
unsigned sectors; /* maximum size for current entry */
unsigned disk_sectors; /* maximum size entry could have been, if
buf_size was bigger */
unsigned u64s_reserved;
/* bloom filter: */
unsigned long has_inode[1024 / sizeof(unsigned long)];
......@@ -129,9 +131,14 @@ struct journal {
unsigned long flags;
union journal_res_state reservations;
/* Max size of current journal entry */
unsigned cur_entry_u64s;
unsigned prev_buf_sectors;
unsigned cur_buf_sectors;
unsigned cur_entry_sectors;
/* Reserved space in journal entry to be used just prior to write */
unsigned entry_u64s_reserved;
unsigned buf_size_want;
/*
......@@ -159,9 +166,6 @@ struct journal {
u64 seq_ondisk;
u64 last_seq_ondisk;
/* Reserved space in journal entry to be used just prior to write */
unsigned entry_u64s_reserved;
/*
* FIFO of journal entries whose btree updates have not yet been
* written out.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment