Commit ed0e24c0 authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Be more precise with journal error reporting

We were incorrectly detecting a journal deadlock - the journal filling
up - when only the journal pin fifo had filled up; if the journal pin
fifo is full that just means we need to wait on reclaim.

This plumbs through better error reporting so we can better discriminate
in the journal_res_get path what's going on.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent d8ebed7d
...@@ -234,7 +234,7 @@ static int journal_entry_open(struct journal *j) ...@@ -234,7 +234,7 @@ static int journal_entry_open(struct journal *j)
BUG_ON(journal_entry_is_open(j)); BUG_ON(journal_entry_is_open(j));
if (j->blocked) if (j->blocked)
return -EAGAIN; return cur_entry_blocked;
if (j->cur_entry_error) if (j->cur_entry_error)
return j->cur_entry_error; return j->cur_entry_error;
...@@ -250,7 +250,7 @@ static int journal_entry_open(struct journal *j) ...@@ -250,7 +250,7 @@ static int journal_entry_open(struct journal *j)
u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1); u64s = clamp_t(int, u64s, 0, JOURNAL_ENTRY_CLOSED_VAL - 1);
if (u64s <= le32_to_cpu(buf->data->u64s)) if (u64s <= le32_to_cpu(buf->data->u64s))
return -ENOSPC; return cur_entry_journal_full;
/* /*
* Must be set before marking the journal entry as open: * Must be set before marking the journal entry as open:
...@@ -262,7 +262,7 @@ static int journal_entry_open(struct journal *j) ...@@ -262,7 +262,7 @@ static int journal_entry_open(struct journal *j)
old.v = new.v = v; old.v = new.v = v;
if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL) if (old.cur_entry_offset == JOURNAL_ENTRY_ERROR_VAL)
return -EROFS; return cur_entry_insufficient_devices;
/* Handle any already added entries */ /* Handle any already added entries */
new.cur_entry_offset = le32_to_cpu(buf->data->u64s); new.cur_entry_offset = le32_to_cpu(buf->data->u64s);
...@@ -375,7 +375,7 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, ...@@ -375,7 +375,7 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
* Don't want to close current journal entry, just need to * Don't want to close current journal entry, just need to
* invoke reclaim: * invoke reclaim:
*/ */
ret = -ENOSPC; ret = cur_entry_journal_full;
goto unlock; goto unlock;
} }
...@@ -398,14 +398,16 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, ...@@ -398,14 +398,16 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
* there's still a previous one in flight: * there's still a previous one in flight:
*/ */
trace_journal_entry_full(c); trace_journal_entry_full(c);
ret = -EAGAIN; ret = cur_entry_blocked;
} else { } else {
ret = journal_entry_open(j); ret = journal_entry_open(j);
} }
unlock: unlock:
if ((ret == -EAGAIN || ret == -ENOSPC) && if ((ret && ret != cur_entry_insufficient_devices) &&
!j->res_get_blocked_start) !j->res_get_blocked_start) {
j->res_get_blocked_start = local_clock() ?: 1; j->res_get_blocked_start = local_clock() ?: 1;
trace_journal_full(c);
}
can_discard = j->can_discard; can_discard = j->can_discard;
spin_unlock(&j->lock); spin_unlock(&j->lock);
...@@ -413,41 +415,39 @@ static int __journal_res_get(struct journal *j, struct journal_res *res, ...@@ -413,41 +415,39 @@ static int __journal_res_get(struct journal *j, struct journal_res *res,
if (!ret) if (!ret)
goto retry; goto retry;
if (ret == -ENOSPC) { if (WARN_ONCE(ret == cur_entry_journal_full &&
if (WARN_ONCE(!can_discard && (flags & JOURNAL_RES_GET_RESERVED), !can_discard &&
"JOURNAL_RES_GET_RESERVED set but journal full")) { (flags & JOURNAL_RES_GET_RESERVED),
char *buf; "JOURNAL_RES_GET_RESERVED set but journal full")) {
char *buf;
buf = kmalloc(4096, GFP_NOFS);
if (buf) { buf = kmalloc(4096, GFP_NOFS);
bch2_journal_debug_to_text(&_PBUF(buf, 4096), j); if (buf) {
pr_err("\n%s", buf); bch2_journal_debug_to_text(&_PBUF(buf, 4096), j);
kfree(buf); pr_err("\n%s", buf);
} kfree(buf);
} }
}
/* /*
* Journal is full - can't rely on reclaim from work item due to * Journal is full - can't rely on reclaim from work item due to
* freezing: * freezing:
*/ */
trace_journal_full(c); if ((ret == cur_entry_journal_full ||
ret == cur_entry_journal_pin_full) &&
if (!(flags & JOURNAL_RES_GET_NONBLOCK)) { !(flags & JOURNAL_RES_GET_NONBLOCK)) {
if (can_discard) { if (can_discard) {
bch2_journal_do_discards(j); bch2_journal_do_discards(j);
goto retry; goto retry;
}
if (mutex_trylock(&j->reclaim_lock)) {
bch2_journal_reclaim(j);
mutex_unlock(&j->reclaim_lock);
}
} }
ret = -EAGAIN; if (mutex_trylock(&j->reclaim_lock)) {
bch2_journal_reclaim(j);
mutex_unlock(&j->reclaim_lock);
}
} }
return ret; return ret == cur_entry_insufficient_devices ? -EROFS : -EAGAIN;
} }
/* /*
...@@ -1072,6 +1072,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) ...@@ -1072,6 +1072,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
"last_seq_ondisk:\t%llu\n" "last_seq_ondisk:\t%llu\n"
"prereserved:\t\t%u/%u\n" "prereserved:\t\t%u/%u\n"
"current entry sectors:\t%u\n" "current entry sectors:\t%u\n"
"current entry error:\t%u\n"
"current entry:\t\t", "current entry:\t\t",
fifo_used(&j->pin), fifo_used(&j->pin),
journal_cur_seq(j), journal_cur_seq(j),
...@@ -1079,7 +1080,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) ...@@ -1079,7 +1080,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
j->last_seq_ondisk, j->last_seq_ondisk,
j->prereserved.reserved, j->prereserved.reserved,
j->prereserved.remaining, j->prereserved.remaining,
j->cur_entry_sectors); j->cur_entry_sectors,
j->cur_entry_error);
switch (s.cur_entry_offset) { switch (s.cur_entry_offset) {
case JOURNAL_ENTRY_ERROR_VAL: case JOURNAL_ENTRY_ERROR_VAL:
......
...@@ -164,12 +164,12 @@ void bch2_journal_space_available(struct journal *j) ...@@ -164,12 +164,12 @@ void bch2_journal_space_available(struct journal *j)
j->can_discard = can_discard; j->can_discard = can_discard;
if (nr_online < c->opts.metadata_replicas_required) { if (nr_online < c->opts.metadata_replicas_required) {
ret = -EROFS; ret = cur_entry_insufficient_devices;
goto out; goto out;
} }
if (!fifo_free(&j->pin)) { if (!fifo_free(&j->pin)) {
ret = -ENOSPC; ret = cur_entry_journal_pin_full;
goto out; goto out;
} }
...@@ -180,7 +180,7 @@ void bch2_journal_space_available(struct journal *j) ...@@ -180,7 +180,7 @@ void bch2_journal_space_available(struct journal *j)
clean = __journal_space_available(j, nr_devs_want, journal_space_clean); clean = __journal_space_available(j, nr_devs_want, journal_space_clean);
if (!discarded.next_entry) if (!discarded.next_entry)
ret = -ENOSPC; ret = cur_entry_journal_full;
overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) * overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) *
journal_entry_overhead(j); journal_entry_overhead(j);
......
...@@ -146,7 +146,13 @@ struct journal { ...@@ -146,7 +146,13 @@ struct journal {
* 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if * 0, or -ENOSPC if waiting on journal reclaim, or -EROFS if
* insufficient devices: * insufficient devices:
*/ */
int cur_entry_error; enum {
cur_entry_ok,
cur_entry_blocked,
cur_entry_journal_full,
cur_entry_journal_pin_full,
cur_entry_insufficient_devices,
} cur_entry_error;
union journal_preres_state prereserved; union journal_preres_state prereserved;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment