Commit b7a9bbfc authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Move journal reclaim to a kthread

This is to make tracing easier.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent d5425a3b
...@@ -1409,7 +1409,7 @@ int bch2_dev_allocator_start(struct bch_dev *ca) ...@@ -1409,7 +1409,7 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
return 0; return 0;
p = kthread_create(bch2_allocator_thread, ca, p = kthread_create(bch2_allocator_thread, ca,
"bch_alloc[%s]", ca->name); "bch-alloc/%s", ca->name);
if (IS_ERR(p)) if (IS_ERR(p))
return PTR_ERR(p); return PTR_ERR(p);
......
...@@ -650,7 +650,6 @@ struct bch_fs { ...@@ -650,7 +650,6 @@ struct bch_fs {
struct workqueue_struct *wq; struct workqueue_struct *wq;
/* copygc needs its own workqueue for index updates.. */ /* copygc needs its own workqueue for index updates.. */
struct workqueue_struct *copygc_wq; struct workqueue_struct *copygc_wq;
struct workqueue_struct *journal_reclaim_wq;
/* ALLOCATION */ /* ALLOCATION */
struct delayed_work pd_controllers_update; struct delayed_work pd_controllers_update;
......
...@@ -1427,7 +1427,7 @@ int bch2_gc_thread_start(struct bch_fs *c) ...@@ -1427,7 +1427,7 @@ int bch2_gc_thread_start(struct bch_fs *c)
BUG_ON(c->gc_thread); BUG_ON(c->gc_thread);
p = kthread_create(bch2_gc_thread, c, "bch_gc"); p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
if (IS_ERR(p)) if (IS_ERR(p))
return PTR_ERR(p); return PTR_ERR(p);
......
...@@ -497,7 +497,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, ...@@ -497,7 +497,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
&ck->journal, btree_key_cache_journal_flush); &ck->journal, btree_key_cache_journal_flush);
if (kick_reclaim) if (kick_reclaim)
mod_delayed_work(c->journal_reclaim_wq, &c->journal.reclaim_work, 0); journal_reclaim_kick(&c->journal);
return true; return true;
} }
......
...@@ -341,7 +341,8 @@ static long bch2_ioctl_data(struct bch_fs *c, ...@@ -341,7 +341,8 @@ static long bch2_ioctl_data(struct bch_fs *c,
ctx->c = c; ctx->c = c;
ctx->arg = arg; ctx->arg = arg;
ctx->thread = kthread_create(bch2_data_thread, ctx, "[bcachefs]"); ctx->thread = kthread_create(bch2_data_thread, ctx,
"bch-data/%s", c->name);
if (IS_ERR(ctx->thread)) { if (IS_ERR(ctx->thread)) {
ret = PTR_ERR(ctx->thread); ret = PTR_ERR(ctx->thread);
goto err; goto err;
......
...@@ -225,11 +225,14 @@ static bool journal_entry_close(struct journal *j) ...@@ -225,11 +225,14 @@ static bool journal_entry_close(struct journal *j)
*/ */
static int journal_entry_open(struct journal *j) static int journal_entry_open(struct journal *j)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_buf *buf = journal_cur_buf(j); struct journal_buf *buf = journal_cur_buf(j);
union journal_res_state old, new; union journal_res_state old, new;
int u64s; int u64s;
u64 v; u64 v;
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
lockdep_assert_held(&j->lock); lockdep_assert_held(&j->lock);
BUG_ON(journal_entry_is_open(j)); BUG_ON(journal_entry_is_open(j));
...@@ -480,8 +483,10 @@ static bool journal_preres_available(struct journal *j, ...@@ -480,8 +483,10 @@ static bool journal_preres_available(struct journal *j,
{ {
bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags); bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags);
if (!ret) if (!ret && mutex_trylock(&j->reclaim_lock)) {
bch2_journal_reclaim_work(&j->reclaim_work.work); bch2_journal_reclaim(j);
mutex_unlock(&j->reclaim_lock);
}
return ret; return ret;
} }
...@@ -888,7 +893,7 @@ void bch2_fs_journal_stop(struct journal *j) ...@@ -888,7 +893,7 @@ void bch2_fs_journal_stop(struct journal *j)
j->last_empty_seq + 1 != journal_cur_seq(j))); j->last_empty_seq + 1 != journal_cur_seq(j)));
cancel_delayed_work_sync(&j->write_work); cancel_delayed_work_sync(&j->write_work);
cancel_delayed_work_sync(&j->reclaim_work); bch2_journal_reclaim_stop(j);
} }
int bch2_fs_journal_start(struct journal *j, u64 cur_seq, int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
...@@ -1019,7 +1024,6 @@ int bch2_fs_journal_init(struct journal *j) ...@@ -1019,7 +1024,6 @@ int bch2_fs_journal_init(struct journal *j)
spin_lock_init(&j->err_lock); spin_lock_init(&j->err_lock);
init_waitqueue_head(&j->wait); init_waitqueue_head(&j->wait);
INIT_DELAYED_WORK(&j->write_work, journal_write_work); INIT_DELAYED_WORK(&j->write_work, journal_write_work);
INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
init_waitqueue_head(&j->pin_flush_wait); init_waitqueue_head(&j->pin_flush_wait);
mutex_init(&j->reclaim_lock); mutex_init(&j->reclaim_lock);
mutex_init(&j->discard_lock); mutex_init(&j->discard_lock);
...@@ -1071,6 +1075,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) ...@@ -1071,6 +1075,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
"last_seq:\t\t%llu\n" "last_seq:\t\t%llu\n"
"last_seq_ondisk:\t%llu\n" "last_seq_ondisk:\t%llu\n"
"prereserved:\t\t%u/%u\n" "prereserved:\t\t%u/%u\n"
"nr direct reclaim:\t%llu\n"
"nr background reclaim:\t%llu\n"
"current entry sectors:\t%u\n" "current entry sectors:\t%u\n"
"current entry error:\t%u\n" "current entry error:\t%u\n"
"current entry:\t\t", "current entry:\t\t",
...@@ -1080,6 +1086,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j) ...@@ -1080,6 +1086,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
j->last_seq_ondisk, j->last_seq_ondisk,
j->prereserved.reserved, j->prereserved.reserved,
j->prereserved.remaining, j->prereserved.remaining,
j->nr_direct_reclaim,
j->nr_background_reclaim,
j->cur_entry_sectors, j->cur_entry_sectors,
j->cur_entry_error); j->cur_entry_error);
......
...@@ -993,7 +993,7 @@ static void journal_write_done(struct closure *cl) ...@@ -993,7 +993,7 @@ static void journal_write_done(struct closure *cl)
* Must come before signaling write completion, for * Must come before signaling write completion, for
* bch2_fs_journal_stop(): * bch2_fs_journal_stop():
*/ */
mod_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0); journal_reclaim_kick(&c->journal);
/* also must come before signalling write completion: */ /* also must come before signalling write completion: */
closure_debug_destroy(cl); closure_debug_destroy(cl);
...@@ -1044,6 +1044,8 @@ void bch2_journal_write(struct closure *cl) ...@@ -1044,6 +1044,8 @@ void bch2_journal_write(struct closure *cl)
unsigned i, sectors, bytes, u64s; unsigned i, sectors, bytes, u64s;
int ret; int ret;
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
bch2_journal_pin_put(j, le64_to_cpu(w->data->seq)); bch2_journal_pin_put(j, le64_to_cpu(w->data->seq));
journal_buf_realloc(j, w); journal_buf_realloc(j, w);
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "super.h" #include "super.h"
#include "trace.h" #include "trace.h"
#include <linux/kthread.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
/* Free space calculations: */ /* Free space calculations: */
...@@ -534,9 +535,10 @@ static u64 journal_seq_to_flush(struct journal *j) ...@@ -534,9 +535,10 @@ static u64 journal_seq_to_flush(struct journal *j)
* 512 journal entries or 25% of all journal buckets, then * 512 journal entries or 25% of all journal buckets, then
* journal_next_bucket() should not stall. * journal_next_bucket() should not stall.
*/ */
void bch2_journal_reclaim(struct journal *j) static void __bch2_journal_reclaim(struct journal *j, bool direct)
{ {
struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_fs *c = container_of(j, struct bch_fs, journal);
bool kthread = (current->flags & PF_KTHREAD) != 0;
u64 seq_to_flush, nr_flushed = 0; u64 seq_to_flush, nr_flushed = 0;
size_t min_nr; size_t min_nr;
unsigned flags; unsigned flags;
...@@ -551,6 +553,9 @@ void bch2_journal_reclaim(struct journal *j) ...@@ -551,6 +553,9 @@ void bch2_journal_reclaim(struct journal *j)
flags = memalloc_noreclaim_save(); flags = memalloc_noreclaim_save();
do { do {
if (kthread && kthread_should_stop())
break;
bch2_journal_do_discards(j); bch2_journal_do_discards(j);
seq_to_flush = journal_seq_to_flush(j); seq_to_flush = journal_seq_to_flush(j);
...@@ -582,26 +587,83 @@ void bch2_journal_reclaim(struct journal *j) ...@@ -582,26 +587,83 @@ void bch2_journal_reclaim(struct journal *j)
c->btree_key_cache.nr_dirty, c->btree_key_cache.nr_dirty,
c->btree_key_cache.nr_keys); c->btree_key_cache.nr_keys);
nr_flushed += journal_flush_pins(j, seq_to_flush, min_nr); nr_flushed = journal_flush_pins(j, seq_to_flush, min_nr);
if (direct)
j->nr_direct_reclaim += nr_flushed;
else
j->nr_background_reclaim += nr_flushed;
trace_journal_reclaim_finish(c, nr_flushed);
} while (min_nr); } while (min_nr);
memalloc_noreclaim_restore(flags); memalloc_noreclaim_restore(flags);
}
void bch2_journal_reclaim(struct journal *j)
{
__bch2_journal_reclaim(j, true);
}
static int bch2_journal_reclaim_thread(void *arg)
{
struct journal *j = arg;
unsigned long next;
while (!kthread_should_stop()) {
j->reclaim_kicked = false;
mutex_lock(&j->reclaim_lock);
__bch2_journal_reclaim(j, false);
mutex_unlock(&j->reclaim_lock);
next = j->last_flushed + msecs_to_jiffies(j->reclaim_delay_ms);
trace_journal_reclaim_finish(c, nr_flushed); while (1) {
set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop())
break;
if (j->reclaim_kicked)
break;
if (time_after_eq(jiffies, next))
break;
schedule_timeout(next - jiffies);
if (!bch2_journal_error(j)) }
queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, __set_current_state(TASK_RUNNING);
msecs_to_jiffies(j->reclaim_delay_ms)); }
return 0;
} }
void bch2_journal_reclaim_work(struct work_struct *work) void bch2_journal_reclaim_stop(struct journal *j)
{ {
struct journal *j = container_of(to_delayed_work(work), struct task_struct *p = j->reclaim_thread;
struct journal, reclaim_work);
mutex_lock(&j->reclaim_lock); j->reclaim_thread = NULL;
bch2_journal_reclaim(j);
mutex_unlock(&j->reclaim_lock); if (p) {
kthread_stop(p);
put_task_struct(p);
}
}
int bch2_journal_reclaim_start(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct task_struct *p;
if (j->reclaim_thread)
return 0;
p = kthread_create(bch2_journal_reclaim_thread, j,
"bch-reclaim/%s", c->name);
if (IS_ERR(p))
return PTR_ERR(p);
get_task_struct(p);
j->reclaim_thread = p;
wake_up_process(p);
return 0;
} }
static int journal_flush_done(struct journal *j, u64 seq_to_flush, static int journal_flush_done(struct journal *j, u64 seq_to_flush,
......
...@@ -10,6 +10,17 @@ enum journal_space_from { ...@@ -10,6 +10,17 @@ enum journal_space_from {
journal_space_clean, journal_space_clean,
}; };
static inline void journal_reclaim_kick(struct journal *j)
{
struct task_struct *p = READ_ONCE(j->reclaim_thread);
if (p && !j->reclaim_kicked) {
j->reclaim_kicked = true;
if (p)
wake_up_process(p);
}
}
unsigned bch2_journal_dev_buckets_available(struct journal *, unsigned bch2_journal_dev_buckets_available(struct journal *,
struct journal_device *, struct journal_device *,
enum journal_space_from); enum journal_space_from);
...@@ -55,7 +66,9 @@ void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *); ...@@ -55,7 +66,9 @@ void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *);
void bch2_journal_do_discards(struct journal *); void bch2_journal_do_discards(struct journal *);
void bch2_journal_reclaim(struct journal *); void bch2_journal_reclaim(struct journal *);
void bch2_journal_reclaim_work(struct work_struct *);
void bch2_journal_reclaim_stop(struct journal *);
int bch2_journal_reclaim_start(struct journal *);
bool bch2_journal_flush_pins(struct journal *, u64); bool bch2_journal_flush_pins(struct journal *, u64);
......
...@@ -216,8 +216,12 @@ struct journal { ...@@ -216,8 +216,12 @@ struct journal {
struct write_point wp; struct write_point wp;
spinlock_t err_lock; spinlock_t err_lock;
struct delayed_work reclaim_work;
struct mutex reclaim_lock; struct mutex reclaim_lock;
struct task_struct *reclaim_thread;
bool reclaim_kicked;
u64 nr_direct_reclaim;
u64 nr_background_reclaim;
unsigned long last_flushed; unsigned long last_flushed;
struct journal_entry_pin *flush_in_progress; struct journal_entry_pin *flush_in_progress;
wait_queue_head_t pin_flush_wait; wait_queue_head_t pin_flush_wait;
......
...@@ -345,7 +345,7 @@ int bch2_copygc_start(struct bch_fs *c) ...@@ -345,7 +345,7 @@ int bch2_copygc_start(struct bch_fs *c)
if (bch2_fs_init_fault("copygc_start")) if (bch2_fs_init_fault("copygc_start"))
return -ENOMEM; return -ENOMEM;
t = kthread_create(bch2_copygc_thread, c, "bch_copygc"); t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
if (IS_ERR(t)) if (IS_ERR(t))
return PTR_ERR(t); return PTR_ERR(t);
......
...@@ -314,7 +314,7 @@ int bch2_rebalance_start(struct bch_fs *c) ...@@ -314,7 +314,7 @@ int bch2_rebalance_start(struct bch_fs *c)
if (c->opts.nochanges) if (c->opts.nochanges)
return 0; return 0;
p = kthread_create(bch2_rebalance_thread, c, "bch_rebalance"); p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
if (IS_ERR(p)) if (IS_ERR(p))
return PTR_ERR(p); return PTR_ERR(p);
......
...@@ -49,7 +49,6 @@ ...@@ -49,7 +49,6 @@
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/device.h> #include <linux/device.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/kthread.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/random.h> #include <linux/random.h>
...@@ -266,7 +265,7 @@ static void bch2_writes_disabled(struct percpu_ref *writes) ...@@ -266,7 +265,7 @@ static void bch2_writes_disabled(struct percpu_ref *writes)
void bch2_fs_read_only(struct bch_fs *c) void bch2_fs_read_only(struct bch_fs *c)
{ {
if (!test_bit(BCH_FS_RW, &c->flags)) { if (!test_bit(BCH_FS_RW, &c->flags)) {
cancel_delayed_work_sync(&c->journal.reclaim_work); BUG_ON(c->journal.reclaim_thread);
return; return;
} }
...@@ -424,6 +423,12 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) ...@@ -424,6 +423,12 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
ret = bch2_journal_reclaim_start(&c->journal);
if (ret) {
bch_err(c, "error starting journal reclaim: %i", ret);
return ret;
}
if (!early) { if (!early) {
ret = bch2_fs_read_write_late(c); ret = bch2_fs_read_write_late(c);
if (ret) if (ret)
...@@ -432,9 +437,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) ...@@ -432,9 +437,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
percpu_ref_reinit(&c->writes); percpu_ref_reinit(&c->writes);
set_bit(BCH_FS_RW, &c->flags); set_bit(BCH_FS_RW, &c->flags);
queue_delayed_work(c->journal_reclaim_wq,
&c->journal.reclaim_work, 0);
return 0; return 0;
err: err:
__bch2_fs_read_only(c); __bch2_fs_read_only(c);
...@@ -503,8 +505,6 @@ static void __bch2_fs_free(struct bch_fs *c) ...@@ -503,8 +505,6 @@ static void __bch2_fs_free(struct bch_fs *c)
kfree(c->unused_inode_hints); kfree(c->unused_inode_hints);
free_heap(&c->copygc_heap); free_heap(&c->copygc_heap);
if (c->journal_reclaim_wq)
destroy_workqueue(c->journal_reclaim_wq);
if (c->copygc_wq) if (c->copygc_wq)
destroy_workqueue(c->copygc_wq); destroy_workqueue(c->copygc_wq);
if (c->wq) if (c->wq)
...@@ -758,8 +758,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts) ...@@ -758,8 +758,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) || WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc", !(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) || WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->journal_reclaim_wq = alloc_workqueue("bcachefs_journal_reclaim",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
percpu_ref_init(&c->writes, bch2_writes_disabled, percpu_ref_init(&c->writes, bch2_writes_disabled,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) || PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) || mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment