Commit b7a9bbfc authored by Kent Overstreet's avatar Kent Overstreet Committed by Kent Overstreet

bcachefs: Move journal reclaim to a kthread

This is to make tracing easier.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent d5425a3b
......@@ -1409,7 +1409,7 @@ int bch2_dev_allocator_start(struct bch_dev *ca)
return 0;
p = kthread_create(bch2_allocator_thread, ca,
"bch_alloc[%s]", ca->name);
"bch-alloc/%s", ca->name);
if (IS_ERR(p))
return PTR_ERR(p);
......
......@@ -650,7 +650,6 @@ struct bch_fs {
struct workqueue_struct *wq;
/* copygc needs its own workqueue for index updates.. */
struct workqueue_struct *copygc_wq;
struct workqueue_struct *journal_reclaim_wq;
/* ALLOCATION */
struct delayed_work pd_controllers_update;
......
......@@ -1427,7 +1427,7 @@ int bch2_gc_thread_start(struct bch_fs *c)
BUG_ON(c->gc_thread);
p = kthread_create(bch2_gc_thread, c, "bch_gc");
p = kthread_create(bch2_gc_thread, c, "bch-gc/%s", c->name);
if (IS_ERR(p))
return PTR_ERR(p);
......
......@@ -497,7 +497,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
&ck->journal, btree_key_cache_journal_flush);
if (kick_reclaim)
mod_delayed_work(c->journal_reclaim_wq, &c->journal.reclaim_work, 0);
journal_reclaim_kick(&c->journal);
return true;
}
......
......@@ -341,7 +341,8 @@ static long bch2_ioctl_data(struct bch_fs *c,
ctx->c = c;
ctx->arg = arg;
ctx->thread = kthread_create(bch2_data_thread, ctx, "[bcachefs]");
ctx->thread = kthread_create(bch2_data_thread, ctx,
"bch-data/%s", c->name);
if (IS_ERR(ctx->thread)) {
ret = PTR_ERR(ctx->thread);
goto err;
......
......@@ -225,11 +225,14 @@ static bool journal_entry_close(struct journal *j)
*/
static int journal_entry_open(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct journal_buf *buf = journal_cur_buf(j);
union journal_res_state old, new;
int u64s;
u64 v;
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
lockdep_assert_held(&j->lock);
BUG_ON(journal_entry_is_open(j));
......@@ -480,8 +483,10 @@ static bool journal_preres_available(struct journal *j,
{
bool ret = bch2_journal_preres_get_fast(j, res, new_u64s, flags);
if (!ret)
bch2_journal_reclaim_work(&j->reclaim_work.work);
if (!ret && mutex_trylock(&j->reclaim_lock)) {
bch2_journal_reclaim(j);
mutex_unlock(&j->reclaim_lock);
}
return ret;
}
......@@ -888,7 +893,7 @@ void bch2_fs_journal_stop(struct journal *j)
j->last_empty_seq + 1 != journal_cur_seq(j)));
cancel_delayed_work_sync(&j->write_work);
cancel_delayed_work_sync(&j->reclaim_work);
bch2_journal_reclaim_stop(j);
}
int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
......@@ -1019,7 +1024,6 @@ int bch2_fs_journal_init(struct journal *j)
spin_lock_init(&j->err_lock);
init_waitqueue_head(&j->wait);
INIT_DELAYED_WORK(&j->write_work, journal_write_work);
INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
init_waitqueue_head(&j->pin_flush_wait);
mutex_init(&j->reclaim_lock);
mutex_init(&j->discard_lock);
......@@ -1071,6 +1075,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
"last_seq:\t\t%llu\n"
"last_seq_ondisk:\t%llu\n"
"prereserved:\t\t%u/%u\n"
"nr direct reclaim:\t%llu\n"
"nr background reclaim:\t%llu\n"
"current entry sectors:\t%u\n"
"current entry error:\t%u\n"
"current entry:\t\t",
......@@ -1080,6 +1086,8 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
j->last_seq_ondisk,
j->prereserved.reserved,
j->prereserved.remaining,
j->nr_direct_reclaim,
j->nr_background_reclaim,
j->cur_entry_sectors,
j->cur_entry_error);
......
......@@ -993,7 +993,7 @@ static void journal_write_done(struct closure *cl)
* Must come before signaling write completion, for
* bch2_fs_journal_stop():
*/
mod_delayed_work(c->journal_reclaim_wq, &j->reclaim_work, 0);
journal_reclaim_kick(&c->journal);
/* also must come before signalling write completion: */
closure_debug_destroy(cl);
......@@ -1044,6 +1044,8 @@ void bch2_journal_write(struct closure *cl)
unsigned i, sectors, bytes, u64s;
int ret;
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
bch2_journal_pin_put(j, le64_to_cpu(w->data->seq));
journal_buf_realloc(j, w);
......
......@@ -9,6 +9,7 @@
#include "super.h"
#include "trace.h"
#include <linux/kthread.h>
#include <linux/sched/mm.h>
/* Free space calculations: */
......@@ -534,9 +535,10 @@ static u64 journal_seq_to_flush(struct journal *j)
* 512 journal entries or 25% of all journal buckets, then
* journal_next_bucket() should not stall.
*/
void bch2_journal_reclaim(struct journal *j)
static void __bch2_journal_reclaim(struct journal *j, bool direct)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
bool kthread = (current->flags & PF_KTHREAD) != 0;
u64 seq_to_flush, nr_flushed = 0;
size_t min_nr;
unsigned flags;
......@@ -551,6 +553,9 @@ void bch2_journal_reclaim(struct journal *j)
flags = memalloc_noreclaim_save();
do {
if (kthread && kthread_should_stop())
break;
bch2_journal_do_discards(j);
seq_to_flush = journal_seq_to_flush(j);
......@@ -582,26 +587,83 @@ void bch2_journal_reclaim(struct journal *j)
c->btree_key_cache.nr_dirty,
c->btree_key_cache.nr_keys);
nr_flushed += journal_flush_pins(j, seq_to_flush, min_nr);
nr_flushed = journal_flush_pins(j, seq_to_flush, min_nr);
if (direct)
j->nr_direct_reclaim += nr_flushed;
else
j->nr_background_reclaim += nr_flushed;
trace_journal_reclaim_finish(c, nr_flushed);
} while (min_nr);
memalloc_noreclaim_restore(flags);
}
trace_journal_reclaim_finish(c, nr_flushed);
if (!bch2_journal_error(j))
queue_delayed_work(c->journal_reclaim_wq, &j->reclaim_work,
msecs_to_jiffies(j->reclaim_delay_ms));
void bch2_journal_reclaim(struct journal *j)
{
__bch2_journal_reclaim(j, true);
}
void bch2_journal_reclaim_work(struct work_struct *work)
static int bch2_journal_reclaim_thread(void *arg)
{
struct journal *j = container_of(to_delayed_work(work),
struct journal, reclaim_work);
struct journal *j = arg;
unsigned long next;
while (!kthread_should_stop()) {
j->reclaim_kicked = false;
mutex_lock(&j->reclaim_lock);
bch2_journal_reclaim(j);
__bch2_journal_reclaim(j, false);
mutex_unlock(&j->reclaim_lock);
next = j->last_flushed + msecs_to_jiffies(j->reclaim_delay_ms);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
if (kthread_should_stop())
break;
if (j->reclaim_kicked)
break;
if (time_after_eq(jiffies, next))
break;
schedule_timeout(next - jiffies);
}
__set_current_state(TASK_RUNNING);
}
return 0;
}
void bch2_journal_reclaim_stop(struct journal *j)
{
struct task_struct *p = j->reclaim_thread;
j->reclaim_thread = NULL;
if (p) {
kthread_stop(p);
put_task_struct(p);
}
}
int bch2_journal_reclaim_start(struct journal *j)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct task_struct *p;
if (j->reclaim_thread)
return 0;
p = kthread_create(bch2_journal_reclaim_thread, j,
"bch-reclaim/%s", c->name);
if (IS_ERR(p))
return PTR_ERR(p);
get_task_struct(p);
j->reclaim_thread = p;
wake_up_process(p);
return 0;
}
static int journal_flush_done(struct journal *j, u64 seq_to_flush,
......
......@@ -10,6 +10,17 @@ enum journal_space_from {
journal_space_clean,
};
static inline void journal_reclaim_kick(struct journal *j)
{
struct task_struct *p = READ_ONCE(j->reclaim_thread);
if (p && !j->reclaim_kicked) {
j->reclaim_kicked = true;
if (p)
wake_up_process(p);
}
}
unsigned bch2_journal_dev_buckets_available(struct journal *,
struct journal_device *,
enum journal_space_from);
......@@ -55,7 +66,9 @@ void bch2_journal_pin_flush(struct journal *, struct journal_entry_pin *);
void bch2_journal_do_discards(struct journal *);
void bch2_journal_reclaim(struct journal *);
void bch2_journal_reclaim_work(struct work_struct *);
void bch2_journal_reclaim_stop(struct journal *);
int bch2_journal_reclaim_start(struct journal *);
bool bch2_journal_flush_pins(struct journal *, u64);
......
......@@ -216,8 +216,12 @@ struct journal {
struct write_point wp;
spinlock_t err_lock;
struct delayed_work reclaim_work;
struct mutex reclaim_lock;
struct task_struct *reclaim_thread;
bool reclaim_kicked;
u64 nr_direct_reclaim;
u64 nr_background_reclaim;
unsigned long last_flushed;
struct journal_entry_pin *flush_in_progress;
wait_queue_head_t pin_flush_wait;
......
......@@ -345,7 +345,7 @@ int bch2_copygc_start(struct bch_fs *c)
if (bch2_fs_init_fault("copygc_start"))
return -ENOMEM;
t = kthread_create(bch2_copygc_thread, c, "bch_copygc");
t = kthread_create(bch2_copygc_thread, c, "bch-copygc/%s", c->name);
if (IS_ERR(t))
return PTR_ERR(t);
......
......@@ -314,7 +314,7 @@ int bch2_rebalance_start(struct bch_fs *c)
if (c->opts.nochanges)
return 0;
p = kthread_create(bch2_rebalance_thread, c, "bch_rebalance");
p = kthread_create(bch2_rebalance_thread, c, "bch-rebalance/%s", c->name);
if (IS_ERR(p))
return PTR_ERR(p);
......
......@@ -49,7 +49,6 @@
#include <linux/debugfs.h>
#include <linux/device.h>
#include <linux/idr.h>
#include <linux/kthread.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/random.h>
......@@ -266,7 +265,7 @@ static void bch2_writes_disabled(struct percpu_ref *writes)
void bch2_fs_read_only(struct bch_fs *c)
{
if (!test_bit(BCH_FS_RW, &c->flags)) {
cancel_delayed_work_sync(&c->journal.reclaim_work);
BUG_ON(c->journal.reclaim_thread);
return;
}
......@@ -424,6 +423,12 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
set_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags);
ret = bch2_journal_reclaim_start(&c->journal);
if (ret) {
bch_err(c, "error starting journal reclaim: %i", ret);
return ret;
}
if (!early) {
ret = bch2_fs_read_write_late(c);
if (ret)
......@@ -432,9 +437,6 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
percpu_ref_reinit(&c->writes);
set_bit(BCH_FS_RW, &c->flags);
queue_delayed_work(c->journal_reclaim_wq,
&c->journal.reclaim_work, 0);
return 0;
err:
__bch2_fs_read_only(c);
......@@ -503,8 +505,6 @@ static void __bch2_fs_free(struct bch_fs *c)
kfree(c->unused_inode_hints);
free_heap(&c->copygc_heap);
if (c->journal_reclaim_wq)
destroy_workqueue(c->journal_reclaim_wq);
if (c->copygc_wq)
destroy_workqueue(c->copygc_wq);
if (c->wq)
......@@ -758,8 +758,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
!(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 1)) ||
!(c->journal_reclaim_wq = alloc_workqueue("bcachefs_journal_reclaim",
WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_HIGHPRI, 1)) ||
percpu_ref_init(&c->writes, bch2_writes_disabled,
PERCPU_REF_INIT_DEAD, GFP_KERNEL) ||
mempool_init_kmalloc_pool(&c->fill_iter, 1, iter_size) ||
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment