Commit a5b696ee authored by Kent Overstreet's avatar Kent Overstreet

bcachefs: seqmutex; fix a lockdep splat

We can't be holding btree_trans_lock while copying to user space, which
might incur a page fault. To fix this, convert it to a seqmutex so we
can unlock/relock.
Signed-off-by: default avatarKent Overstreet <kent.overstreet@linux.dev>
parent 6547ebab
...@@ -208,6 +208,7 @@ ...@@ -208,6 +208,7 @@
#include "fifo.h" #include "fifo.h"
#include "nocow_locking_types.h" #include "nocow_locking_types.h"
#include "opts.h" #include "opts.h"
#include "seqmutex.h"
#include "util.h" #include "util.h"
#ifdef CONFIG_BCACHEFS_DEBUG #ifdef CONFIG_BCACHEFS_DEBUG
...@@ -779,7 +780,7 @@ struct bch_fs { ...@@ -779,7 +780,7 @@ struct bch_fs {
} btree_write_stats[BTREE_WRITE_TYPE_NR]; } btree_write_stats[BTREE_WRITE_TYPE_NR];
/* btree_iter.c: */ /* btree_iter.c: */
struct mutex btree_trans_lock; struct seqmutex btree_trans_lock;
struct list_head btree_trans_list; struct list_head btree_trans_list;
mempool_t btree_paths_pool; mempool_t btree_paths_pool;
mempool_t btree_trans_mem_pool; mempool_t btree_trans_mem_pool;
......
...@@ -2991,7 +2991,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_ ...@@ -2991,7 +2991,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) { if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
struct btree_trans *pos; struct btree_trans *pos;
mutex_lock(&c->btree_trans_lock); seqmutex_lock(&c->btree_trans_lock);
list_for_each_entry(pos, &c->btree_trans_list, list) { list_for_each_entry(pos, &c->btree_trans_list, list) {
/* /*
* We'd much prefer to be stricter here and completely * We'd much prefer to be stricter here and completely
...@@ -3009,7 +3009,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_ ...@@ -3009,7 +3009,7 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c, unsigned fn_
} }
list_add_tail(&trans->list, &c->btree_trans_list); list_add_tail(&trans->list, &c->btree_trans_list);
list_add_done: list_add_done:
mutex_unlock(&c->btree_trans_lock); seqmutex_unlock(&c->btree_trans_lock);
} }
} }
...@@ -3044,6 +3044,12 @@ void bch2_trans_exit(struct btree_trans *trans) ...@@ -3044,6 +3044,12 @@ void bch2_trans_exit(struct btree_trans *trans)
bch2_trans_unlock(trans); bch2_trans_unlock(trans);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
seqmutex_lock(&c->btree_trans_lock);
list_del(&trans->list);
seqmutex_unlock(&c->btree_trans_lock);
}
closure_sync(&trans->ref); closure_sync(&trans->ref);
if (s) if (s)
...@@ -3055,12 +3061,6 @@ void bch2_trans_exit(struct btree_trans *trans) ...@@ -3055,12 +3061,6 @@ void bch2_trans_exit(struct btree_trans *trans)
check_btree_paths_leaked(trans); check_btree_paths_leaked(trans);
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG_TRANSACTIONS)) {
mutex_lock(&c->btree_trans_lock);
list_del(&trans->list);
mutex_unlock(&c->btree_trans_lock);
}
srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx); srcu_read_unlock(&c->btree_trans_barrier, trans->srcu_idx);
bch2_journal_preres_put(&c->journal, &trans->journal_preres); bch2_journal_preres_put(&c->journal, &trans->journal_preres);
...@@ -3198,7 +3198,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c) ...@@ -3198,7 +3198,7 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
} }
INIT_LIST_HEAD(&c->btree_trans_list); INIT_LIST_HEAD(&c->btree_trans_list);
mutex_init(&c->btree_trans_lock); seqmutex_init(&c->btree_trans_lock);
ret = mempool_init_kmalloc_pool(&c->btree_paths_pool, 1, ret = mempool_init_kmalloc_pool(&c->btree_paths_pool, 1,
sizeof(struct btree_path) * nr + sizeof(struct btree_path) * nr +
......
...@@ -627,19 +627,26 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, ...@@ -627,19 +627,26 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
struct bch_fs *c = i->c; struct bch_fs *c = i->c;
struct btree_trans *trans; struct btree_trans *trans;
ssize_t ret = 0; ssize_t ret = 0;
u32 seq;
i->ubuf = buf; i->ubuf = buf;
i->size = size; i->size = size;
i->ret = 0; i->ret = 0;
restart:
mutex_lock(&c->btree_trans_lock); seqmutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) { list_for_each_entry(trans, &c->btree_trans_list, list) {
if (trans->locking_wait.task->pid <= i->iter) if (trans->locking_wait.task->pid <= i->iter)
continue; continue;
closure_get(&trans->ref);
seq = seqmutex_seq(&c->btree_trans_lock);
seqmutex_unlock(&c->btree_trans_lock);
ret = flush_buf(i); ret = flush_buf(i);
if (ret) if (ret) {
break; closure_put(&trans->ref);
goto unlocked;
}
bch2_btree_trans_to_text(&i->buf, trans); bch2_btree_trans_to_text(&i->buf, trans);
...@@ -651,9 +658,14 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, ...@@ -651,9 +658,14 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
prt_newline(&i->buf); prt_newline(&i->buf);
i->iter = trans->locking_wait.task->pid; i->iter = trans->locking_wait.task->pid;
}
mutex_unlock(&c->btree_trans_lock);
closure_put(&trans->ref);
if (!seqmutex_relock(&c->btree_trans_lock, seq))
goto restart;
}
seqmutex_unlock(&c->btree_trans_lock);
unlocked:
if (i->buf.allocation_failure) if (i->buf.allocation_failure)
ret = -ENOMEM; ret = -ENOMEM;
...@@ -815,6 +827,7 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, ...@@ -815,6 +827,7 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
struct bch_fs *c = i->c; struct bch_fs *c = i->c;
struct btree_trans *trans; struct btree_trans *trans;
ssize_t ret = 0; ssize_t ret = 0;
u32 seq;
i->ubuf = buf; i->ubuf = buf;
i->size = size; i->size = size;
...@@ -822,21 +835,32 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, ...@@ -822,21 +835,32 @@ static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
if (i->iter) if (i->iter)
goto out; goto out;
restart:
mutex_lock(&c->btree_trans_lock); seqmutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) { list_for_each_entry(trans, &c->btree_trans_list, list) {
if (trans->locking_wait.task->pid <= i->iter) if (trans->locking_wait.task->pid <= i->iter)
continue; continue;
closure_get(&trans->ref);
seq = seqmutex_seq(&c->btree_trans_lock);
seqmutex_unlock(&c->btree_trans_lock);
ret = flush_buf(i); ret = flush_buf(i);
if (ret) if (ret) {
break; closure_put(&trans->ref);
goto out;
}
bch2_check_for_deadlock(trans, &i->buf); bch2_check_for_deadlock(trans, &i->buf);
i->iter = trans->locking_wait.task->pid; i->iter = trans->locking_wait.task->pid;
closure_put(&trans->ref);
if (!seqmutex_relock(&c->btree_trans_lock, seq))
goto restart;
} }
mutex_unlock(&c->btree_trans_lock); seqmutex_unlock(&c->btree_trans_lock);
out: out:
if (i->buf.allocation_failure) if (i->buf.allocation_failure)
ret = -ENOMEM; ret = -ENOMEM;
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _BCACHEFS_SEQMUTEX_H
#define _BCACHEFS_SEQMUTEX_H
#include <linux/mutex.h>
struct seqmutex {
struct mutex lock;
u32 seq;
};
#define seqmutex_init(_lock) mutex_init(&(_lock)->lock)
static inline bool seqmutex_trylock(struct seqmutex *lock)
{
return mutex_trylock(&lock->lock);
}
static inline void seqmutex_lock(struct seqmutex *lock)
{
mutex_lock(&lock->lock);
}
static inline void seqmutex_unlock(struct seqmutex *lock)
{
lock->seq++;
mutex_unlock(&lock->lock);
}
static inline u32 seqmutex_seq(struct seqmutex *lock)
{
return lock->seq;
}
static inline bool seqmutex_relock(struct seqmutex *lock, u32 seq)
{
if (lock->seq != seq || !mutex_trylock(&lock->lock))
return false;
if (lock->seq != seq) {
mutex_unlock(&lock->lock);
return false;
}
return true;
}
#endif /* _BCACHEFS_SEQMUTEX_H */
...@@ -379,7 +379,7 @@ static void bch2_btree_wakeup_all(struct bch_fs *c) ...@@ -379,7 +379,7 @@ static void bch2_btree_wakeup_all(struct bch_fs *c)
{ {
struct btree_trans *trans; struct btree_trans *trans;
mutex_lock(&c->btree_trans_lock); seqmutex_lock(&c->btree_trans_lock);
list_for_each_entry(trans, &c->btree_trans_list, list) { list_for_each_entry(trans, &c->btree_trans_list, list) {
struct btree_bkey_cached_common *b = READ_ONCE(trans->locking); struct btree_bkey_cached_common *b = READ_ONCE(trans->locking);
...@@ -387,7 +387,7 @@ static void bch2_btree_wakeup_all(struct bch_fs *c) ...@@ -387,7 +387,7 @@ static void bch2_btree_wakeup_all(struct bch_fs *c)
six_lock_wakeup_all(&b->lock); six_lock_wakeup_all(&b->lock);
} }
mutex_unlock(&c->btree_trans_lock); seqmutex_unlock(&c->btree_trans_lock);
} }
SHOW(bch2_fs) SHOW(bch2_fs)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment