Commit 5bead06b authored by Eric Biggers's avatar Eric Biggers Committed by Greg Kroah-Hartman

fuse: fix deadlock with aio poll and fuse_iqueue::waitq.lock

[ Upstream commit 76e43c8c ]

When IOCB_CMD_POLL is used on the FUSE device, aio_poll() disables IRQs
and takes kioctx::ctx_lock, then fuse_iqueue::waitq.lock.

This may have to wait for fuse_iqueue::waitq.lock to be released by one
of many places that take it with IRQs enabled.  Since the IRQ handler
may take kioctx::ctx_lock, lockdep reports that a deadlock is possible.

Fix it by protecting the state of struct fuse_iqueue with a separate
spinlock, and only accessing fuse_iqueue::waitq using the versions of
the waitqueue functions which do IRQ-safe locking internally.

Reproducer:

	#include <fcntl.h>
	#include <stdio.h>
	#include <sys/mount.h>
	#include <sys/stat.h>
	#include <sys/syscall.h>
	#include <unistd.h>
	#include <linux/aio_abi.h>

	int main()
	{
		char opts[128];
		int fd = open("/dev/fuse", O_RDWR);
		aio_context_t ctx = 0;
		struct iocb cb = { .aio_lio_opcode = IOCB_CMD_POLL, .aio_fildes = fd };
		struct iocb *cbp = &cb;

		sprintf(opts, "fd=%d,rootmode=040000,user_id=0,group_id=0", fd);
		mkdir("mnt", 0700);
		mount("foo",  "mnt", "fuse", 0, opts);
		syscall(__NR_io_setup, 1, &ctx);
		syscall(__NR_io_submit, ctx, 1, &cbp);
	}

Beginning of lockdep output:

	=====================================================
	WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected
	5.3.0-rc5 #9 Not tainted
	-----------------------------------------------------
	syz_fuse/135 [HC0[0]:SC0[0]:HE0:SE1] is trying to acquire:
	000000003590ceda (&fiq->waitq){+.+.}, at: spin_lock include/linux/spinlock.h:338 [inline]
	000000003590ceda (&fiq->waitq){+.+.}, at: aio_poll fs/aio.c:1751 [inline]
	000000003590ceda (&fiq->waitq){+.+.}, at: __io_submit_one.constprop.0+0x203/0x5b0 fs/aio.c:1825

	and this task is already holding:
	0000000075037284 (&(&ctx->ctx_lock)->rlock){..-.}, at: spin_lock_irq include/linux/spinlock.h:363 [inline]
	0000000075037284 (&(&ctx->ctx_lock)->rlock){..-.}, at: aio_poll fs/aio.c:1749 [inline]
	0000000075037284 (&(&ctx->ctx_lock)->rlock){..-.}, at: __io_submit_one.constprop.0+0x1f4/0x5b0 fs/aio.c:1825
	which would create a new lock dependency:
	 (&(&ctx->ctx_lock)->rlock){..-.} -> (&fiq->waitq){+.+.}

	but this new dependency connects a SOFTIRQ-irq-safe lock:
	 (&(&ctx->ctx_lock)->rlock){..-.}

	[...]

Reported-by: syzbot+af05535bb79520f95431@syzkaller.appspotmail.com
Reported-by: syzbot+d86c4426a01f60feddc7@syzkaller.appspotmail.com
Fixes: bfe4037e ("aio: implement IOCB_CMD_POLL")
Cc: <stable@vger.kernel.org> # v4.19+
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: default avatarEric Biggers <ebiggers@google.com>
Signed-off-by: default avatarMiklos Szeredi <mszeredi@redhat.com>
Signed-off-by: default avatarSasha Levin <sashal@kernel.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent bbe3e205
...@@ -331,7 +331,7 @@ static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req) ...@@ -331,7 +331,7 @@ static void queue_request(struct fuse_iqueue *fiq, struct fuse_req *req)
req->in.h.len = sizeof(struct fuse_in_header) + req->in.h.len = sizeof(struct fuse_in_header) +
len_args(req->in.numargs, (struct fuse_arg *) req->in.args); len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
list_add_tail(&req->list, &fiq->pending); list_add_tail(&req->list, &fiq->pending);
wake_up_locked(&fiq->waitq); wake_up(&fiq->waitq);
kill_fasync(&fiq->fasync, SIGIO, POLL_IN); kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
} }
...@@ -343,16 +343,16 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, ...@@ -343,16 +343,16 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget,
forget->forget_one.nodeid = nodeid; forget->forget_one.nodeid = nodeid;
forget->forget_one.nlookup = nlookup; forget->forget_one.nlookup = nlookup;
spin_lock(&fiq->waitq.lock); spin_lock(&fiq->lock);
if (fiq->connected) { if (fiq->connected) {
fiq->forget_list_tail->next = forget; fiq->forget_list_tail->next = forget;
fiq->forget_list_tail = forget; fiq->forget_list_tail = forget;
wake_up_locked(&fiq->waitq); wake_up(&fiq->waitq);
kill_fasync(&fiq->fasync, SIGIO, POLL_IN); kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
} else { } else {
kfree(forget); kfree(forget);
} }
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
} }
static void flush_bg_queue(struct fuse_conn *fc) static void flush_bg_queue(struct fuse_conn *fc)
...@@ -365,10 +365,10 @@ static void flush_bg_queue(struct fuse_conn *fc) ...@@ -365,10 +365,10 @@ static void flush_bg_queue(struct fuse_conn *fc)
req = list_entry(fc->bg_queue.next, struct fuse_req, list); req = list_entry(fc->bg_queue.next, struct fuse_req, list);
list_del(&req->list); list_del(&req->list);
fc->active_background++; fc->active_background++;
spin_lock(&fiq->waitq.lock); spin_lock(&fiq->lock);
req->in.h.unique = fuse_get_unique(fiq); req->in.h.unique = fuse_get_unique(fiq);
queue_request(fiq, req); queue_request(fiq, req);
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
} }
} }
...@@ -387,9 +387,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) ...@@ -387,9 +387,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
if (test_and_set_bit(FR_FINISHED, &req->flags)) if (test_and_set_bit(FR_FINISHED, &req->flags))
goto put_request; goto put_request;
spin_lock(&fiq->waitq.lock); spin_lock(&fiq->lock);
list_del_init(&req->intr_entry); list_del_init(&req->intr_entry);
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
WARN_ON(test_bit(FR_PENDING, &req->flags)); WARN_ON(test_bit(FR_PENDING, &req->flags));
WARN_ON(test_bit(FR_SENT, &req->flags)); WARN_ON(test_bit(FR_SENT, &req->flags));
if (test_bit(FR_BACKGROUND, &req->flags)) { if (test_bit(FR_BACKGROUND, &req->flags)) {
...@@ -427,16 +427,16 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req) ...@@ -427,16 +427,16 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req) static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
{ {
spin_lock(&fiq->waitq.lock); spin_lock(&fiq->lock);
if (test_bit(FR_FINISHED, &req->flags)) { if (test_bit(FR_FINISHED, &req->flags)) {
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
return; return;
} }
if (list_empty(&req->intr_entry)) { if (list_empty(&req->intr_entry)) {
list_add_tail(&req->intr_entry, &fiq->interrupts); list_add_tail(&req->intr_entry, &fiq->interrupts);
wake_up_locked(&fiq->waitq); wake_up(&fiq->waitq);
} }
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
kill_fasync(&fiq->fasync, SIGIO, POLL_IN); kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
} }
...@@ -466,16 +466,16 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req) ...@@ -466,16 +466,16 @@ static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
if (!err) if (!err)
return; return;
spin_lock(&fiq->waitq.lock); spin_lock(&fiq->lock);
/* Request is not yet in userspace, bail out */ /* Request is not yet in userspace, bail out */
if (test_bit(FR_PENDING, &req->flags)) { if (test_bit(FR_PENDING, &req->flags)) {
list_del(&req->list); list_del(&req->list);
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
__fuse_put_request(req); __fuse_put_request(req);
req->out.h.error = -EINTR; req->out.h.error = -EINTR;
return; return;
} }
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
} }
/* /*
...@@ -490,9 +490,9 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) ...@@ -490,9 +490,9 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
struct fuse_iqueue *fiq = &fc->iq; struct fuse_iqueue *fiq = &fc->iq;
BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); BUG_ON(test_bit(FR_BACKGROUND, &req->flags));
spin_lock(&fiq->waitq.lock); spin_lock(&fiq->lock);
if (!fiq->connected) { if (!fiq->connected) {
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
req->out.h.error = -ENOTCONN; req->out.h.error = -ENOTCONN;
} else { } else {
req->in.h.unique = fuse_get_unique(fiq); req->in.h.unique = fuse_get_unique(fiq);
...@@ -500,7 +500,7 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req) ...@@ -500,7 +500,7 @@ static void __fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
/* acquire extra reference, since request is still needed /* acquire extra reference, since request is still needed
after request_end() */ after request_end() */
__fuse_get_request(req); __fuse_get_request(req);
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
request_wait_answer(fc, req); request_wait_answer(fc, req);
/* Pairs with smp_wmb() in request_end() */ /* Pairs with smp_wmb() in request_end() */
...@@ -633,12 +633,12 @@ static int fuse_request_send_notify_reply(struct fuse_conn *fc, ...@@ -633,12 +633,12 @@ static int fuse_request_send_notify_reply(struct fuse_conn *fc,
__clear_bit(FR_ISREPLY, &req->flags); __clear_bit(FR_ISREPLY, &req->flags);
req->in.h.unique = unique; req->in.h.unique = unique;
spin_lock(&fiq->waitq.lock); spin_lock(&fiq->lock);
if (fiq->connected) { if (fiq->connected) {
queue_request(fiq, req); queue_request(fiq, req);
err = 0; err = 0;
} }
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
return err; return err;
} }
...@@ -1082,12 +1082,12 @@ static int request_pending(struct fuse_iqueue *fiq) ...@@ -1082,12 +1082,12 @@ static int request_pending(struct fuse_iqueue *fiq)
* Unlike other requests this is assembled on demand, without a need * Unlike other requests this is assembled on demand, without a need
* to allocate a separate fuse_req structure. * to allocate a separate fuse_req structure.
* *
* Called with fiq->waitq.lock held, releases it * Called with fiq->lock held, releases it
*/ */
static int fuse_read_interrupt(struct fuse_iqueue *fiq, static int fuse_read_interrupt(struct fuse_iqueue *fiq,
struct fuse_copy_state *cs, struct fuse_copy_state *cs,
size_t nbytes, struct fuse_req *req) size_t nbytes, struct fuse_req *req)
__releases(fiq->waitq.lock) __releases(fiq->lock)
{ {
struct fuse_in_header ih; struct fuse_in_header ih;
struct fuse_interrupt_in arg; struct fuse_interrupt_in arg;
...@@ -1103,7 +1103,7 @@ __releases(fiq->waitq.lock) ...@@ -1103,7 +1103,7 @@ __releases(fiq->waitq.lock)
ih.unique = req->intr_unique; ih.unique = req->intr_unique;
arg.unique = req->in.h.unique; arg.unique = req->in.h.unique;
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
if (nbytes < reqsize) if (nbytes < reqsize)
return -EINVAL; return -EINVAL;
...@@ -1140,7 +1140,7 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq, ...@@ -1140,7 +1140,7 @@ static struct fuse_forget_link *dequeue_forget(struct fuse_iqueue *fiq,
static int fuse_read_single_forget(struct fuse_iqueue *fiq, static int fuse_read_single_forget(struct fuse_iqueue *fiq,
struct fuse_copy_state *cs, struct fuse_copy_state *cs,
size_t nbytes) size_t nbytes)
__releases(fiq->waitq.lock) __releases(fiq->lock)
{ {
int err; int err;
struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL); struct fuse_forget_link *forget = dequeue_forget(fiq, 1, NULL);
...@@ -1154,7 +1154,7 @@ __releases(fiq->waitq.lock) ...@@ -1154,7 +1154,7 @@ __releases(fiq->waitq.lock)
.len = sizeof(ih) + sizeof(arg), .len = sizeof(ih) + sizeof(arg),
}; };
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
kfree(forget); kfree(forget);
if (nbytes < ih.len) if (nbytes < ih.len)
return -EINVAL; return -EINVAL;
...@@ -1172,7 +1172,7 @@ __releases(fiq->waitq.lock) ...@@ -1172,7 +1172,7 @@ __releases(fiq->waitq.lock)
static int fuse_read_batch_forget(struct fuse_iqueue *fiq, static int fuse_read_batch_forget(struct fuse_iqueue *fiq,
struct fuse_copy_state *cs, size_t nbytes) struct fuse_copy_state *cs, size_t nbytes)
__releases(fiq->waitq.lock) __releases(fiq->lock)
{ {
int err; int err;
unsigned max_forgets; unsigned max_forgets;
...@@ -1186,13 +1186,13 @@ __releases(fiq->waitq.lock) ...@@ -1186,13 +1186,13 @@ __releases(fiq->waitq.lock)
}; };
if (nbytes < ih.len) { if (nbytes < ih.len) {
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
return -EINVAL; return -EINVAL;
} }
max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one);
head = dequeue_forget(fiq, max_forgets, &count); head = dequeue_forget(fiq, max_forgets, &count);
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
arg.count = count; arg.count = count;
ih.len += count * sizeof(struct fuse_forget_one); ih.len += count * sizeof(struct fuse_forget_one);
...@@ -1222,7 +1222,7 @@ __releases(fiq->waitq.lock) ...@@ -1222,7 +1222,7 @@ __releases(fiq->waitq.lock)
static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq, static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq,
struct fuse_copy_state *cs, struct fuse_copy_state *cs,
size_t nbytes) size_t nbytes)
__releases(fiq->waitq.lock) __releases(fiq->lock)
{ {
if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL) if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL)
return fuse_read_single_forget(fiq, cs, nbytes); return fuse_read_single_forget(fiq, cs, nbytes);
...@@ -1251,16 +1251,19 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, ...@@ -1251,16 +1251,19 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
unsigned reqsize; unsigned reqsize;
restart: restart:
spin_lock(&fiq->waitq.lock); for (;;) {
err = -EAGAIN; spin_lock(&fiq->lock);
if ((file->f_flags & O_NONBLOCK) && fiq->connected && if (!fiq->connected || request_pending(fiq))
!request_pending(fiq)) break;
goto err_unlock; spin_unlock(&fiq->lock);
err = wait_event_interruptible_exclusive_locked(fiq->waitq, if (file->f_flags & O_NONBLOCK)
return -EAGAIN;
err = wait_event_interruptible_exclusive(fiq->waitq,
!fiq->connected || request_pending(fiq)); !fiq->connected || request_pending(fiq));
if (err) if (err)
goto err_unlock; return err;
}
if (!fiq->connected) { if (!fiq->connected) {
err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV; err = (fc->aborted && fc->abort_err) ? -ECONNABORTED : -ENODEV;
...@@ -1284,7 +1287,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, ...@@ -1284,7 +1287,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
req = list_entry(fiq->pending.next, struct fuse_req, list); req = list_entry(fiq->pending.next, struct fuse_req, list);
clear_bit(FR_PENDING, &req->flags); clear_bit(FR_PENDING, &req->flags);
list_del_init(&req->list); list_del_init(&req->list);
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
in = &req->in; in = &req->in;
reqsize = in->h.len; reqsize = in->h.len;
...@@ -1341,7 +1344,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, ...@@ -1341,7 +1344,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
return err; return err;
err_unlock: err_unlock:
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
return err; return err;
} }
...@@ -2054,12 +2057,12 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait) ...@@ -2054,12 +2057,12 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait)
fiq = &fud->fc->iq; fiq = &fud->fc->iq;
poll_wait(file, &fiq->waitq, wait); poll_wait(file, &fiq->waitq, wait);
spin_lock(&fiq->waitq.lock); spin_lock(&fiq->lock);
if (!fiq->connected) if (!fiq->connected)
mask = EPOLLERR; mask = EPOLLERR;
else if (request_pending(fiq)) else if (request_pending(fiq))
mask |= EPOLLIN | EPOLLRDNORM; mask |= EPOLLIN | EPOLLRDNORM;
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
return mask; return mask;
} }
...@@ -2150,15 +2153,15 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort) ...@@ -2150,15 +2153,15 @@ void fuse_abort_conn(struct fuse_conn *fc, bool is_abort)
fc->max_background = UINT_MAX; fc->max_background = UINT_MAX;
flush_bg_queue(fc); flush_bg_queue(fc);
spin_lock(&fiq->waitq.lock); spin_lock(&fiq->lock);
fiq->connected = 0; fiq->connected = 0;
list_for_each_entry(req, &fiq->pending, list) list_for_each_entry(req, &fiq->pending, list)
clear_bit(FR_PENDING, &req->flags); clear_bit(FR_PENDING, &req->flags);
list_splice_tail_init(&fiq->pending, &to_end); list_splice_tail_init(&fiq->pending, &to_end);
while (forget_pending(fiq)) while (forget_pending(fiq))
kfree(dequeue_forget(fiq, 1, NULL)); kfree(dequeue_forget(fiq, 1, NULL));
wake_up_all_locked(&fiq->waitq); wake_up_all(&fiq->waitq);
spin_unlock(&fiq->waitq.lock); spin_unlock(&fiq->lock);
kill_fasync(&fiq->fasync, SIGIO, POLL_IN); kill_fasync(&fiq->fasync, SIGIO, POLL_IN);
end_polls(fc); end_polls(fc);
wake_up_all(&fc->blocked_waitq); wake_up_all(&fc->blocked_waitq);
......
...@@ -388,6 +388,9 @@ struct fuse_iqueue { ...@@ -388,6 +388,9 @@ struct fuse_iqueue {
/** Connection established */ /** Connection established */
unsigned connected; unsigned connected;
/** Lock protecting accesses to members of this structure */
spinlock_t lock;
/** Readers of the connection are waiting on this */ /** Readers of the connection are waiting on this */
wait_queue_head_t waitq; wait_queue_head_t waitq;
......
...@@ -585,6 +585,7 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) ...@@ -585,6 +585,7 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root)
static void fuse_iqueue_init(struct fuse_iqueue *fiq) static void fuse_iqueue_init(struct fuse_iqueue *fiq)
{ {
memset(fiq, 0, sizeof(struct fuse_iqueue)); memset(fiq, 0, sizeof(struct fuse_iqueue));
spin_lock_init(&fiq->lock);
init_waitqueue_head(&fiq->waitq); init_waitqueue_head(&fiq->waitq);
INIT_LIST_HEAD(&fiq->pending); INIT_LIST_HEAD(&fiq->pending);
INIT_LIST_HEAD(&fiq->interrupts); INIT_LIST_HEAD(&fiq->interrupts);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment