Commit 0ee818c3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.14-2021-07-24' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

 - Fix a memory leak due to a race condition in io_init_wq_offload
   (Yang)

 - Poll error handling fixes (Pavel)

 - Fix early fdput() regression (me)

 - Don't reissue iopoll requests off release path (me)

 - Add a safety check for io-wq queue off wrong path (me)

* tag 'io_uring-5.14-2021-07-24' of git://git.kernel.dk/linux-block:
  io_uring: explicitly catch any illegal async queue attempt
  io_uring: never attempt iopoll reissue from release path
  io_uring: fix early fdput() of file
  io_uring: fix memleak in io_init_wq_offload()
  io_uring: remove double poll entry on arm failure
  io_uring: explicitly count entries for poll reqs
parents 4d4a60ce 991468dc
...@@ -731,7 +731,12 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) ...@@ -731,7 +731,12 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
int work_flags; int work_flags;
unsigned long flags; unsigned long flags;
if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) { /*
* If io-wq is exiting for this task, or if the request has explicitly
* been marked as one that should not get executed, cancel it here.
*/
if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
(work->flags & IO_WQ_WORK_CANCEL)) {
io_run_cancel(work, wqe); io_run_cancel(work, wqe);
return; return;
} }
......
...@@ -1294,6 +1294,17 @@ static void io_queue_async_work(struct io_kiocb *req) ...@@ -1294,6 +1294,17 @@ static void io_queue_async_work(struct io_kiocb *req)
/* init ->work of the whole link before punting */ /* init ->work of the whole link before punting */
io_prep_async_link(req); io_prep_async_link(req);
/*
* Not expected to happen, but if we do have a bug where this _can_
* happen, catch it here and ensure the request is marked as
* canceled. That will make io-wq go through the usual work cancel
* procedure rather than attempt to run this request (or create a new
* worker for it).
*/
if (WARN_ON_ONCE(!same_thread_group(req->task, current)))
req->work.flags |= IO_WQ_WORK_CANCEL;
trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
&req->work, req->flags); &req->work, req->flags);
io_wq_enqueue(tctx->io_wq, &req->work); io_wq_enqueue(tctx->io_wq, &req->work);
...@@ -2205,7 +2216,7 @@ static inline bool io_run_task_work(void) ...@@ -2205,7 +2216,7 @@ static inline bool io_run_task_work(void)
* Find and free completed poll iocbs * Find and free completed poll iocbs
*/ */
static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
struct list_head *done) struct list_head *done, bool resubmit)
{ {
struct req_batch rb; struct req_batch rb;
struct io_kiocb *req; struct io_kiocb *req;
...@@ -2220,7 +2231,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, ...@@ -2220,7 +2231,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
req = list_first_entry(done, struct io_kiocb, inflight_entry); req = list_first_entry(done, struct io_kiocb, inflight_entry);
list_del(&req->inflight_entry); list_del(&req->inflight_entry);
if (READ_ONCE(req->result) == -EAGAIN && if (READ_ONCE(req->result) == -EAGAIN && resubmit &&
!(req->flags & REQ_F_DONT_REISSUE)) { !(req->flags & REQ_F_DONT_REISSUE)) {
req->iopoll_completed = 0; req->iopoll_completed = 0;
req_ref_get(req); req_ref_get(req);
...@@ -2244,7 +2255,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, ...@@ -2244,7 +2255,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
} }
static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
long min) long min, bool resubmit)
{ {
struct io_kiocb *req, *tmp; struct io_kiocb *req, *tmp;
LIST_HEAD(done); LIST_HEAD(done);
...@@ -2287,7 +2298,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, ...@@ -2287,7 +2298,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
} }
if (!list_empty(&done)) if (!list_empty(&done))
io_iopoll_complete(ctx, nr_events, &done); io_iopoll_complete(ctx, nr_events, &done, resubmit);
return ret; return ret;
} }
...@@ -2305,7 +2316,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) ...@@ -2305,7 +2316,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx)
while (!list_empty(&ctx->iopoll_list)) { while (!list_empty(&ctx->iopoll_list)) {
unsigned int nr_events = 0; unsigned int nr_events = 0;
io_do_iopoll(ctx, &nr_events, 0); io_do_iopoll(ctx, &nr_events, 0, false);
/* let it sleep and repeat later if can't complete a request */ /* let it sleep and repeat later if can't complete a request */
if (nr_events == 0) if (nr_events == 0)
...@@ -2367,7 +2378,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) ...@@ -2367,7 +2378,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
list_empty(&ctx->iopoll_list)) list_empty(&ctx->iopoll_list))
break; break;
} }
ret = io_do_iopoll(ctx, &nr_events, min); ret = io_do_iopoll(ctx, &nr_events, min, true);
} while (!ret && nr_events < min && !need_resched()); } while (!ret && nr_events < min && !need_resched());
out: out:
mutex_unlock(&ctx->uring_lock); mutex_unlock(&ctx->uring_lock);
...@@ -4802,6 +4813,7 @@ IO_NETOP_FN(recv); ...@@ -4802,6 +4813,7 @@ IO_NETOP_FN(recv);
struct io_poll_table { struct io_poll_table {
struct poll_table_struct pt; struct poll_table_struct pt;
struct io_kiocb *req; struct io_kiocb *req;
int nr_entries;
int error; int error;
}; };
...@@ -4995,11 +5007,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, ...@@ -4995,11 +5007,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
struct io_kiocb *req = pt->req; struct io_kiocb *req = pt->req;
/* /*
* If poll->head is already set, it's because the file being polled * The file being polled uses multiple waitqueues for poll handling
* uses multiple waitqueues for poll handling (eg one for read, one * (e.g. one for read, one for write). Setup a separate io_poll_iocb
* for write). Setup a separate io_poll_iocb if this happens. * if this happens.
*/ */
if (unlikely(poll->head)) { if (unlikely(pt->nr_entries)) {
struct io_poll_iocb *poll_one = poll; struct io_poll_iocb *poll_one = poll;
/* already have a 2nd entry, fail a third attempt */ /* already have a 2nd entry, fail a third attempt */
...@@ -5027,7 +5039,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, ...@@ -5027,7 +5039,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
*poll_ptr = poll; *poll_ptr = poll;
} }
pt->error = 0; pt->nr_entries++;
poll->head = head; poll->head = head;
if (poll->events & EPOLLEXCLUSIVE) if (poll->events & EPOLLEXCLUSIVE)
...@@ -5104,11 +5116,16 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, ...@@ -5104,11 +5116,16 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
ipt->pt._key = mask; ipt->pt._key = mask;
ipt->req = req; ipt->req = req;
ipt->error = -EINVAL; ipt->error = 0;
ipt->nr_entries = 0;
mask = vfs_poll(req->file, &ipt->pt) & poll->events; mask = vfs_poll(req->file, &ipt->pt) & poll->events;
if (unlikely(!ipt->nr_entries) && !ipt->error)
ipt->error = -EINVAL;
spin_lock_irq(&ctx->completion_lock); spin_lock_irq(&ctx->completion_lock);
if (ipt->error)
io_poll_remove_double(req);
if (likely(poll->head)) { if (likely(poll->head)) {
spin_lock(&poll->head->lock); spin_lock(&poll->head->lock);
if (unlikely(list_empty(&poll->wait.entry))) { if (unlikely(list_empty(&poll->wait.entry))) {
...@@ -6792,7 +6809,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) ...@@ -6792,7 +6809,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
mutex_lock(&ctx->uring_lock); mutex_lock(&ctx->uring_lock);
if (!list_empty(&ctx->iopoll_list)) if (!list_empty(&ctx->iopoll_list))
io_do_iopoll(ctx, &nr_events, 0); io_do_iopoll(ctx, &nr_events, 0, true);
/* /*
* Don't submit if refs are dying, good for io_uring_register(), * Don't submit if refs are dying, good for io_uring_register(),
...@@ -7899,15 +7916,19 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, ...@@ -7899,15 +7916,19 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
struct io_wq_data data; struct io_wq_data data;
unsigned int concurrency; unsigned int concurrency;
mutex_lock(&ctx->uring_lock);
hash = ctx->hash_map; hash = ctx->hash_map;
if (!hash) { if (!hash) {
hash = kzalloc(sizeof(*hash), GFP_KERNEL); hash = kzalloc(sizeof(*hash), GFP_KERNEL);
if (!hash) if (!hash) {
mutex_unlock(&ctx->uring_lock);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
}
refcount_set(&hash->refs, 1); refcount_set(&hash->refs, 1);
init_waitqueue_head(&hash->wait); init_waitqueue_head(&hash->wait);
ctx->hash_map = hash; ctx->hash_map = hash;
} }
mutex_unlock(&ctx->uring_lock);
data.hash = hash; data.hash = hash;
data.task = task; data.task = task;
...@@ -7981,9 +8002,11 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, ...@@ -7981,9 +8002,11 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
f = fdget(p->wq_fd); f = fdget(p->wq_fd);
if (!f.file) if (!f.file)
return -ENXIO; return -ENXIO;
fdput(f); if (f.file->f_op != &io_uring_fops) {
if (f.file->f_op != &io_uring_fops) fdput(f);
return -EINVAL; return -EINVAL;
}
fdput(f);
} }
if (ctx->flags & IORING_SETUP_SQPOLL) { if (ctx->flags & IORING_SETUP_SQPOLL) {
struct task_struct *tsk; struct task_struct *tsk;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment