Commit c605c396 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.15-2021-09-11' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

 - Fix an off-by-one in a BUILD_BUG_ON() check. Not a real issue right
   now as we have plenty of flags left, but could become one. (Hao)

 - Fix lockdep issue introduced in this merge window (me)

 - Fix a few issues with the worker creation (me, Pavel, Qiang)

 - Fix regression with wq_has_sleeper() for IOPOLL (Pavel)

 - Timeout link error propagation fix (Pavel)

* tag 'io_uring-5.15-2021-09-11' of git://git.kernel.dk/linux-block:
  io_uring: fix off-by-one in BUILD_BUG_ON check of __REQ_F_LAST_BIT
  io_uring: fail links of cancelled timeouts
  io-wq: fix memory leak in create_io_worker()
  io-wq: fix silly logic error in io_task_work_match()
  io_uring: drop ctx->uring_lock before acquiring sqd->lock
  io_uring: fix missing mb() before waitqueue_active
  io-wq: fix cancellation on create-worker failure
parents c0f7e49f 32c2d33e
...@@ -709,6 +709,7 @@ static void create_worker_cont(struct callback_head *cb) ...@@ -709,6 +709,7 @@ static void create_worker_cont(struct callback_head *cb)
} }
raw_spin_unlock(&wqe->lock); raw_spin_unlock(&wqe->lock);
io_worker_ref_put(wqe->wq); io_worker_ref_put(wqe->wq);
kfree(worker);
return; return;
} }
...@@ -725,6 +726,7 @@ static void io_workqueue_create(struct work_struct *work) ...@@ -725,6 +726,7 @@ static void io_workqueue_create(struct work_struct *work)
if (!io_queue_worker_create(worker, acct, create_worker_cont)) { if (!io_queue_worker_create(worker, acct, create_worker_cont)) {
clear_bit_unlock(0, &worker->create_state); clear_bit_unlock(0, &worker->create_state);
io_worker_release(worker); io_worker_release(worker);
kfree(worker);
} }
} }
...@@ -759,6 +761,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) ...@@ -759,6 +761,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
if (!IS_ERR(tsk)) { if (!IS_ERR(tsk)) {
io_init_new_worker(wqe, worker, tsk); io_init_new_worker(wqe, worker, tsk);
} else if (!io_should_retry_thread(PTR_ERR(tsk))) { } else if (!io_should_retry_thread(PTR_ERR(tsk))) {
kfree(worker);
goto fail; goto fail;
} else { } else {
INIT_WORK(&worker->work, io_workqueue_create); INIT_WORK(&worker->work, io_workqueue_create);
...@@ -832,6 +835,11 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work) ...@@ -832,6 +835,11 @@ static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
wq_list_add_after(&work->list, &tail->list, &acct->work_list); wq_list_add_after(&work->list, &tail->list, &acct->work_list);
} }
static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
{
return work == data;
}
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{ {
struct io_wqe_acct *acct = io_work_get_acct(wqe, work); struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
...@@ -844,7 +852,6 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) ...@@ -844,7 +852,6 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
*/ */
if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
(work->flags & IO_WQ_WORK_CANCEL)) { (work->flags & IO_WQ_WORK_CANCEL)) {
run_cancel:
io_run_cancel(work, wqe); io_run_cancel(work, wqe);
return; return;
} }
...@@ -864,16 +871,23 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) ...@@ -864,16 +871,23 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
bool did_create; bool did_create;
did_create = io_wqe_create_worker(wqe, acct); did_create = io_wqe_create_worker(wqe, acct);
if (unlikely(!did_create)) { if (likely(did_create))
return;
raw_spin_lock(&wqe->lock); raw_spin_lock(&wqe->lock);
/* fatal condition, failed to create the first worker */ /* fatal condition, failed to create the first worker */
if (!acct->nr_workers) { if (!acct->nr_workers) {
raw_spin_unlock(&wqe->lock); struct io_cb_cancel_data match = {
goto run_cancel; .fn = io_wq_work_match_item,
.data = work,
.cancel_all = false,
};
if (io_acct_cancel_pending_work(wqe, acct, &match))
raw_spin_lock(&wqe->lock);
} }
raw_spin_unlock(&wqe->lock); raw_spin_unlock(&wqe->lock);
} }
}
} }
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
...@@ -1122,7 +1136,7 @@ static bool io_task_work_match(struct callback_head *cb, void *data) ...@@ -1122,7 +1136,7 @@ static bool io_task_work_match(struct callback_head *cb, void *data)
{ {
struct io_worker *worker; struct io_worker *worker;
if (cb->func != create_worker_cb || cb->func != create_worker_cont) if (cb->func != create_worker_cb && cb->func != create_worker_cont)
return false; return false;
worker = container_of(cb, struct io_worker, create_work); worker = container_of(cb, struct io_worker, create_work);
return worker->wqe->wq == data; return worker->wqe->wq == data;
...@@ -1143,9 +1157,14 @@ static void io_wq_exit_workers(struct io_wq *wq) ...@@ -1143,9 +1157,14 @@ static void io_wq_exit_workers(struct io_wq *wq)
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
struct io_worker *worker; struct io_worker *worker;
struct io_wqe_acct *acct;
worker = container_of(cb, struct io_worker, create_work); worker = container_of(cb, struct io_worker, create_work);
atomic_dec(&worker->wqe->acct[worker->create_index].nr_running); acct = io_wqe_get_acct(worker);
atomic_dec(&acct->nr_running);
raw_spin_lock(&worker->wqe->lock);
acct->nr_workers--;
raw_spin_unlock(&worker->wqe->lock);
io_worker_ref_put(wq); io_worker_ref_put(wq);
clear_bit_unlock(0, &worker->create_state); clear_bit_unlock(0, &worker->create_state);
io_worker_release(worker); io_worker_release(worker);
......
...@@ -1482,6 +1482,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status) ...@@ -1482,6 +1482,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
struct io_timeout_data *io = req->async_data; struct io_timeout_data *io = req->async_data;
if (hrtimer_try_to_cancel(&io->timer) != -1) { if (hrtimer_try_to_cancel(&io->timer) != -1) {
if (status)
req_set_fail(req);
atomic_set(&req->ctx->cq_timeouts, atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1); atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list); list_del_init(&req->timeout.list);
...@@ -1619,8 +1621,11 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) ...@@ -1619,8 +1621,11 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
{ {
/* see waitqueue_active() comment */
smp_mb();
if (ctx->flags & IORING_SETUP_SQPOLL) { if (ctx->flags & IORING_SETUP_SQPOLL) {
if (wq_has_sleeper(&ctx->cq_wait)) if (waitqueue_active(&ctx->cq_wait))
wake_up_all(&ctx->cq_wait); wake_up_all(&ctx->cq_wait);
} }
if (io_should_trigger_evfd(ctx)) if (io_should_trigger_evfd(ctx))
...@@ -10550,7 +10555,14 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx, ...@@ -10550,7 +10555,14 @@ static int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
if (ctx->flags & IORING_SETUP_SQPOLL) { if (ctx->flags & IORING_SETUP_SQPOLL) {
sqd = ctx->sq_data; sqd = ctx->sq_data;
if (sqd) { if (sqd) {
/*
* Observe the correct sqd->lock -> ctx->uring_lock
* ordering. Fine to drop uring_lock here, we hold
* a ref to the ctx.
*/
mutex_unlock(&ctx->uring_lock);
mutex_lock(&sqd->lock); mutex_lock(&sqd->lock);
mutex_lock(&ctx->uring_lock);
tctx = sqd->thread->io_uring; tctx = sqd->thread->io_uring;
} }
} else { } else {
...@@ -10853,7 +10865,7 @@ static int __init io_uring_init(void) ...@@ -10853,7 +10865,7 @@ static int __init io_uring_init(void)
BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int)); BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int));
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT); SLAB_ACCOUNT);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment