Commit f331c5de authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-6.3-2023-03-09' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

 - Stop setting PF_NO_SETAFFINITY on io-wq workers.

   This has been reported in the past as it confuses some applications,
   as some of their threads will fail with -1/EINVAL if attempted
   affinitized. Most recent report was on cpusets, where enabling that
   with io-wq workers active will fail.

   Just deal with the mask changing by checking when a worker times out,
   and then exit if we have no work pending.

 - Fix an issue with passthrough support where we don't properly check
   if the file type has pollable uring_cmd support.

 - Fix a reported W=1 warning on a variable being set and unused. Add a
   special helper for iterating these lists that doesn't save the
   previous list element, if that iterator never ends up using it.

* tag 'io_uring-6.3-2023-03-09' of git://git.kernel.dk/linux:
  io_uring: silence variable ‘prev’ set but not used warning
  io_uring/uring_cmd: ensure that device supports IOPOLL
  io_uring/io-wq: stop setting PF_NO_SETAFFINITY on io-wq workers
parents 49be4fb2 fa780334
...@@ -616,7 +616,7 @@ static int io_wqe_worker(void *data) ...@@ -616,7 +616,7 @@ static int io_wqe_worker(void *data)
struct io_wqe_acct *acct = io_wqe_get_acct(worker); struct io_wqe_acct *acct = io_wqe_get_acct(worker);
struct io_wqe *wqe = worker->wqe; struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq; struct io_wq *wq = wqe->wq;
bool last_timeout = false; bool exit_mask = false, last_timeout = false;
char buf[TASK_COMM_LEN]; char buf[TASK_COMM_LEN];
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
...@@ -632,8 +632,11 @@ static int io_wqe_worker(void *data) ...@@ -632,8 +632,11 @@ static int io_wqe_worker(void *data)
io_worker_handle_work(worker); io_worker_handle_work(worker);
raw_spin_lock(&wqe->lock); raw_spin_lock(&wqe->lock);
/* timed out, exit unless we're the last worker */ /*
if (last_timeout && acct->nr_workers > 1) { * Last sleep timed out. Exit if we're not the last worker,
* or if someone modified our affinity.
*/
if (last_timeout && (exit_mask || acct->nr_workers > 1)) {
acct->nr_workers--; acct->nr_workers--;
raw_spin_unlock(&wqe->lock); raw_spin_unlock(&wqe->lock);
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
...@@ -652,7 +655,11 @@ static int io_wqe_worker(void *data) ...@@ -652,7 +655,11 @@ static int io_wqe_worker(void *data)
continue; continue;
break; break;
} }
last_timeout = !ret; if (!ret) {
last_timeout = true;
exit_mask = !cpumask_test_cpu(raw_smp_processor_id(),
wqe->cpu_mask);
}
} }
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) if (test_bit(IO_WQ_BIT_EXIT, &wq->state))
...@@ -704,7 +711,6 @@ static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker, ...@@ -704,7 +711,6 @@ static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker,
tsk->worker_private = worker; tsk->worker_private = worker;
worker->task = tsk; worker->task = tsk;
set_cpus_allowed_ptr(tsk, wqe->cpu_mask); set_cpus_allowed_ptr(tsk, wqe->cpu_mask);
tsk->flags |= PF_NO_SETAFFINITY;
raw_spin_lock(&wqe->lock); raw_spin_lock(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
......
...@@ -1499,14 +1499,14 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node) ...@@ -1499,14 +1499,14 @@ void io_free_batch_list(struct io_ring_ctx *ctx, struct io_wq_work_node *node)
static void __io_submit_flush_completions(struct io_ring_ctx *ctx) static void __io_submit_flush_completions(struct io_ring_ctx *ctx)
__must_hold(&ctx->uring_lock) __must_hold(&ctx->uring_lock)
{ {
struct io_wq_work_node *node, *prev;
struct io_submit_state *state = &ctx->submit_state; struct io_submit_state *state = &ctx->submit_state;
struct io_wq_work_node *node;
__io_cq_lock(ctx); __io_cq_lock(ctx);
/* must come first to preserve CQE ordering in failure cases */ /* must come first to preserve CQE ordering in failure cases */
if (state->cqes_count) if (state->cqes_count)
__io_flush_post_cqes(ctx); __io_flush_post_cqes(ctx);
wq_list_for_each(node, prev, &state->compl_reqs) { __wq_list_for_each(node, &state->compl_reqs) {
struct io_kiocb *req = container_of(node, struct io_kiocb, struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list); comp_list);
......
...@@ -3,6 +3,9 @@ ...@@ -3,6 +3,9 @@
#include <linux/io_uring_types.h> #include <linux/io_uring_types.h>
#define __wq_list_for_each(pos, head) \
for (pos = (head)->first; pos; pos = (pos)->next)
#define wq_list_for_each(pos, prv, head) \ #define wq_list_for_each(pos, prv, head) \
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next) for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
......
...@@ -108,7 +108,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) ...@@ -108,7 +108,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
struct file *file = req->file; struct file *file = req->file;
int ret; int ret;
if (!req->file->f_op->uring_cmd) if (!file->f_op->uring_cmd)
return -EOPNOTSUPP; return -EOPNOTSUPP;
ret = security_uring_cmd(ioucmd); ret = security_uring_cmd(ioucmd);
...@@ -120,6 +120,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) ...@@ -120,6 +120,8 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
if (ctx->flags & IORING_SETUP_CQE32) if (ctx->flags & IORING_SETUP_CQE32)
issue_flags |= IO_URING_F_CQE32; issue_flags |= IO_URING_F_CQE32;
if (ctx->flags & IORING_SETUP_IOPOLL) { if (ctx->flags & IORING_SETUP_IOPOLL) {
if (!file->f_op->uring_cmd_iopoll)
return -EOPNOTSUPP;
issue_flags |= IO_URING_F_IOPOLL; issue_flags |= IO_URING_F_IOPOLL;
req->iopoll_completed = 0; req->iopoll_completed = 0;
WRITE_ONCE(ioucmd->cookie, NULL); WRITE_ONCE(ioucmd->cookie, NULL);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment