Commit 5bd831a4 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.5-20191212' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

 - A tweak to IOSQE_IO_LINK (also marked for stable) to allow links that
   don't sever if the result is < 0.

   This is mostly for linked timeouts, where if we ask for a pure
   timeout we always get -ETIME. This makes links useless for that case,
   hence allow a case where it works.

 - Five minor optimizations to fix and improve cases that regressed
   since v5.4.

 - An SQTHREAD locking fix.

 - A sendmsg/recvmsg iov assignment fix.

 - Net fix where read_iter/write_iter don't honor IOCB_NOWAIT, and
   subsequently ensuring that works for io_uring.

 - Fix a case where for an invalid opcode we might return -EBADF instead
   of -EINVAL, if the ->fd of that sqe was set to an invalid fd value.

* tag 'io_uring-5.5-20191212' of git://git.kernel.dk/linux-block:
  io_uring: ensure we return -EINVAL on unknown opcode
  io_uring: add sockets to list of files that support non-blocking issue
  net: make socket read/write_iter() honor IOCB_NOWAIT
  io_uring: only hash regular files for async work execution
  io_uring: run next sqe inline if possible
  io_uring: don't dynamically allocate poll data
  io_uring: deferred send/recvmsg should assign iov
  io_uring: sqthread should grab ctx->uring_lock for submissions
  io-wq: briefly spin for new work after finishing work
  io-wq: remove worker->wait waitqueue
  io_uring: allow unbreakable links
parents 15da849c 9e3aa61a
...@@ -49,7 +49,6 @@ struct io_worker { ...@@ -49,7 +49,6 @@ struct io_worker {
struct hlist_nulls_node nulls_node; struct hlist_nulls_node nulls_node;
struct list_head all_list; struct list_head all_list;
struct task_struct *task; struct task_struct *task;
wait_queue_head_t wait;
struct io_wqe *wqe; struct io_wqe *wqe;
struct io_wq_work *cur_work; struct io_wq_work *cur_work;
...@@ -258,7 +257,7 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe) ...@@ -258,7 +257,7 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
worker = hlist_nulls_entry(n, struct io_worker, nulls_node); worker = hlist_nulls_entry(n, struct io_worker, nulls_node);
if (io_worker_get(worker)) { if (io_worker_get(worker)) {
wake_up(&worker->wait); wake_up_process(worker->task);
io_worker_release(worker); io_worker_release(worker);
return true; return true;
} }
...@@ -492,28 +491,46 @@ static void io_worker_handle_work(struct io_worker *worker) ...@@ -492,28 +491,46 @@ static void io_worker_handle_work(struct io_worker *worker)
} while (1); } while (1);
} }
static inline void io_worker_spin_for_work(struct io_wqe *wqe)
{
int i = 0;
while (++i < 1000) {
if (io_wqe_run_queue(wqe))
break;
if (need_resched())
break;
cpu_relax();
}
}
static int io_wqe_worker(void *data) static int io_wqe_worker(void *data)
{ {
struct io_worker *worker = data; struct io_worker *worker = data;
struct io_wqe *wqe = worker->wqe; struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq; struct io_wq *wq = wqe->wq;
DEFINE_WAIT(wait); bool did_work;
io_worker_start(wqe, worker); io_worker_start(wqe, worker);
did_work = false;
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
prepare_to_wait(&worker->wait, &wait, TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
loop:
if (did_work)
io_worker_spin_for_work(wqe);
spin_lock_irq(&wqe->lock); spin_lock_irq(&wqe->lock);
if (io_wqe_run_queue(wqe)) { if (io_wqe_run_queue(wqe)) {
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
io_worker_handle_work(worker); io_worker_handle_work(worker);
continue; did_work = true;
goto loop;
} }
did_work = false;
/* drops the lock on success, retry */ /* drops the lock on success, retry */
if (__io_worker_idle(wqe, worker)) { if (__io_worker_idle(wqe, worker)) {
__release(&wqe->lock); __release(&wqe->lock);
continue; goto loop;
} }
spin_unlock_irq(&wqe->lock); spin_unlock_irq(&wqe->lock);
if (signal_pending(current)) if (signal_pending(current))
...@@ -526,8 +543,6 @@ static int io_wqe_worker(void *data) ...@@ -526,8 +543,6 @@ static int io_wqe_worker(void *data)
break; break;
} }
finish_wait(&worker->wait, &wait);
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) { if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
spin_lock_irq(&wqe->lock); spin_lock_irq(&wqe->lock);
if (!wq_list_empty(&wqe->work_list)) if (!wq_list_empty(&wqe->work_list))
...@@ -589,7 +604,6 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) ...@@ -589,7 +604,6 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
refcount_set(&worker->ref, 1); refcount_set(&worker->ref, 1);
worker->nulls_node.pprev = NULL; worker->nulls_node.pprev = NULL;
init_waitqueue_head(&worker->wait);
worker->wqe = wqe; worker->wqe = wqe;
spin_lock_init(&worker->lock); spin_lock_init(&worker->lock);
......
...@@ -35,7 +35,8 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node, ...@@ -35,7 +35,8 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
struct io_wq_work_list *list) struct io_wq_work_list *list)
{ {
if (!list->first) { if (!list->first) {
list->first = list->last = node; list->last = node;
WRITE_ONCE(list->first, node);
} else { } else {
list->last->next = node; list->last->next = node;
list->last = node; list->last = node;
...@@ -47,7 +48,7 @@ static inline void wq_node_del(struct io_wq_work_list *list, ...@@ -47,7 +48,7 @@ static inline void wq_node_del(struct io_wq_work_list *list,
struct io_wq_work_node *prev) struct io_wq_work_node *prev)
{ {
if (node == list->first) if (node == list->first)
list->first = node->next; WRITE_ONCE(list->first, node->next);
if (node == list->last) if (node == list->last)
list->last = prev; list->last = prev;
if (prev) if (prev)
...@@ -58,7 +59,7 @@ static inline void wq_node_del(struct io_wq_work_list *list, ...@@ -58,7 +59,7 @@ static inline void wq_node_del(struct io_wq_work_list *list,
#define wq_list_for_each(pos, prv, head) \ #define wq_list_for_each(pos, prv, head) \
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next) for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
#define wq_list_empty(list) ((list)->first == NULL) #define wq_list_empty(list) (READ_ONCE((list)->first) == NULL)
#define INIT_WQ_LIST(list) do { \ #define INIT_WQ_LIST(list) do { \
(list)->first = NULL; \ (list)->first = NULL; \
(list)->last = NULL; \ (list)->last = NULL; \
......
This diff is collapsed.
...@@ -48,6 +48,7 @@ struct io_uring_sqe { ...@@ -48,6 +48,7 @@ struct io_uring_sqe {
#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */ #define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */
#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */ #define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */
#define IOSQE_IO_LINK (1U << 2) /* links next sqe */ #define IOSQE_IO_LINK (1U << 2) /* links next sqe */
#define IOSQE_IO_HARDLINK (1U << 3) /* like LINK, but stronger */
/* /*
* io_uring_setup() flags * io_uring_setup() flags
...@@ -57,23 +58,28 @@ struct io_uring_sqe { ...@@ -57,23 +58,28 @@ struct io_uring_sqe {
#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ #define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
#define IORING_OP_NOP 0 enum {
#define IORING_OP_READV 1 IORING_OP_NOP,
#define IORING_OP_WRITEV 2 IORING_OP_READV,
#define IORING_OP_FSYNC 3 IORING_OP_WRITEV,
#define IORING_OP_READ_FIXED 4 IORING_OP_FSYNC,
#define IORING_OP_WRITE_FIXED 5 IORING_OP_READ_FIXED,
#define IORING_OP_POLL_ADD 6 IORING_OP_WRITE_FIXED,
#define IORING_OP_POLL_REMOVE 7 IORING_OP_POLL_ADD,
#define IORING_OP_SYNC_FILE_RANGE 8 IORING_OP_POLL_REMOVE,
#define IORING_OP_SENDMSG 9 IORING_OP_SYNC_FILE_RANGE,
#define IORING_OP_RECVMSG 10 IORING_OP_SENDMSG,
#define IORING_OP_TIMEOUT 11 IORING_OP_RECVMSG,
#define IORING_OP_TIMEOUT_REMOVE 12 IORING_OP_TIMEOUT,
#define IORING_OP_ACCEPT 13 IORING_OP_TIMEOUT_REMOVE,
#define IORING_OP_ASYNC_CANCEL 14 IORING_OP_ACCEPT,
#define IORING_OP_LINK_TIMEOUT 15 IORING_OP_ASYNC_CANCEL,
#define IORING_OP_CONNECT 16 IORING_OP_LINK_TIMEOUT,
IORING_OP_CONNECT,
/* this goes last, obviously */
IORING_OP_LAST,
};
/* /*
* sqe->fsync_flags * sqe->fsync_flags
......
...@@ -957,7 +957,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -957,7 +957,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
.msg_iocb = iocb}; .msg_iocb = iocb};
ssize_t res; ssize_t res;
if (file->f_flags & O_NONBLOCK) if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
msg.msg_flags = MSG_DONTWAIT; msg.msg_flags = MSG_DONTWAIT;
if (iocb->ki_pos != 0) if (iocb->ki_pos != 0)
...@@ -982,7 +982,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -982,7 +982,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_pos != 0) if (iocb->ki_pos != 0)
return -ESPIPE; return -ESPIPE;
if (file->f_flags & O_NONBLOCK) if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
msg.msg_flags = MSG_DONTWAIT; msg.msg_flags = MSG_DONTWAIT;
if (sock->type == SOCK_SEQPACKET) if (sock->type == SOCK_SEQPACKET)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment