Commit 5bd831a4 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.5-20191212' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

 - A tweak to IOSQE_IO_LINK (also marked for stable) to allow links that
   don't sever if the result is < 0.

   This is mostly for linked timeouts, where if we ask for a pure
   timeout we always get -ETIME. This makes links useless for that case,
   hence allow a case where it works.

 - Five minor optimizations to fix and improve cases that regressed
   since v5.4.

 - An SQTHREAD locking fix.

 - A sendmsg/recvmsg iov assignment fix.

 - Net fix where read_iter/write_iter don't honor IOCB_NOWAIT, and
   subsequently ensuring that works for io_uring.

 - Fix a case where for an invalid opcode we might return -EBADF instead
   of -EINVAL, if the ->fd of that sqe was set to an invalid fd value.

* tag 'io_uring-5.5-20191212' of git://git.kernel.dk/linux-block:
  io_uring: ensure we return -EINVAL on unknown opcode
  io_uring: add sockets to list of files that support non-blocking issue
  net: make socket read/write_iter() honor IOCB_NOWAIT
  io_uring: only hash regular files for async work execution
  io_uring: run next sqe inline if possible
  io_uring: don't dynamically allocate poll data
  io_uring: deferred send/recvmsg should assign iov
  io_uring: sqthread should grab ctx->uring_lock for submissions
  io-wq: briefly spin for new work after finishing work
  io-wq: remove worker->wait waitqueue
  io_uring: allow unbreakable links
parents 15da849c 9e3aa61a
......@@ -49,7 +49,6 @@ struct io_worker {
struct hlist_nulls_node nulls_node;
struct list_head all_list;
struct task_struct *task;
wait_queue_head_t wait;
struct io_wqe *wqe;
struct io_wq_work *cur_work;
......@@ -258,7 +257,7 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
worker = hlist_nulls_entry(n, struct io_worker, nulls_node);
if (io_worker_get(worker)) {
wake_up(&worker->wait);
wake_up_process(worker->task);
io_worker_release(worker);
return true;
}
......@@ -492,28 +491,46 @@ static void io_worker_handle_work(struct io_worker *worker)
} while (1);
}
static inline void io_worker_spin_for_work(struct io_wqe *wqe)
{
int i = 0;
while (++i < 1000) {
if (io_wqe_run_queue(wqe))
break;
if (need_resched())
break;
cpu_relax();
}
}
static int io_wqe_worker(void *data)
{
struct io_worker *worker = data;
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
DEFINE_WAIT(wait);
bool did_work;
io_worker_start(wqe, worker);
did_work = false;
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
prepare_to_wait(&worker->wait, &wait, TASK_INTERRUPTIBLE);
set_current_state(TASK_INTERRUPTIBLE);
loop:
if (did_work)
io_worker_spin_for_work(wqe);
spin_lock_irq(&wqe->lock);
if (io_wqe_run_queue(wqe)) {
__set_current_state(TASK_RUNNING);
io_worker_handle_work(worker);
continue;
did_work = true;
goto loop;
}
did_work = false;
/* drops the lock on success, retry */
if (__io_worker_idle(wqe, worker)) {
__release(&wqe->lock);
continue;
goto loop;
}
spin_unlock_irq(&wqe->lock);
if (signal_pending(current))
......@@ -526,8 +543,6 @@ static int io_wqe_worker(void *data)
break;
}
finish_wait(&worker->wait, &wait);
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
spin_lock_irq(&wqe->lock);
if (!wq_list_empty(&wqe->work_list))
......@@ -589,7 +604,6 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
refcount_set(&worker->ref, 1);
worker->nulls_node.pprev = NULL;
init_waitqueue_head(&worker->wait);
worker->wqe = wqe;
spin_lock_init(&worker->lock);
......
......@@ -35,7 +35,8 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
struct io_wq_work_list *list)
{
if (!list->first) {
list->first = list->last = node;
list->last = node;
WRITE_ONCE(list->first, node);
} else {
list->last->next = node;
list->last = node;
......@@ -47,7 +48,7 @@ static inline void wq_node_del(struct io_wq_work_list *list,
struct io_wq_work_node *prev)
{
if (node == list->first)
list->first = node->next;
WRITE_ONCE(list->first, node->next);
if (node == list->last)
list->last = prev;
if (prev)
......@@ -58,7 +59,7 @@ static inline void wq_node_del(struct io_wq_work_list *list,
#define wq_list_for_each(pos, prv, head) \
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
#define wq_list_empty(list) ((list)->first == NULL)
#define wq_list_empty(list) (READ_ONCE((list)->first) == NULL)
#define INIT_WQ_LIST(list) do { \
(list)->first = NULL; \
(list)->last = NULL; \
......
This diff is collapsed.
......@@ -48,6 +48,7 @@ struct io_uring_sqe {
#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */
#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */
#define IOSQE_IO_LINK (1U << 2) /* links next sqe */
#define IOSQE_IO_HARDLINK (1U << 3) /* like LINK, but stronger */
/*
* io_uring_setup() flags
......@@ -57,23 +58,28 @@ struct io_uring_sqe {
#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
#define IORING_OP_NOP 0
#define IORING_OP_READV 1
#define IORING_OP_WRITEV 2
#define IORING_OP_FSYNC 3
#define IORING_OP_READ_FIXED 4
#define IORING_OP_WRITE_FIXED 5
#define IORING_OP_POLL_ADD 6
#define IORING_OP_POLL_REMOVE 7
#define IORING_OP_SYNC_FILE_RANGE 8
#define IORING_OP_SENDMSG 9
#define IORING_OP_RECVMSG 10
#define IORING_OP_TIMEOUT 11
#define IORING_OP_TIMEOUT_REMOVE 12
#define IORING_OP_ACCEPT 13
#define IORING_OP_ASYNC_CANCEL 14
#define IORING_OP_LINK_TIMEOUT 15
#define IORING_OP_CONNECT 16
enum {
IORING_OP_NOP,
IORING_OP_READV,
IORING_OP_WRITEV,
IORING_OP_FSYNC,
IORING_OP_READ_FIXED,
IORING_OP_WRITE_FIXED,
IORING_OP_POLL_ADD,
IORING_OP_POLL_REMOVE,
IORING_OP_SYNC_FILE_RANGE,
IORING_OP_SENDMSG,
IORING_OP_RECVMSG,
IORING_OP_TIMEOUT,
IORING_OP_TIMEOUT_REMOVE,
IORING_OP_ACCEPT,
IORING_OP_ASYNC_CANCEL,
IORING_OP_LINK_TIMEOUT,
IORING_OP_CONNECT,
/* this goes last, obviously */
IORING_OP_LAST,
};
/*
* sqe->fsync_flags
......
......@@ -957,7 +957,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
.msg_iocb = iocb};
ssize_t res;
if (file->f_flags & O_NONBLOCK)
if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
msg.msg_flags = MSG_DONTWAIT;
if (iocb->ki_pos != 0)
......@@ -982,7 +982,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_pos != 0)
return -ESPIPE;
if (file->f_flags & O_NONBLOCK)
if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
msg.msg_flags = MSG_DONTWAIT;
if (sock->type == SOCK_SEQPACKET)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment