Commit a194dfe6 authored by David Howells's avatar David Howells

pipe: Rearrange sequence in pipe_write() to preallocate slot

Rearrange the sequence in pipe_write() so that the allocation of the new
buffer, the allocation of a ring slot and the attachment to the ring is
done under the pipe wait spinlock and then the lock is dropped and the
buffer can be filled.

The data copy needs to be done with the spinlock unheld and irqs enabled,
so the lock needs to be dropped first.  However, the reader can't progress
as we're holding pipe->mutex.

We also need to drop the lock as that would impact others looking at the
pipe waitqueue, such as poll(), the consumer and a future kernel message
writer.

We just abandon the preallocated slot if we get a copy error.  Future
writes may continue it and a future read will eventually recycle it.
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
parent 8446487f
...@@ -387,7 +387,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -387,7 +387,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
{ {
struct file *filp = iocb->ki_filp; struct file *filp = iocb->ki_filp;
struct pipe_inode_info *pipe = filp->private_data; struct pipe_inode_info *pipe = filp->private_data;
unsigned int head, tail, max_usage, mask; unsigned int head, max_usage, mask;
ssize_t ret = 0; ssize_t ret = 0;
int do_wakeup = 0; int do_wakeup = 0;
size_t total_len = iov_iter_count(from); size_t total_len = iov_iter_count(from);
...@@ -405,14 +405,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -405,14 +405,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
goto out; goto out;
} }
tail = pipe->tail;
head = pipe->head; head = pipe->head;
max_usage = pipe->max_usage; max_usage = pipe->max_usage;
mask = pipe->ring_size - 1; mask = pipe->ring_size - 1;
/* We try to merge small writes */ /* We try to merge small writes */
chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
if (!pipe_empty(head, tail) && chars != 0) { if (!pipe_empty(head, pipe->tail) && chars != 0) {
struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask];
int offset = buf->offset + buf->len; int offset = buf->offset + buf->len;
...@@ -441,8 +440,8 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -441,8 +440,8 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
break; break;
} }
tail = pipe->tail; head = pipe->head;
if (!pipe_full(head, tail, max_usage)) { if (!pipe_full(head, pipe->tail, max_usage)) {
struct pipe_buffer *buf = &pipe->bufs[head & mask]; struct pipe_buffer *buf = &pipe->bufs[head & mask];
struct page *page = pipe->tmp_page; struct page *page = pipe->tmp_page;
int copied; int copied;
...@@ -455,40 +454,56 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) ...@@ -455,40 +454,56 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
} }
pipe->tmp_page = page; pipe->tmp_page = page;
} }
/* Allocate a slot in the ring in advance and attach an
* empty buffer. If we fault or otherwise fail to use
* it, either the reader will consume it or it'll still
* be there for the next write.
*/
spin_lock_irq(&pipe->wait.lock);
head = pipe->head;
pipe->head = head + 1;
/* Always wake up, even if the copy fails. Otherwise /* Always wake up, even if the copy fails. Otherwise
* we lock up (O_NONBLOCK-)readers that sleep due to * we lock up (O_NONBLOCK-)readers that sleep due to
* syscall merging. * syscall merging.
* FIXME! Is this really true? * FIXME! Is this really true?
*/ */
do_wakeup = 1; wake_up_interruptible_sync_poll_locked(
copied = copy_page_from_iter(page, 0, PAGE_SIZE, from); &pipe->wait, EPOLLIN | EPOLLRDNORM);
if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
if (!ret) spin_unlock_irq(&pipe->wait.lock);
ret = -EFAULT; kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
break;
}
ret += copied;
/* Insert it into the buffer array */ /* Insert it into the buffer array */
buf = &pipe->bufs[head & mask];
buf->page = page; buf->page = page;
buf->ops = &anon_pipe_buf_ops; buf->ops = &anon_pipe_buf_ops;
buf->offset = 0; buf->offset = 0;
buf->len = copied; buf->len = 0;
buf->flags = 0; buf->flags = 0;
if (is_packetized(filp)) { if (is_packetized(filp)) {
buf->ops = &packet_pipe_buf_ops; buf->ops = &packet_pipe_buf_ops;
buf->flags = PIPE_BUF_FLAG_PACKET; buf->flags = PIPE_BUF_FLAG_PACKET;
} }
head++;
pipe->head = head;
pipe->tmp_page = NULL; pipe->tmp_page = NULL;
copied = copy_page_from_iter(page, 0, PAGE_SIZE, from);
if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {
if (!ret)
ret = -EFAULT;
break;
}
ret += copied;
buf->offset = 0;
buf->len = copied;
if (!iov_iter_count(from)) if (!iov_iter_count(from))
break; break;
} }
if (!pipe_full(head, tail, max_usage)) if (!pipe_full(head, pipe->tail, max_usage))
continue; continue;
/* Wait for buffer space to become available. */ /* Wait for buffer space to become available. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment