Commit e270bfd2 authored by Jens Axboe's avatar Jens Axboe

io_uring/kbuf: vmap pinned buffer ring

This avoids needing to care about HIGHMEM, and it makes the buffer
indexing easier as both ring provided buffer methods are now virtually
mapped in a contigious fashion.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 1943f96b
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/poll.h> #include <linux/poll.h>
#include <linux/vmalloc.h>
#include <linux/io_uring.h> #include <linux/io_uring.h>
#include <uapi/linux/io_uring.h> #include <uapi/linux/io_uring.h>
...@@ -146,15 +147,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, ...@@ -146,15 +147,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
req->flags |= REQ_F_BL_EMPTY; req->flags |= REQ_F_BL_EMPTY;
head &= bl->mask; head &= bl->mask;
/* mmaped buffers are always contig */
if (bl->is_mmap || head < IO_BUFFER_LIST_BUF_PER_PAGE) {
buf = &br->bufs[head]; buf = &br->bufs[head];
} else {
int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
int index = head / IO_BUFFER_LIST_BUF_PER_PAGE;
buf = page_address(bl->buf_pages[index]);
buf += off;
}
if (*len == 0 || *len > buf->len) if (*len == 0 || *len > buf->len)
*len = buf->len; *len = buf->len;
req->flags |= REQ_F_BUFFER_RING; req->flags |= REQ_F_BUFFER_RING;
...@@ -241,6 +234,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, ...@@ -241,6 +234,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
for (j = 0; j < bl->buf_nr_pages; j++) for (j = 0; j < bl->buf_nr_pages; j++)
unpin_user_page(bl->buf_pages[j]); unpin_user_page(bl->buf_pages[j]);
kvfree(bl->buf_pages); kvfree(bl->buf_pages);
vunmap(bl->buf_ring);
bl->buf_pages = NULL; bl->buf_pages = NULL;
bl->buf_nr_pages = 0; bl->buf_nr_pages = 0;
} }
...@@ -498,9 +492,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) ...@@ -498,9 +492,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
struct io_buffer_list *bl) struct io_buffer_list *bl)
{ {
struct io_uring_buf_ring *br; struct io_uring_buf_ring *br = NULL;
int nr_pages, ret, i;
struct page **pages; struct page **pages;
int i, nr_pages;
pages = io_pin_pages(reg->ring_addr, pages = io_pin_pages(reg->ring_addr,
flex_array_size(br, bufs, reg->ring_entries), flex_array_size(br, bufs, reg->ring_entries),
...@@ -508,18 +502,12 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, ...@@ -508,18 +502,12 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
if (IS_ERR(pages)) if (IS_ERR(pages))
return PTR_ERR(pages); return PTR_ERR(pages);
/* br = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
* Apparently some 32-bit boxes (ARM) will return highmem pages, if (!br) {
* which then need to be mapped. We could support that, but it'd ret = -ENOMEM;
* complicate the code and slowdown the common cases quite a bit.
* So just error out, returning -EINVAL just like we did on kernels
* that didn't support mapped buffer rings.
*/
for (i = 0; i < nr_pages; i++)
if (PageHighMem(pages[i]))
goto error_unpin; goto error_unpin;
}
br = page_address(pages[0]);
#ifdef SHM_COLOUR #ifdef SHM_COLOUR
/* /*
* On platforms that have specific aliasing requirements, SHM_COLOUR * On platforms that have specific aliasing requirements, SHM_COLOUR
...@@ -530,8 +518,10 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, ...@@ -530,8 +518,10 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
* should use IOU_PBUF_RING_MMAP instead, and liburing will handle * should use IOU_PBUF_RING_MMAP instead, and liburing will handle
* this transparently. * this transparently.
*/ */
if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) {
ret = -EINVAL;
goto error_unpin; goto error_unpin;
}
#endif #endif
bl->buf_pages = pages; bl->buf_pages = pages;
bl->buf_nr_pages = nr_pages; bl->buf_nr_pages = nr_pages;
...@@ -543,7 +533,8 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, ...@@ -543,7 +533,8 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
for (i = 0; i < nr_pages; i++) for (i = 0; i < nr_pages; i++)
unpin_user_page(pages[i]); unpin_user_page(pages[i]);
kvfree(pages); kvfree(pages);
return -EINVAL; vunmap(br);
return ret;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment