Commit bfd019d1 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'crypto-splice-net-make-af_alg-handle-sendmsg-msg_splice_pages'

David Howells says:

====================
crypto, splice, net: Make AF_ALG handle sendmsg(MSG_SPLICE_PAGES)

Here are patches to make AF_ALG handle the MSG_SPLICE_PAGES internal
sendmsg flag.  MSG_SPLICE_PAGES is an internal hint that tells the protocol
that it should splice the pages supplied if it can.  The sendpage functions
are then turned into wrappers around that.

This set consists of the following parts:

 (1) Move netfs_extract_iter_to_sg() to somewhere more general and rename
     it to drop the "netfs" prefix.  We use this to extract directly from
     an iterator into a scatterlist.

 (2) Make AF_ALG use iov_iter_extract_pages().  This has the additional
     effect of pinning pages obtained from userspace rather than taking
     refs on them.  Pages from kernel-backed iterators would not be pinned,
     but AF_ALG isn't really meant for use by kernel services.

 (3) Change AF_ALG still further to use extract_iter_to_sg().

 (4) Make af_alg_sendmsg() support MSG_SPLICE_PAGES support and make
     af_alg_sendpage() just a wrapper around sendmsg().  This has to take
     refs on the pages pinned for the moment.

 (5) Make hash_sendmsg() support MSG_SPLICE_PAGES by simply ignoring it.
     hash_sendpage() is left untouched to be removed later, after the
     splice core has been changed to call sendmsg().
====================

Link: https://lore.kernel.org/r/20230606130856.1970660-1-dhowells@redhat.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents b62d9e20 c662b043
......@@ -531,50 +531,25 @@ static const struct net_proto_family alg_family = {
.owner = THIS_MODULE,
};
int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len)
{
size_t off;
ssize_t n;
int npages, i;
n = iov_iter_get_pages2(iter, sgl->pages, len, ALG_MAX_PAGES, &off);
if (n < 0)
return n;
npages = DIV_ROUND_UP(off + n, PAGE_SIZE);
if (WARN_ON(npages == 0))
return -EINVAL;
/* Add one extra for linking */
sg_init_table(sgl->sg, npages + 1);
for (i = 0, len = n; i < npages; i++) {
int plen = min_t(int, len, PAGE_SIZE - off);
sg_set_page(sgl->sg + i, sgl->pages[i], plen, off);
off = 0;
len -= plen;
}
sg_mark_end(sgl->sg + npages - 1);
sgl->npages = npages;
return n;
}
EXPORT_SYMBOL_GPL(af_alg_make_sg);
static void af_alg_link_sg(struct af_alg_sgl *sgl_prev,
struct af_alg_sgl *sgl_new)
{
sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
sg_chain(sgl_prev->sg, sgl_prev->npages + 1, sgl_new->sg);
sg_unmark_end(sgl_prev->sgt.sgl + sgl_prev->sgt.nents - 1);
sg_chain(sgl_prev->sgt.sgl, sgl_prev->sgt.nents + 1, sgl_new->sgt.sgl);
}
void af_alg_free_sg(struct af_alg_sgl *sgl)
{
int i;
for (i = 0; i < sgl->npages; i++)
put_page(sgl->pages[i]);
if (sgl->sgt.sgl) {
if (sgl->need_unpin)
for (i = 0; i < sgl->sgt.nents; i++)
unpin_user_page(sg_page(&sgl->sgt.sgl[i]));
if (sgl->sgt.sgl != sgl->sgl)
kvfree(sgl->sgt.sgl);
sgl->sgt.sgl = NULL;
}
}
EXPORT_SYMBOL_GPL(af_alg_free_sg);
......@@ -1015,7 +990,7 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
while (size) {
struct scatterlist *sg;
size_t len = size;
size_t plen;
ssize_t plen;
/* use the existing memory in an allocated page */
if (ctx->merge) {
......@@ -1060,6 +1035,27 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (sgl->cur)
sg_unmark_end(sg + sgl->cur - 1);
if (msg->msg_flags & MSG_SPLICE_PAGES) {
struct sg_table sgtable = {
.sgl = sg,
.nents = sgl->cur,
.orig_nents = sgl->cur,
};
plen = extract_iter_to_sg(&msg->msg_iter, len, &sgtable,
MAX_SGL_ENTS, 0);
if (plen < 0) {
err = plen;
goto unlock;
}
for (; sgl->cur < sgtable.nents; sgl->cur++)
get_page(sg_page(&sg[sgl->cur]));
len -= plen;
ctx->used += plen;
copied += plen;
size -= plen;
} else {
do {
struct page *pg;
unsigned int i = sgl->cur;
......@@ -1074,7 +1070,8 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
sg_assign_page(sg + i, pg);
err = memcpy_from_msg(page_address(sg_page(sg + i)),
err = memcpy_from_msg(
page_address(sg_page(sg + i)),
msg, plen);
if (err) {
__free_page(sg_page(sg + i));
......@@ -1089,6 +1086,7 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
size -= plen;
sgl->cur++;
} while (len && sgl->cur < MAX_SGL_ENTS);
}
if (!size)
sg_mark_end(sg + sgl->cur - 1);
......@@ -1121,53 +1119,17 @@ EXPORT_SYMBOL_GPL(af_alg_sendmsg);
ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, int flags)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct af_alg_ctx *ctx = ask->private;
struct af_alg_tsgl *sgl;
int err = -EINVAL;
struct bio_vec bvec;
struct msghdr msg = {
.msg_flags = flags | MSG_SPLICE_PAGES,
};
if (flags & MSG_SENDPAGE_NOTLAST)
flags |= MSG_MORE;
lock_sock(sk);
if (!ctx->more && ctx->used)
goto unlock;
if (!size)
goto done;
if (!af_alg_writable(sk)) {
err = af_alg_wait_for_wmem(sk, flags);
if (err)
goto unlock;
}
err = af_alg_alloc_tsgl(sk);
if (err)
goto unlock;
ctx->merge = 0;
sgl = list_entry(ctx->tsgl_list.prev, struct af_alg_tsgl, list);
if (sgl->cur)
sg_unmark_end(sgl->sg + sgl->cur - 1);
sg_mark_end(sgl->sg + sgl->cur);
get_page(page);
sg_set_page(sgl->sg + sgl->cur, page, size, offset);
sgl->cur++;
ctx->used += size;
msg.msg_flags |= MSG_MORE;
done:
ctx->more = flags & MSG_MORE;
unlock:
af_alg_data_wakeup(sk);
release_sock(sk);
return err ?: size;
bvec_set_page(&bvec, page, size, offset);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
return sock_sendmsg(sock, &msg);
}
EXPORT_SYMBOL_GPL(af_alg_sendpage);
......@@ -1288,8 +1250,8 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
while (maxsize > len && msg_data_left(msg)) {
struct af_alg_rsgl *rsgl;
ssize_t err;
size_t seglen;
int err;
/* limit the amount of readable buffers */
if (!af_alg_readable(sk))
......@@ -1306,16 +1268,23 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
return -ENOMEM;
}
rsgl->sgl.npages = 0;
rsgl->sgl.sgt.sgl = rsgl->sgl.sgl;
rsgl->sgl.sgt.nents = 0;
rsgl->sgl.sgt.orig_nents = 0;
list_add_tail(&rsgl->list, &areq->rsgl_list);
/* make one iovec available as scatterlist */
err = af_alg_make_sg(&rsgl->sgl, &msg->msg_iter, seglen);
sg_init_table(rsgl->sgl.sgt.sgl, ALG_MAX_PAGES);
err = extract_iter_to_sg(&msg->msg_iter, seglen, &rsgl->sgl.sgt,
ALG_MAX_PAGES, 0);
if (err < 0) {
rsgl->sg_num_bytes = 0;
return err;
}
sg_mark_end(rsgl->sgl.sgt.sgl + rsgl->sgl.sgt.nents - 1);
rsgl->sgl.need_unpin =
iov_iter_extract_will_pin(&msg->msg_iter);
/* chain the new scatterlist with previous one */
if (areq->last_rsgl)
af_alg_link_sg(&areq->last_rsgl->sgl, &rsgl->sgl);
......
......@@ -9,8 +9,8 @@
* The following concept of the memory management is used:
*
* The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
* filled by user space with the data submitted via sendpage/sendmsg. Filling
* up the TX SGL does not cause a crypto operation -- the data will only be
* filled by user space with the data submitted via sendpage. Filling up
* the TX SGL does not cause a crypto operation -- the data will only be
* tracked by the kernel. Upon receipt of one recvmsg call, the caller must
* provide a buffer which is tracked with the RX SGL.
*
......@@ -113,19 +113,19 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
}
/*
* Data length provided by caller via sendmsg/sendpage that has not
* yet been processed.
* Data length provided by caller via sendmsg that has not yet been
* processed.
*/
used = ctx->used;
/*
* Make sure sufficient data is present -- note, the same check is
* also present in sendmsg/sendpage. The checks in sendpage/sendmsg
* shall provide an information to the data sender that something is
* wrong, but they are irrelevant to maintain the kernel integrity.
* We need this check here too in case user space decides to not honor
* the error message in sendmsg/sendpage and still call recvmsg. This
* check here protects the kernel integrity.
* Make sure sufficient data is present -- note, the same check is also
* present in sendmsg. The checks in sendmsg shall provide an
* information to the data sender that something is wrong, but they are
* irrelevant to maintain the kernel integrity. We need this check
* here too in case user space decides to not honor the error message
* in sendmsg and still call recvmsg. This check here protects the
* kernel integrity.
*/
if (!aead_sufficient_data(sk))
return -EINVAL;
......@@ -210,7 +210,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
*/
/* Use the RX SGL as source (and destination) for crypto op. */
rsgl_src = areq->first_rsgl.sgl.sg;
rsgl_src = areq->first_rsgl.sgl.sgt.sgl;
if (ctx->enc) {
/*
......@@ -224,7 +224,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
* RX SGL: AAD || PT || Tag
*/
err = crypto_aead_copy_sgl(null_tfm, tsgl_src,
areq->first_rsgl.sgl.sg, processed);
areq->first_rsgl.sgl.sgt.sgl,
processed);
if (err)
goto free;
af_alg_pull_tsgl(sk, processed, NULL, 0);
......@@ -242,7 +243,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
/* Copy AAD || CT to RX SGL buffer for in-place operation. */
err = crypto_aead_copy_sgl(null_tfm, tsgl_src,
areq->first_rsgl.sgl.sg, outlen);
areq->first_rsgl.sgl.sgt.sgl,
outlen);
if (err)
goto free;
......@@ -267,10 +269,10 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
if (usedpages) {
/* RX SGL present */
struct af_alg_sgl *sgl_prev = &areq->last_rsgl->sgl;
struct scatterlist *sg = sgl_prev->sgt.sgl;
sg_unmark_end(sgl_prev->sg + sgl_prev->npages - 1);
sg_chain(sgl_prev->sg, sgl_prev->npages + 1,
areq->tsgl);
sg_unmark_end(sg + sgl_prev->sgt.nents - 1);
sg_chain(sg, sgl_prev->sgt.nents + 1, areq->tsgl);
} else
/* no RX SGL present (e.g. authentication only) */
rsgl_src = areq->tsgl;
......@@ -278,7 +280,7 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg,
/* Initialize the crypto operation */
aead_request_set_crypt(&areq->cra_u.aead_req, rsgl_src,
areq->first_rsgl.sgl.sg, used, ctx->iv);
areq->first_rsgl.sgl.sgt.sgl, used, ctx->iv);
aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen);
aead_request_set_tfm(&areq->cra_u.aead_req, tfm);
......
......@@ -63,70 +63,102 @@ static void hash_free_result(struct sock *sk, struct hash_ctx *ctx)
static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
size_t ignored)
{
int limit = ALG_MAX_PAGES * PAGE_SIZE;
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct hash_ctx *ctx = ask->private;
long copied = 0;
ssize_t copied = 0;
size_t len, max_pages, npages;
bool continuing = ctx->more, need_init = false;
int err;
if (limit > sk->sk_sndbuf)
limit = sk->sk_sndbuf;
max_pages = min_t(size_t, ALG_MAX_PAGES,
DIV_ROUND_UP(sk->sk_sndbuf, PAGE_SIZE));
lock_sock(sk);
if (!ctx->more) {
if (!continuing) {
if ((msg->msg_flags & MSG_MORE))
hash_free_result(sk, ctx);
err = crypto_wait_req(crypto_ahash_init(&ctx->req), &ctx->wait);
if (err)
goto unlock;
need_init = true;
}
ctx->more = false;
while (msg_data_left(msg)) {
int len = msg_data_left(msg);
ctx->sgl.sgt.sgl = ctx->sgl.sgl;
ctx->sgl.sgt.nents = 0;
ctx->sgl.sgt.orig_nents = 0;
if (len > limit)
len = limit;
err = -EIO;
npages = iov_iter_npages(&msg->msg_iter, max_pages);
if (npages == 0)
goto unlock_free;
len = af_alg_make_sg(&ctx->sgl, &msg->msg_iter, len);
if (len < 0) {
err = copied ? 0 : len;
goto unlock;
if (npages > ARRAY_SIZE(ctx->sgl.sgl)) {
err = -ENOMEM;
ctx->sgl.sgt.sgl =
kvmalloc(array_size(npages,
sizeof(*ctx->sgl.sgt.sgl)),
GFP_KERNEL);
if (!ctx->sgl.sgt.sgl)
goto unlock_free;
}
sg_init_table(ctx->sgl.sgl, npages);
ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL, len);
ctx->sgl.need_unpin = iov_iter_extract_will_pin(&msg->msg_iter);
err = crypto_wait_req(crypto_ahash_update(&ctx->req),
&ctx->wait);
af_alg_free_sg(&ctx->sgl);
if (err) {
iov_iter_revert(&msg->msg_iter, len);
goto unlock;
err = extract_iter_to_sg(&msg->msg_iter, LONG_MAX,
&ctx->sgl.sgt, npages, 0);
if (err < 0)
goto unlock_free;
len = err;
sg_mark_end(ctx->sgl.sgt.sgl + ctx->sgl.sgt.nents - 1);
if (!msg_data_left(msg)) {
err = hash_alloc_result(sk, ctx);
if (err)
goto unlock_free;
}
copied += len;
ahash_request_set_crypt(&ctx->req, ctx->sgl.sgt.sgl,
ctx->result, len);
if (!msg_data_left(msg) && !continuing &&
!(msg->msg_flags & MSG_MORE)) {
err = crypto_ahash_digest(&ctx->req);
} else {
if (need_init) {
err = crypto_wait_req(
crypto_ahash_init(&ctx->req),
&ctx->wait);
if (err)
goto unlock_free;
need_init = false;
}
err = 0;
if (msg_data_left(msg) || (msg->msg_flags & MSG_MORE))
err = crypto_ahash_update(&ctx->req);
else
err = crypto_ahash_finup(&ctx->req);
continuing = true;
}
ctx->more = msg->msg_flags & MSG_MORE;
if (!ctx->more) {
err = hash_alloc_result(sk, ctx);
err = crypto_wait_req(err, &ctx->wait);
if (err)
goto unlock;
goto unlock_free;
ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0);
err = crypto_wait_req(crypto_ahash_final(&ctx->req),
&ctx->wait);
copied += len;
af_alg_free_sg(&ctx->sgl);
}
ctx->more = msg->msg_flags & MSG_MORE;
err = 0;
unlock:
release_sock(sk);
return copied ?: err;
return err ?: copied;
unlock_free:
af_alg_free_sg(&ctx->sgl);
goto unlock;
}
static ssize_t hash_sendpage(struct socket *sock, struct page *page,
......@@ -141,8 +173,8 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page,
flags |= MSG_MORE;
lock_sock(sk);
sg_init_table(ctx->sgl.sg, 1);
sg_set_page(ctx->sgl.sg, page, size, offset);
sg_init_table(ctx->sgl.sgl, 1);
sg_set_page(ctx->sgl.sgl, page, size, offset);
if (!(flags & MSG_MORE)) {
err = hash_alloc_result(sk, ctx);
......@@ -151,7 +183,7 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page,
} else if (!ctx->more)
hash_free_result(sk, ctx);
ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, ctx->result, size);
ahash_request_set_crypt(&ctx->req, ctx->sgl.sgl, ctx->result, size);
if (!(flags & MSG_MORE)) {
if (ctx->more)
......
......@@ -9,10 +9,10 @@
* The following concept of the memory management is used:
*
* The kernel maintains two SGLs, the TX SGL and the RX SGL. The TX SGL is
* filled by user space with the data submitted via sendpage/sendmsg. Filling
* up the TX SGL does not cause a crypto operation -- the data will only be
* tracked by the kernel. Upon receipt of one recvmsg call, the caller must
* provide a buffer which is tracked with the RX SGL.
* filled by user space with the data submitted via sendmsg. Filling up the TX
* SGL does not cause a crypto operation -- the data will only be tracked by
* the kernel. Upon receipt of one recvmsg call, the caller must provide a
* buffer which is tracked with the RX SGL.
*
* During the processing of the recvmsg operation, the cipher request is
* allocated and prepared. As part of the recvmsg operation, the processed
......@@ -105,7 +105,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg,
/* Initialize the crypto operation */
skcipher_request_set_tfm(&areq->cra_u.skcipher_req, tfm);
skcipher_request_set_crypt(&areq->cra_u.skcipher_req, areq->tsgl,
areq->first_rsgl.sgl.sg, len, ctx->iv);
areq->first_rsgl.sgl.sgt.sgl, len, ctx->iv);
if (msg->msg_iocb && !is_sync_kiocb(msg->msg_iocb)) {
/* AIO operation */
......
......@@ -101,269 +101,3 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
return npages;
}
EXPORT_SYMBOL_GPL(netfs_extract_user_iter);
/*
* Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
* iterators, and add them to the scatterlist.
*/
static ssize_t netfs_extract_user_to_sg(struct iov_iter *iter,
ssize_t maxsize,
struct sg_table *sgtable,
unsigned int sg_max,
iov_iter_extraction_t extraction_flags)
{
struct scatterlist *sg = sgtable->sgl + sgtable->nents;
struct page **pages;
unsigned int npages;
ssize_t ret = 0, res;
size_t len, off;
/* We decant the page list into the tail of the scatterlist */
pages = (void *)sgtable->sgl + array_size(sg_max, sizeof(struct scatterlist));
pages -= sg_max;
do {
res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
extraction_flags, &off);
if (res < 0)
goto failed;
len = res;
maxsize -= len;
ret += len;
npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
sg_max -= npages;
for (; npages > 0; npages--) {
struct page *page = *pages;
size_t seg = min_t(size_t, PAGE_SIZE - off, len);
*pages++ = NULL;
sg_set_page(sg, page, seg, off);
sgtable->nents++;
sg++;
len -= seg;
off = 0;
}
} while (maxsize > 0 && sg_max > 0);
return ret;
failed:
while (sgtable->nents > sgtable->orig_nents)
put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
return res;
}
/*
* Extract up to sg_max pages from a BVEC-type iterator and add them to the
* scatterlist. The pages are not pinned.
*/
static ssize_t netfs_extract_bvec_to_sg(struct iov_iter *iter,
ssize_t maxsize,
struct sg_table *sgtable,
unsigned int sg_max,
iov_iter_extraction_t extraction_flags)
{
const struct bio_vec *bv = iter->bvec;
struct scatterlist *sg = sgtable->sgl + sgtable->nents;
unsigned long start = iter->iov_offset;
unsigned int i;
ssize_t ret = 0;
for (i = 0; i < iter->nr_segs; i++) {
size_t off, len;
len = bv[i].bv_len;
if (start >= len) {
start -= len;
continue;
}
len = min_t(size_t, maxsize, len - start);
off = bv[i].bv_offset + start;
sg_set_page(sg, bv[i].bv_page, len, off);
sgtable->nents++;
sg++;
sg_max--;
ret += len;
maxsize -= len;
if (maxsize <= 0 || sg_max == 0)
break;
start = 0;
}
if (ret > 0)
iov_iter_advance(iter, ret);
return ret;
}
/*
* Extract up to sg_max pages from a KVEC-type iterator and add them to the
* scatterlist. This can deal with vmalloc'd buffers as well as kmalloc'd or
* static buffers. The pages are not pinned.
*/
static ssize_t netfs_extract_kvec_to_sg(struct iov_iter *iter,
ssize_t maxsize,
struct sg_table *sgtable,
unsigned int sg_max,
iov_iter_extraction_t extraction_flags)
{
const struct kvec *kv = iter->kvec;
struct scatterlist *sg = sgtable->sgl + sgtable->nents;
unsigned long start = iter->iov_offset;
unsigned int i;
ssize_t ret = 0;
for (i = 0; i < iter->nr_segs; i++) {
struct page *page;
unsigned long kaddr;
size_t off, len, seg;
len = kv[i].iov_len;
if (start >= len) {
start -= len;
continue;
}
kaddr = (unsigned long)kv[i].iov_base + start;
off = kaddr & ~PAGE_MASK;
len = min_t(size_t, maxsize, len - start);
kaddr &= PAGE_MASK;
maxsize -= len;
ret += len;
do {
seg = min_t(size_t, len, PAGE_SIZE - off);
if (is_vmalloc_or_module_addr((void *)kaddr))
page = vmalloc_to_page((void *)kaddr);
else
page = virt_to_page(kaddr);
sg_set_page(sg, page, len, off);
sgtable->nents++;
sg++;
sg_max--;
len -= seg;
kaddr += PAGE_SIZE;
off = 0;
} while (len > 0 && sg_max > 0);
if (maxsize <= 0 || sg_max == 0)
break;
start = 0;
}
if (ret > 0)
iov_iter_advance(iter, ret);
return ret;
}
/*
* Extract up to sg_max folios from an XARRAY-type iterator and add them to
* the scatterlist. The pages are not pinned.
*/
static ssize_t netfs_extract_xarray_to_sg(struct iov_iter *iter,
ssize_t maxsize,
struct sg_table *sgtable,
unsigned int sg_max,
iov_iter_extraction_t extraction_flags)
{
struct scatterlist *sg = sgtable->sgl + sgtable->nents;
struct xarray *xa = iter->xarray;
struct folio *folio;
loff_t start = iter->xarray_start + iter->iov_offset;
pgoff_t index = start / PAGE_SIZE;
ssize_t ret = 0;
size_t offset, len;
XA_STATE(xas, xa, index);
rcu_read_lock();
xas_for_each(&xas, folio, ULONG_MAX) {
if (xas_retry(&xas, folio))
continue;
if (WARN_ON(xa_is_value(folio)))
break;
if (WARN_ON(folio_test_hugetlb(folio)))
break;
offset = offset_in_folio(folio, start);
len = min_t(size_t, maxsize, folio_size(folio) - offset);
sg_set_page(sg, folio_page(folio, 0), len, offset);
sgtable->nents++;
sg++;
sg_max--;
maxsize -= len;
ret += len;
if (maxsize <= 0 || sg_max == 0)
break;
}
rcu_read_unlock();
if (ret > 0)
iov_iter_advance(iter, ret);
return ret;
}
/**
* netfs_extract_iter_to_sg - Extract pages from an iterator and add ot an sglist
* @iter: The iterator to extract from
* @maxsize: The amount of iterator to copy
* @sgtable: The scatterlist table to fill in
* @sg_max: Maximum number of elements in @sgtable that may be filled
* @extraction_flags: Flags to qualify the request
*
* Extract the page fragments from the given amount of the source iterator and
* add them to a scatterlist that refers to all of those bits, to a maximum
* addition of @sg_max elements.
*
* The pages referred to by UBUF- and IOVEC-type iterators are extracted and
* pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
* and DISCARD-type are not supported.
*
* No end mark is placed on the scatterlist; that's left to the caller.
*
* @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
* be allowed on the pages extracted.
*
* If successul, @sgtable->nents is updated to include the number of elements
* added and the number of bytes added is returned. @sgtable->orig_nents is
* left unaltered.
*
* The iov_iter_extract_mode() function should be used to query how cleanup
* should be performed.
*/
ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
struct sg_table *sgtable, unsigned int sg_max,
iov_iter_extraction_t extraction_flags)
{
if (maxsize == 0)
return 0;
switch (iov_iter_type(iter)) {
case ITER_UBUF:
case ITER_IOVEC:
return netfs_extract_user_to_sg(iter, maxsize, sgtable, sg_max,
extraction_flags);
case ITER_BVEC:
return netfs_extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
extraction_flags);
case ITER_KVEC:
return netfs_extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
extraction_flags);
case ITER_XARRAY:
return netfs_extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
extraction_flags);
default:
pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
WARN_ON_ONCE(1);
return -EIO;
}
}
EXPORT_SYMBOL_GPL(netfs_extract_iter_to_sg);
......@@ -4333,7 +4333,7 @@ static void *smb2_get_aead_req(struct crypto_aead *tfm, struct smb_rqst *rqst,
}
sgtable.orig_nents = sgtable.nents;
rc = netfs_extract_iter_to_sg(iter, count, &sgtable,
rc = extract_iter_to_sg(iter, count, &sgtable,
num_sgs - sgtable.nents, 0);
iov_iter_revert(iter, rc);
sgtable.orig_nents = sgtable.nents;
......
......@@ -2227,7 +2227,7 @@ static int smbd_iter_to_mr(struct smbd_connection *info,
memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist));
ret = netfs_extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0);
ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0);
WARN_ON(ret < 0);
if (sgt->nents > 0)
sg_mark_end(&sgt->sgl[sgt->nents - 1]);
......
......@@ -56,9 +56,9 @@ struct af_alg_type {
};
struct af_alg_sgl {
struct scatterlist sg[ALG_MAX_PAGES + 1];
struct page *pages[ALG_MAX_PAGES];
unsigned int npages;
struct sg_table sgt;
struct scatterlist sgl[ALG_MAX_PAGES + 1];
bool need_unpin;
};
/* TX SGL entry */
......@@ -163,7 +163,6 @@ int af_alg_release(struct socket *sock);
void af_alg_release_parent(struct sock *sk);
int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern);
int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
void af_alg_free_sg(struct af_alg_sgl *sgl);
static inline struct alg_sock *alg_sk(struct sock *sk)
......
......@@ -300,10 +300,6 @@ void netfs_stats_show(struct seq_file *);
ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
struct iov_iter *new,
iov_iter_extraction_t extraction_flags);
struct sg_table;
ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t len,
struct sg_table *sgtable, unsigned int sg_max,
iov_iter_extraction_t extraction_flags);
/**
* netfs_inode - Get the netfs inode context from the inode
......
......@@ -433,4 +433,9 @@ static inline bool iov_iter_extract_will_pin(const struct iov_iter *iter)
return user_backed_iter(iter);
}
struct sg_table;
ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t len,
struct sg_table *sgtable, unsigned int sg_max,
iov_iter_extraction_t extraction_flags);
#endif
......@@ -9,6 +9,8 @@
#include <linux/scatterlist.h>
#include <linux/highmem.h>
#include <linux/kmemleak.h>
#include <linux/bvec.h>
#include <linux/uio.h>
/**
* sg_next - return the next scatterlist entry in a list
......@@ -1095,3 +1097,270 @@ size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents,
return offset;
}
EXPORT_SYMBOL(sg_zero_buffer);
/*
* Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class
* iterators, and add them to the scatterlist.
*/
static ssize_t extract_user_to_sg(struct iov_iter *iter,
ssize_t maxsize,
struct sg_table *sgtable,
unsigned int sg_max,
iov_iter_extraction_t extraction_flags)
{
struct scatterlist *sg = sgtable->sgl + sgtable->nents;
struct page **pages;
unsigned int npages;
ssize_t ret = 0, res;
size_t len, off;
/* We decant the page list into the tail of the scatterlist */
pages = (void *)sgtable->sgl +
array_size(sg_max, sizeof(struct scatterlist));
pages -= sg_max;
do {
res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max,
extraction_flags, &off);
if (res < 0)
goto failed;
len = res;
maxsize -= len;
ret += len;
npages = DIV_ROUND_UP(off + len, PAGE_SIZE);
sg_max -= npages;
for (; npages > 0; npages--) {
struct page *page = *pages;
size_t seg = min_t(size_t, PAGE_SIZE - off, len);
*pages++ = NULL;
sg_set_page(sg, page, seg, off);
sgtable->nents++;
sg++;
len -= seg;
off = 0;
}
} while (maxsize > 0 && sg_max > 0);
return ret;
failed:
while (sgtable->nents > sgtable->orig_nents)
put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
return res;
}
/*
* Extract up to sg_max pages from a BVEC-type iterator and add them to the
* scatterlist. The pages are not pinned.
*/
static ssize_t extract_bvec_to_sg(struct iov_iter *iter,
ssize_t maxsize,
struct sg_table *sgtable,
unsigned int sg_max,
iov_iter_extraction_t extraction_flags)
{
const struct bio_vec *bv = iter->bvec;
struct scatterlist *sg = sgtable->sgl + sgtable->nents;
unsigned long start = iter->iov_offset;
unsigned int i;
ssize_t ret = 0;
for (i = 0; i < iter->nr_segs; i++) {
size_t off, len;
len = bv[i].bv_len;
if (start >= len) {
start -= len;
continue;
}
len = min_t(size_t, maxsize, len - start);
off = bv[i].bv_offset + start;
sg_set_page(sg, bv[i].bv_page, len, off);
sgtable->nents++;
sg++;
sg_max--;
ret += len;
maxsize -= len;
if (maxsize <= 0 || sg_max == 0)
break;
start = 0;
}
if (ret > 0)
iov_iter_advance(iter, ret);
return ret;
}
/*
* Extract up to sg_max pages from a KVEC-type iterator and add them to the
* scatterlist. This can deal with vmalloc'd buffers as well as kmalloc'd or
* static buffers. The pages are not pinned.
*/
static ssize_t extract_kvec_to_sg(struct iov_iter *iter,
ssize_t maxsize,
struct sg_table *sgtable,
unsigned int sg_max,
iov_iter_extraction_t extraction_flags)
{
const struct kvec *kv = iter->kvec;
struct scatterlist *sg = sgtable->sgl + sgtable->nents;
unsigned long start = iter->iov_offset;
unsigned int i;
ssize_t ret = 0;
for (i = 0; i < iter->nr_segs; i++) {
struct page *page;
unsigned long kaddr;
size_t off, len, seg;
len = kv[i].iov_len;
if (start >= len) {
start -= len;
continue;
}
kaddr = (unsigned long)kv[i].iov_base + start;
off = kaddr & ~PAGE_MASK;
len = min_t(size_t, maxsize, len - start);
kaddr &= PAGE_MASK;
maxsize -= len;
ret += len;
do {
seg = min_t(size_t, len, PAGE_SIZE - off);
if (is_vmalloc_or_module_addr((void *)kaddr))
page = vmalloc_to_page((void *)kaddr);
else
page = virt_to_page(kaddr);
sg_set_page(sg, page, len, off);
sgtable->nents++;
sg++;
sg_max--;
len -= seg;
kaddr += PAGE_SIZE;
off = 0;
} while (len > 0 && sg_max > 0);
if (maxsize <= 0 || sg_max == 0)
break;
start = 0;
}
if (ret > 0)
iov_iter_advance(iter, ret);
return ret;
}
/*
* Extract up to sg_max folios from an XARRAY-type iterator and add them to
* the scatterlist. The pages are not pinned.
*/
static ssize_t extract_xarray_to_sg(struct iov_iter *iter,
ssize_t maxsize,
struct sg_table *sgtable,
unsigned int sg_max,
iov_iter_extraction_t extraction_flags)
{
struct scatterlist *sg = sgtable->sgl + sgtable->nents;
struct xarray *xa = iter->xarray;
struct folio *folio;
loff_t start = iter->xarray_start + iter->iov_offset;
pgoff_t index = start / PAGE_SIZE;
ssize_t ret = 0;
size_t offset, len;
XA_STATE(xas, xa, index);
rcu_read_lock();
xas_for_each(&xas, folio, ULONG_MAX) {
if (xas_retry(&xas, folio))
continue;
if (WARN_ON(xa_is_value(folio)))
break;
if (WARN_ON(folio_test_hugetlb(folio)))
break;
offset = offset_in_folio(folio, start);
len = min_t(size_t, maxsize, folio_size(folio) - offset);
sg_set_page(sg, folio_page(folio, 0), len, offset);
sgtable->nents++;
sg++;
sg_max--;
maxsize -= len;
ret += len;
if (maxsize <= 0 || sg_max == 0)
break;
}
rcu_read_unlock();
if (ret > 0)
iov_iter_advance(iter, ret);
return ret;
}
/**
* extract_iter_to_sg - Extract pages from an iterator and add to an sglist
* @iter: The iterator to extract from
* @maxsize: The amount of iterator to copy
* @sgtable: The scatterlist table to fill in
* @sg_max: Maximum number of elements in @sgtable that may be filled
* @extraction_flags: Flags to qualify the request
*
* Extract the page fragments from the given amount of the source iterator and
* add them to a scatterlist that refers to all of those bits, to a maximum
* addition of @sg_max elements.
*
* The pages referred to by UBUF- and IOVEC-type iterators are extracted and
* pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE-
* and DISCARD-type are not supported.
*
* No end mark is placed on the scatterlist; that's left to the caller.
*
* @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA
* be allowed on the pages extracted.
*
* If successful, @sgtable->nents is updated to include the number of elements
* added and the number of bytes added is returned. @sgtable->orig_nents is
* left unaltered.
*
* The iov_iter_extract_mode() function should be used to query how cleanup
* should be performed.
*/
ssize_t extract_iter_to_sg(struct iov_iter *iter, size_t maxsize,
struct sg_table *sgtable, unsigned int sg_max,
iov_iter_extraction_t extraction_flags)
{
if (maxsize == 0)
return 0;
switch (iov_iter_type(iter)) {
case ITER_UBUF:
case ITER_IOVEC:
return extract_user_to_sg(iter, maxsize, sgtable, sg_max,
extraction_flags);
case ITER_BVEC:
return extract_bvec_to_sg(iter, maxsize, sgtable, sg_max,
extraction_flags);
case ITER_KVEC:
return extract_kvec_to_sg(iter, maxsize, sgtable, sg_max,
extraction_flags);
case ITER_XARRAY:
return extract_xarray_to_sg(iter, maxsize, sgtable, sg_max,
extraction_flags);
default:
pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter));
WARN_ON_ONCE(1);
return -EIO;
}
}
EXPORT_SYMBOL_GPL(extract_iter_to_sg);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment