Commit e5fbdde4 authored by David Howells's avatar David Howells Committed by Steve French

cifs: Add a function to build an RDMA SGE list from an iterator

Add a function to add elements onto an RDMA SGE list representing page
fragments extracted from a BVEC-, KVEC- or XARRAY-type iterator and DMA
mapped until the maximum number of elements is reached.

Nothing is done to make sure the pages remain present - that must be done
by the caller.
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: Rohith Surabattula <rohiths.msft@gmail.com>
cc: Tom Talpey <tom@talpey.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-rdma@vger.kernel.org

Link: https://lore.kernel.org/r/166697256704.61150.17388516338310645808.stgit@warthog.procyon.org.uk/ # rfc
Link: https://lore.kernel.org/r/166732028840.3186319.8512284239779728860.stgit@warthog.procyon.org.uk/ # rfc
Signed-off-by: default avatarSteve French <stfrench@microsoft.com>
parent 01858469
......@@ -44,6 +44,17 @@ static int smbd_post_send_page(struct smbd_connection *info,
static void destroy_mr_list(struct smbd_connection *info);
static int allocate_mr_list(struct smbd_connection *info);
struct smb_extract_to_rdma {
struct ib_sge *sge;
unsigned int nr_sge;
unsigned int max_sge;
struct ib_device *device;
u32 local_dma_lkey;
enum dma_data_direction direction;
};
static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len,
struct smb_extract_to_rdma *rdma);
/* SMBD version number */
#define SMBD_V1 0x0100
......@@ -2492,3 +2503,206 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
return rc;
}
static bool smb_set_sge(struct smb_extract_to_rdma *rdma,
struct page *lowest_page, size_t off, size_t len)
{
struct ib_sge *sge = &rdma->sge[rdma->nr_sge];
u64 addr;
addr = ib_dma_map_page(rdma->device, lowest_page,
off, len, rdma->direction);
if (ib_dma_mapping_error(rdma->device, addr))
return false;
sge->addr = addr;
sge->length = len;
sge->lkey = rdma->local_dma_lkey;
rdma->nr_sge++;
return true;
}
/*
* Extract page fragments from a BVEC-class iterator and add them to an RDMA
* element list. The pages are not pinned.
*/
static ssize_t smb_extract_bvec_to_rdma(struct iov_iter *iter,
struct smb_extract_to_rdma *rdma,
ssize_t maxsize)
{
const struct bio_vec *bv = iter->bvec;
unsigned long start = iter->iov_offset;
unsigned int i;
ssize_t ret = 0;
for (i = 0; i < iter->nr_segs; i++) {
size_t off, len;
len = bv[i].bv_len;
if (start >= len) {
start -= len;
continue;
}
len = min_t(size_t, maxsize, len - start);
off = bv[i].bv_offset + start;
if (!smb_set_sge(rdma, bv[i].bv_page, off, len))
return -EIO;
ret += len;
maxsize -= len;
if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0)
break;
start = 0;
}
return ret;
}
/*
* Extract fragments from a KVEC-class iterator and add them to an RDMA list.
* This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers.
* The pages are not pinned.
*/
static ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter,
struct smb_extract_to_rdma *rdma,
ssize_t maxsize)
{
const struct kvec *kv = iter->kvec;
unsigned long start = iter->iov_offset;
unsigned int i;
ssize_t ret = 0;
for (i = 0; i < iter->nr_segs; i++) {
struct page *page;
unsigned long kaddr;
size_t off, len, seg;
len = kv[i].iov_len;
if (start >= len) {
start -= len;
continue;
}
kaddr = (unsigned long)kv[i].iov_base + start;
off = kaddr & ~PAGE_MASK;
len = min_t(size_t, maxsize, len - start);
kaddr &= PAGE_MASK;
maxsize -= len;
do {
seg = min_t(size_t, len, PAGE_SIZE - off);
if (is_vmalloc_or_module_addr((void *)kaddr))
page = vmalloc_to_page((void *)kaddr);
else
page = virt_to_page(kaddr);
if (!smb_set_sge(rdma, page, off, seg))
return -EIO;
ret += seg;
len -= seg;
kaddr += PAGE_SIZE;
off = 0;
} while (len > 0 && rdma->nr_sge < rdma->max_sge);
if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0)
break;
start = 0;
}
return ret;
}
/*
* Extract folio fragments from an XARRAY-class iterator and add them to an
* RDMA list. The folios are not pinned.
*/
static ssize_t smb_extract_xarray_to_rdma(struct iov_iter *iter,
struct smb_extract_to_rdma *rdma,
ssize_t maxsize)
{
struct xarray *xa = iter->xarray;
struct folio *folio;
loff_t start = iter->xarray_start + iter->iov_offset;
pgoff_t index = start / PAGE_SIZE;
ssize_t ret = 0;
size_t off, len;
XA_STATE(xas, xa, index);
rcu_read_lock();
xas_for_each(&xas, folio, ULONG_MAX) {
if (xas_retry(&xas, folio))
continue;
if (WARN_ON(xa_is_value(folio)))
break;
if (WARN_ON(folio_test_hugetlb(folio)))
break;
off = offset_in_folio(folio, start);
len = min_t(size_t, maxsize, folio_size(folio) - off);
if (!smb_set_sge(rdma, folio_page(folio, 0), off, len)) {
rcu_read_unlock();
return -EIO;
}
maxsize -= len;
ret += len;
if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0)
break;
}
rcu_read_unlock();
return ret;
}
/*
* Extract page fragments from up to the given amount of the source iterator
* and build up an RDMA list that refers to all of those bits. The RDMA list
* is appended to, up to the maximum number of elements set in the parameter
* block.
*
* The extracted page fragments are not pinned or ref'd in any way; if an
* IOVEC/UBUF-type iterator is to be used, it should be converted to a
* BVEC-type iterator and the pages pinned, ref'd or otherwise held in some
* way.
*/
static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len,
struct smb_extract_to_rdma *rdma)
{
ssize_t ret;
int before = rdma->nr_sge;
switch (iov_iter_type(iter)) {
case ITER_BVEC:
ret = smb_extract_bvec_to_rdma(iter, rdma, len);
break;
case ITER_KVEC:
ret = smb_extract_kvec_to_rdma(iter, rdma, len);
break;
case ITER_XARRAY:
ret = smb_extract_xarray_to_rdma(iter, rdma, len);
break;
default:
WARN_ON_ONCE(1);
return -EIO;
}
if (ret > 0) {
iov_iter_advance(iter, ret);
} else if (ret < 0) {
while (rdma->nr_sge > before) {
struct ib_sge *sge = &rdma->sge[rdma->nr_sge--];
ib_dma_unmap_single(rdma->device, sge->addr, sge->length,
rdma->direction);
sge->addr = 0;
}
}
return ret;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment