Commit cc3ea893 authored by Trond Myklebust's avatar Trond Myklebust

Merge tag 'nfs-rdma-for-3.20' of git://git.linux-nfs.org/projects/anna/nfs-rdma

NFS: Client side changes for RDMA

These patches improve the scalability of the NFSoRDMA client and take large
variables off of the stack.  Additionally, the GFP_* flags are updated to
match what TCP uses.
Signed-off-by: default avatarAnna Schumaker <Anna.Schumaker@Netapp.com>

* tag 'nfs-rdma-for-3.20' of git://git.linux-nfs.org/projects/anna/nfs-rdma: (21 commits)
  xprtrdma: Update the GFP flags used in xprt_rdma_allocate()
  xprtrdma: Clean up after adding regbuf management
  xprtrdma: Allocate zero pad separately from rpcrdma_buffer
  xprtrdma: Allocate RPC/RDMA receive buffer separately from struct rpcrdma_rep
  xprtrdma: Allocate RPC/RDMA send buffer separately from struct rpcrdma_req
  xprtrdma: Allocate RPC send buffer separately from struct rpcrdma_req
  xprtrdma: Add struct rpcrdma_regbuf and helpers
  xprtrdma: Refactor rpcrdma_buffer_create() and rpcrdma_buffer_destroy()
  xprtrdma: Simplify synopsis of rpcrdma_buffer_create()
  xprtrdma: Take struct ib_qp_attr and ib_qp_init_attr off the stack
  xprtrdma: Take struct ib_device_attr off the stack
  xprtrdma: Free the pd if ib_query_qp() fails
  xprtrdma: Remove rpcrdma_ep::rep_func and ::rep_xprt
  xprtrdma: Move credit update to RPC reply handler
  xprtrdma: Remove rl_mr field, and the mr_chunk union
  xprtrdma: Remove rpcrdma_ep::rep_ia
  xprtrdma: Rename "xprt" and "rdma_connect" fields in struct rpcrdma_xprt
  xprtrdma: Clean up hdrlen
  xprtrdma: Display XIDs in host byte order
  xprtrdma: Modernize htonl and ntohl
  ...
parents c7c545d4 a0a1d50c
...@@ -42,6 +42,9 @@ ...@@ -42,6 +42,9 @@
#include <linux/types.h> #include <linux/types.h>
#define RPCRDMA_VERSION 1
#define rpcrdma_version cpu_to_be32(RPCRDMA_VERSION)
struct rpcrdma_segment { struct rpcrdma_segment {
__be32 rs_handle; /* Registered memory handle */ __be32 rs_handle; /* Registered memory handle */
__be32 rs_length; /* Length of the chunk in bytes */ __be32 rs_length; /* Length of the chunk in bytes */
...@@ -95,7 +98,10 @@ struct rpcrdma_msg { ...@@ -95,7 +98,10 @@ struct rpcrdma_msg {
} rm_body; } rm_body;
}; };
#define RPCRDMA_HDRLEN_MIN 28 /*
* Smallest RPC/RDMA header: rm_xid through rm_type, then rm_nochunks
*/
#define RPCRDMA_HDRLEN_MIN (sizeof(__be32) * 7)
enum rpcrdma_errcode { enum rpcrdma_errcode {
ERR_VERS = 1, ERR_VERS = 1,
...@@ -115,4 +121,10 @@ enum rpcrdma_proc { ...@@ -115,4 +121,10 @@ enum rpcrdma_proc {
RDMA_ERROR = 4 /* An RPC RDMA encoding error */ RDMA_ERROR = 4 /* An RPC RDMA encoding error */
}; };
#define rdma_msg cpu_to_be32(RDMA_MSG)
#define rdma_nomsg cpu_to_be32(RDMA_NOMSG)
#define rdma_msgp cpu_to_be32(RDMA_MSGP)
#define rdma_done cpu_to_be32(RDMA_DONE)
#define rdma_error cpu_to_be32(RDMA_ERROR)
#endif /* _LINUX_SUNRPC_RPC_RDMA_H */ #endif /* _LINUX_SUNRPC_RPC_RDMA_H */
...@@ -63,8 +63,6 @@ extern atomic_t rdma_stat_rq_prod; ...@@ -63,8 +63,6 @@ extern atomic_t rdma_stat_rq_prod;
extern atomic_t rdma_stat_sq_poll; extern atomic_t rdma_stat_sq_poll;
extern atomic_t rdma_stat_sq_prod; extern atomic_t rdma_stat_sq_prod;
#define RPCRDMA_VERSION 1
/* /*
* Contexts are built when an RDMA request is created and are a * Contexts are built when an RDMA request is created and are a
* record of the resources that can be recovered when the request * record of the resources that can be recovered when the request
......
This diff is collapsed.
...@@ -200,9 +200,9 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt) ...@@ -200,9 +200,9 @@ xprt_rdma_free_addresses(struct rpc_xprt *xprt)
static void static void
xprt_rdma_connect_worker(struct work_struct *work) xprt_rdma_connect_worker(struct work_struct *work)
{ {
struct rpcrdma_xprt *r_xprt = struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
container_of(work, struct rpcrdma_xprt, rdma_connect.work); rx_connect_worker.work);
struct rpc_xprt *xprt = &r_xprt->xprt; struct rpc_xprt *xprt = &r_xprt->rx_xprt;
int rc = 0; int rc = 0;
xprt_clear_connected(xprt); xprt_clear_connected(xprt);
...@@ -235,7 +235,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) ...@@ -235,7 +235,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
dprintk("RPC: %s: called\n", __func__); dprintk("RPC: %s: called\n", __func__);
cancel_delayed_work_sync(&r_xprt->rdma_connect); cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
xprt_clear_connected(xprt); xprt_clear_connected(xprt);
...@@ -364,8 +364,7 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -364,8 +364,7 @@ xprt_setup_rdma(struct xprt_create *args)
* any inline data. Also specify any padding which will be provided * any inline data. Also specify any padding which will be provided
* from a preregistered zero buffer. * from a preregistered zero buffer.
*/ */
rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia, rc = rpcrdma_buffer_create(new_xprt);
&new_xprt->rx_data);
if (rc) if (rc)
goto out3; goto out3;
...@@ -374,9 +373,8 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -374,9 +373,8 @@ xprt_setup_rdma(struct xprt_create *args)
* connection loss notification is async. We also catch connection loss * connection loss notification is async. We also catch connection loss
* when reaping receives. * when reaping receives.
*/ */
INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker); INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
new_ep->rep_func = rpcrdma_conn_func; xprt_rdma_connect_worker);
new_ep->rep_xprt = xprt;
xprt_rdma_format_addresses(xprt); xprt_rdma_format_addresses(xprt);
xprt->max_payload = rpcrdma_max_payload(new_xprt); xprt->max_payload = rpcrdma_max_payload(new_xprt);
...@@ -434,7 +432,7 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) ...@@ -434,7 +432,7 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
if (r_xprt->rx_ep.rep_connected != 0) { if (r_xprt->rx_ep.rep_connected != 0) {
/* Reconnect */ /* Reconnect */
schedule_delayed_work(&r_xprt->rdma_connect, schedule_delayed_work(&r_xprt->rx_connect_worker,
xprt->reestablish_timeout); xprt->reestablish_timeout);
xprt->reestablish_timeout <<= 1; xprt->reestablish_timeout <<= 1;
if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO) if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
...@@ -442,86 +440,93 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) ...@@ -442,86 +440,93 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
} else { } else {
schedule_delayed_work(&r_xprt->rdma_connect, 0); schedule_delayed_work(&r_xprt->rx_connect_worker, 0);
if (!RPC_IS_ASYNC(task)) if (!RPC_IS_ASYNC(task))
flush_delayed_work(&r_xprt->rdma_connect); flush_delayed_work(&r_xprt->rx_connect_worker);
} }
} }
/* /*
* The RDMA allocate/free functions need the task structure as a place * The RDMA allocate/free functions need the task structure as a place
* to hide the struct rpcrdma_req, which is necessary for the actual send/recv * to hide the struct rpcrdma_req, which is necessary for the actual send/recv
* sequence. For this reason, the recv buffers are attached to send * sequence.
* buffers for portions of the RPC. Note that the RPC layer allocates *
* both send and receive buffers in the same call. We may register * The RPC layer allocates both send and receive buffers in the same call
* the receive buffer portion when using reply chunks. * (rq_send_buf and rq_rcv_buf are both part of a single contiguous buffer).
* We may register rq_rcv_buf when using reply chunks.
*/ */
static void * static void *
xprt_rdma_allocate(struct rpc_task *task, size_t size) xprt_rdma_allocate(struct rpc_task *task, size_t size)
{ {
struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
struct rpcrdma_req *req, *nreq; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_regbuf *rb;
struct rpcrdma_req *req;
size_t min_size;
gfp_t flags;
req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); req = rpcrdma_buffer_get(&r_xprt->rx_buf);
if (req == NULL) if (req == NULL)
return NULL; return NULL;
if (size > req->rl_size) { flags = GFP_NOIO | __GFP_NOWARN;
dprintk("RPC: %s: size %zd too large for buffer[%zd]: " if (RPC_IS_SWAPPER(task))
"prog %d vers %d proc %d\n", flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
__func__, size, req->rl_size,
task->tk_client->cl_prog, task->tk_client->cl_vers, if (req->rl_rdmabuf == NULL)
task->tk_msg.rpc_proc->p_proc); goto out_rdmabuf;
/* if (req->rl_sendbuf == NULL)
* Outgoing length shortage. Our inline write max must have goto out_sendbuf;
* been configured to perform direct i/o. if (size > req->rl_sendbuf->rg_size)
goto out_sendbuf;
out:
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
req->rl_connect_cookie = 0; /* our reserved value */
return req->rl_sendbuf->rg_base;
out_rdmabuf:
min_size = RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, min_size, flags);
if (IS_ERR(rb))
goto out_fail;
req->rl_rdmabuf = rb;
out_sendbuf:
/* XDR encoding and RPC/RDMA marshaling of this request has not
* yet occurred. Thus a lower bound is needed to prevent buffer
* overrun during marshaling.
* *
* This is therefore a large metadata operation, and the * RPC/RDMA marshaling may choose to send payload bearing ops
* allocate call was made on the maximum possible message, * inline, if the result is smaller than the inline threshold.
* e.g. containing long filename(s) or symlink data. In * The value of the "size" argument accounts for header
* fact, while these metadata operations *might* carry * requirements but not for the payload in these cases.
* large outgoing payloads, they rarely *do*. However, we
* have to commit to the request here, so reallocate and
* register it now. The data path will never require this
* reallocation.
* *
* If the allocation or registration fails, the RPC framework * Likewise, allocate enough space to receive a reply up to the
* will (doggedly) retry. * size of the inline threshold.
*
* It's unlikely that both the send header and the received
* reply will be large, but slush is provided here to allow
* flexibility when marshaling.
*/ */
if (task->tk_flags & RPC_TASK_SWAPPER) min_size = RPCRDMA_INLINE_READ_THRESHOLD(task->tk_rqstp);
nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); min_size += RPCRDMA_INLINE_WRITE_THRESHOLD(task->tk_rqstp);
else if (size < min_size)
nreq = kmalloc(sizeof *req + size, GFP_NOFS); size = min_size;
if (nreq == NULL)
goto outfail; rb = rpcrdma_alloc_regbuf(&r_xprt->rx_ia, size, flags);
if (IS_ERR(rb))
if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia, goto out_fail;
nreq->rl_base, size + sizeof(struct rpcrdma_req) rb->rg_owner = req;
- offsetof(struct rpcrdma_req, rl_base),
&nreq->rl_handle, &nreq->rl_iov)) { r_xprt->rx_stats.hardway_register_count += size;
kfree(nreq); rpcrdma_free_regbuf(&r_xprt->rx_ia, req->rl_sendbuf);
goto outfail; req->rl_sendbuf = rb;
} goto out;
rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size;
nreq->rl_size = size; out_fail:
nreq->rl_niovs = 0;
nreq->rl_nchunks = 0;
nreq->rl_buffer = (struct rpcrdma_buffer *)req;
nreq->rl_reply = req->rl_reply;
memcpy(nreq->rl_segments,
req->rl_segments, sizeof nreq->rl_segments);
/* flag the swap with an unused field */
nreq->rl_iov.length = 0;
req->rl_reply = NULL;
req = nreq;
}
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
req->rl_connect_cookie = 0; /* our reserved value */
return req->rl_xdr_buf;
outfail:
rpcrdma_buffer_put(req); rpcrdma_buffer_put(req);
rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; r_xprt->rx_stats.failed_marshal_count++;
return NULL; return NULL;
} }
...@@ -533,47 +538,24 @@ xprt_rdma_free(void *buffer) ...@@ -533,47 +538,24 @@ xprt_rdma_free(void *buffer)
{ {
struct rpcrdma_req *req; struct rpcrdma_req *req;
struct rpcrdma_xprt *r_xprt; struct rpcrdma_xprt *r_xprt;
struct rpcrdma_rep *rep; struct rpcrdma_regbuf *rb;
int i; int i;
if (buffer == NULL) if (buffer == NULL)
return; return;
req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]); rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
if (req->rl_iov.length == 0) { /* see allocate above */ req = rb->rg_owner;
r_xprt = container_of(((struct rpcrdma_req *) req->rl_buffer)->rl_buffer,
struct rpcrdma_xprt, rx_buf);
} else
r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
rep = req->rl_reply;
dprintk("RPC: %s: called on 0x%p%s\n", dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
__func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
/*
* Finish the deregistration. The process is considered
* complete when the rr_func vector becomes NULL - this
* was put in place during rpcrdma_reply_handler() - the wait
* call below will not block if the dereg is "done". If
* interrupted, our framework will clean up.
*/
for (i = 0; req->rl_nchunks;) { for (i = 0; req->rl_nchunks;) {
--req->rl_nchunks; --req->rl_nchunks;
i += rpcrdma_deregister_external( i += rpcrdma_deregister_external(
&req->rl_segments[i], r_xprt); &req->rl_segments[i], r_xprt);
} }
if (req->rl_iov.length == 0) { /* see allocate above */
struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer;
oreq->rl_reply = req->rl_reply;
(void) rpcrdma_deregister_internal(&r_xprt->rx_ia,
req->rl_handle,
&req->rl_iov);
kfree(req);
req = oreq;
}
/* Put back request+reply buffers */
rpcrdma_buffer_put(req); rpcrdma_buffer_put(req);
} }
......
This diff is collapsed.
...@@ -70,6 +70,9 @@ struct rpcrdma_ia { ...@@ -70,6 +70,9 @@ struct rpcrdma_ia {
int ri_async_rc; int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy; enum rpcrdma_memreg ri_memreg_strategy;
unsigned int ri_max_frmr_depth; unsigned int ri_max_frmr_depth;
struct ib_device_attr ri_devattr;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
}; };
/* /*
...@@ -83,13 +86,9 @@ struct rpcrdma_ep { ...@@ -83,13 +86,9 @@ struct rpcrdma_ep {
atomic_t rep_cqcount; atomic_t rep_cqcount;
int rep_cqinit; int rep_cqinit;
int rep_connected; int rep_connected;
struct rpcrdma_ia *rep_ia;
struct ib_qp_init_attr rep_attr; struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait; wait_queue_head_t rep_connect_wait;
struct ib_sge rep_pad; /* holds zeroed pad */ struct rpcrdma_regbuf *rep_padbuf;
struct ib_mr *rep_pad_mr; /* holds zeroed pad */
void (*rep_func)(struct rpcrdma_ep *);
struct rpc_xprt *rep_xprt; /* for rep_func */
struct rdma_conn_param rep_remote_cma; struct rdma_conn_param rep_remote_cma;
struct sockaddr_storage rep_remote_addr; struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker; struct delayed_work rep_connect_worker;
...@@ -106,6 +105,44 @@ struct rpcrdma_ep { ...@@ -106,6 +105,44 @@ struct rpcrdma_ep {
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
/* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
*
* The below structure appears at the front of a large region of kmalloc'd
* memory, which always starts on a good alignment boundary.
*/
struct rpcrdma_regbuf {
size_t rg_size;
struct rpcrdma_req *rg_owner;
struct ib_mr *rg_mr;
struct ib_sge rg_iov;
__be32 rg_base[0] __attribute__ ((aligned(256)));
};
static inline u64
rdmab_addr(struct rpcrdma_regbuf *rb)
{
return rb->rg_iov.addr;
}
static inline u32
rdmab_length(struct rpcrdma_regbuf *rb)
{
return rb->rg_iov.length;
}
static inline u32
rdmab_lkey(struct rpcrdma_regbuf *rb)
{
return rb->rg_iov.lkey;
}
static inline struct rpcrdma_msg *
rdmab_to_msg(struct rpcrdma_regbuf *rb)
{
return (struct rpcrdma_msg *)rb->rg_base;
}
enum rpcrdma_chunktype { enum rpcrdma_chunktype {
rpcrdma_noch = 0, rpcrdma_noch = 0,
rpcrdma_readch, rpcrdma_readch,
...@@ -134,22 +171,16 @@ enum rpcrdma_chunktype { ...@@ -134,22 +171,16 @@ enum rpcrdma_chunktype {
/* temporary static scatter/gather max */ /* temporary static scatter/gather max */
#define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */ #define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */
#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ #define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
#define MAX_RPCRDMAHDR (\
/* max supported RPC/RDMA header */ \
sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \
(sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32))
struct rpcrdma_buffer; struct rpcrdma_buffer;
struct rpcrdma_rep { struct rpcrdma_rep {
unsigned int rr_len; /* actual received reply length */ unsigned int rr_len;
struct rpcrdma_buffer *rr_buffer; /* home base for this structure */ struct rpcrdma_buffer *rr_buffer;
struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ struct rpc_xprt *rr_xprt;
void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ void (*rr_func)(struct rpcrdma_rep *);
struct list_head rr_list; /* tasklet list */ struct list_head rr_list;
struct ib_sge rr_iov; /* for posting */ struct rpcrdma_regbuf *rr_rdmabuf;
struct ib_mr *rr_handle; /* handle for mem in rr_iov */
char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
}; };
/* /*
...@@ -211,10 +242,7 @@ struct rpcrdma_mw { ...@@ -211,10 +242,7 @@ struct rpcrdma_mw {
*/ */
struct rpcrdma_mr_seg { /* chunk descriptors */ struct rpcrdma_mr_seg { /* chunk descriptors */
union { /* chunk memory handles */ struct rpcrdma_mw *rl_mw; /* registered MR */
struct ib_mr *rl_mr; /* if registered directly */
struct rpcrdma_mw *rl_mw; /* if registered from region */
} mr_chunk;
u64 mr_base; /* registration result */ u64 mr_base; /* registration result */
u32 mr_rkey; /* registration result */ u32 mr_rkey; /* registration result */
u32 mr_len; /* length of chunk or segment */ u32 mr_len; /* length of chunk or segment */
...@@ -227,22 +255,26 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ ...@@ -227,22 +255,26 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
}; };
struct rpcrdma_req { struct rpcrdma_req {
size_t rl_size; /* actual length of buffer */
unsigned int rl_niovs; /* 0, 2 or 4 */ unsigned int rl_niovs; /* 0, 2 or 4 */
unsigned int rl_nchunks; /* non-zero if chunks */ unsigned int rl_nchunks; /* non-zero if chunks */
unsigned int rl_connect_cookie; /* retry detection */ unsigned int rl_connect_cookie; /* retry detection */
enum rpcrdma_chunktype rl_rtype, rl_wtype; enum rpcrdma_chunktype rl_rtype, rl_wtype;
struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
struct ib_sge rl_send_iov[4]; /* for active requests */ struct ib_sge rl_send_iov[4]; /* for active requests */
struct ib_sge rl_iov; /* for posting */ struct rpcrdma_regbuf *rl_rdmabuf;
struct ib_mr *rl_handle; /* handle for mem in rl_iov */ struct rpcrdma_regbuf *rl_sendbuf;
char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */ struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
__u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */
}; };
#define rpcr_to_rdmar(r) \
container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0]) static inline struct rpcrdma_req *
rpcr_to_rdmar(struct rpc_rqst *rqst)
{
struct rpcrdma_regbuf *rb = container_of(rqst->rq_buffer,
struct rpcrdma_regbuf,
rg_base[0]);
return rb->rg_owner;
}
/* /*
* struct rpcrdma_buffer -- holds list/queue of pre-registered memory for * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
...@@ -252,7 +284,6 @@ struct rpcrdma_req { ...@@ -252,7 +284,6 @@ struct rpcrdma_req {
*/ */
struct rpcrdma_buffer { struct rpcrdma_buffer {
spinlock_t rb_lock; /* protects indexes */ spinlock_t rb_lock; /* protects indexes */
atomic_t rb_credits; /* most recent server credits */
int rb_max_requests;/* client max requests */ int rb_max_requests;/* client max requests */
struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
struct list_head rb_all; struct list_head rb_all;
...@@ -318,16 +349,16 @@ struct rpcrdma_stats { ...@@ -318,16 +349,16 @@ struct rpcrdma_stats {
* during unmount. * during unmount.
*/ */
struct rpcrdma_xprt { struct rpcrdma_xprt {
struct rpc_xprt xprt; struct rpc_xprt rx_xprt;
struct rpcrdma_ia rx_ia; struct rpcrdma_ia rx_ia;
struct rpcrdma_ep rx_ep; struct rpcrdma_ep rx_ep;
struct rpcrdma_buffer rx_buf; struct rpcrdma_buffer rx_buf;
struct rpcrdma_create_data_internal rx_data; struct rpcrdma_create_data_internal rx_data;
struct delayed_work rdma_connect; struct delayed_work rx_connect_worker;
struct rpcrdma_stats rx_stats; struct rpcrdma_stats rx_stats;
}; };
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
/* Setting this to 0 ensures interoperability with early servers. /* Setting this to 0 ensures interoperability with early servers.
...@@ -358,9 +389,7 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, ...@@ -358,9 +389,7 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
/* /*
* Buffer calls - xprtrdma/verbs.c * Buffer calls - xprtrdma/verbs.c
*/ */
int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *, int rpcrdma_buffer_create(struct rpcrdma_xprt *);
struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
...@@ -368,16 +397,16 @@ void rpcrdma_buffer_put(struct rpcrdma_req *); ...@@ -368,16 +397,16 @@ void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_get(struct rpcrdma_req *); void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int,
struct ib_mr **, struct ib_sge *);
int rpcrdma_deregister_internal(struct rpcrdma_ia *,
struct ib_mr *, struct ib_sge *);
int rpcrdma_register_external(struct rpcrdma_mr_seg *, int rpcrdma_register_external(struct rpcrdma_mr_seg *,
int, int, struct rpcrdma_xprt *); int, int, struct rpcrdma_xprt *);
int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
struct rpcrdma_xprt *); struct rpcrdma_xprt *);
struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(struct rpcrdma_ia *,
size_t, gfp_t);
void rpcrdma_free_regbuf(struct rpcrdma_ia *,
struct rpcrdma_regbuf *);
/* /*
* RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment