Commit 8d0ed0ca authored by Trond Myklebust's avatar Trond Myklebust

Merge tag 'nfs-rdma-4.5' of git://git.linux-nfs.org/projects/anna/nfs-rdma

NFS: NFSoRDMA Client Side Changes

These patches mostly fix send queue ordering issues inside the NFSoRDMA
client, but there are also two patches from Dan Carpenter fixing up smatch
warnings.
Signed-off-by: default avatarAnna Schumaker <Anna.Schumaker@Netapp.com>

* tag 'nfs-rdma-4.5' of git://git.linux-nfs.org/projects/anna/nfs-rdma:
  xprtrdma: Revert commit e7104a2a ('xprtrdma: Cap req_cqinit').
  xprtrdma: Invalidate in the RPC reply handler
  xprtrdma: Add ro_unmap_sync method for all-physical registration
  xprtrdma: Add ro_unmap_sync method for FMR
  xprtrdma: Add ro_unmap_sync method for FRWR
  xprtrdma: Introduce ro_unmap_sync method
  xprtrdma: Move struct ib_send_wr off the stack
  xprtrdma: Disable RPC/RDMA backchannel debugging messages
  xprtrdma: xprt_rdma_free() must not release backchannel reqs
  xprtrdma: Fix additional uses of spin_lock_irqsave(rb_lock)
  xprtrdma: checking for NULL instead of IS_ERR()
  xprtrdma: clean up some curly braces
parents d1358917 26ae9d1c
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
# define RPCDBG_FACILITY RPCDBG_TRANS # define RPCDBG_FACILITY RPCDBG_TRANS
#endif #endif
#define RPCRDMA_BACKCHANNEL_DEBUG #undef RPCRDMA_BACKCHANNEL_DEBUG
static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt, static void rpcrdma_bc_free_rqst(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst) struct rpc_rqst *rqst)
...@@ -42,8 +42,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, ...@@ -42,8 +42,8 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
size_t size; size_t size;
req = rpcrdma_create_req(r_xprt); req = rpcrdma_create_req(r_xprt);
if (!req) if (IS_ERR(req))
return -ENOMEM; return PTR_ERR(req);
req->rl_backchannel = true; req->rl_backchannel = true;
size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst); size = RPCRDMA_INLINE_WRITE_THRESHOLD(rqst);
...@@ -84,9 +84,7 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, ...@@ -84,9 +84,7 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count) unsigned int count)
{ {
struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
struct rpcrdma_rep *rep; struct rpcrdma_rep *rep;
unsigned long flags;
int rc = 0; int rc = 0;
while (count--) { while (count--) {
...@@ -98,9 +96,7 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, ...@@ -98,9 +96,7 @@ static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
break; break;
} }
spin_lock_irqsave(&buffers->rb_lock, flags); rpcrdma_recv_buffer_put(rep);
list_add(&rep->rr_list, &buffers->rb_recv_bufs);
spin_unlock_irqrestore(&buffers->rb_lock, flags);
} }
return rc; return rc;
...@@ -140,6 +136,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) ...@@ -140,6 +136,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
__func__); __func__);
goto out_free; goto out_free;
} }
dprintk("RPC: %s: new rqst %p\n", __func__, rqst);
rqst->rq_xprt = &r_xprt->rx_xprt; rqst->rq_xprt = &r_xprt->rx_xprt;
INIT_LIST_HEAD(&rqst->rq_list); INIT_LIST_HEAD(&rqst->rq_list);
...@@ -220,12 +217,14 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) ...@@ -220,12 +217,14 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
rpclen = rqst->rq_svec[0].iov_len; rpclen = rqst->rq_svec[0].iov_len;
#ifdef RPCRDMA_BACKCHANNEL_DEBUG
pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n", pr_info("RPC: %s: rpclen %zd headerp 0x%p lkey 0x%x\n",
__func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf)); __func__, rpclen, headerp, rdmab_lkey(req->rl_rdmabuf));
pr_info("RPC: %s: RPC/RDMA: %*ph\n", pr_info("RPC: %s: RPC/RDMA: %*ph\n",
__func__, (int)RPCRDMA_HDRLEN_MIN, headerp); __func__, (int)RPCRDMA_HDRLEN_MIN, headerp);
pr_info("RPC: %s: RPC: %*ph\n", pr_info("RPC: %s: RPC: %*ph\n",
__func__, (int)rpclen, rqst->rq_svec[0].iov_base); __func__, (int)rpclen, rqst->rq_svec[0].iov_base);
#endif
req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf); req->rl_send_iov[0].addr = rdmab_addr(req->rl_rdmabuf);
req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN; req->rl_send_iov[0].length = RPCRDMA_HDRLEN_MIN;
...@@ -269,6 +268,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) ...@@ -269,6 +268,9 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{ {
struct rpc_xprt *xprt = rqst->rq_xprt; struct rpc_xprt *xprt = rqst->rq_xprt;
dprintk("RPC: %s: freeing rqst %p (req %p)\n",
__func__, rqst, rpcr_to_rdmar(rqst));
smp_mb__before_atomic(); smp_mb__before_atomic();
WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)); WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
...@@ -333,9 +335,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, ...@@ -333,9 +335,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst, rq_bc_pa_list); struct rpc_rqst, rq_bc_pa_list);
list_del(&rqst->rq_bc_pa_list); list_del(&rqst->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock); spin_unlock(&xprt->bc_pa_lock);
#ifdef RPCRDMA_BACKCHANNEL_DEBUG dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
pr_info("RPC: %s: using rqst %p\n", __func__, rqst);
#endif
/* Prepare rqst */ /* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0; rqst->rq_reply_bytes_recvd = 0;
...@@ -355,10 +355,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, ...@@ -355,10 +355,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
* direction reply. * direction reply.
*/ */
req = rpcr_to_rdmar(rqst); req = rpcr_to_rdmar(rqst);
#ifdef RPCRDMA_BACKCHANNEL_DEBUG dprintk("RPC: %s: attaching rep %p to req %p\n",
pr_info("RPC: %s: attaching rep %p to req %p\n",
__func__, rep, req); __func__, rep, req);
#endif
req->rl_reply = rep; req->rl_reply = rep;
/* Defeat the retransmit detection logic in send_request */ /* Defeat the retransmit detection logic in send_request */
......
...@@ -179,6 +179,69 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -179,6 +179,69 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
return rc; return rc;
} }
static void
__fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
struct ib_device *device = r_xprt->rx_ia.ri_device;
struct rpcrdma_mw *mw = seg->rl_mw;
int nsegs = seg->mr_nsegs;
seg->rl_mw = NULL;
while (nsegs--)
rpcrdma_unmap_one(device, seg++);
rpcrdma_put_mw(r_xprt, mw);
}
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*/
static void
fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct rpcrdma_mr_seg *seg;
unsigned int i, nchunks;
struct rpcrdma_mw *mw;
LIST_HEAD(unmap_list);
int rc;
dprintk("RPC: %s: req %p\n", __func__, req);
/* ORDER: Invalidate all of the req's MRs first
*
* ib_unmap_fmr() is slow, so use a single call instead
* of one call per mapped MR.
*/
for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
seg = &req->rl_segments[i];
mw = seg->rl_mw;
list_add(&mw->r.fmr.fmr->list, &unmap_list);
i += seg->mr_nsegs;
}
rc = ib_unmap_fmr(&unmap_list);
if (rc)
pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc);
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
*/
for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
seg = &req->rl_segments[i];
__fmr_dma_unmap(r_xprt, seg);
i += seg->mr_nsegs;
seg->mr_nsegs = 0;
}
req->rl_nchunks = 0;
}
/* Use the ib_unmap_fmr() verb to prevent further remote /* Use the ib_unmap_fmr() verb to prevent further remote
* access via RDMA READ or RDMA WRITE. * access via RDMA READ or RDMA WRITE.
*/ */
...@@ -231,6 +294,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf) ...@@ -231,6 +294,7 @@ fmr_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
.ro_map = fmr_op_map, .ro_map = fmr_op_map,
.ro_unmap_sync = fmr_op_unmap_sync,
.ro_unmap = fmr_op_unmap, .ro_unmap = fmr_op_unmap,
.ro_open = fmr_op_open, .ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages, .ro_maxpages = fmr_op_maxpages,
......
...@@ -245,12 +245,14 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) ...@@ -245,12 +245,14 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth); rpcrdma_max_segments(r_xprt) * ia->ri_max_frmr_depth);
} }
/* If FAST_REG or LOCAL_INV failed, indicate the frmr needs to be reset. */ /* If FAST_REG or LOCAL_INV failed, indicate the frmr needs
* to be reset.
*
* WARNING: Only wr_id and status are reliable at this point
*/
static void static void
frwr_sendcompletion(struct ib_wc *wc) __frwr_sendcompletion_flush(struct ib_wc *wc, struct rpcrdma_mw *r)
{ {
struct rpcrdma_mw *r;
if (likely(wc->status == IB_WC_SUCCESS)) if (likely(wc->status == IB_WC_SUCCESS))
return; return;
...@@ -261,9 +263,23 @@ frwr_sendcompletion(struct ib_wc *wc) ...@@ -261,9 +263,23 @@ frwr_sendcompletion(struct ib_wc *wc)
else else
pr_warn("RPC: %s: frmr %p error, status %s (%d)\n", pr_warn("RPC: %s: frmr %p error, status %s (%d)\n",
__func__, r, ib_wc_status_msg(wc->status), wc->status); __func__, r, ib_wc_status_msg(wc->status), wc->status);
r->r.frmr.fr_state = FRMR_IS_STALE; r->r.frmr.fr_state = FRMR_IS_STALE;
} }
static void
frwr_sendcompletion(struct ib_wc *wc)
{
struct rpcrdma_mw *r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
struct rpcrdma_frmr *f = &r->r.frmr;
if (unlikely(wc->status != IB_WC_SUCCESS))
__frwr_sendcompletion_flush(wc, r);
if (f->fr_waiter)
complete(&f->fr_linv_done);
}
static int static int
frwr_op_init(struct rpcrdma_xprt *r_xprt) frwr_op_init(struct rpcrdma_xprt *r_xprt)
{ {
...@@ -319,7 +335,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -319,7 +335,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
struct rpcrdma_mw *mw; struct rpcrdma_mw *mw;
struct rpcrdma_frmr *frmr; struct rpcrdma_frmr *frmr;
struct ib_mr *mr; struct ib_mr *mr;
struct ib_reg_wr reg_wr; struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr; struct ib_send_wr *bad_wr;
int rc, i, n, dma_nents; int rc, i, n, dma_nents;
u8 key; u8 key;
...@@ -335,7 +351,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -335,7 +351,9 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
} while (mw->r.frmr.fr_state != FRMR_IS_INVALID); } while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
frmr = &mw->r.frmr; frmr = &mw->r.frmr;
frmr->fr_state = FRMR_IS_VALID; frmr->fr_state = FRMR_IS_VALID;
frmr->fr_waiter = false;
mr = frmr->fr_mr; mr = frmr->fr_mr;
reg_wr = &frmr->fr_regwr;
if (nsegs > ia->ri_max_frmr_depth) if (nsegs > ia->ri_max_frmr_depth)
nsegs = ia->ri_max_frmr_depth; nsegs = ia->ri_max_frmr_depth;
...@@ -381,19 +399,19 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -381,19 +399,19 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
key = (u8)(mr->rkey & 0x000000FF); key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key); ib_update_fast_reg_key(mr, ++key);
reg_wr.wr.next = NULL; reg_wr->wr.next = NULL;
reg_wr.wr.opcode = IB_WR_REG_MR; reg_wr->wr.opcode = IB_WR_REG_MR;
reg_wr.wr.wr_id = (uintptr_t)mw; reg_wr->wr.wr_id = (uintptr_t)mw;
reg_wr.wr.num_sge = 0; reg_wr->wr.num_sge = 0;
reg_wr.wr.send_flags = 0; reg_wr->wr.send_flags = 0;
reg_wr.mr = mr; reg_wr->mr = mr;
reg_wr.key = mr->rkey; reg_wr->key = mr->rkey;
reg_wr.access = writing ? reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ; IB_ACCESS_REMOTE_READ;
DECR_CQCOUNT(&r_xprt->rx_ep); DECR_CQCOUNT(&r_xprt->rx_ep);
rc = ib_post_send(ia->ri_id->qp, &reg_wr.wr, &bad_wr); rc = ib_post_send(ia->ri_id->qp, &reg_wr->wr, &bad_wr);
if (rc) if (rc)
goto out_senderr; goto out_senderr;
...@@ -413,6 +431,116 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -413,6 +431,116 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
return rc; return rc;
} }
static struct ib_send_wr *
__frwr_prepare_linv_wr(struct rpcrdma_mr_seg *seg)
{
struct rpcrdma_mw *mw = seg->rl_mw;
struct rpcrdma_frmr *f = &mw->r.frmr;
struct ib_send_wr *invalidate_wr;
f->fr_waiter = false;
f->fr_state = FRMR_IS_INVALID;
invalidate_wr = &f->fr_invwr;
memset(invalidate_wr, 0, sizeof(*invalidate_wr));
invalidate_wr->wr_id = (unsigned long)(void *)mw;
invalidate_wr->opcode = IB_WR_LOCAL_INV;
invalidate_wr->ex.invalidate_rkey = f->fr_mr->rkey;
return invalidate_wr;
}
static void
__frwr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int rc)
{
struct ib_device *device = r_xprt->rx_ia.ri_device;
struct rpcrdma_mw *mw = seg->rl_mw;
struct rpcrdma_frmr *f = &mw->r.frmr;
seg->rl_mw = NULL;
ib_dma_unmap_sg(device, f->sg, f->sg_nents, seg->mr_dir);
if (!rc)
rpcrdma_put_mw(r_xprt, mw);
else
__frwr_queue_recovery(mw);
}
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*/
static void
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_send_wr *invalidate_wrs, *pos, *prev, *bad_wr;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mr_seg *seg;
unsigned int i, nchunks;
struct rpcrdma_frmr *f;
int rc;
dprintk("RPC: %s: req %p\n", __func__, req);
/* ORDER: Invalidate all of the req's MRs first
*
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
invalidate_wrs = pos = prev = NULL;
seg = NULL;
for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
seg = &req->rl_segments[i];
pos = __frwr_prepare_linv_wr(seg);
if (!invalidate_wrs)
invalidate_wrs = pos;
else
prev->next = pos;
prev = pos;
i += seg->mr_nsegs;
}
f = &seg->rl_mw->r.frmr;
/* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain
* are complete.
*/
f->fr_invwr.send_flags = IB_SEND_SIGNALED;
f->fr_waiter = true;
init_completion(&f->fr_linv_done);
INIT_CQCOUNT(&r_xprt->rx_ep);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* unless ri_id->qp is a valid pointer.
*/
rc = ib_post_send(ia->ri_id->qp, invalidate_wrs, &bad_wr);
if (rc)
pr_warn("%s: ib_post_send failed %i\n", __func__, rc);
wait_for_completion(&f->fr_linv_done);
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
*/
for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
seg = &req->rl_segments[i];
__frwr_dma_unmap(r_xprt, seg, rc);
i += seg->mr_nsegs;
seg->mr_nsegs = 0;
}
req->rl_nchunks = 0;
}
/* Post a LOCAL_INV Work Request to prevent further remote access /* Post a LOCAL_INV Work Request to prevent further remote access
* via RDMA READ or RDMA WRITE. * via RDMA READ or RDMA WRITE.
*/ */
...@@ -423,23 +551,24 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) ...@@ -423,23 +551,24 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mw *mw = seg1->rl_mw; struct rpcrdma_mw *mw = seg1->rl_mw;
struct rpcrdma_frmr *frmr = &mw->r.frmr; struct rpcrdma_frmr *frmr = &mw->r.frmr;
struct ib_send_wr invalidate_wr, *bad_wr; struct ib_send_wr *invalidate_wr, *bad_wr;
int rc, nsegs = seg->mr_nsegs; int rc, nsegs = seg->mr_nsegs;
dprintk("RPC: %s: FRMR %p\n", __func__, mw); dprintk("RPC: %s: FRMR %p\n", __func__, mw);
seg1->rl_mw = NULL; seg1->rl_mw = NULL;
frmr->fr_state = FRMR_IS_INVALID; frmr->fr_state = FRMR_IS_INVALID;
invalidate_wr = &mw->r.frmr.fr_invwr;
memset(&invalidate_wr, 0, sizeof(invalidate_wr)); memset(invalidate_wr, 0, sizeof(*invalidate_wr));
invalidate_wr.wr_id = (unsigned long)(void *)mw; invalidate_wr->wr_id = (uintptr_t)mw;
invalidate_wr.opcode = IB_WR_LOCAL_INV; invalidate_wr->opcode = IB_WR_LOCAL_INV;
invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey; invalidate_wr->ex.invalidate_rkey = frmr->fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep); DECR_CQCOUNT(&r_xprt->rx_ep);
ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir); ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
read_lock(&ia->ri_qplock); read_lock(&ia->ri_qplock);
rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); rc = ib_post_send(ia->ri_id->qp, invalidate_wr, &bad_wr);
read_unlock(&ia->ri_qplock); read_unlock(&ia->ri_qplock);
if (rc) if (rc)
goto out_err; goto out_err;
...@@ -471,6 +600,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf) ...@@ -471,6 +600,7 @@ frwr_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map, .ro_map = frwr_op_map,
.ro_unmap_sync = frwr_op_unmap_sync,
.ro_unmap = frwr_op_unmap, .ro_unmap = frwr_op_unmap,
.ro_open = frwr_op_open, .ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages, .ro_maxpages = frwr_op_maxpages,
......
...@@ -83,6 +83,18 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg) ...@@ -83,6 +83,18 @@ physical_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
return 1; return 1;
} }
/* DMA unmap all memory regions that were mapped for "req".
*/
static void
physical_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
struct ib_device *device = r_xprt->rx_ia.ri_device;
unsigned int i;
for (i = 0; req->rl_nchunks; --req->rl_nchunks)
rpcrdma_unmap_one(device, &req->rl_segments[i++]);
}
static void static void
physical_op_destroy(struct rpcrdma_buffer *buf) physical_op_destroy(struct rpcrdma_buffer *buf)
{ {
...@@ -90,6 +102,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf) ...@@ -90,6 +102,7 @@ physical_op_destroy(struct rpcrdma_buffer *buf)
const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = { const struct rpcrdma_memreg_ops rpcrdma_physical_memreg_ops = {
.ro_map = physical_op_map, .ro_map = physical_op_map,
.ro_unmap_sync = physical_op_unmap_sync,
.ro_unmap = physical_op_unmap, .ro_unmap = physical_op_unmap,
.ro_open = physical_op_open, .ro_open = physical_op_open,
.ro_maxpages = physical_op_maxpages, .ro_maxpages = physical_op_maxpages,
......
...@@ -804,6 +804,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -804,6 +804,11 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
if (req->rl_reply) if (req->rl_reply)
goto out_duplicate; goto out_duplicate;
/* Sanity checking has passed. We are now committed
* to complete this transaction.
*/
list_del_init(&rqst->rq_list);
spin_unlock_bh(&xprt->transport_lock);
dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
" RPC request 0x%p xid 0x%08x\n", " RPC request 0x%p xid 0x%08x\n",
__func__, rep, req, rqst, __func__, rep, req, rqst,
...@@ -888,12 +893,23 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -888,12 +893,23 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
break; break;
} }
/* Invalidate and flush the data payloads before waking the
* waiting application. This guarantees the memory region is
* properly fenced from the server before the application
* accesses the data. It also ensures proper send flow
* control: waking the next RPC waits until this RPC has
* relinquished all its Send Queue entries.
*/
if (req->rl_nchunks)
r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, req);
credits = be32_to_cpu(headerp->rm_credit); credits = be32_to_cpu(headerp->rm_credit);
if (credits == 0) if (credits == 0)
credits = 1; /* don't deadlock */ credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_buf.rb_max_requests) else if (credits > r_xprt->rx_buf.rb_max_requests)
credits = r_xprt->rx_buf.rb_max_requests; credits = r_xprt->rx_buf.rb_max_requests;
spin_lock_bh(&xprt->transport_lock);
cwnd = xprt->cwnd; cwnd = xprt->cwnd;
xprt->cwnd = credits << RPC_CWNDSHIFT; xprt->cwnd = credits << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd) if (xprt->cwnd > cwnd)
......
...@@ -576,6 +576,9 @@ xprt_rdma_free(void *buffer) ...@@ -576,6 +576,9 @@ xprt_rdma_free(void *buffer)
rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]); rb = container_of(buffer, struct rpcrdma_regbuf, rg_base[0]);
req = rb->rg_owner; req = rb->rg_owner;
if (req->rl_backchannel)
return;
r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf);
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
......
...@@ -616,10 +616,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ...@@ -616,10 +616,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* set trigger for requesting send completion */ /* set trigger for requesting send completion */
ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS) if (ep->rep_cqinit <= 2)
ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS; ep->rep_cqinit = 0; /* always signal? */
else if (ep->rep_cqinit <= 2)
ep->rep_cqinit = 0;
INIT_CQCOUNT(ep); INIT_CQCOUNT(ep);
init_waitqueue_head(&ep->rep_connect_wait); init_waitqueue_head(&ep->rep_connect_wait);
INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
...@@ -852,12 +850,13 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -852,12 +850,13 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
if (extras) { if (extras) {
rc = rpcrdma_ep_post_extra_recv(r_xprt, extras); rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
if (rc) if (rc) {
pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n", pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
__func__, rc); __func__, rc);
rc = 0; rc = 0;
} }
} }
}
out: out:
if (rc) if (rc)
...@@ -1337,15 +1336,14 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) ...@@ -1337,15 +1336,14 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_rep *rep; struct rpcrdma_rep *rep;
unsigned long flags;
int rc; int rc;
while (count--) { while (count--) {
spin_lock_irqsave(&buffers->rb_lock, flags); spin_lock(&buffers->rb_lock);
if (list_empty(&buffers->rb_recv_bufs)) if (list_empty(&buffers->rb_recv_bufs))
goto out_reqbuf; goto out_reqbuf;
rep = rpcrdma_buffer_get_rep_locked(buffers); rep = rpcrdma_buffer_get_rep_locked(buffers);
spin_unlock_irqrestore(&buffers->rb_lock, flags); spin_unlock(&buffers->rb_lock);
rc = rpcrdma_ep_post_recv(ia, ep, rep); rc = rpcrdma_ep_post_recv(ia, ep, rep);
if (rc) if (rc)
...@@ -1355,7 +1353,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) ...@@ -1355,7 +1353,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
return 0; return 0;
out_reqbuf: out_reqbuf:
spin_unlock_irqrestore(&buffers->rb_lock, flags); spin_unlock(&buffers->rb_lock);
pr_warn("%s: no extra receive buffers\n", __func__); pr_warn("%s: no extra receive buffers\n", __func__);
return -ENOMEM; return -ENOMEM;
......
...@@ -88,12 +88,6 @@ struct rpcrdma_ep { ...@@ -88,12 +88,6 @@ struct rpcrdma_ep {
struct delayed_work rep_connect_worker; struct delayed_work rep_connect_worker;
}; };
/*
* Force a signaled SEND Work Request every so often,
* in case the provider needs to do some housekeeping.
*/
#define RPCRDMA_MAX_UNSIGNALED_SENDS (32)
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
...@@ -207,6 +201,12 @@ struct rpcrdma_frmr { ...@@ -207,6 +201,12 @@ struct rpcrdma_frmr {
enum rpcrdma_frmr_state fr_state; enum rpcrdma_frmr_state fr_state;
struct work_struct fr_work; struct work_struct fr_work;
struct rpcrdma_xprt *fr_xprt; struct rpcrdma_xprt *fr_xprt;
bool fr_waiter;
struct completion fr_linv_done;;
union {
struct ib_reg_wr fr_regwr;
struct ib_send_wr fr_invwr;
};
}; };
struct rpcrdma_fmr { struct rpcrdma_fmr {
...@@ -364,6 +364,8 @@ struct rpcrdma_xprt; ...@@ -364,6 +364,8 @@ struct rpcrdma_xprt;
struct rpcrdma_memreg_ops { struct rpcrdma_memreg_ops {
int (*ro_map)(struct rpcrdma_xprt *, int (*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool); struct rpcrdma_mr_seg *, int, bool);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct rpcrdma_req *);
int (*ro_unmap)(struct rpcrdma_xprt *, int (*ro_unmap)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *); struct rpcrdma_mr_seg *);
int (*ro_open)(struct rpcrdma_ia *, int (*ro_open)(struct rpcrdma_ia *,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment