Commit 7a89f9c6 authored by Chuck Lever's avatar Chuck Lever Committed by Anna Schumaker

xprtrdma: Honor ->send_request API contract

Commit c93c6223 ("xprtrdma: Disconnect on registration failure")
added a disconnect for some RPC marshaling failures. This is needed
only in a handful of cases, but it was triggering for simple stuff
like temporary resource shortages. Try to straighten this out.

Fix up the lower layers so they don't return -ENOMEM or other error
codes that the RPC client's FSM doesn't explicitly recognize.

Also fix up the places in the send_request path that do want a
disconnect. For example, when ib_post_send or ib_post_recv fail,
this is a sign that there is a send or receive queue resource
miscalculation. That should be rare, and is a sign of a software
bug. But xprtrdma can recover: disconnect to reset the transport and
start over.
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Tested-by: default avatarSteve Wise <swise@opengridcomputing.com>
Signed-off-by: default avatarAnna Schumaker <Anna.Schumaker@Netapp.com>
parent 3d4cf35b
...@@ -219,7 +219,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -219,7 +219,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
rpcrdma_defer_mr_recovery(mw); rpcrdma_defer_mr_recovery(mw);
mw = rpcrdma_get_mw(r_xprt); mw = rpcrdma_get_mw(r_xprt);
if (!mw) if (!mw)
return -ENOMEM; return -ENOBUFS;
pageoff = offset_in_page(seg1->mr_offset); pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */ seg1->mr_offset -= pageoff; /* start of page */
...@@ -269,14 +269,14 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -269,14 +269,14 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n", pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
mw->mw_sg, mw->mw_nents); mw->mw_sg, mw->mw_nents);
rpcrdma_defer_mr_recovery(mw); rpcrdma_defer_mr_recovery(mw);
return -ENOMEM; return -EIO;
out_maperr: out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
len, (unsigned long long)dma_pages[0], len, (unsigned long long)dma_pages[0],
pageoff, mw->mw_nents, rc); pageoff, mw->mw_nents, rc);
rpcrdma_defer_mr_recovery(mw); rpcrdma_defer_mr_recovery(mw);
return rc; return -EIO;
} }
/* Invalidate all memory regions that were registered for "req". /* Invalidate all memory regions that were registered for "req".
......
...@@ -382,7 +382,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -382,7 +382,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
rpcrdma_defer_mr_recovery(mw); rpcrdma_defer_mr_recovery(mw);
mw = rpcrdma_get_mw(r_xprt); mw = rpcrdma_get_mw(r_xprt);
if (!mw) if (!mw)
return -ENOMEM; return -ENOBUFS;
} while (mw->frmr.fr_state != FRMR_IS_INVALID); } while (mw->frmr.fr_state != FRMR_IS_INVALID);
frmr = &mw->frmr; frmr = &mw->frmr;
frmr->fr_state = FRMR_IS_VALID; frmr->fr_state = FRMR_IS_VALID;
...@@ -456,18 +456,18 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -456,18 +456,18 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n", pr_err("rpcrdma: failed to dma map sg %p sg_nents %u\n",
mw->mw_sg, mw->mw_nents); mw->mw_sg, mw->mw_nents);
rpcrdma_defer_mr_recovery(mw); rpcrdma_defer_mr_recovery(mw);
return -ENOMEM; return -EIO;
out_mapmr_err: out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%u/%u)\n", pr_err("rpcrdma: failed to map mr %p (%u/%u)\n",
frmr->fr_mr, n, mw->mw_nents); frmr->fr_mr, n, mw->mw_nents);
rc = n < 0 ? n : -EIO;
rpcrdma_defer_mr_recovery(mw); rpcrdma_defer_mr_recovery(mw);
return rc; return -EIO;
out_senderr: out_senderr:
pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc);
rpcrdma_defer_mr_recovery(mw); rpcrdma_defer_mr_recovery(mw);
return rc; return -ENOTCONN;
} }
static struct ib_send_wr * static struct ib_send_wr *
...@@ -569,7 +569,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) ...@@ -569,7 +569,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
return; return;
reset_mrs: reset_mrs:
pr_warn("%s: ib_post_send failed %i\n", __func__, rc); pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc);
rdma_disconnect(ia->ri_id);
/* Find and reset the MRs in the LOCAL_INV WRs that did not /* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted. This is synchronous, and slow. * get posted. This is synchronous, and slow.
......
...@@ -251,7 +251,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, ...@@ -251,7 +251,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
/* alloc the pagelist for receiving buffer */ /* alloc the pagelist for receiving buffer */
ppages[p] = alloc_page(GFP_ATOMIC); ppages[p] = alloc_page(GFP_ATOMIC);
if (!ppages[p]) if (!ppages[p])
return -ENOMEM; return -EAGAIN;
} }
seg[n].mr_page = ppages[p]; seg[n].mr_page = ppages[p];
seg[n].mr_offset = (void *)(unsigned long) page_base; seg[n].mr_offset = (void *)(unsigned long) page_base;
......
...@@ -558,7 +558,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) ...@@ -558,7 +558,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
out_fail: out_fail:
rpcrdma_buffer_put(req); rpcrdma_buffer_put(req);
r_xprt->rx_stats.failed_marshal_count++;
return NULL; return NULL;
} }
...@@ -590,8 +589,19 @@ xprt_rdma_free(void *buffer) ...@@ -590,8 +589,19 @@ xprt_rdma_free(void *buffer)
rpcrdma_buffer_put(req); rpcrdma_buffer_put(req);
} }
/* /**
* xprt_rdma_send_request - marshal and send an RPC request
* @task: RPC task with an RPC message in rq_snd_buf
*
* Return values:
* 0: The request has been sent
* ENOTCONN: Caller needs to invoke connect logic then call again
* ENOBUFS: Call again later to send the request
* EIO: A permanent error occurred. The request was not sent,
* and don't try it again
*
* send_request invokes the meat of RPC RDMA. It must do the following: * send_request invokes the meat of RPC RDMA. It must do the following:
*
* 1. Marshal the RPC request into an RPC RDMA request, which means * 1. Marshal the RPC request into an RPC RDMA request, which means
* putting a header in front of data, and creating IOVs for RDMA * putting a header in front of data, and creating IOVs for RDMA
* from those in the request. * from those in the request.
...@@ -600,7 +610,6 @@ xprt_rdma_free(void *buffer) ...@@ -600,7 +610,6 @@ xprt_rdma_free(void *buffer)
* the request (rpcrdma_ep_post). * the request (rpcrdma_ep_post).
* 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
*/ */
static int static int
xprt_rdma_send_request(struct rpc_task *task) xprt_rdma_send_request(struct rpc_task *task)
{ {
...@@ -630,11 +639,12 @@ xprt_rdma_send_request(struct rpc_task *task) ...@@ -630,11 +639,12 @@ xprt_rdma_send_request(struct rpc_task *task)
return 0; return 0;
failed_marshal: failed_marshal:
r_xprt->rx_stats.failed_marshal_count++;
dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n", dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
__func__, rc); __func__, rc);
if (rc == -EIO) if (rc == -EIO)
return -EIO; r_xprt->rx_stats.failed_marshal_count++;
if (rc != -ENOTCONN)
return rc;
drop_connection: drop_connection:
xprt_disconnect_done(xprt); xprt_disconnect_done(xprt);
return -ENOTCONN; /* implies disconnect */ return -ENOTCONN; /* implies disconnect */
......
...@@ -1151,7 +1151,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, ...@@ -1151,7 +1151,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
if (rep) { if (rep) {
rc = rpcrdma_ep_post_recv(ia, ep, rep); rc = rpcrdma_ep_post_recv(ia, ep, rep);
if (rc) if (rc)
goto out; return rc;
req->rl_reply = NULL; req->rl_reply = NULL;
} }
...@@ -1176,10 +1176,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, ...@@ -1176,10 +1176,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail); rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail);
if (rc) if (rc)
dprintk("RPC: %s: ib_post_send returned %i\n", __func__, goto out_postsend_err;
rc); return 0;
out:
return rc; out_postsend_err:
pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
return -ENOTCONN;
} }
/* /*
...@@ -1204,11 +1206,13 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, ...@@ -1204,11 +1206,13 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
DMA_BIDIRECTIONAL); DMA_BIDIRECTIONAL);
rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
if (rc) if (rc)
dprintk("RPC: %s: ib_post_recv returned %i\n", __func__, goto out_postrecv;
rc); return 0;
return rc;
out_postrecv:
pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
return -ENOTCONN;
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment