Commit ea526413 authored by Trond Myklebust's avatar Trond Myklebust

Merge tag 'nfs-rdma-for-3.19' of git://git.linux-nfs.org/projects/anna/nfs-rdma into linux-next

Pull NFS client RDMA changes for 3.19 from Anna Schumaker:
 "NFS: Client side changes for RDMA

  These patches various bugfixes and cleanups for using NFS over RDMA, including
  better error handling and performance improvements by using pad optimization.

  Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>"

* tag 'nfs-rdma-for-3.19' of git://git.linux-nfs.org/projects/anna/nfs-rdma:
  xprtrdma: Display async errors
  xprtrdma: Enable pad optimization
  xprtrdma: Re-write rpcrdma_flush_cqs()
  xprtrdma: Refactor tasklet scheduling
  xprtrdma: unmap all FMRs during transport disconnect
  xprtrdma: Cap req_cqinit
  xprtrdma: Return an errno from rpcrdma_register_external()
parents 1702562d 7ff11de1
...@@ -73,7 +73,7 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; ...@@ -73,7 +73,7 @@ static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_inline_write_padding;
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
int xprt_rdma_pad_optimize = 0; int xprt_rdma_pad_optimize = 1;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
...@@ -599,7 +599,7 @@ xprt_rdma_send_request(struct rpc_task *task) ...@@ -599,7 +599,7 @@ xprt_rdma_send_request(struct rpc_task *task)
if (req->rl_niovs == 0) if (req->rl_niovs == 0)
rc = rpcrdma_marshal_req(rqst); rc = rpcrdma_marshal_req(rqst);
else if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) else if (r_xprt->rx_ia.ri_memreg_strategy != RPCRDMA_ALLPHYSICAL)
rc = rpcrdma_marshal_chunks(rqst, 0); rc = rpcrdma_marshal_chunks(rqst, 0);
if (rc < 0) if (rc < 0)
goto failed_marshal; goto failed_marshal;
......
...@@ -62,6 +62,7 @@ ...@@ -62,6 +62,7 @@
#endif #endif
static void rpcrdma_reset_frmrs(struct rpcrdma_ia *); static void rpcrdma_reset_frmrs(struct rpcrdma_ia *);
static void rpcrdma_reset_fmrs(struct rpcrdma_ia *);
/* /*
* internal functions * internal functions
...@@ -105,13 +106,51 @@ rpcrdma_run_tasklet(unsigned long data) ...@@ -105,13 +106,51 @@ rpcrdma_run_tasklet(unsigned long data)
static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL);
static const char * const async_event[] = {
"CQ error",
"QP fatal error",
"QP request error",
"QP access error",
"communication established",
"send queue drained",
"path migration successful",
"path mig error",
"device fatal error",
"port active",
"port error",
"LID change",
"P_key change",
"SM change",
"SRQ error",
"SRQ limit reached",
"last WQE reached",
"client reregister",
"GID change",
};
#define ASYNC_MSG(status) \
((status) < ARRAY_SIZE(async_event) ? \
async_event[(status)] : "unknown async error")
static void
rpcrdma_schedule_tasklet(struct list_head *sched_list)
{
unsigned long flags;
spin_lock_irqsave(&rpcrdma_tk_lock_g, flags);
list_splice_tail(sched_list, &rpcrdma_tasklets_g);
spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
tasklet_schedule(&rpcrdma_tasklet_g);
}
static void static void
rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
{ {
struct rpcrdma_ep *ep = context; struct rpcrdma_ep *ep = context;
dprintk("RPC: %s: QP error %X on device %s ep %p\n", pr_err("RPC: %s: %s on device %s ep %p\n",
__func__, event->event, event->device->name, context); __func__, ASYNC_MSG(event->event),
event->device->name, context);
if (ep->rep_connected == 1) { if (ep->rep_connected == 1) {
ep->rep_connected = -EIO; ep->rep_connected = -EIO;
ep->rep_func(ep); ep->rep_func(ep);
...@@ -124,8 +163,9 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) ...@@ -124,8 +163,9 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
{ {
struct rpcrdma_ep *ep = context; struct rpcrdma_ep *ep = context;
dprintk("RPC: %s: CQ error %X on device %s ep %p\n", pr_err("RPC: %s: %s on device %s ep %p\n",
__func__, event->event, event->device->name, context); __func__, ASYNC_MSG(event->event),
event->device->name, context);
if (ep->rep_connected == 1) { if (ep->rep_connected == 1) {
ep->rep_connected = -EIO; ep->rep_connected = -EIO;
ep->rep_func(ep); ep->rep_func(ep);
...@@ -243,7 +283,6 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) ...@@ -243,7 +283,6 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
struct list_head sched_list; struct list_head sched_list;
struct ib_wc *wcs; struct ib_wc *wcs;
int budget, count, rc; int budget, count, rc;
unsigned long flags;
INIT_LIST_HEAD(&sched_list); INIT_LIST_HEAD(&sched_list);
budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE; budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
...@@ -261,10 +300,7 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep) ...@@ -261,10 +300,7 @@ rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
rc = 0; rc = 0;
out_schedule: out_schedule:
spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); rpcrdma_schedule_tasklet(&sched_list);
list_splice_tail(&sched_list, &rpcrdma_tasklets_g);
spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags);
tasklet_schedule(&rpcrdma_tasklet_g);
return rc; return rc;
} }
...@@ -309,8 +345,15 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context) ...@@ -309,8 +345,15 @@ rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
static void static void
rpcrdma_flush_cqs(struct rpcrdma_ep *ep) rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
{ {
rpcrdma_recvcq_upcall(ep->rep_attr.recv_cq, ep); struct ib_wc wc;
rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep); LIST_HEAD(sched_list);
while (ib_poll_cq(ep->rep_attr.recv_cq, 1, &wc) > 0)
rpcrdma_recvcq_process_wc(&wc, &sched_list);
if (!list_empty(&sched_list))
rpcrdma_schedule_tasklet(&sched_list);
while (ib_poll_cq(ep->rep_attr.send_cq, 1, &wc) > 0)
rpcrdma_sendcq_process_wc(&wc);
} }
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
...@@ -733,7 +776,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ...@@ -733,7 +776,9 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* set trigger for requesting send completion */ /* set trigger for requesting send completion */
ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
if (ep->rep_cqinit <= 2) if (ep->rep_cqinit > RPCRDMA_MAX_UNSIGNALED_SENDS)
ep->rep_cqinit = RPCRDMA_MAX_UNSIGNALED_SENDS;
else if (ep->rep_cqinit <= 2)
ep->rep_cqinit = 0; ep->rep_cqinit = 0;
INIT_CQCOUNT(ep); INIT_CQCOUNT(ep);
ep->rep_ia = ia; ep->rep_ia = ia;
...@@ -866,8 +911,19 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -866,8 +911,19 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
rpcrdma_ep_disconnect(ep, ia); rpcrdma_ep_disconnect(ep, ia);
rpcrdma_flush_cqs(ep); rpcrdma_flush_cqs(ep);
if (ia->ri_memreg_strategy == RPCRDMA_FRMR) switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
rpcrdma_reset_frmrs(ia); rpcrdma_reset_frmrs(ia);
break;
case RPCRDMA_MTHCAFMR:
rpcrdma_reset_fmrs(ia);
break;
case RPCRDMA_ALLPHYSICAL:
break;
default:
rc = -EIO;
goto out;
}
xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
id = rpcrdma_create_id(xprt, ia, id = rpcrdma_create_id(xprt, ia,
...@@ -1287,6 +1343,34 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) ...@@ -1287,6 +1343,34 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
kfree(buf->rb_pool); kfree(buf->rb_pool);
} }
/* After a disconnect, unmap all FMRs.
*
* This is invoked only in the transport connect worker in order
* to serialize with rpcrdma_register_fmr_external().
*/
static void
rpcrdma_reset_fmrs(struct rpcrdma_ia *ia)
{
struct rpcrdma_xprt *r_xprt =
container_of(ia, struct rpcrdma_xprt, rx_ia);
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct list_head *pos;
struct rpcrdma_mw *r;
LIST_HEAD(l);
int rc;
list_for_each(pos, &buf->rb_all) {
r = list_entry(pos, struct rpcrdma_mw, mw_all);
INIT_LIST_HEAD(&l);
list_add(&r->r.fmr->list, &l);
rc = ib_unmap_fmr(&l);
if (rc)
dprintk("RPC: %s: ib_unmap_fmr failed %i\n",
__func__, rc);
}
}
/* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in /* After a disconnect, a flushed FAST_REG_MR can leave an FRMR in
* an unusable state. Find FRMRs in this state and dereg / reg * an unusable state. Find FRMRs in this state and dereg / reg
* each. FRMRs that are VALID and attached to an rpcrdma_req are * each. FRMRs that are VALID and attached to an rpcrdma_req are
...@@ -1918,10 +2002,10 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, ...@@ -1918,10 +2002,10 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
break; break;
default: default:
return -1; return -EIO;
} }
if (rc) if (rc)
return -1; return rc;
return nsegs; return nsegs;
} }
......
...@@ -97,6 +97,12 @@ struct rpcrdma_ep { ...@@ -97,6 +97,12 @@ struct rpcrdma_ep {
struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE]; struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE];
}; };
/*
* Force a signaled SEND Work Request every so often,
* in case the provider needs to do some housekeeping.
*/
#define RPCRDMA_MAX_UNSIGNALED_SENDS (32)
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) #define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) #define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment