Commit 35a24421 authored by Trond Myklebust's avatar Trond Myklebust

Merge tag 'nfs-rdma-4.12-1' of git://git.linux-nfs.org/projects/anna/nfs-rdma

NFS: NFS over RDMA Client Side Changes

New Features:
- Break RDMA connections after a connection timeout
- Support for unloading the underlying device driver

Bugfixes and cleanups:
- Mark the receive workqueue as "read-mostly"
- Silence warnings caused by ENOBUFS
- Update a comment in xdr_init_decode_pages()
- Remove rpcrdma_buffer->rb_pool.
parents bb3393d5 2be1fce9
...@@ -807,7 +807,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) ...@@ -807,7 +807,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
EXPORT_SYMBOL_GPL(xdr_init_decode); EXPORT_SYMBOL_GPL(xdr_init_decode);
/** /**
* xdr_init_decode - Initialize an xdr_stream for decoding data. * xdr_init_decode_pages - Initialize an xdr_stream for decoding into pages
* @xdr: pointer to xdr_stream struct * @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer from which to decode data * @buf: pointer to XDR buffer from which to decode data
* @pages: list of pages to decode into * @pages: list of pages to decode into
......
...@@ -651,6 +651,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) ...@@ -651,6 +651,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
xprt_wake_pending_tasks(xprt, -EAGAIN); xprt_wake_pending_tasks(xprt, -EAGAIN);
spin_unlock_bh(&xprt->transport_lock); spin_unlock_bh(&xprt->transport_lock);
} }
EXPORT_SYMBOL_GPL(xprt_force_disconnect);
/** /**
* xprt_conditional_disconnect - force a transport to disconnect * xprt_conditional_disconnect - force a transport to disconnect
......
...@@ -494,7 +494,7 @@ rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req, ...@@ -494,7 +494,7 @@ rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
} }
sge->length = len; sge->length = len;
ib_dma_sync_single_for_device(ia->ri_device, sge->addr, ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
sge->length, DMA_TO_DEVICE); sge->length, DMA_TO_DEVICE);
req->rl_send_wr.num_sge++; req->rl_send_wr.num_sge++;
return true; return true;
...@@ -523,7 +523,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, ...@@ -523,7 +523,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
sge[sge_no].addr = rdmab_addr(rb); sge[sge_no].addr = rdmab_addr(rb);
sge[sge_no].length = xdr->head[0].iov_len; sge[sge_no].length = xdr->head[0].iov_len;
sge[sge_no].lkey = rdmab_lkey(rb); sge[sge_no].lkey = rdmab_lkey(rb);
ib_dma_sync_single_for_device(device, sge[sge_no].addr, ib_dma_sync_single_for_device(rdmab_device(rb), sge[sge_no].addr,
sge[sge_no].length, DMA_TO_DEVICE); sge[sge_no].length, DMA_TO_DEVICE);
/* If there is a Read chunk, the page list is being handled /* If there is a Read chunk, the page list is being handled
...@@ -781,9 +781,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) ...@@ -781,9 +781,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
return 0; return 0;
out_err: out_err:
if (PTR_ERR(iptr) != -ENOBUFS) {
pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n", pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
PTR_ERR(iptr)); PTR_ERR(iptr));
r_xprt->rx_stats.failed_marshal_count++; r_xprt->rx_stats.failed_marshal_count++;
}
return PTR_ERR(iptr); return PTR_ERR(iptr);
} }
......
...@@ -66,8 +66,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; ...@@ -66,8 +66,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_inline_write_padding;
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
int xprt_rdma_pad_optimize = 0; int xprt_rdma_pad_optimize;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
...@@ -396,7 +396,7 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -396,7 +396,7 @@ xprt_setup_rdma(struct xprt_create *args)
new_xprt = rpcx_to_rdmax(xprt); new_xprt = rpcx_to_rdmax(xprt);
rc = rpcrdma_ia_open(new_xprt, sap, xprt_rdma_memreg_strategy); rc = rpcrdma_ia_open(new_xprt, sap);
if (rc) if (rc)
goto out1; goto out1;
...@@ -457,19 +457,33 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -457,19 +457,33 @@ xprt_setup_rdma(struct xprt_create *args)
return ERR_PTR(rc); return ERR_PTR(rc);
} }
/* /**
* Close a connection, during shutdown or timeout/reconnect * xprt_rdma_close - Close down RDMA connection
* @xprt: generic transport to be closed
*
* Called during transport shutdown reconnect, or device
* removal. Caller holds the transport's write lock.
*/ */
static void static void
xprt_rdma_close(struct rpc_xprt *xprt) xprt_rdma_close(struct rpc_xprt *xprt)
{ {
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_ep *ep = &r_xprt->rx_ep;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
dprintk("RPC: %s: closing xprt %p\n", __func__, xprt);
dprintk("RPC: %s: closing\n", __func__); if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
if (r_xprt->rx_ep.rep_connected > 0) xprt_clear_connected(xprt);
rpcrdma_ia_remove(ia);
return;
}
if (ep->rep_connected == -ENODEV)
return;
if (ep->rep_connected > 0)
xprt->reestablish_timeout = 0; xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt); xprt_disconnect_done(xprt);
rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); rpcrdma_ep_disconnect(ep, ia);
} }
static void static void
...@@ -484,6 +498,27 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) ...@@ -484,6 +498,27 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
dprintk("RPC: %s: %u\n", __func__, port); dprintk("RPC: %s: %u\n", __func__, port);
} }
/**
* xprt_rdma_timer - invoked when an RPC times out
* @xprt: controlling RPC transport
* @task: RPC task that timed out
*
* Invoked when the transport is still connected, but an RPC
* retransmit timeout occurs.
*
* Since RDMA connections don't have a keep-alive, forcibly
* disconnect and retry to connect. This drives full
* detection of the network path, and retransmissions of
* all pending RPCs.
*/
static void
xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
{
dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
xprt_force_disconnect(xprt);
}
static void static void
xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{ {
...@@ -659,6 +694,8 @@ xprt_rdma_free(struct rpc_task *task) ...@@ -659,6 +694,8 @@ xprt_rdma_free(struct rpc_task *task)
* xprt_rdma_send_request - marshal and send an RPC request * xprt_rdma_send_request - marshal and send an RPC request
* @task: RPC task with an RPC message in rq_snd_buf * @task: RPC task with an RPC message in rq_snd_buf
* *
* Caller holds the transport's write lock.
*
* Return values: * Return values:
* 0: The request has been sent * 0: The request has been sent
* ENOTCONN: Caller needs to invoke connect logic then call again * ENOTCONN: Caller needs to invoke connect logic then call again
...@@ -685,6 +722,9 @@ xprt_rdma_send_request(struct rpc_task *task) ...@@ -685,6 +722,9 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc = 0; int rc = 0;
if (!xprt_connected(xprt))
goto drop_connection;
/* On retransmit, remove any previously registered chunks */ /* On retransmit, remove any previously registered chunks */
if (unlikely(!list_empty(&req->rl_registered))) if (unlikely(!list_empty(&req->rl_registered)))
r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
...@@ -776,6 +816,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = { ...@@ -776,6 +816,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
.alloc_slot = xprt_alloc_slot, .alloc_slot = xprt_alloc_slot,
.release_request = xprt_release_rqst_cong, /* ditto */ .release_request = xprt_release_rqst_cong, /* ditto */
.set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
.timer = xprt_rdma_timer,
.rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
.set_port = xprt_rdma_set_port, .set_port = xprt_rdma_set_port,
.connect = xprt_rdma_connect, .connect = xprt_rdma_connect,
......
This diff is collapsed.
...@@ -69,6 +69,7 @@ struct rpcrdma_ia { ...@@ -69,6 +69,7 @@ struct rpcrdma_ia {
struct rdma_cm_id *ri_id; struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd; struct ib_pd *ri_pd;
struct completion ri_done; struct completion ri_done;
struct completion ri_remove_done;
int ri_async_rc; int ri_async_rc;
unsigned int ri_max_segs; unsigned int ri_max_segs;
unsigned int ri_max_frmr_depth; unsigned int ri_max_frmr_depth;
...@@ -78,10 +79,15 @@ struct rpcrdma_ia { ...@@ -78,10 +79,15 @@ struct rpcrdma_ia {
bool ri_reminv_expected; bool ri_reminv_expected;
bool ri_implicit_roundup; bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype; enum ib_mr_type ri_mrtype;
unsigned long ri_flags;
struct ib_qp_attr ri_qp_attr; struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr; struct ib_qp_init_attr ri_qp_init_attr;
}; };
enum {
RPCRDMA_IAF_REMOVING = 0,
};
/* /*
* RDMA Endpoint -- one per transport instance * RDMA Endpoint -- one per transport instance
*/ */
...@@ -164,6 +170,12 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb) ...@@ -164,6 +170,12 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
return (struct rpcrdma_msg *)rb->rg_base; return (struct rpcrdma_msg *)rb->rg_base;
} }
static inline struct ib_device *
rdmab_device(struct rpcrdma_regbuf *rb)
{
return rb->rg_device;
}
#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) #define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
/* To ensure a transport can always make forward progress, /* To ensure a transport can always make forward progress,
...@@ -209,7 +221,6 @@ struct rpcrdma_rep { ...@@ -209,7 +221,6 @@ struct rpcrdma_rep {
unsigned int rr_len; unsigned int rr_len;
int rr_wc_flags; int rr_wc_flags;
u32 rr_inv_rkey; u32 rr_inv_rkey;
struct ib_device *rr_device;
struct rpcrdma_xprt *rr_rxprt; struct rpcrdma_xprt *rr_rxprt;
struct work_struct rr_work; struct work_struct rr_work;
struct list_head rr_list; struct list_head rr_list;
...@@ -380,7 +391,6 @@ struct rpcrdma_buffer { ...@@ -380,7 +391,6 @@ struct rpcrdma_buffer {
spinlock_t rb_mwlock; /* protect rb_mws list */ spinlock_t rb_mwlock; /* protect rb_mws list */
struct list_head rb_mws; struct list_head rb_mws;
struct list_head rb_all; struct list_head rb_all;
char *rb_pool;
spinlock_t rb_lock; /* protect buf lists */ spinlock_t rb_lock; /* protect buf lists */
int rb_send_count, rb_recv_count; int rb_send_count, rb_recv_count;
...@@ -497,10 +507,16 @@ struct rpcrdma_xprt { ...@@ -497,10 +507,16 @@ struct rpcrdma_xprt {
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
extern int xprt_rdma_pad_optimize; extern int xprt_rdma_pad_optimize;
/* This setting controls the hunt for a supported memory
* registration strategy.
*/
extern unsigned int xprt_rdma_memreg_strategy;
/* /*
* Interface Adapter calls - xprtrdma/verbs.c * Interface Adapter calls - xprtrdma/verbs.c
*/ */
int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int); int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
void rpcrdma_ia_close(struct rpcrdma_ia *); void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *); bool frwr_is_supported(struct rpcrdma_ia *);
bool fmr_is_supported(struct rpcrdma_ia *); bool fmr_is_supported(struct rpcrdma_ia *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment