Commit 8f39fce8 authored by Trond Myklebust's avatar Trond Myklebust

Merge tag 'nfs-rdma-for-4.16-1' of git://git.linux-nfs.org/projects/anna/linux-nfs

NFS-over-RDMA client updates for Linux 4.16

New features:
- xprtrdma tracepoints

Bugfixes and cleanups:
- Fix memory leak if rpcrdma_buffer_create() fails
- Fix allocating extra rpcrdma_reps for the backchannel
- Remove various unused and redundant variables and lock cycles
- Fix IPv6 support in xprt_rdma_set_port()
- Fix memory leak by calling buf_free for callback replies
- Fix "bytes registered" accounting
- Fix kernel-doc comments
- SUNRPC tracepoint cleanups for consistent information
- Optimizations for __rpc_execute()
parents 0be283f6 21ead9ff
...@@ -64,7 +64,7 @@ enum rpcrdma_memreg { ...@@ -64,7 +64,7 @@ enum rpcrdma_memreg {
RPCRDMA_MEMWINDOWS, RPCRDMA_MEMWINDOWS,
RPCRDMA_MEMWINDOWS_ASYNC, RPCRDMA_MEMWINDOWS_ASYNC,
RPCRDMA_MTHCAFMR, RPCRDMA_MTHCAFMR,
RPCRDMA_FRMR, RPCRDMA_FRWR,
RPCRDMA_ALLPHYSICAL, RPCRDMA_ALLPHYSICAL,
RPCRDMA_LAST RPCRDMA_LAST
}; };
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2017 Oracle. All rights reserved.
*/
/*
* enum ib_event_type, from include/rdma/ib_verbs.h
*/
#define IB_EVENT_LIST \
ib_event(CQ_ERR) \
ib_event(QP_FATAL) \
ib_event(QP_REQ_ERR) \
ib_event(QP_ACCESS_ERR) \
ib_event(COMM_EST) \
ib_event(SQ_DRAINED) \
ib_event(PATH_MIG) \
ib_event(PATH_MIG_ERR) \
ib_event(DEVICE_FATAL) \
ib_event(PORT_ACTIVE) \
ib_event(PORT_ERR) \
ib_event(LID_CHANGE) \
ib_event(PKEY_CHANGE) \
ib_event(SM_CHANGE) \
ib_event(SRQ_ERR) \
ib_event(SRQ_LIMIT_REACHED) \
ib_event(QP_LAST_WQE_REACHED) \
ib_event(CLIENT_REREGISTER) \
ib_event(GID_CHANGE) \
ib_event_end(WQ_FATAL)
#undef ib_event
#undef ib_event_end
#define ib_event(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
#define ib_event_end(x) TRACE_DEFINE_ENUM(IB_EVENT_##x);
IB_EVENT_LIST
#undef ib_event
#undef ib_event_end
#define ib_event(x) { IB_EVENT_##x, #x },
#define ib_event_end(x) { IB_EVENT_##x, #x }
#define rdma_show_ib_event(x) \
__print_symbolic(x, IB_EVENT_LIST)
/*
* enum ib_wc_status type, from include/rdma/ib_verbs.h
*/
#define IB_WC_STATUS_LIST \
ib_wc_status(SUCCESS) \
ib_wc_status(LOC_LEN_ERR) \
ib_wc_status(LOC_QP_OP_ERR) \
ib_wc_status(LOC_EEC_OP_ERR) \
ib_wc_status(LOC_PROT_ERR) \
ib_wc_status(WR_FLUSH_ERR) \
ib_wc_status(MW_BIND_ERR) \
ib_wc_status(BAD_RESP_ERR) \
ib_wc_status(LOC_ACCESS_ERR) \
ib_wc_status(REM_INV_REQ_ERR) \
ib_wc_status(REM_ACCESS_ERR) \
ib_wc_status(REM_OP_ERR) \
ib_wc_status(RETRY_EXC_ERR) \
ib_wc_status(RNR_RETRY_EXC_ERR) \
ib_wc_status(LOC_RDD_VIOL_ERR) \
ib_wc_status(REM_INV_RD_REQ_ERR) \
ib_wc_status(REM_ABORT_ERR) \
ib_wc_status(INV_EECN_ERR) \
ib_wc_status(INV_EEC_STATE_ERR) \
ib_wc_status(FATAL_ERR) \
ib_wc_status(RESP_TIMEOUT_ERR) \
ib_wc_status_end(GENERAL_ERR)
#undef ib_wc_status
#undef ib_wc_status_end
#define ib_wc_status(x) TRACE_DEFINE_ENUM(IB_WC_##x);
#define ib_wc_status_end(x) TRACE_DEFINE_ENUM(IB_WC_##x);
IB_WC_STATUS_LIST
#undef ib_wc_status
#undef ib_wc_status_end
#define ib_wc_status(x) { IB_WC_##x, #x },
#define ib_wc_status_end(x) { IB_WC_##x, #x }
#define rdma_show_wc_status(x) \
__print_symbolic(x, IB_WC_STATUS_LIST)
/*
* enum rdma_cm_event_type, from include/rdma/rdma_cm.h
*/
#define RDMA_CM_EVENT_LIST \
rdma_cm_event(ADDR_RESOLVED) \
rdma_cm_event(ADDR_ERROR) \
rdma_cm_event(ROUTE_RESOLVED) \
rdma_cm_event(ROUTE_ERROR) \
rdma_cm_event(CONNECT_REQUEST) \
rdma_cm_event(CONNECT_RESPONSE) \
rdma_cm_event(CONNECT_ERROR) \
rdma_cm_event(UNREACHABLE) \
rdma_cm_event(REJECTED) \
rdma_cm_event(ESTABLISHED) \
rdma_cm_event(DISCONNECTED) \
rdma_cm_event(DEVICE_REMOVAL) \
rdma_cm_event(MULTICAST_JOIN) \
rdma_cm_event(MULTICAST_ERROR) \
rdma_cm_event(ADDR_CHANGE) \
rdma_cm_event_end(TIMEWAIT_EXIT)
#undef rdma_cm_event
#undef rdma_cm_event_end
#define rdma_cm_event(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
#define rdma_cm_event_end(x) TRACE_DEFINE_ENUM(RDMA_CM_EVENT_##x);
RDMA_CM_EVENT_LIST
#undef rdma_cm_event
#undef rdma_cm_event_end
#define rdma_cm_event(x) { RDMA_CM_EVENT_##x, #x },
#define rdma_cm_event_end(x) { RDMA_CM_EVENT_##x, #x }
#define rdma_show_cm_event(x) \
__print_symbolic(x, RDMA_CM_EVENT_LIST)
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2017 Oracle. All rights reserved.
*/
#undef TRACE_SYSTEM
#define TRACE_SYSTEM rpcrdma
#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_RPCRDMA_H
#include <linux/tracepoint.h>
#include <trace/events/rdma.h>
/**
** Event classes
**/
DECLARE_EVENT_CLASS(xprtrdma_reply_event,
TP_PROTO(
const struct rpcrdma_rep *rep
),
TP_ARGS(rep),
TP_STRUCT__entry(
__field(const void *, rep)
__field(const void *, r_xprt)
__field(u32, xid)
__field(u32, version)
__field(u32, proc)
),
TP_fast_assign(
__entry->rep = rep;
__entry->r_xprt = rep->rr_rxprt;
__entry->xid = be32_to_cpu(rep->rr_xid);
__entry->version = be32_to_cpu(rep->rr_vers);
__entry->proc = be32_to_cpu(rep->rr_proc);
),
TP_printk("rxprt %p xid=0x%08x rep=%p: version %u proc %u",
__entry->r_xprt, __entry->xid, __entry->rep,
__entry->version, __entry->proc
)
);
#define DEFINE_REPLY_EVENT(name) \
DEFINE_EVENT(xprtrdma_reply_event, name, \
TP_PROTO( \
const struct rpcrdma_rep *rep \
), \
TP_ARGS(rep))
DECLARE_EVENT_CLASS(xprtrdma_rxprt,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt
),
TP_ARGS(r_xprt),
TP_STRUCT__entry(
__field(const void *, r_xprt)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
__entry->r_xprt = r_xprt;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
TP_printk("peer=[%s]:%s r_xprt=%p",
__get_str(addr), __get_str(port), __entry->r_xprt
)
);
#define DEFINE_RXPRT_EVENT(name) \
DEFINE_EVENT(xprtrdma_rxprt, name, \
TP_PROTO( \
const struct rpcrdma_xprt *r_xprt \
), \
TP_ARGS(r_xprt))
DECLARE_EVENT_CLASS(xprtrdma_rdch_event,
TP_PROTO(
const struct rpc_task *task,
unsigned int pos,
struct rpcrdma_mr *mr,
int nsegs
),
TP_ARGS(task, pos, mr, nsegs),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, mr)
__field(unsigned int, pos)
__field(int, nents)
__field(u32, handle)
__field(u32, length)
__field(u64, offset)
__field(int, nsegs)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->mr = mr;
__entry->pos = pos;
__entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
__entry->offset = mr->mr_offset;
__entry->nsegs = nsegs;
),
TP_printk("task:%u@%u mr=%p pos=%u %u@0x%016llx:0x%08x (%s)",
__entry->task_id, __entry->client_id, __entry->mr,
__entry->pos, __entry->length,
(unsigned long long)__entry->offset, __entry->handle,
__entry->nents < __entry->nsegs ? "more" : "last"
)
);
#define DEFINE_RDCH_EVENT(name) \
DEFINE_EVENT(xprtrdma_rdch_event, name, \
TP_PROTO( \
const struct rpc_task *task, \
unsigned int pos, \
struct rpcrdma_mr *mr, \
int nsegs \
), \
TP_ARGS(task, pos, mr, nsegs))
DECLARE_EVENT_CLASS(xprtrdma_wrch_event,
TP_PROTO(
const struct rpc_task *task,
struct rpcrdma_mr *mr,
int nsegs
),
TP_ARGS(task, mr, nsegs),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, mr)
__field(int, nents)
__field(u32, handle)
__field(u32, length)
__field(u64, offset)
__field(int, nsegs)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->mr = mr;
__entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
__entry->offset = mr->mr_offset;
__entry->nsegs = nsegs;
),
TP_printk("task:%u@%u mr=%p %u@0x%016llx:0x%08x (%s)",
__entry->task_id, __entry->client_id, __entry->mr,
__entry->length, (unsigned long long)__entry->offset,
__entry->handle,
__entry->nents < __entry->nsegs ? "more" : "last"
)
);
#define DEFINE_WRCH_EVENT(name) \
DEFINE_EVENT(xprtrdma_wrch_event, name, \
TP_PROTO( \
const struct rpc_task *task, \
struct rpcrdma_mr *mr, \
int nsegs \
), \
TP_ARGS(task, mr, nsegs))
TRACE_DEFINE_ENUM(FRWR_IS_INVALID);
TRACE_DEFINE_ENUM(FRWR_IS_VALID);
TRACE_DEFINE_ENUM(FRWR_FLUSHED_FR);
TRACE_DEFINE_ENUM(FRWR_FLUSHED_LI);
#define xprtrdma_show_frwr_state(x) \
__print_symbolic(x, \
{ FRWR_IS_INVALID, "INVALID" }, \
{ FRWR_IS_VALID, "VALID" }, \
{ FRWR_FLUSHED_FR, "FLUSHED_FR" }, \
{ FRWR_FLUSHED_LI, "FLUSHED_LI" })
DECLARE_EVENT_CLASS(xprtrdma_frwr_done,
TP_PROTO(
const struct ib_wc *wc,
const struct rpcrdma_frwr *frwr
),
TP_ARGS(wc, frwr),
TP_STRUCT__entry(
__field(const void *, mr)
__field(unsigned int, state)
__field(unsigned int, status)
__field(unsigned int, vendor_err)
),
TP_fast_assign(
__entry->mr = container_of(frwr, struct rpcrdma_mr, frwr);
__entry->state = frwr->fr_state;
__entry->status = wc->status;
__entry->vendor_err = __entry->status ? wc->vendor_err : 0;
),
TP_printk(
"mr=%p state=%s: %s (%u/0x%x)",
__entry->mr, xprtrdma_show_frwr_state(__entry->state),
rdma_show_wc_status(__entry->status),
__entry->status, __entry->vendor_err
)
);
#define DEFINE_FRWR_DONE_EVENT(name) \
DEFINE_EVENT(xprtrdma_frwr_done, name, \
TP_PROTO( \
const struct ib_wc *wc, \
const struct rpcrdma_frwr *frwr \
), \
TP_ARGS(wc, frwr))
DECLARE_EVENT_CLASS(xprtrdma_mr,
TP_PROTO(
const struct rpcrdma_mr *mr
),
TP_ARGS(mr),
TP_STRUCT__entry(
__field(const void *, mr)
__field(u32, handle)
__field(u32, length)
__field(u64, offset)
),
TP_fast_assign(
__entry->mr = mr;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
__entry->offset = mr->mr_offset;
),
TP_printk("mr=%p %u@0x%016llx:0x%08x",
__entry->mr, __entry->length,
(unsigned long long)__entry->offset,
__entry->handle
)
);
#define DEFINE_MR_EVENT(name) \
DEFINE_EVENT(xprtrdma_mr, name, \
TP_PROTO( \
const struct rpcrdma_mr *mr \
), \
TP_ARGS(mr))
DECLARE_EVENT_CLASS(xprtrdma_cb_event,
TP_PROTO(
const struct rpc_rqst *rqst
),
TP_ARGS(rqst),
TP_STRUCT__entry(
__field(const void *, rqst)
__field(const void *, rep)
__field(const void *, req)
__field(u32, xid)
),
TP_fast_assign(
__entry->rqst = rqst;
__entry->req = rpcr_to_rdmar(rqst);
__entry->rep = rpcr_to_rdmar(rqst)->rl_reply;
__entry->xid = be32_to_cpu(rqst->rq_xid);
),
TP_printk("xid=0x%08x, rqst=%p req=%p rep=%p",
__entry->xid, __entry->rqst, __entry->req, __entry->rep
)
);
#define DEFINE_CB_EVENT(name) \
DEFINE_EVENT(xprtrdma_cb_event, name, \
TP_PROTO( \
const struct rpc_rqst *rqst \
), \
TP_ARGS(rqst))
/**
** Connection events
**/
TRACE_EVENT(xprtrdma_conn_upcall,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
struct rdma_cm_event *event
),
TP_ARGS(r_xprt, event),
TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(unsigned int, event)
__field(int, status)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->event = event->event;
__entry->status = event->status;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
TP_printk("peer=[%s]:%s r_xprt=%p: %s (%u/%d)",
__get_str(addr), __get_str(port),
__entry->r_xprt, rdma_show_cm_event(__entry->event),
__entry->event, __entry->status
)
);
TRACE_EVENT(xprtrdma_disconnect,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
int status
),
TP_ARGS(r_xprt, status),
TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(int, status)
__field(int, connected)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->status = status;
__entry->connected = r_xprt->rx_ep.rep_connected;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
TP_printk("peer=[%s]:%s r_xprt=%p: status=%d %sconnected",
__get_str(addr), __get_str(port),
__entry->r_xprt, __entry->status,
__entry->connected == 1 ? "still " : "dis"
)
);
DEFINE_RXPRT_EVENT(xprtrdma_conn_start);
DEFINE_RXPRT_EVENT(xprtrdma_conn_tout);
DEFINE_RXPRT_EVENT(xprtrdma_create);
DEFINE_RXPRT_EVENT(xprtrdma_destroy);
DEFINE_RXPRT_EVENT(xprtrdma_remove);
DEFINE_RXPRT_EVENT(xprtrdma_reinsert);
DEFINE_RXPRT_EVENT(xprtrdma_reconnect);
DEFINE_RXPRT_EVENT(xprtrdma_inject_dsc);
TRACE_EVENT(xprtrdma_qp_error,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
const struct ib_event *event
),
TP_ARGS(r_xprt, event),
TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(unsigned int, event)
__string(name, event->device->name)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->event = event->event;
__assign_str(name, event->device->name);
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
TP_printk("peer=[%s]:%s r_xprt=%p: dev %s: %s (%u)",
__get_str(addr), __get_str(port), __entry->r_xprt,
__get_str(name), rdma_show_ib_event(__entry->event),
__entry->event
)
);
/**
** Call events
**/
TRACE_EVENT(xprtrdma_createmrs,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
unsigned int count
),
TP_ARGS(r_xprt, count),
TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(unsigned int, count)
),
TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->count = count;
),
TP_printk("r_xprt=%p: created %u MRs",
__entry->r_xprt, __entry->count
)
);
DEFINE_RXPRT_EVENT(xprtrdma_nomrs);
DEFINE_RDCH_EVENT(xprtrdma_read_chunk);
DEFINE_WRCH_EVENT(xprtrdma_write_chunk);
DEFINE_WRCH_EVENT(xprtrdma_reply_chunk);
TRACE_DEFINE_ENUM(rpcrdma_noch);
TRACE_DEFINE_ENUM(rpcrdma_readch);
TRACE_DEFINE_ENUM(rpcrdma_areadch);
TRACE_DEFINE_ENUM(rpcrdma_writech);
TRACE_DEFINE_ENUM(rpcrdma_replych);
#define xprtrdma_show_chunktype(x) \
__print_symbolic(x, \
{ rpcrdma_noch, "inline" }, \
{ rpcrdma_readch, "read list" }, \
{ rpcrdma_areadch, "*read list" }, \
{ rpcrdma_writech, "write list" }, \
{ rpcrdma_replych, "reply chunk" })
TRACE_EVENT(xprtrdma_marshal,
TP_PROTO(
const struct rpc_rqst *rqst,
unsigned int hdrlen,
unsigned int rtype,
unsigned int wtype
),
TP_ARGS(rqst, hdrlen, rtype, wtype),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__field(unsigned int, hdrlen)
__field(unsigned int, headlen)
__field(unsigned int, pagelen)
__field(unsigned int, taillen)
__field(unsigned int, rtype)
__field(unsigned int, wtype)
),
TP_fast_assign(
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->hdrlen = hdrlen;
__entry->headlen = rqst->rq_snd_buf.head[0].iov_len;
__entry->pagelen = rqst->rq_snd_buf.page_len;
__entry->taillen = rqst->rq_snd_buf.tail[0].iov_len;
__entry->rtype = rtype;
__entry->wtype = wtype;
),
TP_printk("task:%u@%u xid=0x%08x: hdr=%u xdr=%u/%u/%u %s/%s",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->hdrlen,
__entry->headlen, __entry->pagelen, __entry->taillen,
xprtrdma_show_chunktype(__entry->rtype),
xprtrdma_show_chunktype(__entry->wtype)
)
);
TRACE_EVENT(xprtrdma_post_send,
TP_PROTO(
const struct rpcrdma_req *req,
int status
),
TP_ARGS(req, status),
TP_STRUCT__entry(
__field(const void *, req)
__field(int, num_sge)
__field(bool, signaled)
__field(int, status)
),
TP_fast_assign(
__entry->req = req;
__entry->num_sge = req->rl_sendctx->sc_wr.num_sge;
__entry->signaled = req->rl_sendctx->sc_wr.send_flags &
IB_SEND_SIGNALED;
__entry->status = status;
),
TP_printk("req=%p, %d SGEs%s, status=%d",
__entry->req, __entry->num_sge,
(__entry->signaled ? ", signaled" : ""),
__entry->status
)
);
TRACE_EVENT(xprtrdma_post_recv,
TP_PROTO(
const struct rpcrdma_rep *rep,
int status
),
TP_ARGS(rep, status),
TP_STRUCT__entry(
__field(const void *, rep)
__field(int, status)
),
TP_fast_assign(
__entry->rep = rep;
__entry->status = status;
),
TP_printk("rep=%p status=%d",
__entry->rep, __entry->status
)
);
/**
** Completion events
**/
TRACE_EVENT(xprtrdma_wc_send,
TP_PROTO(
const struct rpcrdma_sendctx *sc,
const struct ib_wc *wc
),
TP_ARGS(sc, wc),
TP_STRUCT__entry(
__field(const void *, req)
__field(unsigned int, unmap_count)
__field(unsigned int, status)
__field(unsigned int, vendor_err)
),
TP_fast_assign(
__entry->req = sc->sc_req;
__entry->unmap_count = sc->sc_unmap_count;
__entry->status = wc->status;
__entry->vendor_err = __entry->status ? wc->vendor_err : 0;
),
TP_printk("req=%p, unmapped %u pages: %s (%u/0x%x)",
__entry->req, __entry->unmap_count,
rdma_show_wc_status(__entry->status),
__entry->status, __entry->vendor_err
)
);
TRACE_EVENT(xprtrdma_wc_receive,
TP_PROTO(
const struct rpcrdma_rep *rep,
const struct ib_wc *wc
),
TP_ARGS(rep, wc),
TP_STRUCT__entry(
__field(const void *, rep)
__field(unsigned int, byte_len)
__field(unsigned int, status)
__field(unsigned int, vendor_err)
),
TP_fast_assign(
__entry->rep = rep;
__entry->byte_len = wc->byte_len;
__entry->status = wc->status;
__entry->vendor_err = __entry->status ? wc->vendor_err : 0;
),
TP_printk("rep=%p, %u bytes: %s (%u/0x%x)",
__entry->rep, __entry->byte_len,
rdma_show_wc_status(__entry->status),
__entry->status, __entry->vendor_err
)
);
DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_fastreg);
DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li);
DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake);
DEFINE_MR_EVENT(xprtrdma_localinv);
DEFINE_MR_EVENT(xprtrdma_dma_unmap);
DEFINE_MR_EVENT(xprtrdma_remoteinv);
DEFINE_MR_EVENT(xprtrdma_recover_mr);
/**
** Reply events
**/
TRACE_EVENT(xprtrdma_reply,
TP_PROTO(
const struct rpc_task *task,
const struct rpcrdma_rep *rep,
const struct rpcrdma_req *req,
unsigned int credits
),
TP_ARGS(task, rep, req, credits),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, rep)
__field(const void *, req)
__field(u32, xid)
__field(unsigned int, credits)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->rep = rep;
__entry->req = req;
__entry->xid = be32_to_cpu(rep->rr_xid);
__entry->credits = credits;
),
TP_printk("task:%u@%u xid=0x%08x, %u credits, rep=%p -> req=%p",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->credits, __entry->rep, __entry->req
)
);
TRACE_EVENT(xprtrdma_defer_cmp,
TP_PROTO(
const struct rpcrdma_rep *rep
),
TP_ARGS(rep),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, rep)
__field(u32, xid)
),
TP_fast_assign(
__entry->task_id = rep->rr_rqst->rq_task->tk_pid;
__entry->client_id = rep->rr_rqst->rq_task->tk_client->cl_clid;
__entry->rep = rep;
__entry->xid = be32_to_cpu(rep->rr_xid);
),
TP_printk("task:%u@%u xid=0x%08x rep=%p",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->rep
)
);
DEFINE_REPLY_EVENT(xprtrdma_reply_vers);
DEFINE_REPLY_EVENT(xprtrdma_reply_rqst);
DEFINE_REPLY_EVENT(xprtrdma_reply_short);
DEFINE_REPLY_EVENT(xprtrdma_reply_hdr);
TRACE_EVENT(xprtrdma_fixup,
TP_PROTO(
const struct rpc_rqst *rqst,
int len,
int hdrlen
),
TP_ARGS(rqst, len, hdrlen),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, base)
__field(int, len)
__field(int, hdrlen)
),
TP_fast_assign(
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->base = rqst->rq_rcv_buf.head[0].iov_base;
__entry->len = len;
__entry->hdrlen = hdrlen;
),
TP_printk("task:%u@%u base=%p len=%d hdrlen=%d",
__entry->task_id, __entry->client_id,
__entry->base, __entry->len, __entry->hdrlen
)
);
TRACE_EVENT(xprtrdma_fixup_pg,
TP_PROTO(
const struct rpc_rqst *rqst,
int pageno,
const void *pos,
int len,
int curlen
),
TP_ARGS(rqst, pageno, pos, len, curlen),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, pos)
__field(int, pageno)
__field(int, len)
__field(int, curlen)
),
TP_fast_assign(
__entry->task_id = rqst->rq_task->tk_pid;
__entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->pos = pos;
__entry->pageno = pageno;
__entry->len = len;
__entry->curlen = curlen;
),
TP_printk("task:%u@%u pageno=%d pos=%p len=%d curlen=%d",
__entry->task_id, __entry->client_id,
__entry->pageno, __entry->pos, __entry->len, __entry->curlen
)
);
TRACE_EVENT(xprtrdma_decode_seg,
TP_PROTO(
u32 handle,
u32 length,
u64 offset
),
TP_ARGS(handle, length, offset),
TP_STRUCT__entry(
__field(u32, handle)
__field(u32, length)
__field(u64, offset)
),
TP_fast_assign(
__entry->handle = handle;
__entry->length = length;
__entry->offset = offset;
),
TP_printk("%u@0x%016llx:0x%08x",
__entry->length, (unsigned long long)__entry->offset,
__entry->handle
)
);
/**
** Allocation/release of rpcrdma_reqs and rpcrdma_reps
**/
TRACE_EVENT(xprtrdma_allocate,
TP_PROTO(
const struct rpc_task *task,
const struct rpcrdma_req *req
),
TP_ARGS(task, req),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, req)
__field(const void *, rep)
__field(size_t, callsize)
__field(size_t, rcvsize)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->req = req;
__entry->rep = req ? req->rl_reply : NULL;
__entry->callsize = task->tk_rqstp->rq_callsize;
__entry->rcvsize = task->tk_rqstp->rq_rcvsize;
),
TP_printk("task:%u@%u req=%p rep=%p (%zu, %zu)",
__entry->task_id, __entry->client_id,
__entry->req, __entry->rep,
__entry->callsize, __entry->rcvsize
)
);
TRACE_EVENT(xprtrdma_rpc_done,
TP_PROTO(
const struct rpc_task *task,
const struct rpcrdma_req *req
),
TP_ARGS(task, req),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(const void *, req)
__field(const void *, rep)
),
TP_fast_assign(
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->req = req;
__entry->rep = req->rl_reply;
),
TP_printk("task:%u@%u req=%p rep=%p",
__entry->task_id, __entry->client_id,
__entry->req, __entry->rep
)
);
DEFINE_RXPRT_EVENT(xprtrdma_noreps);
/**
** Callback events
**/
TRACE_EVENT(xprtrdma_cb_setup,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
unsigned int reqs
),
TP_ARGS(r_xprt, reqs),
TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(unsigned int, reqs)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->reqs = reqs;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs",
__get_str(addr), __get_str(port),
__entry->r_xprt, __entry->reqs
)
);
DEFINE_CB_EVENT(xprtrdma_cb_call);
DEFINE_CB_EVENT(xprtrdma_cb_reply);
#endif /* _TRACE_RPCRDMA_H */
#include <trace/define_trace.h>
...@@ -32,7 +32,7 @@ DECLARE_EVENT_CLASS(rpc_task_status, ...@@ -32,7 +32,7 @@ DECLARE_EVENT_CLASS(rpc_task_status,
__entry->status = task->tk_status; __entry->status = task->tk_status;
), ),
TP_printk("task:%u@%u, status %d", TP_printk("task:%u@%u status=%d",
__entry->task_id, __entry->client_id, __entry->task_id, __entry->client_id,
__entry->status) __entry->status)
); );
...@@ -66,7 +66,7 @@ TRACE_EVENT(rpc_connect_status, ...@@ -66,7 +66,7 @@ TRACE_EVENT(rpc_connect_status,
__entry->status = status; __entry->status = status;
), ),
TP_printk("task:%u@%u, status %d", TP_printk("task:%u@%u status=%d",
__entry->task_id, __entry->client_id, __entry->task_id, __entry->client_id,
__entry->status) __entry->status)
); );
...@@ -390,6 +390,10 @@ DECLARE_EVENT_CLASS(rpc_xprt_event, ...@@ -390,6 +390,10 @@ DECLARE_EVENT_CLASS(rpc_xprt_event,
__entry->status) __entry->status)
); );
DEFINE_EVENT(rpc_xprt_event, xprt_timer,
TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
TP_ARGS(xprt, xid, status));
DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst, DEFINE_EVENT(rpc_xprt_event, xprt_lookup_rqst,
TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status), TP_PROTO(struct rpc_xprt *xprt, __be32 xid, int status),
TP_ARGS(xprt, xid, status)); TP_ARGS(xprt, xid, status));
......
...@@ -755,22 +755,20 @@ static void __rpc_execute(struct rpc_task *task) ...@@ -755,22 +755,20 @@ static void __rpc_execute(struct rpc_task *task)
void (*do_action)(struct rpc_task *); void (*do_action)(struct rpc_task *);
/* /*
* Execute any pending callback first. * Perform the next FSM step or a pending callback.
*
* tk_action may be NULL if the task has been killed.
* In particular, note that rpc_killall_tasks may
* do this at any time, so beware when dereferencing.
*/ */
do_action = task->tk_callback; do_action = task->tk_action;
task->tk_callback = NULL; if (task->tk_callback) {
if (do_action == NULL) { do_action = task->tk_callback;
/* task->tk_callback = NULL;
* Perform the next FSM step.
* tk_action may be NULL if the task has been killed.
* In particular, note that rpc_killall_tasks may
* do this at any time, so beware when dereferencing.
*/
do_action = task->tk_action;
if (do_action == NULL)
break;
} }
trace_rpc_task_run_action(task->tk_client, task, task->tk_action); if (!do_action)
break;
trace_rpc_task_run_action(task->tk_client, task, do_action);
do_action(task); do_action(task);
/* /*
......
...@@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task) ...@@ -940,8 +940,8 @@ static void xprt_timer(struct rpc_task *task)
if (task->tk_status != -ETIMEDOUT) if (task->tk_status != -ETIMEDOUT)
return; return;
dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
trace_xprt_timer(xprt, req->rq_xid, task->tk_status);
if (!req->rq_reply_bytes_recvd) { if (!req->rq_reply_bytes_recvd) {
if (xprt->ops->timer) if (xprt->ops->timer)
xprt->ops->timer(xprt, task); xprt->ops->timer(xprt, task);
......
...@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, ...@@ -43,7 +43,6 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
req = rpcrdma_create_req(r_xprt); req = rpcrdma_create_req(r_xprt);
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
__set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags);
rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE,
DMA_TO_DEVICE, GFP_KERNEL); DMA_TO_DEVICE, GFP_KERNEL);
...@@ -74,21 +73,13 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, ...@@ -74,21 +73,13 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt,
static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt, static int rpcrdma_bc_setup_reps(struct rpcrdma_xprt *r_xprt,
unsigned int count) unsigned int count)
{ {
struct rpcrdma_rep *rep;
int rc = 0; int rc = 0;
while (count--) { while (count--) {
rep = rpcrdma_create_rep(r_xprt); rc = rpcrdma_create_rep(r_xprt);
if (IS_ERR(rep)) { if (rc)
pr_err("RPC: %s: reply buffer alloc failed\n",
__func__);
rc = PTR_ERR(rep);
break; break;
}
rpcrdma_recv_buffer_put(rep);
} }
return rc; return rc;
} }
...@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) ...@@ -129,6 +120,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
rqst->rq_xprt = &r_xprt->rx_xprt; rqst->rq_xprt = &r_xprt->rx_xprt;
INIT_LIST_HEAD(&rqst->rq_list); INIT_LIST_HEAD(&rqst->rq_list);
INIT_LIST_HEAD(&rqst->rq_bc_list); INIT_LIST_HEAD(&rqst->rq_bc_list);
__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
if (rpcrdma_bc_setup_rqst(r_xprt, rqst)) if (rpcrdma_bc_setup_rqst(r_xprt, rqst))
goto out_free; goto out_free;
...@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) ...@@ -148,7 +140,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
buffer->rb_bc_srv_max_requests = reqs; buffer->rb_bc_srv_max_requests = reqs;
request_module("svcrdma"); request_module("svcrdma");
trace_xprtrdma_cb_setup(r_xprt, reqs);
return 0; return 0;
out_free: out_free:
...@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) ...@@ -196,13 +188,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt)
return maxmsg - RPCRDMA_HDRLEN_MIN; return maxmsg - RPCRDMA_HDRLEN_MIN;
} }
/** static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
* rpcrdma_bc_marshal_reply - Send backwards direction reply
* @rqst: buffer containing RPC reply data
*
* Returns zero on success.
*/
int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
{ {
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
...@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) ...@@ -226,7 +212,46 @@ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)
if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN,
&rqst->rq_snd_buf, rpcrdma_noch)) &rqst->rq_snd_buf, rpcrdma_noch))
return -EIO; return -EIO;
trace_xprtrdma_cb_reply(rqst);
return 0;
}
/**
* xprt_rdma_bc_send_reply - marshal and send a backchannel reply
* @rqst: RPC rqst with a backchannel RPC reply in rq_snd_buf
*
* Caller holds the transport's write lock.
*
* Returns:
* %0 if the RPC message has been sent
* %-ENOTCONN if the caller should reconnect and call again
* %-EIO if a permanent error occurred and the request was not
* sent. Do not try to send this message again.
*/
int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
int rc;
if (!xprt_connected(rqst->rq_xprt))
goto drop_connection;
rc = rpcrdma_bc_marshal_reply(rqst);
if (rc < 0)
goto failed_marshal;
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
return 0; return 0;
failed_marshal:
if (rc != -ENOTCONN)
return rc;
drop_connection:
xprt_disconnect_done(rqst->rq_xprt);
return -ENOTCONN;
} }
/** /**
...@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) ...@@ -262,11 +287,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
dprintk("RPC: %s: freeing rqst %p (req %p)\n", dprintk("RPC: %s: freeing rqst %p (req %p)\n",
__func__, rqst, rpcr_to_rdmar(rqst)); __func__, rqst, rpcr_to_rdmar(rqst));
smp_mb__before_atomic();
WARN_ON_ONCE(!test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state));
clear_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
smp_mb__after_atomic();
spin_lock_bh(&xprt->bc_pa_lock); spin_lock_bh(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list); list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
spin_unlock_bh(&xprt->bc_pa_lock); spin_unlock_bh(&xprt->bc_pa_lock);
...@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) ...@@ -274,7 +294,7 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
/** /**
* rpcrdma_bc_receive_call - Handle a backward direction call * rpcrdma_bc_receive_call - Handle a backward direction call
* @xprt: transport receiving the call * @r_xprt: transport receiving the call
* @rep: receive buffer containing the call * @rep: receive buffer containing the call
* *
* Operational assumptions: * Operational assumptions:
...@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, ...@@ -313,7 +333,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst, rq_bc_pa_list); struct rpc_rqst, rq_bc_pa_list);
list_del(&rqst->rq_bc_pa_list); list_del(&rqst->rq_bc_pa_list);
spin_unlock(&xprt->bc_pa_lock); spin_unlock(&xprt->bc_pa_lock);
dprintk("RPC: %s: using rqst %p\n", __func__, rqst);
/* Prepare rqst */ /* Prepare rqst */
rqst->rq_reply_bytes_recvd = 0; rqst->rq_reply_bytes_recvd = 0;
...@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, ...@@ -321,7 +340,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
rqst->rq_xid = *p; rqst->rq_xid = *p;
rqst->rq_private_buf.len = size; rqst->rq_private_buf.len = size;
set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
buf = &rqst->rq_rcv_buf; buf = &rqst->rq_rcv_buf;
memset(buf, 0, sizeof(*buf)); memset(buf, 0, sizeof(*buf));
...@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, ...@@ -335,12 +353,8 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt,
* the Upper Layer is done decoding it. * the Upper Layer is done decoding it.
*/ */
req = rpcr_to_rdmar(rqst); req = rpcr_to_rdmar(rqst);
dprintk("RPC: %s: attaching rep %p to req %p\n",
__func__, rep, req);
req->rl_reply = rep; req->rl_reply = rep;
trace_xprtrdma_cb_call(rqst);
/* Defeat the retransmit detection logic in send_request */
req->rl_connect_cookie = 0;
/* Queue rqst for ULP's callback service */ /* Queue rqst for ULP's callback service */
bc_serv = xprt->bc_serv; bc_serv = xprt->bc_serv;
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* /*
* Copyright (c) 2015 Oracle. All rights reserved. * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/ */
...@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia) ...@@ -47,7 +47,7 @@ fmr_is_supported(struct rpcrdma_ia *ia)
} }
static int static int
fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{ {
static struct ib_fmr_attr fmr_attr = { static struct ib_fmr_attr fmr_attr = {
.max_pages = RPCRDMA_MAX_FMR_SGES, .max_pages = RPCRDMA_MAX_FMR_SGES,
...@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw) ...@@ -55,106 +55,108 @@ fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *mw)
.page_shift = PAGE_SHIFT .page_shift = PAGE_SHIFT
}; };
mw->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES, mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
sizeof(u64), GFP_KERNEL); sizeof(u64), GFP_KERNEL);
if (!mw->fmr.fm_physaddrs) if (!mr->fmr.fm_physaddrs)
goto out_free; goto out_free;
mw->mw_sg = kcalloc(RPCRDMA_MAX_FMR_SGES, mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
sizeof(*mw->mw_sg), GFP_KERNEL); sizeof(*mr->mr_sg), GFP_KERNEL);
if (!mw->mw_sg) if (!mr->mr_sg)
goto out_free; goto out_free;
sg_init_table(mw->mw_sg, RPCRDMA_MAX_FMR_SGES); sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
mw->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS, mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
&fmr_attr); &fmr_attr);
if (IS_ERR(mw->fmr.fm_mr)) if (IS_ERR(mr->fmr.fm_mr))
goto out_fmr_err; goto out_fmr_err;
return 0; return 0;
out_fmr_err: out_fmr_err:
dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__, dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
PTR_ERR(mw->fmr.fm_mr)); PTR_ERR(mr->fmr.fm_mr));
out_free: out_free:
kfree(mw->mw_sg); kfree(mr->mr_sg);
kfree(mw->fmr.fm_physaddrs); kfree(mr->fmr.fm_physaddrs);
return -ENOMEM; return -ENOMEM;
} }
static int static int
__fmr_unmap(struct rpcrdma_mw *mw) __fmr_unmap(struct rpcrdma_mr *mr)
{ {
LIST_HEAD(l); LIST_HEAD(l);
int rc; int rc;
list_add(&mw->fmr.fm_mr->list, &l); list_add(&mr->fmr.fm_mr->list, &l);
rc = ib_unmap_fmr(&l); rc = ib_unmap_fmr(&l);
list_del(&mw->fmr.fm_mr->list); list_del(&mr->fmr.fm_mr->list);
return rc; return rc;
} }
static void static void
fmr_op_release_mr(struct rpcrdma_mw *r) fmr_op_release_mr(struct rpcrdma_mr *mr)
{ {
LIST_HEAD(unmap_list); LIST_HEAD(unmap_list);
int rc; int rc;
/* Ensure MW is not on any rl_registered list */ /* Ensure MW is not on any rl_registered list */
if (!list_empty(&r->mw_list)) if (!list_empty(&mr->mr_list))
list_del(&r->mw_list); list_del(&mr->mr_list);
kfree(r->fmr.fm_physaddrs); kfree(mr->fmr.fm_physaddrs);
kfree(r->mw_sg); kfree(mr->mr_sg);
/* In case this one was left mapped, try to unmap it /* In case this one was left mapped, try to unmap it
* to prevent dealloc_fmr from failing with EBUSY * to prevent dealloc_fmr from failing with EBUSY
*/ */
rc = __fmr_unmap(r); rc = __fmr_unmap(mr);
if (rc) if (rc)
pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n", pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
r, rc); mr, rc);
rc = ib_dealloc_fmr(r->fmr.fm_mr); rc = ib_dealloc_fmr(mr->fmr.fm_mr);
if (rc) if (rc)
pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n", pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
r, rc); mr, rc);
kfree(r); kfree(mr);
} }
/* Reset of a single FMR. /* Reset of a single FMR.
*/ */
static void static void
fmr_op_recover_mr(struct rpcrdma_mw *mw) fmr_op_recover_mr(struct rpcrdma_mr *mr)
{ {
struct rpcrdma_xprt *r_xprt = mw->mw_xprt; struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
int rc; int rc;
/* ORDER: invalidate first */ /* ORDER: invalidate first */
rc = __fmr_unmap(mw); rc = __fmr_unmap(mr);
/* ORDER: then DMA unmap */
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
if (rc) if (rc)
goto out_release; goto out_release;
rpcrdma_put_mw(r_xprt, mw); /* ORDER: then DMA unmap */
rpcrdma_mr_unmap_and_put(mr);
r_xprt->rx_stats.mrs_recovered++; r_xprt->rx_stats.mrs_recovered++;
return; return;
out_release: out_release:
pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mw); pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++; r_xprt->rx_stats.mrs_orphaned++;
spin_lock(&r_xprt->rx_buf.rb_mwlock); trace_xprtrdma_dma_unmap(mr);
list_del(&mw->mw_all); ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
spin_unlock(&r_xprt->rx_buf.rb_mwlock); mr->mr_sg, mr->mr_nents, mr->mr_dir);
spin_lock(&r_xprt->rx_buf.rb_mrlock);
list_del(&mr->mr_all);
spin_unlock(&r_xprt->rx_buf.rb_mrlock);
fmr_op_release_mr(mw); fmr_op_release_mr(mr);
} }
static int static int
...@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) ...@@ -180,15 +182,15 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
*/ */
static struct rpcrdma_mr_seg * static struct rpcrdma_mr_seg *
fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, struct rpcrdma_mw **out) int nsegs, bool writing, struct rpcrdma_mr **out)
{ {
struct rpcrdma_mr_seg *seg1 = seg; struct rpcrdma_mr_seg *seg1 = seg;
int len, pageoff, i, rc; int len, pageoff, i, rc;
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
u64 *dma_pages; u64 *dma_pages;
mw = rpcrdma_get_mw(r_xprt); mr = rpcrdma_mr_get(r_xprt);
if (!mw) if (!mr)
return ERR_PTR(-ENOBUFS); return ERR_PTR(-ENOBUFS);
pageoff = offset_in_page(seg1->mr_offset); pageoff = offset_in_page(seg1->mr_offset);
...@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -199,12 +201,12 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
nsegs = RPCRDMA_MAX_FMR_SGES; nsegs = RPCRDMA_MAX_FMR_SGES;
for (i = 0; i < nsegs;) { for (i = 0; i < nsegs;) {
if (seg->mr_page) if (seg->mr_page)
sg_set_page(&mw->mw_sg[i], sg_set_page(&mr->mr_sg[i],
seg->mr_page, seg->mr_page,
seg->mr_len, seg->mr_len,
offset_in_page(seg->mr_offset)); offset_in_page(seg->mr_offset));
else else
sg_set_buf(&mw->mw_sg[i], seg->mr_offset, sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len); seg->mr_len);
len += seg->mr_len; len += seg->mr_len;
++seg; ++seg;
...@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -214,40 +216,38 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break; break;
} }
mw->mw_dir = rpcrdma_data_dir(writing); mr->mr_dir = rpcrdma_data_dir(writing);
mw->mw_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device, mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, i, mw->mw_dir); mr->mr_sg, i, mr->mr_dir);
if (!mw->mw_nents) if (!mr->mr_nents)
goto out_dmamap_err; goto out_dmamap_err;
for (i = 0, dma_pages = mw->fmr.fm_physaddrs; i < mw->mw_nents; i++) for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
dma_pages[i] = sg_dma_address(&mw->mw_sg[i]); dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
rc = ib_map_phys_fmr(mw->fmr.fm_mr, dma_pages, mw->mw_nents, rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
dma_pages[0]); dma_pages[0]);
if (rc) if (rc)
goto out_maperr; goto out_maperr;
mw->mw_handle = mw->fmr.fm_mr->rkey; mr->mr_handle = mr->fmr.fm_mr->rkey;
mw->mw_length = len; mr->mr_length = len;
mw->mw_offset = dma_pages[0] + pageoff; mr->mr_offset = dma_pages[0] + pageoff;
*out = mw; *out = mr;
return seg; return seg;
out_dmamap_err: out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
mw->mw_sg, i); mr->mr_sg, i);
rpcrdma_put_mw(r_xprt, mw); rpcrdma_mr_put(mr);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
out_maperr: out_maperr:
pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
len, (unsigned long long)dma_pages[0], len, (unsigned long long)dma_pages[0],
pageoff, mw->mw_nents, rc); pageoff, mr->mr_nents, rc);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, rpcrdma_mr_unmap_and_put(mr);
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
...@@ -256,13 +256,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -256,13 +256,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
* Sleeps until it is safe for the host CPU to access the * Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions. * previously mapped memory regions.
* *
* Caller ensures that @mws is not empty before the call. This * Caller ensures that @mrs is not empty before the call. This
* function empties the list. * function empties the list.
*/ */
static void static void
fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{ {
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
LIST_HEAD(unmap_list); LIST_HEAD(unmap_list);
int rc; int rc;
...@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -271,10 +271,11 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* ib_unmap_fmr() is slow, so use a single call instead * ib_unmap_fmr() is slow, so use a single call instead
* of one call per mapped FMR. * of one call per mapped FMR.
*/ */
list_for_each_entry(mw, mws, mw_list) { list_for_each_entry(mr, mrs, mr_list) {
dprintk("RPC: %s: unmapping fmr %p\n", dprintk("RPC: %s: unmapping fmr %p\n",
__func__, &mw->fmr); __func__, &mr->fmr);
list_add_tail(&mw->fmr.fm_mr->list, &unmap_list); trace_xprtrdma_localinv(mr);
list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
} }
r_xprt->rx_stats.local_inv_needed++; r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list); rc = ib_unmap_fmr(&unmap_list);
...@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -284,14 +285,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
/* ORDER: Now DMA unmap all of the req's MRs, and return /* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list. * them to the free MW list.
*/ */
while (!list_empty(mws)) { while (!list_empty(mrs)) {
mw = rpcrdma_pop_mw(mws); mr = rpcrdma_mr_pop(mrs);
dprintk("RPC: %s: DMA unmapping fmr %p\n", list_del(&mr->fmr.fm_mr->list);
__func__, &mw->fmr); rpcrdma_mr_unmap_and_put(mr);
list_del(&mw->fmr.fm_mr->list);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
} }
return; return;
...@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -299,10 +296,10 @@ fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
out_reset: out_reset:
pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc); pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
while (!list_empty(mws)) { while (!list_empty(mrs)) {
mw = rpcrdma_pop_mw(mws); mr = rpcrdma_mr_pop(mrs);
list_del(&mw->fmr.fm_mr->list); list_del(&mr->fmr.fm_mr->list);
fmr_op_recover_mr(mw); fmr_op_recover_mr(mr);
} }
} }
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/* /*
* Copyright (c) 2015 Oracle. All rights reserved. * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/ */
/* Lightweight memory registration using Fast Registration Work /* Lightweight memory registration using Fast Registration Work
* Requests (FRWR). Also referred to sometimes as FRMR mode. * Requests (FRWR).
* *
* FRWR features ordered asynchronous registration and deregistration * FRWR features ordered asynchronous registration and deregistration
* of arbitrarily sized memory regions. This is the fastest and safest * of arbitrarily sized memory regions. This is the fastest and safest
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
/* Normal operation /* Normal operation
* *
* A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
* Work Request (frmr_op_map). When the RDMA operation is finished, this * Work Request (frwr_op_map). When the RDMA operation is finished, this
* Memory Region is invalidated using a LOCAL_INV Work Request * Memory Region is invalidated using a LOCAL_INV Work Request
* (frmr_op_unmap). * (frwr_op_unmap_sync).
* *
* Typically these Work Requests are not signaled, and neither are RDMA * Typically these Work Requests are not signaled, and neither are RDMA
* SEND Work Requests (with the exception of signaling occasionally to * SEND Work Requests (with the exception of signaling occasionally to
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
* *
* As an optimization, frwr_op_unmap marks MRs INVALID before the * As an optimization, frwr_op_unmap marks MRs INVALID before the
* LOCAL_INV WR is posted. If posting succeeds, the MR is placed on * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
* rb_mws immediately so that no work (like managing a linked list * rb_mrs immediately so that no work (like managing a linked list
* under a spinlock) is needed in the completion upcall. * under a spinlock) is needed in the completion upcall.
* *
* But this means that frwr_op_map() can occasionally encounter an MR * But this means that frwr_op_map() can occasionally encounter an MR
...@@ -60,7 +60,7 @@ ...@@ -60,7 +60,7 @@
* When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
* with ib_dereg_mr and then are re-initialized. Because MR recovery * with ib_dereg_mr and then are re-initialized. Because MR recovery
* allocates fresh resources, it is deferred to a workqueue, and the * allocates fresh resources, it is deferred to a workqueue, and the
* recovered MRs are placed back on the rb_mws list when recovery is * recovered MRs are placed back on the rb_mrs list when recovery is
* complete. frwr_op_map allocates another MR for the current RPC while * complete. frwr_op_map allocates another MR for the current RPC while
* the broken MR is reset. * the broken MR is reset.
* *
...@@ -96,26 +96,26 @@ frwr_is_supported(struct rpcrdma_ia *ia) ...@@ -96,26 +96,26 @@ frwr_is_supported(struct rpcrdma_ia *ia)
} }
static int static int
frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{ {
unsigned int depth = ia->ri_max_frmr_depth; unsigned int depth = ia->ri_max_frwr_depth;
struct rpcrdma_frmr *f = &r->frmr; struct rpcrdma_frwr *frwr = &mr->frwr;
int rc; int rc;
f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
if (IS_ERR(f->fr_mr)) if (IS_ERR(frwr->fr_mr))
goto out_mr_err; goto out_mr_err;
r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL); mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
if (!r->mw_sg) if (!mr->mr_sg)
goto out_list_err; goto out_list_err;
sg_init_table(r->mw_sg, depth); sg_init_table(mr->mr_sg, depth);
init_completion(&f->fr_linv_done); init_completion(&frwr->fr_linv_done);
return 0; return 0;
out_mr_err: out_mr_err:
rc = PTR_ERR(f->fr_mr); rc = PTR_ERR(frwr->fr_mr);
dprintk("RPC: %s: ib_alloc_mr status %i\n", dprintk("RPC: %s: ib_alloc_mr status %i\n",
__func__, rc); __func__, rc);
return rc; return rc;
...@@ -124,83 +124,85 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) ...@@ -124,83 +124,85 @@ frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
rc = -ENOMEM; rc = -ENOMEM;
dprintk("RPC: %s: sg allocation failure\n", dprintk("RPC: %s: sg allocation failure\n",
__func__); __func__);
ib_dereg_mr(f->fr_mr); ib_dereg_mr(frwr->fr_mr);
return rc; return rc;
} }
static void static void
frwr_op_release_mr(struct rpcrdma_mw *r) frwr_op_release_mr(struct rpcrdma_mr *mr)
{ {
int rc; int rc;
/* Ensure MW is not on any rl_registered list */ /* Ensure MR is not on any rl_registered list */
if (!list_empty(&r->mw_list)) if (!list_empty(&mr->mr_list))
list_del(&r->mw_list); list_del(&mr->mr_list);
rc = ib_dereg_mr(r->frmr.fr_mr); rc = ib_dereg_mr(mr->frwr.fr_mr);
if (rc) if (rc)
pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n", pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
r, rc); mr, rc);
kfree(r->mw_sg); kfree(mr->mr_sg);
kfree(r); kfree(mr);
} }
static int static int
__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r) __frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{ {
struct rpcrdma_frmr *f = &r->frmr; struct rpcrdma_frwr *frwr = &mr->frwr;
int rc; int rc;
rc = ib_dereg_mr(f->fr_mr); rc = ib_dereg_mr(frwr->fr_mr);
if (rc) { if (rc) {
pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n", pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
rc, r); rc, mr);
return rc; return rc;
} }
f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
ia->ri_max_frmr_depth); ia->ri_max_frwr_depth);
if (IS_ERR(f->fr_mr)) { if (IS_ERR(frwr->fr_mr)) {
pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n", pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
PTR_ERR(f->fr_mr), r); PTR_ERR(frwr->fr_mr), mr);
return PTR_ERR(f->fr_mr); return PTR_ERR(frwr->fr_mr);
} }
dprintk("RPC: %s: recovered FRMR %p\n", __func__, f); dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
f->fr_state = FRMR_IS_INVALID; frwr->fr_state = FRWR_IS_INVALID;
return 0; return 0;
} }
/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR. /* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
*/ */
static void static void
frwr_op_recover_mr(struct rpcrdma_mw *mw) frwr_op_recover_mr(struct rpcrdma_mr *mr)
{ {
enum rpcrdma_frmr_state state = mw->frmr.fr_state; enum rpcrdma_frwr_state state = mr->frwr.fr_state;
struct rpcrdma_xprt *r_xprt = mw->mw_xprt; struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int rc; int rc;
rc = __frwr_reset_mr(ia, mw); rc = __frwr_mr_reset(ia, mr);
if (state != FRMR_FLUSHED_LI) if (state != FRWR_FLUSHED_LI) {
trace_xprtrdma_dma_unmap(mr);
ib_dma_unmap_sg(ia->ri_device, ib_dma_unmap_sg(ia->ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir); mr->mr_sg, mr->mr_nents, mr->mr_dir);
}
if (rc) if (rc)
goto out_release; goto out_release;
rpcrdma_put_mw(r_xprt, mw); rpcrdma_mr_put(mr);
r_xprt->rx_stats.mrs_recovered++; r_xprt->rx_stats.mrs_recovered++;
return; return;
out_release: out_release:
pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw); pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++; r_xprt->rx_stats.mrs_orphaned++;
spin_lock(&r_xprt->rx_buf.rb_mwlock); spin_lock(&r_xprt->rx_buf.rb_mrlock);
list_del(&mw->mw_all); list_del(&mr->mr_all);
spin_unlock(&r_xprt->rx_buf.rb_mwlock); spin_unlock(&r_xprt->rx_buf.rb_mrlock);
frwr_op_release_mr(mw); frwr_op_release_mr(mr);
} }
static int static int
...@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, ...@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
ia->ri_max_frmr_depth = ia->ri_max_frwr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
attrs->max_fast_reg_page_list_len); attrs->max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n", dprintk("RPC: %s: device's max FR page list len = %u\n",
__func__, ia->ri_max_frmr_depth); __func__, ia->ri_max_frwr_depth);
/* Add room for frmr register and invalidate WRs. /* Add room for frwr register and invalidate WRs.
* 1. FRMR reg WR for head * 1. FRWR reg WR for head
* 2. FRMR invalidate WR for head * 2. FRWR invalidate WR for head
* 3. N FRMR reg WRs for pagelist * 3. N FRWR reg WRs for pagelist
* 4. N FRMR invalidate WRs for pagelist * 4. N FRWR invalidate WRs for pagelist
* 5. FRMR reg WR for tail * 5. FRWR reg WR for tail
* 6. FRMR invalidate WR for tail * 6. FRWR invalidate WR for tail
* 7. The RDMA_SEND WR * 7. The RDMA_SEND WR
*/ */
depth = 7; depth = 7;
/* Calculate N if the device max FRMR depth is smaller than /* Calculate N if the device max FRWR depth is smaller than
* RPCRDMA_MAX_DATA_SEGS. * RPCRDMA_MAX_DATA_SEGS.
*/ */
if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) { if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth; delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
do { do {
depth += 2; /* FRMR reg + invalidate */ depth += 2; /* FRWR reg + invalidate */
delta -= ia->ri_max_frmr_depth; delta -= ia->ri_max_frwr_depth;
} while (delta > 0); } while (delta > 0);
} }
...@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, ...@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
} }
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS / ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
ia->ri_max_frmr_depth); ia->ri_max_frwr_depth);
return 0; return 0;
} }
...@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt) ...@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS, return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth); RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
} }
static void static void
...@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr) ...@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
static void static void
frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct rpcrdma_frmr *frmr; struct ib_cqe *cqe = wc->wr_cqe;
struct ib_cqe *cqe; struct rpcrdma_frwr *frwr =
container_of(cqe, struct rpcrdma_frwr, fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_SUCCESS) {
cqe = wc->wr_cqe; frwr->fr_state = FRWR_FLUSHED_FR;
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
frmr->fr_state = FRMR_FLUSHED_FR;
__frwr_sendcompletion_flush(wc, "fastreg"); __frwr_sendcompletion_flush(wc, "fastreg");
} }
trace_xprtrdma_wc_fastreg(wc, frwr);
} }
/** /**
...@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) ...@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
static void static void
frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct rpcrdma_frmr *frmr; struct ib_cqe *cqe = wc->wr_cqe;
struct ib_cqe *cqe; struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_SUCCESS) {
cqe = wc->wr_cqe; frwr->fr_state = FRWR_FLUSHED_LI;
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
frmr->fr_state = FRMR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv"); __frwr_sendcompletion_flush(wc, "localinv");
} }
trace_xprtrdma_wc_li(wc, frwr);
} }
/** /**
...@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) ...@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
static void static void
frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct rpcrdma_frmr *frmr; struct ib_cqe *cqe = wc->wr_cqe;
struct ib_cqe *cqe; struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
cqe = wc->wr_cqe;
frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
if (wc->status != IB_WC_SUCCESS) { if (wc->status != IB_WC_SUCCESS) {
frmr->fr_state = FRMR_FLUSHED_LI; frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv"); __frwr_sendcompletion_flush(wc, "localinv");
} }
complete(&frmr->fr_linv_done); complete(&frwr->fr_linv_done);
trace_xprtrdma_wc_li_wake(wc, frwr);
} }
/* Post a REG_MR Work Request to register a memory region /* Post a REG_MR Work Request to register a memory region
...@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) ...@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
*/ */
static struct rpcrdma_mr_seg * static struct rpcrdma_mr_seg *
frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, struct rpcrdma_mw **out) int nsegs, bool writing, struct rpcrdma_mr **out)
{ {
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS; bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
struct rpcrdma_mw *mw; struct rpcrdma_frwr *frwr;
struct rpcrdma_frmr *frmr; struct rpcrdma_mr *mr;
struct ib_mr *mr; struct ib_mr *ibmr;
struct ib_reg_wr *reg_wr; struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr; struct ib_send_wr *bad_wr;
int rc, i, n; int rc, i, n;
u8 key; u8 key;
mw = NULL; mr = NULL;
do { do {
if (mw) if (mr)
rpcrdma_defer_mr_recovery(mw); rpcrdma_mr_defer_recovery(mr);
mw = rpcrdma_get_mw(r_xprt); mr = rpcrdma_mr_get(r_xprt);
if (!mw) if (!mr)
return ERR_PTR(-ENOBUFS); return ERR_PTR(-ENOBUFS);
} while (mw->frmr.fr_state != FRMR_IS_INVALID); } while (mr->frwr.fr_state != FRWR_IS_INVALID);
frmr = &mw->frmr; frwr = &mr->frwr;
frmr->fr_state = FRMR_IS_VALID; frwr->fr_state = FRWR_IS_VALID;
mr = frmr->fr_mr;
reg_wr = &frmr->fr_regwr; if (nsegs > ia->ri_max_frwr_depth)
nsegs = ia->ri_max_frwr_depth;
if (nsegs > ia->ri_max_frmr_depth)
nsegs = ia->ri_max_frmr_depth;
for (i = 0; i < nsegs;) { for (i = 0; i < nsegs;) {
if (seg->mr_page) if (seg->mr_page)
sg_set_page(&mw->mw_sg[i], sg_set_page(&mr->mr_sg[i],
seg->mr_page, seg->mr_page,
seg->mr_len, seg->mr_len,
offset_in_page(seg->mr_offset)); offset_in_page(seg->mr_offset));
else else
sg_set_buf(&mw->mw_sg[i], seg->mr_offset, sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len); seg->mr_len);
++seg; ++seg;
...@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break; break;
} }
mw->mw_dir = rpcrdma_data_dir(writing); mr->mr_dir = rpcrdma_data_dir(writing);
mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir); mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
if (!mw->mw_nents) if (!mr->mr_nents)
goto out_dmamap_err; goto out_dmamap_err;
n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE); ibmr = frwr->fr_mr;
if (unlikely(n != mw->mw_nents)) n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
if (unlikely(n != mr->mr_nents))
goto out_mapmr_err; goto out_mapmr_err;
dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n", key = (u8)(ibmr->rkey & 0x000000FF);
__func__, frmr, mw->mw_nents, mr->length); ib_update_fast_reg_key(ibmr, ++key);
key = (u8)(mr->rkey & 0x000000FF);
ib_update_fast_reg_key(mr, ++key);
reg_wr = &frwr->fr_regwr;
reg_wr->wr.next = NULL; reg_wr->wr.next = NULL;
reg_wr->wr.opcode = IB_WR_REG_MR; reg_wr->wr.opcode = IB_WR_REG_MR;
frmr->fr_cqe.done = frwr_wc_fastreg; frwr->fr_cqe.done = frwr_wc_fastreg;
reg_wr->wr.wr_cqe = &frmr->fr_cqe; reg_wr->wr.wr_cqe = &frwr->fr_cqe;
reg_wr->wr.num_sge = 0; reg_wr->wr.num_sge = 0;
reg_wr->wr.send_flags = 0; reg_wr->wr.send_flags = 0;
reg_wr->mr = mr; reg_wr->mr = ibmr;
reg_wr->key = mr->rkey; reg_wr->key = ibmr->rkey;
reg_wr->access = writing ? reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ; IB_ACCESS_REMOTE_READ;
...@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, ...@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
if (rc) if (rc)
goto out_senderr; goto out_senderr;
mw->mw_handle = mr->rkey; mr->mr_handle = ibmr->rkey;
mw->mw_length = mr->length; mr->mr_length = ibmr->length;
mw->mw_offset = mr->iova; mr->mr_offset = ibmr->iova;
*out = mw; *out = mr;
return seg; return seg;
out_dmamap_err: out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
mw->mw_sg, i); mr->mr_sg, i);
frmr->fr_state = FRMR_IS_INVALID; frwr->fr_state = FRWR_IS_INVALID;
rpcrdma_put_mw(r_xprt, mw); rpcrdma_mr_put(mr);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
out_mapmr_err: out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
frmr->fr_mr, n, mw->mw_nents); frwr->fr_mr, n, mr->mr_nents);
rpcrdma_defer_mr_recovery(mw); rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
out_senderr: out_senderr:
pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
rpcrdma_defer_mr_recovery(mw); rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-ENOTCONN); return ERR_PTR(-ENOTCONN);
} }
/* Handle a remotely invalidated mr on the @mrs list
*/
static void
frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
{
struct rpcrdma_mr *mr;
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del(&mr->mr_list);
trace_xprtrdma_remoteinv(mr);
mr->frwr.fr_state = FRWR_IS_INVALID;
rpcrdma_mr_unmap_and_put(mr);
break; /* only one invalidated MR per RPC */
}
}
/* Invalidate all memory regions that were registered for "req". /* Invalidate all memory regions that were registered for "req".
* *
* Sleeps until it is safe for the host CPU to access the * Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions. * previously mapped memory regions.
* *
* Caller ensures that @mws is not empty before the call. This * Caller ensures that @mrs is not empty before the call. This
* function empties the list. * function empties the list.
*/ */
static void static void
frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{ {
struct ib_send_wr *first, **prev, *last, *bad_wr; struct ib_send_wr *first, **prev, *last, *bad_wr;
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_frmr *f; struct rpcrdma_frwr *frwr;
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
int count, rc; int count, rc;
/* ORDER: Invalidate all of the MRs first /* ORDER: Invalidate all of the MRs first
...@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* Chain the LOCAL_INV Work Requests and post them with * Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call. * a single ib_post_send() call.
*/ */
f = NULL; frwr = NULL;
count = 0; count = 0;
prev = &first; prev = &first;
list_for_each_entry(mw, mws, mw_list) { list_for_each_entry(mr, mrs, mr_list) {
mw->frmr.fr_state = FRMR_IS_INVALID; mr->frwr.fr_state = FRWR_IS_INVALID;
if (mw->mw_flags & RPCRDMA_MW_F_RI) frwr = &mr->frwr;
continue; trace_xprtrdma_localinv(mr);
f = &mw->frmr; frwr->fr_cqe.done = frwr_wc_localinv;
dprintk("RPC: %s: invalidating frmr %p\n", last = &frwr->fr_invwr;
__func__, f);
f->fr_cqe.done = frwr_wc_localinv;
last = &f->fr_invwr;
memset(last, 0, sizeof(*last)); memset(last, 0, sizeof(*last));
last->wr_cqe = &f->fr_cqe; last->wr_cqe = &frwr->fr_cqe;
last->opcode = IB_WR_LOCAL_INV; last->opcode = IB_WR_LOCAL_INV;
last->ex.invalidate_rkey = mw->mw_handle; last->ex.invalidate_rkey = mr->mr_handle;
count++; count++;
*prev = last; *prev = last;
prev = &last->next; prev = &last->next;
} }
if (!f) if (!frwr)
goto unmap; goto unmap;
/* Strong send queue ordering guarantees that when the /* Strong send queue ordering guarantees that when the
...@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* are complete. * are complete.
*/ */
last->send_flags = IB_SEND_SIGNALED; last->send_flags = IB_SEND_SIGNALED;
f->fr_cqe.done = frwr_wc_localinv_wake; frwr->fr_cqe.done = frwr_wc_localinv_wake;
reinit_completion(&f->fr_linv_done); reinit_completion(&frwr->fr_linv_done);
/* Transport disconnect drains the receive CQ before it /* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us * replaces the QP. The RPC reply handler won't call us
...@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
bad_wr = NULL; bad_wr = NULL;
rc = ib_post_send(ia->ri_id->qp, first, &bad_wr); rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
if (bad_wr != first) if (bad_wr != first)
wait_for_completion(&f->fr_linv_done); wait_for_completion(&frwr->fr_linv_done);
if (rc) if (rc)
goto reset_mrs; goto reset_mrs;
/* ORDER: Now DMA unmap all of the MRs, and return /* ORDER: Now DMA unmap all of the MRs, and return
* them to the free MW list. * them to the free MR list.
*/ */
unmap: unmap:
while (!list_empty(mws)) { while (!list_empty(mrs)) {
mw = rpcrdma_pop_mw(mws); mr = rpcrdma_mr_pop(mrs);
dprintk("RPC: %s: DMA unmapping frmr %p\n", rpcrdma_mr_unmap_and_put(mr);
__func__, &mw->frmr);
ib_dma_unmap_sg(ia->ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
} }
return; return;
reset_mrs: reset_mrs:
pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc); pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
/* Find and reset the MRs in the LOCAL_INV WRs that did not /* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted. * get posted.
*/ */
while (bad_wr) { while (bad_wr) {
f = container_of(bad_wr, struct rpcrdma_frmr, frwr = container_of(bad_wr, struct rpcrdma_frwr,
fr_invwr); fr_invwr);
mw = container_of(f, struct rpcrdma_mw, frmr); mr = container_of(frwr, struct rpcrdma_mr, frwr);
__frwr_reset_mr(ia, mw); __frwr_mr_reset(ia, mr);
bad_wr = bad_wr->next; bad_wr = bad_wr->next;
} }
...@@ -553,6 +561,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) ...@@ -553,6 +561,7 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map, .ro_map = frwr_op_map,
.ro_reminv = frwr_op_reminv,
.ro_unmap_sync = frwr_op_unmap_sync, .ro_unmap_sync = frwr_op_unmap_sync,
.ro_recover_mr = frwr_op_recover_mr, .ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open, .ro_open = frwr_op_open,
......
/* /*
* Copyright (c) 2015 Oracle. All rights reserved. * Copyright (c) 2015, 2017 Oracle. All rights reserved.
*/ */
/* rpcrdma.ko module initialization /* rpcrdma.ko module initialization
*/ */
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h> #include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #include <asm/swab.h>
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif #define CREATE_TRACE_POINTS
#include "xprt_rdma.h"
MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc."); MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
MODULE_DESCRIPTION("RPC/RDMA Transport"); MODULE_DESCRIPTION("RPC/RDMA Transport");
......
...@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr) ...@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr)
} }
static void static void
xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
{ {
*iptr++ = cpu_to_be32(mw->mw_handle); *iptr++ = cpu_to_be32(mr->mr_handle);
*iptr++ = cpu_to_be32(mw->mw_length); *iptr++ = cpu_to_be32(mr->mr_length);
xdr_encode_hyper(iptr, mw->mw_offset); xdr_encode_hyper(iptr, mr->mr_offset);
} }
static int static int
encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
{ {
__be32 *p; __be32 *p;
...@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) ...@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
if (unlikely(!p)) if (unlikely(!p))
return -EMSGSIZE; return -EMSGSIZE;
xdr_encode_rdma_segment(p, mw); xdr_encode_rdma_segment(p, mr);
return 0; return 0;
} }
static int static int
encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
u32 position) u32 position)
{ {
__be32 *p; __be32 *p;
...@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, ...@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
*p++ = xdr_one; /* Item present */ *p++ = xdr_one; /* Item present */
*p++ = cpu_to_be32(position); *p++ = cpu_to_be32(position);
xdr_encode_rdma_segment(p, mw); xdr_encode_rdma_segment(p, mr);
return 0; return 0;
} }
...@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{ {
struct xdr_stream *xdr = &req->rl_stream; struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg; struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
unsigned int pos; unsigned int pos;
int nsegs; int nsegs;
...@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
do { do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
false, &mw); false, &mr);
if (IS_ERR(seg)) if (IS_ERR(seg))
return PTR_ERR(seg); return PTR_ERR(seg);
rpcrdma_push_mw(mw, &req->rl_registered); rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_read_segment(xdr, mw, pos) < 0) if (encode_read_segment(xdr, mr, pos) < 0)
return -EMSGSIZE; return -EMSGSIZE;
dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs);
rqst->rq_task->tk_pid, __func__, pos,
mw->mw_length, (unsigned long long)mw->mw_offset,
mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
r_xprt->rx_stats.read_chunk_count++; r_xprt->rx_stats.read_chunk_count++;
nsegs -= mw->mw_nents; nsegs -= mr->mr_nents;
} while (nsegs); } while (nsegs);
return 0; return 0;
...@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{ {
struct xdr_stream *xdr = &req->rl_stream; struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg; struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
int nsegs, nchunks; int nsegs, nchunks;
__be32 *segcount; __be32 *segcount;
...@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0; nchunks = 0;
do { do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mw); true, &mr);
if (IS_ERR(seg)) if (IS_ERR(seg))
return PTR_ERR(seg); return PTR_ERR(seg);
rpcrdma_push_mw(mw, &req->rl_registered); rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mw) < 0) if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE; return -EMSGSIZE;
dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs);
rqst->rq_task->tk_pid, __func__,
mw->mw_length, (unsigned long long)mw->mw_offset,
mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
r_xprt->rx_stats.write_chunk_count++; r_xprt->rx_stats.write_chunk_count++;
r_xprt->rx_stats.total_rdma_request += seg->mr_len; r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++; nchunks++;
nsegs -= mw->mw_nents; nsegs -= mr->mr_nents;
} while (nsegs); } while (nsegs);
/* Update count of segments in this Write chunk */ /* Update count of segments in this Write chunk */
...@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{ {
struct xdr_stream *xdr = &req->rl_stream; struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg; struct rpcrdma_mr_seg *seg;
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
int nsegs, nchunks; int nsegs, nchunks;
__be32 *segcount; __be32 *segcount;
...@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0; nchunks = 0;
do { do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
true, &mw); true, &mr);
if (IS_ERR(seg)) if (IS_ERR(seg))
return PTR_ERR(seg); return PTR_ERR(seg);
rpcrdma_push_mw(mw, &req->rl_registered); rpcrdma_mr_push(mr, &req->rl_registered);
if (encode_rdma_segment(xdr, mw) < 0) if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE; return -EMSGSIZE;
dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs);
rqst->rq_task->tk_pid, __func__,
mw->mw_length, (unsigned long long)mw->mw_offset,
mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
r_xprt->rx_stats.reply_chunk_count++; r_xprt->rx_stats.reply_chunk_count++;
r_xprt->rx_stats.total_rdma_request += seg->mr_len; r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++; nchunks++;
nsegs -= mw->mw_nents; nsegs -= mr->mr_nents;
} while (nsegs); } while (nsegs);
/* Update count of segments in the Reply chunk */ /* Update count of segments in the Reply chunk */
...@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc) ...@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
struct ib_sge *sge; struct ib_sge *sge;
unsigned int count; unsigned int count;
dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
__func__, sc->sc_unmap_count, sc);
/* The first two SGEs contain the transport header and /* The first two SGEs contain the transport header and
* the inline buffer. These are always left mapped so * the inline buffer. These are always left mapped so
* they can be cheaply re-used. * they can be cheaply re-used.
...@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) ...@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
__be32 *p; __be32 *p;
int ret; int ret;
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
return rpcrdma_bc_marshal_reply(rqst);
#endif
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf, xdr_init_encode(xdr, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base); req->rl_rdmabuf->rg_base);
...@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) ...@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
rtype = rpcrdma_areadch; rtype = rpcrdma_areadch;
} }
/* If this is a retransmit, discard previously registered
* chunks. Very likely the connection has been replaced,
* so these registrations are invalid and unusable.
*/
while (unlikely(!list_empty(&req->rl_registered))) {
struct rpcrdma_mr *mr;
mr = rpcrdma_mr_pop(&req->rl_registered);
rpcrdma_mr_defer_recovery(mr);
}
/* This implementation supports the following combinations /* This implementation supports the following combinations
* of chunk lists in one RPC-over-RDMA Call message: * of chunk lists in one RPC-over-RDMA Call message:
* *
...@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) ...@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
if (ret) if (ret)
goto out_err; goto out_err;
dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n", trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype);
rqst->rq_task->tk_pid, __func__,
transfertypes[rtype], transfertypes[wtype],
xdr_stream_pos(xdr));
ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
&rqst->rq_snd_buf, rtype); &rqst->rq_snd_buf, rtype);
...@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) ...@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
curlen = rqst->rq_rcv_buf.head[0].iov_len; curlen = rqst->rq_rcv_buf.head[0].iov_len;
if (curlen > copy_len) if (curlen > copy_len)
curlen = copy_len; curlen = copy_len;
dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", trace_xprtrdma_fixup(rqst, copy_len, curlen);
__func__, srcp, copy_len, curlen);
srcp += curlen; srcp += curlen;
copy_len -= curlen; copy_len -= curlen;
...@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) ...@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
if (curlen > pagelist_len) if (curlen > pagelist_len)
curlen = pagelist_len; curlen = pagelist_len;
dprintk("RPC: %s: page %d" trace_xprtrdma_fixup_pg(rqst, i, srcp,
" srcp 0x%p len %d curlen %d\n", copy_len, curlen);
__func__, i, srcp, copy_len, curlen);
destp = kmap_atomic(ppages[i]); destp = kmap_atomic(ppages[i]);
memcpy(destp + page_base, srcp, curlen); memcpy(destp + page_base, srcp, curlen);
flush_dcache_page(ppages[i]); flush_dcache_page(ppages[i]);
...@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) ...@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
return fixup_copy_count; return fixup_copy_count;
} }
/* Caller must guarantee @rep remains stable during this call.
*/
static void
rpcrdma_mark_remote_invalidation(struct list_head *mws,
struct rpcrdma_rep *rep)
{
struct rpcrdma_mw *mw;
if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
return;
list_for_each_entry(mw, mws, mw_list)
if (mw->mw_handle == rep->rr_inv_rkey) {
mw->mw_flags = RPCRDMA_MW_F_RI;
break; /* only one invalidated MR per RPC */
}
}
/* By convention, backchannel calls arrive via rdma_msg type /* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes * messages, and never populate the chunk lists. This makes
* the RPC/RDMA header small and fixed in size, so it is * the RPC/RDMA header small and fixed in size, so it is
...@@ -1058,26 +1026,19 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) ...@@ -1058,26 +1026,19 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
{ {
u32 handle;
u64 offset;
__be32 *p; __be32 *p;
p = xdr_inline_decode(xdr, 4 * sizeof(*p)); p = xdr_inline_decode(xdr, 4 * sizeof(*p));
if (unlikely(!p)) if (unlikely(!p))
return -EIO; return -EIO;
ifdebug(FACILITY) { handle = be32_to_cpup(p++);
u64 offset; *length = be32_to_cpup(p++);
u32 handle; xdr_decode_hyper(p, &offset);
handle = be32_to_cpup(p++);
*length = be32_to_cpup(p++);
xdr_decode_hyper(p, &offset);
dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n",
__func__, *length, (unsigned long long)offset,
handle);
} else {
*length = be32_to_cpup(p + 1);
}
trace_xprtrdma_decode_seg(handle, *length, offset);
return 0; return 0;
} }
...@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length) ...@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
*length += seglength; *length += seglength;
} }
dprintk("RPC: %s: segcount=%u, %u bytes\n",
__func__, be32_to_cpup(p), *length);
return 0; return 0;
} }
...@@ -1296,8 +1255,7 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep) ...@@ -1296,8 +1255,7 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
* being marshaled. * being marshaled.
*/ */
out_badheader: out_badheader:
dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n", trace_xprtrdma_reply_hdr(rep);
rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
r_xprt->rx_stats.bad_reply_count++; r_xprt->rx_stats.bad_reply_count++;
status = -EIO; status = -EIO;
goto out; goto out;
...@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work) ...@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work)
struct rpcrdma_rep *rep = struct rpcrdma_rep *rep =
container_of(work, struct rpcrdma_rep, rr_work); container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
rpcrdma_mark_remote_invalidation(&req->rl_registered, rep); trace_xprtrdma_defer_cmp(rep);
rpcrdma_release_rqst(rep->rr_rxprt, req); if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
rpcrdma_release_rqst(r_xprt, req);
rpcrdma_complete_rqst(rep); rpcrdma_complete_rqst(rep);
} }
...@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
u32 credits; u32 credits;
__be32 *p; __be32 *p;
dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
if (rep->rr_hdrbuf.head[0].iov_len == 0) if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus; goto out_badstatus;
...@@ -1405,8 +1364,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -1405,8 +1364,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
rep->rr_rqst = rqst; rep->rr_rqst = rqst;
clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
__func__, rep, req, be32_to_cpu(rep->rr_xid));
queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work); queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
return; return;
...@@ -1420,8 +1378,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -1420,8 +1378,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
return; return;
out_badversion: out_badversion:
dprintk("RPC: %s: invalid version %d\n", trace_xprtrdma_reply_vers(rep);
__func__, be32_to_cpu(rep->rr_vers));
goto repost; goto repost;
/* The RPC transaction has already been terminated, or the header /* The RPC transaction has already been terminated, or the header
...@@ -1429,12 +1386,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -1429,12 +1386,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
*/ */
out_norqst: out_norqst:
spin_unlock(&xprt->recv_lock); spin_unlock(&xprt->recv_lock);
dprintk("RPC: %s: no match for incoming xid 0x%08x\n", trace_xprtrdma_reply_rqst(rep);
__func__, be32_to_cpu(rep->rr_xid));
goto repost; goto repost;
out_shortreply: out_shortreply:
dprintk("RPC: %s: short/invalid reply\n", __func__); trace_xprtrdma_reply_short(rep);
/* If no pending RPC transaction was matched, post a replacement /* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning. * receive buffer before returning.
......
...@@ -67,8 +67,7 @@ ...@@ -67,8 +67,7 @@
static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding; unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRWR;
unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
int xprt_rdma_pad_optimize; int xprt_rdma_pad_optimize;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
...@@ -81,6 +80,7 @@ static unsigned int zero; ...@@ -81,6 +80,7 @@ static unsigned int zero;
static unsigned int max_padding = PAGE_SIZE; static unsigned int max_padding = PAGE_SIZE;
static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS;
static unsigned int max_memreg = RPCRDMA_LAST - 1; static unsigned int max_memreg = RPCRDMA_LAST - 1;
static unsigned int dummy;
static struct ctl_table_header *sunrpc_table_header; static struct ctl_table_header *sunrpc_table_header;
...@@ -114,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = { ...@@ -114,7 +114,7 @@ static struct ctl_table xr_tunables_table[] = {
}, },
{ {
.procname = "rdma_inline_write_padding", .procname = "rdma_inline_write_padding",
.data = &xprt_rdma_inline_write_padding, .data = &dummy,
.maxlen = sizeof(unsigned int), .maxlen = sizeof(unsigned int),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_minmax, .proc_handler = proc_dointvec_minmax,
...@@ -259,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work) ...@@ -259,13 +259,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
xprt_clear_connected(xprt); xprt_clear_connected(xprt);
dprintk("RPC: %s: %sconnect\n", __func__,
r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
if (rc) if (rc)
xprt_wake_pending_tasks(xprt, rc); xprt_wake_pending_tasks(xprt, rc);
dprintk("RPC: %s: exit\n", __func__);
xprt_clear_connecting(xprt); xprt_clear_connecting(xprt);
} }
...@@ -275,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) ...@@ -275,7 +272,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
rx_xprt); rx_xprt);
pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); trace_xprtrdma_inject_dsc(r_xprt);
rdma_disconnect(r_xprt->rx_ia.ri_id); rdma_disconnect(r_xprt->rx_ia.ri_id);
} }
...@@ -295,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) ...@@ -295,7 +292,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
{ {
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
dprintk("RPC: %s: called\n", __func__); trace_xprtrdma_destroy(r_xprt);
cancel_delayed_work_sync(&r_xprt->rx_connect_worker); cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
...@@ -306,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) ...@@ -306,11 +303,8 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
rpcrdma_ia_close(&r_xprt->rx_ia); rpcrdma_ia_close(&r_xprt->rx_ia);
xprt_rdma_free_addresses(xprt); xprt_rdma_free_addresses(xprt);
xprt_free(xprt); xprt_free(xprt);
dprintk("RPC: %s: returning\n", __func__);
module_put(THIS_MODULE); module_put(THIS_MODULE);
} }
...@@ -361,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -361,9 +355,7 @@ xprt_setup_rdma(struct xprt_create *args)
/* /*
* Set up RDMA-specific connect data. * Set up RDMA-specific connect data.
*/ */
sap = args->dstaddr;
sap = (struct sockaddr *)&cdata.addr;
memcpy(sap, args->dstaddr, args->addrlen);
/* Ensure xprt->addr holds valid server TCP (not RDMA) /* Ensure xprt->addr holds valid server TCP (not RDMA)
* address, for any side protocols which peek at it */ * address, for any side protocols which peek at it */
...@@ -373,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -373,6 +365,7 @@ xprt_setup_rdma(struct xprt_create *args)
if (rpc_get_port(sap)) if (rpc_get_port(sap))
xprt_set_bound(xprt); xprt_set_bound(xprt);
xprt_rdma_format_addresses(xprt, sap);
cdata.max_requests = xprt->max_reqs; cdata.max_requests = xprt->max_reqs;
...@@ -387,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -387,8 +380,6 @@ xprt_setup_rdma(struct xprt_create *args)
if (cdata.inline_rsize > cdata.rsize) if (cdata.inline_rsize > cdata.rsize)
cdata.inline_rsize = cdata.rsize; cdata.inline_rsize = cdata.rsize;
cdata.padding = xprt_rdma_inline_write_padding;
/* /*
* Create new transport instance, which includes initialized * Create new transport instance, which includes initialized
* o ia * o ia
...@@ -398,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -398,7 +389,7 @@ xprt_setup_rdma(struct xprt_create *args)
new_xprt = rpcx_to_rdmax(xprt); new_xprt = rpcx_to_rdmax(xprt);
rc = rpcrdma_ia_open(new_xprt, sap); rc = rpcrdma_ia_open(new_xprt);
if (rc) if (rc)
goto out1; goto out1;
...@@ -407,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -407,31 +398,19 @@ xprt_setup_rdma(struct xprt_create *args)
*/ */
new_xprt->rx_data = cdata; new_xprt->rx_data = cdata;
new_ep = &new_xprt->rx_ep; new_ep = &new_xprt->rx_ep;
new_ep->rep_remote_addr = cdata.addr;
rc = rpcrdma_ep_create(&new_xprt->rx_ep, rc = rpcrdma_ep_create(&new_xprt->rx_ep,
&new_xprt->rx_ia, &new_xprt->rx_data); &new_xprt->rx_ia, &new_xprt->rx_data);
if (rc) if (rc)
goto out2; goto out2;
/*
* Allocate pre-registered send and receive buffers for headers and
* any inline data. Also specify any padding which will be provided
* from a preregistered zero buffer.
*/
rc = rpcrdma_buffer_create(new_xprt); rc = rpcrdma_buffer_create(new_xprt);
if (rc) if (rc)
goto out3; goto out3;
/*
* Register a callback for connection events. This is necessary because
* connection loss notification is async. We also catch connection loss
* when reaping receives.
*/
INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, INIT_DELAYED_WORK(&new_xprt->rx_connect_worker,
xprt_rdma_connect_worker); xprt_rdma_connect_worker);
xprt_rdma_format_addresses(xprt, sap);
xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt); xprt->max_payload = new_xprt->rx_ia.ri_ops->ro_maxpages(new_xprt);
if (xprt->max_payload == 0) if (xprt->max_payload == 0)
goto out4; goto out4;
...@@ -445,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args) ...@@ -445,16 +424,19 @@ xprt_setup_rdma(struct xprt_create *args)
dprintk("RPC: %s: %s:%s\n", __func__, dprintk("RPC: %s: %s:%s\n", __func__,
xprt->address_strings[RPC_DISPLAY_ADDR], xprt->address_strings[RPC_DISPLAY_ADDR],
xprt->address_strings[RPC_DISPLAY_PORT]); xprt->address_strings[RPC_DISPLAY_PORT]);
trace_xprtrdma_create(new_xprt);
return xprt; return xprt;
out4: out4:
xprt_rdma_free_addresses(xprt); rpcrdma_buffer_destroy(&new_xprt->rx_buf);
rc = -EINVAL; rc = -ENODEV;
out3: out3:
rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
out2: out2:
rpcrdma_ia_close(&new_xprt->rx_ia); rpcrdma_ia_close(&new_xprt->rx_ia);
out1: out1:
trace_xprtrdma_destroy(new_xprt);
xprt_rdma_free_addresses(xprt);
xprt_free(xprt); xprt_free(xprt);
return ERR_PTR(rc); return ERR_PTR(rc);
} }
...@@ -488,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt) ...@@ -488,16 +470,34 @@ xprt_rdma_close(struct rpc_xprt *xprt)
rpcrdma_ep_disconnect(ep, ia); rpcrdma_ep_disconnect(ep, ia);
} }
/**
* xprt_rdma_set_port - update server port with rpcbind result
* @xprt: controlling RPC transport
* @port: new port value
*
* Transport connect status is unchanged.
*/
static void static void
xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
{ {
struct sockaddr_in *sap; struct sockaddr *sap = (struct sockaddr *)&xprt->addr;
char buf[8];
sap = (struct sockaddr_in *)&xprt->addr; dprintk("RPC: %s: setting port for xprt %p (%s:%s) to %u\n",
sap->sin_port = htons(port); __func__, xprt,
sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; xprt->address_strings[RPC_DISPLAY_ADDR],
sap->sin_port = htons(port); xprt->address_strings[RPC_DISPLAY_PORT],
dprintk("RPC: %s: %u\n", __func__, port); port);
rpc_set_port(sap, port);
kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
snprintf(buf, sizeof(buf), "%u", port);
xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
snprintf(buf, sizeof(buf), "%4hx", port);
xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
} }
/** /**
...@@ -516,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) ...@@ -516,8 +516,6 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
static void static void
xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task) xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
{ {
dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
xprt_force_disconnect(xprt); xprt_force_disconnect(xprt);
} }
...@@ -640,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task) ...@@ -640,7 +638,7 @@ xprt_rdma_allocate(struct rpc_task *task)
req = rpcrdma_buffer_get(&r_xprt->rx_buf); req = rpcrdma_buffer_get(&r_xprt->rx_buf);
if (req == NULL) if (req == NULL)
return -ENOMEM; goto out_get;
flags = RPCRDMA_DEF_GFP; flags = RPCRDMA_DEF_GFP;
if (RPC_IS_SWAPPER(task)) if (RPC_IS_SWAPPER(task))
...@@ -653,19 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task) ...@@ -653,19 +651,18 @@ xprt_rdma_allocate(struct rpc_task *task)
if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags)) if (!rpcrdma_get_recvbuf(r_xprt, req, rqst->rq_rcvsize, flags))
goto out_fail; goto out_fail;
dprintk("RPC: %5u %s: send size = %zd, recv size = %zd, req = %p\n",
task->tk_pid, __func__, rqst->rq_callsize,
rqst->rq_rcvsize, req);
req->rl_cpu = smp_processor_id(); req->rl_cpu = smp_processor_id();
req->rl_connect_cookie = 0; /* our reserved value */ req->rl_connect_cookie = 0; /* our reserved value */
rpcrdma_set_xprtdata(rqst, req); rpcrdma_set_xprtdata(rqst, req);
rqst->rq_buffer = req->rl_sendbuf->rg_base; rqst->rq_buffer = req->rl_sendbuf->rg_base;
rqst->rq_rbuffer = req->rl_recvbuf->rg_base; rqst->rq_rbuffer = req->rl_recvbuf->rg_base;
trace_xprtrdma_allocate(task, req);
return 0; return 0;
out_fail: out_fail:
rpcrdma_buffer_put(req); rpcrdma_buffer_put(req);
out_get:
trace_xprtrdma_allocate(task, NULL);
return -ENOMEM; return -ENOMEM;
} }
...@@ -682,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task) ...@@ -682,13 +679,9 @@ xprt_rdma_free(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags))
return;
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags))
rpcrdma_release_rqst(r_xprt, req); rpcrdma_release_rqst(r_xprt, req);
trace_xprtrdma_rpc_done(task, req);
rpcrdma_buffer_put(req); rpcrdma_buffer_put(req);
} }
...@@ -698,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task) ...@@ -698,22 +691,12 @@ xprt_rdma_free(struct rpc_task *task)
* *
* Caller holds the transport's write lock. * Caller holds the transport's write lock.
* *
* Return values: * Returns:
* 0: The request has been sent * %0 if the RPC message has been sent
* ENOTCONN: Caller needs to invoke connect logic then call again * %-ENOTCONN if the caller should reconnect and call again
* ENOBUFS: Call again later to send the request * %-ENOBUFS if the caller should call again later
* EIO: A permanent error occurred. The request was not sent, * %-EIO if a permanent error occurred and the request was not
* and don't try it again * sent. Do not try to send this message again.
*
* send_request invokes the meat of RPC RDMA. It must do the following:
*
* 1. Marshal the RPC request into an RPC RDMA request, which means
* putting a header in front of data, and creating IOVs for RDMA
* from those in the request.
* 2. In marshaling, detect opportunities for RDMA, and use them.
* 3. Post a recv message to set up asynch completion, then send
* the request (rpcrdma_ep_post).
* 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP).
*/ */
static int static int
xprt_rdma_send_request(struct rpc_task *task) xprt_rdma_send_request(struct rpc_task *task)
...@@ -724,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task) ...@@ -724,14 +707,14 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
int rc = 0; int rc = 0;
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
if (unlikely(!rqst->rq_buffer))
return xprt_rdma_bc_send_reply(rqst);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
if (!xprt_connected(xprt)) if (!xprt_connected(xprt))
goto drop_connection; goto drop_connection;
/* On retransmit, remove any previously registered chunks */
if (unlikely(!list_empty(&req->rl_registered)))
r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
&req->rl_registered);
rc = rpcrdma_marshal_req(r_xprt, rqst); rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0) if (rc < 0)
goto failed_marshal; goto failed_marshal;
...@@ -744,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task) ...@@ -744,7 +727,7 @@ xprt_rdma_send_request(struct rpc_task *task)
goto drop_connection; goto drop_connection;
req->rl_connect_cookie = xprt->connect_cookie; req->rl_connect_cookie = xprt->connect_cookie;
set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); __set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection; goto drop_connection;
...@@ -904,8 +887,7 @@ int xprt_rdma_init(void) ...@@ -904,8 +887,7 @@ int xprt_rdma_init(void)
"\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n",
xprt_rdma_slot_table_entries, xprt_rdma_slot_table_entries,
xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); xprt_rdma_max_inline_read, xprt_rdma_max_inline_write);
dprintk("\tPadding %d\n\tMemreg %d\n", dprintk("\tPadding 0\n\tMemreg %d\n", xprt_rdma_memreg_strategy);
xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
if (!sunrpc_table_header) if (!sunrpc_table_header)
......
...@@ -71,8 +71,8 @@ ...@@ -71,8 +71,8 @@
/* /*
* internal functions * internal functions
*/ */
static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf); static void rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf);
static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
struct workqueue_struct *rpcrdma_receive_wq __read_mostly; struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
...@@ -108,7 +108,10 @@ static void ...@@ -108,7 +108,10 @@ static void
rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
{ {
struct rpcrdma_ep *ep = context; struct rpcrdma_ep *ep = context;
struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
rx_ep);
trace_xprtrdma_qp_error(r_xprt, event);
pr_err("rpcrdma: %s on device %s ep %p\n", pr_err("rpcrdma: %s on device %s ep %p\n",
ib_event_msg(event->event), event->device->name, context); ib_event_msg(event->event), event->device->name, context);
...@@ -133,6 +136,7 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) ...@@ -133,6 +136,7 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
container_of(cqe, struct rpcrdma_sendctx, sc_cqe); container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_send(sc, wc);
if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("rpcrdma: Send: %s (%u/0x%x)\n", pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status), ib_wc_status_msg(wc->status),
...@@ -155,13 +159,11 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) ...@@ -155,13 +159,11 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
rr_cqe); rr_cqe);
/* WARNING: Only wr_id and status are reliable at this point */ /* WARNING: Only wr_id and status are reliable at this point */
trace_xprtrdma_wc_receive(rep, wc);
if (wc->status != IB_WC_SUCCESS) if (wc->status != IB_WC_SUCCESS)
goto out_fail; goto out_fail;
/* status == SUCCESS means all fields in wc are trustworthy */ /* status == SUCCESS means all fields in wc are trustworthy */
dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
__func__, rep, wc->byte_len);
rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len);
rep->rr_wc_flags = wc->wc_flags; rep->rr_wc_flags = wc->wc_flags;
rep->rr_inv_rkey = wc->ex.invalidate_rkey; rep->rr_inv_rkey = wc->ex.invalidate_rkey;
...@@ -192,7 +194,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, ...@@ -192,7 +194,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
unsigned int rsize, wsize; unsigned int rsize, wsize;
/* Default settings for RPC-over-RDMA Version One */ /* Default settings for RPC-over-RDMA Version One */
r_xprt->rx_ia.ri_reminv_expected = false;
r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
rsize = RPCRDMA_V1_DEF_INLINE_SIZE; rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
wsize = RPCRDMA_V1_DEF_INLINE_SIZE; wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
...@@ -200,7 +201,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, ...@@ -200,7 +201,6 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
if (pmsg && if (pmsg &&
pmsg->cp_magic == rpcrdma_cmp_magic && pmsg->cp_magic == rpcrdma_cmp_magic &&
pmsg->cp_version == RPCRDMA_CMP_VERSION) { pmsg->cp_version == RPCRDMA_CMP_VERSION) {
r_xprt->rx_ia.ri_reminv_expected = true;
r_xprt->rx_ia.ri_implicit_roundup = true; r_xprt->rx_ia.ri_implicit_roundup = true;
rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
...@@ -221,11 +221,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) ...@@ -221,11 +221,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
struct rpcrdma_xprt *xprt = id->context; struct rpcrdma_xprt *xprt = id->context;
struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ia *ia = &xprt->rx_ia;
struct rpcrdma_ep *ep = &xprt->rx_ep; struct rpcrdma_ep *ep = &xprt->rx_ep;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
#endif
int connstate = 0; int connstate = 0;
trace_xprtrdma_conn_upcall(xprt, event);
switch (event->event) { switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED: case RDMA_CM_EVENT_ADDR_RESOLVED:
case RDMA_CM_EVENT_ROUTE_RESOLVED: case RDMA_CM_EVENT_ROUTE_RESOLVED:
...@@ -234,21 +232,17 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) ...@@ -234,21 +232,17 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
break; break;
case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ADDR_ERROR:
ia->ri_async_rc = -EHOSTUNREACH; ia->ri_async_rc = -EHOSTUNREACH;
dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
__func__, ep);
complete(&ia->ri_done); complete(&ia->ri_done);
break; break;
case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_ROUTE_ERROR:
ia->ri_async_rc = -ENETUNREACH; ia->ri_async_rc = -ENETUNREACH;
dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
__func__, ep);
complete(&ia->ri_done); complete(&ia->ri_done);
break; break;
case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DEVICE_REMOVAL:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
pr_info("rpcrdma: removing device %s for %pIS:%u\n", pr_info("rpcrdma: removing device %s for %s:%s\n",
ia->ri_device->name, ia->ri_device->name,
sap, rpc_get_port(sap)); rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt));
#endif #endif
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
ep->rep_connected = -ENODEV; ep->rep_connected = -ENODEV;
...@@ -271,8 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) ...@@ -271,8 +265,8 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
connstate = -ENETDOWN; connstate = -ENETDOWN;
goto connected; goto connected;
case RDMA_CM_EVENT_REJECTED: case RDMA_CM_EVENT_REJECTED:
dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n", dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
sap, rpc_get_port(sap), rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
rdma_reject_msg(id, event->status)); rdma_reject_msg(id, event->status));
connstate = -ECONNREFUSED; connstate = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN) if (event->status == IB_CM_REJ_STALE_CONN)
...@@ -287,8 +281,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) ...@@ -287,8 +281,9 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
wake_up_all(&ep->rep_connect_wait); wake_up_all(&ep->rep_connect_wait);
/*FALLTHROUGH*/ /*FALLTHROUGH*/
default: default:
dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n", dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n",
__func__, sap, rpc_get_port(sap), __func__,
rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
ia->ri_device->name, ia->ri_ops->ro_displayname, ia->ri_device->name, ia->ri_ops->ro_displayname,
ep, rdma_event_msg(event->event)); ep, rdma_event_msg(event->event));
break; break;
...@@ -298,13 +293,14 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) ...@@ -298,13 +293,14 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
} }
static struct rdma_cm_id * static struct rdma_cm_id *
rpcrdma_create_id(struct rpcrdma_xprt *xprt, rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia)
struct rpcrdma_ia *ia, struct sockaddr *addr)
{ {
unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
struct rdma_cm_id *id; struct rdma_cm_id *id;
int rc; int rc;
trace_xprtrdma_conn_start(xprt);
init_completion(&ia->ri_done); init_completion(&ia->ri_done);
init_completion(&ia->ri_remove_done); init_completion(&ia->ri_remove_done);
...@@ -318,7 +314,9 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, ...@@ -318,7 +314,9 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
} }
ia->ri_async_rc = -ETIMEDOUT; ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); rc = rdma_resolve_addr(id, NULL,
(struct sockaddr *)&xprt->rx_xprt.addr,
RDMA_RESOLVE_TIMEOUT);
if (rc) { if (rc) {
dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
__func__, rc); __func__, rc);
...@@ -326,8 +324,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, ...@@ -326,8 +324,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
} }
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) { if (rc < 0) {
dprintk("RPC: %s: wait() exited: %i\n", trace_xprtrdma_conn_tout(xprt);
__func__, rc);
goto out; goto out;
} }
...@@ -344,8 +341,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, ...@@ -344,8 +341,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
} }
rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
if (rc < 0) { if (rc < 0) {
dprintk("RPC: %s: wait() exited: %i\n", trace_xprtrdma_conn_tout(xprt);
__func__, rc);
goto out; goto out;
} }
rc = ia->ri_async_rc; rc = ia->ri_async_rc;
...@@ -365,19 +361,18 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, ...@@ -365,19 +361,18 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
/** /**
* rpcrdma_ia_open - Open and initialize an Interface Adapter. * rpcrdma_ia_open - Open and initialize an Interface Adapter.
* @xprt: controlling transport * @xprt: transport with IA to (re)initialize
* @addr: IP address of remote peer
* *
* Returns 0 on success, negative errno if an appropriate * Returns 0 on success, negative errno if an appropriate
* Interface Adapter could not be found and opened. * Interface Adapter could not be found and opened.
*/ */
int int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr) rpcrdma_ia_open(struct rpcrdma_xprt *xprt)
{ {
struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ia *ia = &xprt->rx_ia;
int rc; int rc;
ia->ri_id = rpcrdma_create_id(xprt, ia, addr); ia->ri_id = rpcrdma_create_id(xprt, ia);
if (IS_ERR(ia->ri_id)) { if (IS_ERR(ia->ri_id)) {
rc = PTR_ERR(ia->ri_id); rc = PTR_ERR(ia->ri_id);
goto out_err; goto out_err;
...@@ -392,7 +387,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr) ...@@ -392,7 +387,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
} }
switch (xprt_rdma_memreg_strategy) { switch (xprt_rdma_memreg_strategy) {
case RPCRDMA_FRMR: case RPCRDMA_FRWR:
if (frwr_is_supported(ia)) { if (frwr_is_supported(ia)) {
ia->ri_ops = &rpcrdma_frwr_memreg_ops; ia->ri_ops = &rpcrdma_frwr_memreg_ops;
break; break;
...@@ -462,10 +457,12 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) ...@@ -462,10 +457,12 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
rpcrdma_dma_unmap_regbuf(req->rl_sendbuf); rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
rpcrdma_dma_unmap_regbuf(req->rl_recvbuf); rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
} }
rpcrdma_destroy_mrs(buf); rpcrdma_mrs_destroy(buf);
/* Allow waiters to continue */ /* Allow waiters to continue */
complete(&ia->ri_remove_done); complete(&ia->ri_remove_done);
trace_xprtrdma_remove(r_xprt);
} }
/** /**
...@@ -476,7 +473,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) ...@@ -476,7 +473,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia)
void void
rpcrdma_ia_close(struct rpcrdma_ia *ia) rpcrdma_ia_close(struct rpcrdma_ia *ia)
{ {
dprintk("RPC: %s: entering\n", __func__);
if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
if (ia->ri_id->qp) if (ia->ri_id->qp)
rdma_destroy_qp(ia->ri_id); rdma_destroy_qp(ia->ri_id);
...@@ -630,9 +626,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ...@@ -630,9 +626,6 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
void void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{ {
dprintk("RPC: %s: entering, connected is %d\n",
__func__, ep->rep_connected);
cancel_delayed_work_sync(&ep->rep_connect_worker); cancel_delayed_work_sync(&ep->rep_connect_worker);
if (ia->ri_id->qp) { if (ia->ri_id->qp) {
...@@ -653,13 +646,12 @@ static int ...@@ -653,13 +646,12 @@ static int
rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{ {
struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
int rc, err; int rc, err;
pr_info("%s: r_xprt = %p\n", __func__, r_xprt); trace_xprtrdma_reinsert(r_xprt);
rc = -EHOSTUNREACH; rc = -EHOSTUNREACH;
if (rpcrdma_ia_open(r_xprt, sap)) if (rpcrdma_ia_open(r_xprt))
goto out1; goto out1;
rc = -ENOMEM; rc = -ENOMEM;
...@@ -676,7 +668,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, ...@@ -676,7 +668,7 @@ rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
goto out3; goto out3;
} }
rpcrdma_create_mrs(r_xprt); rpcrdma_mrs_create(r_xprt);
return 0; return 0;
out3: out3:
...@@ -691,16 +683,15 @@ static int ...@@ -691,16 +683,15 @@ static int
rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep, rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
struct rpcrdma_ia *ia) struct rpcrdma_ia *ia)
{ {
struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
struct rdma_cm_id *id, *old; struct rdma_cm_id *id, *old;
int err, rc; int err, rc;
dprintk("RPC: %s: reconnecting...\n", __func__); trace_xprtrdma_reconnect(r_xprt);
rpcrdma_ep_disconnect(ep, ia); rpcrdma_ep_disconnect(ep, ia);
rc = -EHOSTUNREACH; rc = -EHOSTUNREACH;
id = rpcrdma_create_id(r_xprt, ia, sap); id = rpcrdma_create_id(r_xprt, ia);
if (IS_ERR(id)) if (IS_ERR(id))
goto out; goto out;
...@@ -817,16 +808,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -817,16 +808,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
int rc; int rc;
rc = rdma_disconnect(ia->ri_id); rc = rdma_disconnect(ia->ri_id);
if (!rc) { if (!rc)
/* returns without wait if not connected */ /* returns without wait if not connected */
wait_event_interruptible(ep->rep_connect_wait, wait_event_interruptible(ep->rep_connect_wait,
ep->rep_connected != 1); ep->rep_connected != 1);
dprintk("RPC: %s: after wait, %sconnected\n", __func__, else
(ep->rep_connected == 1) ? "still " : "dis");
} else {
dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
ep->rep_connected = rc; ep->rep_connected = rc;
} trace_xprtrdma_disconnect(container_of(ep, struct rpcrdma_xprt,
rx_ep), rc);
ib_drain_qp(ia->ri_id->qp); ib_drain_qp(ia->ri_id->qp);
} }
...@@ -998,15 +987,15 @@ rpcrdma_mr_recovery_worker(struct work_struct *work) ...@@ -998,15 +987,15 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
{ {
struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
rb_recovery_worker.work); rb_recovery_worker.work);
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
spin_lock(&buf->rb_recovery_lock); spin_lock(&buf->rb_recovery_lock);
while (!list_empty(&buf->rb_stale_mrs)) { while (!list_empty(&buf->rb_stale_mrs)) {
mw = rpcrdma_pop_mw(&buf->rb_stale_mrs); mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
spin_unlock(&buf->rb_recovery_lock); spin_unlock(&buf->rb_recovery_lock);
dprintk("RPC: %s: recovering MR %p\n", __func__, mw); trace_xprtrdma_recover_mr(mr);
mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw); mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
spin_lock(&buf->rb_recovery_lock); spin_lock(&buf->rb_recovery_lock);
} }
...@@ -1014,20 +1003,20 @@ rpcrdma_mr_recovery_worker(struct work_struct *work) ...@@ -1014,20 +1003,20 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
} }
void void
rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw) rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
{ {
struct rpcrdma_xprt *r_xprt = mw->mw_xprt; struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
spin_lock(&buf->rb_recovery_lock); spin_lock(&buf->rb_recovery_lock);
rpcrdma_push_mw(mw, &buf->rb_stale_mrs); rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
spin_unlock(&buf->rb_recovery_lock); spin_unlock(&buf->rb_recovery_lock);
schedule_delayed_work(&buf->rb_recovery_worker, 0); schedule_delayed_work(&buf->rb_recovery_worker, 0);
} }
static void static void
rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt) rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
{ {
struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
...@@ -1036,32 +1025,32 @@ rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt) ...@@ -1036,32 +1025,32 @@ rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
LIST_HEAD(all); LIST_HEAD(all);
for (count = 0; count < 32; count++) { for (count = 0; count < 32; count++) {
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
int rc; int rc;
mw = kzalloc(sizeof(*mw), GFP_KERNEL); mr = kzalloc(sizeof(*mr), GFP_KERNEL);
if (!mw) if (!mr)
break; break;
rc = ia->ri_ops->ro_init_mr(ia, mw); rc = ia->ri_ops->ro_init_mr(ia, mr);
if (rc) { if (rc) {
kfree(mw); kfree(mr);
break; break;
} }
mw->mw_xprt = r_xprt; mr->mr_xprt = r_xprt;
list_add(&mw->mw_list, &free); list_add(&mr->mr_list, &free);
list_add(&mw->mw_all, &all); list_add(&mr->mr_all, &all);
} }
spin_lock(&buf->rb_mwlock); spin_lock(&buf->rb_mrlock);
list_splice(&free, &buf->rb_mws); list_splice(&free, &buf->rb_mrs);
list_splice(&all, &buf->rb_all); list_splice(&all, &buf->rb_all);
r_xprt->rx_stats.mrs_allocated += count; r_xprt->rx_stats.mrs_allocated += count;
spin_unlock(&buf->rb_mwlock); spin_unlock(&buf->rb_mrlock);
dprintk("RPC: %s: created %u MRs\n", __func__, count); trace_xprtrdma_createmrs(r_xprt, count);
} }
static void static void
...@@ -1072,7 +1061,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work) ...@@ -1072,7 +1061,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work)
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
rx_buf); rx_buf);
rpcrdma_create_mrs(r_xprt); rpcrdma_mrs_create(r_xprt);
} }
struct rpcrdma_req * struct rpcrdma_req *
...@@ -1093,10 +1082,17 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) ...@@ -1093,10 +1082,17 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
return req; return req;
} }
struct rpcrdma_rep * /**
* rpcrdma_create_rep - Allocate an rpcrdma_rep object
* @r_xprt: controlling transport
*
* Returns 0 on success or a negative errno on failure.
*/
int
rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
{ {
struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep; struct rpcrdma_rep *rep;
int rc; int rc;
...@@ -1121,12 +1117,18 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) ...@@ -1121,12 +1117,18 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1; rep->rr_recv_wr.num_sge = 1;
return rep;
spin_lock(&buf->rb_lock);
list_add(&rep->rr_list, &buf->rb_recv_bufs);
spin_unlock(&buf->rb_lock);
return 0;
out_free: out_free:
kfree(rep); kfree(rep);
out: out:
return ERR_PTR(rc); dprintk("RPC: %s: reply buffer %d alloc failed\n",
__func__, rc);
return rc;
} }
int int
...@@ -1137,10 +1139,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) ...@@ -1137,10 +1139,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
buf->rb_max_requests = r_xprt->rx_data.max_requests; buf->rb_max_requests = r_xprt->rx_data.max_requests;
buf->rb_bc_srv_max_requests = 0; buf->rb_bc_srv_max_requests = 0;
spin_lock_init(&buf->rb_mwlock); spin_lock_init(&buf->rb_mrlock);
spin_lock_init(&buf->rb_lock); spin_lock_init(&buf->rb_lock);
spin_lock_init(&buf->rb_recovery_lock); spin_lock_init(&buf->rb_recovery_lock);
INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_mrs);
INIT_LIST_HEAD(&buf->rb_all); INIT_LIST_HEAD(&buf->rb_all);
INIT_LIST_HEAD(&buf->rb_stale_mrs); INIT_LIST_HEAD(&buf->rb_stale_mrs);
INIT_DELAYED_WORK(&buf->rb_refresh_worker, INIT_DELAYED_WORK(&buf->rb_refresh_worker,
...@@ -1148,7 +1150,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) ...@@ -1148,7 +1150,7 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
INIT_DELAYED_WORK(&buf->rb_recovery_worker, INIT_DELAYED_WORK(&buf->rb_recovery_worker,
rpcrdma_mr_recovery_worker); rpcrdma_mr_recovery_worker);
rpcrdma_create_mrs(r_xprt); rpcrdma_mrs_create(r_xprt);
INIT_LIST_HEAD(&buf->rb_send_bufs); INIT_LIST_HEAD(&buf->rb_send_bufs);
INIT_LIST_HEAD(&buf->rb_allreqs); INIT_LIST_HEAD(&buf->rb_allreqs);
...@@ -1167,17 +1169,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) ...@@ -1167,17 +1169,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
} }
INIT_LIST_HEAD(&buf->rb_recv_bufs); INIT_LIST_HEAD(&buf->rb_recv_bufs);
for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) { for (i = 0; i <= buf->rb_max_requests; i++) {
struct rpcrdma_rep *rep; rc = rpcrdma_create_rep(r_xprt);
if (rc)
rep = rpcrdma_create_rep(r_xprt);
if (IS_ERR(rep)) {
dprintk("RPC: %s: reply buffer %d alloc failed\n",
__func__, i);
rc = PTR_ERR(rep);
goto out; goto out;
}
list_add(&rep->rr_list, &buf->rb_recv_bufs);
} }
rc = rpcrdma_sendctxs_create(r_xprt); rc = rpcrdma_sendctxs_create(r_xprt);
...@@ -1229,26 +1224,26 @@ rpcrdma_destroy_req(struct rpcrdma_req *req) ...@@ -1229,26 +1224,26 @@ rpcrdma_destroy_req(struct rpcrdma_req *req)
} }
static void static void
rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf) rpcrdma_mrs_destroy(struct rpcrdma_buffer *buf)
{ {
struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
rx_buf); rx_buf);
struct rpcrdma_ia *ia = rdmab_to_ia(buf); struct rpcrdma_ia *ia = rdmab_to_ia(buf);
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
unsigned int count; unsigned int count;
count = 0; count = 0;
spin_lock(&buf->rb_mwlock); spin_lock(&buf->rb_mrlock);
while (!list_empty(&buf->rb_all)) { while (!list_empty(&buf->rb_all)) {
mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all); mr = list_entry(buf->rb_all.next, struct rpcrdma_mr, mr_all);
list_del(&mw->mw_all); list_del(&mr->mr_all);
spin_unlock(&buf->rb_mwlock); spin_unlock(&buf->rb_mrlock);
ia->ri_ops->ro_release_mr(mw); ia->ri_ops->ro_release_mr(mr);
count++; count++;
spin_lock(&buf->rb_mwlock); spin_lock(&buf->rb_mrlock);
} }
spin_unlock(&buf->rb_mwlock); spin_unlock(&buf->rb_mrlock);
r_xprt->rx_stats.mrs_allocated = 0; r_xprt->rx_stats.mrs_allocated = 0;
dprintk("RPC: %s: released %u MRs\n", __func__, count); dprintk("RPC: %s: released %u MRs\n", __func__, count);
...@@ -1285,27 +1280,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) ...@@ -1285,27 +1280,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
spin_unlock(&buf->rb_reqslock); spin_unlock(&buf->rb_reqslock);
buf->rb_recv_count = 0; buf->rb_recv_count = 0;
rpcrdma_destroy_mrs(buf); rpcrdma_mrs_destroy(buf);
} }
struct rpcrdma_mw * /**
rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) * rpcrdma_mr_get - Allocate an rpcrdma_mr object
* @r_xprt: controlling transport
*
* Returns an initialized rpcrdma_mr or NULL if no free
* rpcrdma_mr objects are available.
*/
struct rpcrdma_mr *
rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt)
{ {
struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_mw *mw = NULL; struct rpcrdma_mr *mr = NULL;
spin_lock(&buf->rb_mwlock); spin_lock(&buf->rb_mrlock);
if (!list_empty(&buf->rb_mws)) if (!list_empty(&buf->rb_mrs))
mw = rpcrdma_pop_mw(&buf->rb_mws); mr = rpcrdma_mr_pop(&buf->rb_mrs);
spin_unlock(&buf->rb_mwlock); spin_unlock(&buf->rb_mrlock);
if (!mw) if (!mr)
goto out_nomws; goto out_nomrs;
mw->mw_flags = 0; return mr;
return mw;
out_nomws: out_nomrs:
dprintk("RPC: %s: no MWs available\n", __func__); trace_xprtrdma_nomrs(r_xprt);
if (r_xprt->rx_ep.rep_connected != -ENODEV) if (r_xprt->rx_ep.rep_connected != -ENODEV)
schedule_delayed_work(&buf->rb_refresh_worker, 0); schedule_delayed_work(&buf->rb_refresh_worker, 0);
...@@ -1315,14 +1316,39 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt) ...@@ -1315,14 +1316,39 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
return NULL; return NULL;
} }
static void
__rpcrdma_mr_put(struct rpcrdma_buffer *buf, struct rpcrdma_mr *mr)
{
spin_lock(&buf->rb_mrlock);
rpcrdma_mr_push(mr, &buf->rb_mrs);
spin_unlock(&buf->rb_mrlock);
}
/**
* rpcrdma_mr_put - Release an rpcrdma_mr object
* @mr: object to release
*
*/
void void
rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw) rpcrdma_mr_put(struct rpcrdma_mr *mr)
{ {
struct rpcrdma_buffer *buf = &r_xprt->rx_buf; __rpcrdma_mr_put(&mr->mr_xprt->rx_buf, mr);
}
/**
* rpcrdma_mr_unmap_and_put - DMA unmap an MR and release it
* @mr: object to release
*
*/
void
rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr)
{
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
spin_lock(&buf->rb_mwlock); trace_xprtrdma_dma_unmap(mr);
rpcrdma_push_mw(mw, &buf->rb_mws); ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
spin_unlock(&buf->rb_mwlock); mr->mr_sg, mr->mr_nents, mr->mr_dir);
__rpcrdma_mr_put(&r_xprt->rx_buf, mr);
} }
static struct rpcrdma_rep * static struct rpcrdma_rep *
...@@ -1359,11 +1385,11 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) ...@@ -1359,11 +1385,11 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
req = rpcrdma_buffer_get_req_locked(buffers); req = rpcrdma_buffer_get_req_locked(buffers);
req->rl_reply = rpcrdma_buffer_get_rep(buffers); req->rl_reply = rpcrdma_buffer_get_rep(buffers);
spin_unlock(&buffers->rb_lock); spin_unlock(&buffers->rb_lock);
return req; return req;
out_reqbuf: out_reqbuf:
spin_unlock(&buffers->rb_lock); spin_unlock(&buffers->rb_lock);
pr_warn("RPC: %s: out of request buffers\n", __func__);
return NULL; return NULL;
} }
...@@ -1519,9 +1545,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, ...@@ -1519,9 +1545,6 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
req->rl_reply = NULL; req->rl_reply = NULL;
} }
dprintk("RPC: %s: posting %d s/g entries\n",
__func__, send_wr->num_sge);
if (!ep->rep_send_count || if (!ep->rep_send_count ||
test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) {
send_wr->send_flags |= IB_SEND_SIGNALED; send_wr->send_flags |= IB_SEND_SIGNALED;
...@@ -1530,14 +1553,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, ...@@ -1530,14 +1553,12 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia,
send_wr->send_flags &= ~IB_SEND_SIGNALED; send_wr->send_flags &= ~IB_SEND_SIGNALED;
--ep->rep_send_count; --ep->rep_send_count;
} }
rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
trace_xprtrdma_post_send(req, rc);
if (rc) if (rc)
goto out_postsend_err; return -ENOTCONN;
return 0; return 0;
out_postsend_err:
pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
return -ENOTCONN;
} }
int int
...@@ -1550,23 +1571,20 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, ...@@ -1550,23 +1571,20 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf)) if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
goto out_map; goto out_map;
rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail); rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
trace_xprtrdma_post_recv(rep, rc);
if (rc) if (rc)
goto out_postrecv; return -ENOTCONN;
return 0; return 0;
out_map: out_map:
pr_err("rpcrdma: failed to DMA map the Receive buffer\n"); pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
return -EIO; return -EIO;
out_postrecv:
pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
return -ENOTCONN;
} }
/** /**
* rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
* @r_xprt: transport associated with these backchannel resources * @r_xprt: transport associated with these backchannel resources
* @min_reqs: minimum number of incoming requests expected * @count: minimum number of incoming requests expected
* *
* Returns zero if all requested buffers were posted, or a negative errno. * Returns zero if all requested buffers were posted, or a negative errno.
*/ */
...@@ -1594,7 +1612,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count) ...@@ -1594,7 +1612,7 @@ rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
out_reqbuf: out_reqbuf:
spin_unlock(&buffers->rb_lock); spin_unlock(&buffers->rb_lock);
pr_warn("%s: no extra receive buffers\n", __func__); trace_xprtrdma_noreps(r_xprt);
return -ENOMEM; return -ENOMEM;
out_rc: out_rc:
......
...@@ -73,11 +73,10 @@ struct rpcrdma_ia { ...@@ -73,11 +73,10 @@ struct rpcrdma_ia {
struct completion ri_remove_done; struct completion ri_remove_done;
int ri_async_rc; int ri_async_rc;
unsigned int ri_max_segs; unsigned int ri_max_segs;
unsigned int ri_max_frmr_depth; unsigned int ri_max_frwr_depth;
unsigned int ri_max_inline_write; unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read; unsigned int ri_max_inline_read;
unsigned int ri_max_send_sges; unsigned int ri_max_send_sges;
bool ri_reminv_expected;
bool ri_implicit_roundup; bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype; enum ib_mr_type ri_mrtype;
unsigned long ri_flags; unsigned long ri_flags;
...@@ -101,7 +100,6 @@ struct rpcrdma_ep { ...@@ -101,7 +100,6 @@ struct rpcrdma_ep {
wait_queue_head_t rep_connect_wait; wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private; struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma; struct rdma_conn_param rep_remote_cma;
struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker; struct delayed_work rep_connect_worker;
}; };
...@@ -232,29 +230,29 @@ enum { ...@@ -232,29 +230,29 @@ enum {
}; };
/* /*
* struct rpcrdma_mw - external memory region metadata * struct rpcrdma_mr - external memory region metadata
* *
* An external memory region is any buffer or page that is registered * An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered). * on the fly (ie, not pre-registered).
* *
* Each rpcrdma_buffer has a list of free MWs anchored in rb_mws. During * Each rpcrdma_buffer has a list of free MWs anchored in rb_mrs. During
* call_allocate, rpcrdma_buffer_get() assigns one to each segment in * call_allocate, rpcrdma_buffer_get() assigns one to each segment in
* an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep * an rpcrdma_req. Then rpcrdma_register_external() grabs these to keep
* track of registration metadata while each RPC is pending. * track of registration metadata while each RPC is pending.
* rpcrdma_deregister_external() uses this metadata to unmap and * rpcrdma_deregister_external() uses this metadata to unmap and
* release these resources when an RPC is complete. * release these resources when an RPC is complete.
*/ */
enum rpcrdma_frmr_state { enum rpcrdma_frwr_state {
FRMR_IS_INVALID, /* ready to be used */ FRWR_IS_INVALID, /* ready to be used */
FRMR_IS_VALID, /* in use */ FRWR_IS_VALID, /* in use */
FRMR_FLUSHED_FR, /* flushed FASTREG WR */ FRWR_FLUSHED_FR, /* flushed FASTREG WR */
FRMR_FLUSHED_LI, /* flushed LOCALINV WR */ FRWR_FLUSHED_LI, /* flushed LOCALINV WR */
}; };
struct rpcrdma_frmr { struct rpcrdma_frwr {
struct ib_mr *fr_mr; struct ib_mr *fr_mr;
struct ib_cqe fr_cqe; struct ib_cqe fr_cqe;
enum rpcrdma_frmr_state fr_state; enum rpcrdma_frwr_state fr_state;
struct completion fr_linv_done; struct completion fr_linv_done;
union { union {
struct ib_reg_wr fr_regwr; struct ib_reg_wr fr_regwr;
...@@ -267,26 +265,20 @@ struct rpcrdma_fmr { ...@@ -267,26 +265,20 @@ struct rpcrdma_fmr {
u64 *fm_physaddrs; u64 *fm_physaddrs;
}; };
struct rpcrdma_mw { struct rpcrdma_mr {
struct list_head mw_list; struct list_head mr_list;
struct scatterlist *mw_sg; struct scatterlist *mr_sg;
int mw_nents; int mr_nents;
enum dma_data_direction mw_dir; enum dma_data_direction mr_dir;
unsigned long mw_flags;
union { union {
struct rpcrdma_fmr fmr; struct rpcrdma_fmr fmr;
struct rpcrdma_frmr frmr; struct rpcrdma_frwr frwr;
}; };
struct rpcrdma_xprt *mw_xprt; struct rpcrdma_xprt *mr_xprt;
u32 mw_handle; u32 mr_handle;
u32 mw_length; u32 mr_length;
u64 mw_offset; u64 mr_offset;
struct list_head mw_all; struct list_head mr_all;
};
/* mw_flags */
enum {
RPCRDMA_MW_F_RI = 1,
}; };
/* /*
...@@ -362,8 +354,7 @@ struct rpcrdma_req { ...@@ -362,8 +354,7 @@ struct rpcrdma_req {
/* rl_flags */ /* rl_flags */
enum { enum {
RPCRDMA_REQ_F_BACKCHANNEL = 0, RPCRDMA_REQ_F_PENDING = 0,
RPCRDMA_REQ_F_PENDING,
RPCRDMA_REQ_F_TX_RESOURCES, RPCRDMA_REQ_F_TX_RESOURCES,
}; };
...@@ -374,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) ...@@ -374,25 +365,25 @@ rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req)
} }
static inline struct rpcrdma_req * static inline struct rpcrdma_req *
rpcr_to_rdmar(struct rpc_rqst *rqst) rpcr_to_rdmar(const struct rpc_rqst *rqst)
{ {
return rqst->rq_xprtdata; return rqst->rq_xprtdata;
} }
static inline void static inline void
rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list) rpcrdma_mr_push(struct rpcrdma_mr *mr, struct list_head *list)
{ {
list_add_tail(&mw->mw_list, list); list_add_tail(&mr->mr_list, list);
} }
static inline struct rpcrdma_mw * static inline struct rpcrdma_mr *
rpcrdma_pop_mw(struct list_head *list) rpcrdma_mr_pop(struct list_head *list)
{ {
struct rpcrdma_mw *mw; struct rpcrdma_mr *mr;
mw = list_first_entry(list, struct rpcrdma_mw, mw_list); mr = list_first_entry(list, struct rpcrdma_mr, mr_list);
list_del(&mw->mw_list); list_del(&mr->mr_list);
return mw; return mr;
} }
/* /*
...@@ -402,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list) ...@@ -402,8 +393,8 @@ rpcrdma_pop_mw(struct list_head *list)
* One of these is associated with a transport instance * One of these is associated with a transport instance
*/ */
struct rpcrdma_buffer { struct rpcrdma_buffer {
spinlock_t rb_mwlock; /* protect rb_mws list */ spinlock_t rb_mrlock; /* protect rb_mrs list */
struct list_head rb_mws; struct list_head rb_mrs;
struct list_head rb_all; struct list_head rb_all;
unsigned long rb_sc_head; unsigned long rb_sc_head;
...@@ -438,13 +429,11 @@ struct rpcrdma_buffer { ...@@ -438,13 +429,11 @@ struct rpcrdma_buffer {
* This data should be set with mount options * This data should be set with mount options
*/ */
struct rpcrdma_create_data_internal { struct rpcrdma_create_data_internal {
struct sockaddr_storage addr; /* RDMA server address */
unsigned int max_requests; /* max requests (slots) in flight */ unsigned int max_requests; /* max requests (slots) in flight */
unsigned int rsize; /* mount rsize - max read hdr+data */ unsigned int rsize; /* mount rsize - max read hdr+data */
unsigned int wsize; /* mount wsize - max write hdr+data */ unsigned int wsize; /* mount wsize - max write hdr+data */
unsigned int inline_rsize; /* max non-rdma read data payload */ unsigned int inline_rsize; /* max non-rdma read data payload */
unsigned int inline_wsize; /* max non-rdma write data payload */ unsigned int inline_wsize; /* max non-rdma write data payload */
unsigned int padding; /* non-rdma write header padding */
}; };
/* /*
...@@ -484,17 +473,19 @@ struct rpcrdma_memreg_ops { ...@@ -484,17 +473,19 @@ struct rpcrdma_memreg_ops {
struct rpcrdma_mr_seg * struct rpcrdma_mr_seg *
(*ro_map)(struct rpcrdma_xprt *, (*ro_map)(struct rpcrdma_xprt *,
struct rpcrdma_mr_seg *, int, bool, struct rpcrdma_mr_seg *, int, bool,
struct rpcrdma_mw **); struct rpcrdma_mr **);
void (*ro_reminv)(struct rpcrdma_rep *rep,
struct list_head *mrs);
void (*ro_unmap_sync)(struct rpcrdma_xprt *, void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *); struct list_head *);
void (*ro_recover_mr)(struct rpcrdma_mw *); void (*ro_recover_mr)(struct rpcrdma_mr *mr);
int (*ro_open)(struct rpcrdma_ia *, int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *, struct rpcrdma_ep *,
struct rpcrdma_create_data_internal *); struct rpcrdma_create_data_internal *);
size_t (*ro_maxpages)(struct rpcrdma_xprt *); size_t (*ro_maxpages)(struct rpcrdma_xprt *);
int (*ro_init_mr)(struct rpcrdma_ia *, int (*ro_init_mr)(struct rpcrdma_ia *,
struct rpcrdma_mw *); struct rpcrdma_mr *);
void (*ro_release_mr)(struct rpcrdma_mw *); void (*ro_release_mr)(struct rpcrdma_mr *mr);
const char *ro_displayname; const char *ro_displayname;
const int ro_send_w_inv_ok; const int ro_send_w_inv_ok;
}; };
...@@ -525,6 +516,18 @@ struct rpcrdma_xprt { ...@@ -525,6 +516,18 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt) #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, rx_xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
static inline const char *
rpcrdma_addrstr(const struct rpcrdma_xprt *r_xprt)
{
return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_ADDR];
}
static inline const char *
rpcrdma_portstr(const struct rpcrdma_xprt *r_xprt)
{
return r_xprt->rx_xprt.address_strings[RPC_DISPLAY_PORT];
}
/* Setting this to 0 ensures interoperability with early servers. /* Setting this to 0 ensures interoperability with early servers.
* Setting this to 1 enhances certain unaligned read/write performance. * Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
...@@ -538,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy; ...@@ -538,7 +541,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
/* /*
* Interface Adapter calls - xprtrdma/verbs.c * Interface Adapter calls - xprtrdma/verbs.c
*/ */
int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr); int rpcrdma_ia_open(struct rpcrdma_xprt *xprt);
void rpcrdma_ia_remove(struct rpcrdma_ia *ia); void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
void rpcrdma_ia_close(struct rpcrdma_ia *); void rpcrdma_ia_close(struct rpcrdma_ia *);
bool frwr_is_supported(struct rpcrdma_ia *); bool frwr_is_supported(struct rpcrdma_ia *);
...@@ -564,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *); ...@@ -564,22 +567,23 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_rep *);
* Buffer calls - xprtrdma/verbs.c * Buffer calls - xprtrdma/verbs.c
*/ */
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *); struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_req *); void rpcrdma_destroy_req(struct rpcrdma_req *);
int rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt);
int rpcrdma_buffer_create(struct rpcrdma_xprt *); int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf);
void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc);
struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); void rpcrdma_mr_put(struct rpcrdma_mr *mr);
void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *); void rpcrdma_buffer_put(struct rpcrdma_req *);
void rpcrdma_recv_buffer_get(struct rpcrdma_req *); void rpcrdma_recv_buffer_get(struct rpcrdma_req *);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
void rpcrdma_defer_mr_recovery(struct rpcrdma_mw *);
struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction, struct rpcrdma_regbuf *rpcrdma_alloc_regbuf(size_t, enum dma_data_direction,
gfp_t); gfp_t);
bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *); bool __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *, struct rpcrdma_regbuf *);
...@@ -663,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *); ...@@ -663,7 +667,7 @@ int xprt_rdma_bc_up(struct svc_serv *, struct net *);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
int rpcrdma_bc_marshal_reply(struct rpc_rqst *); int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst);
void xprt_rdma_bc_free_rqst(struct rpc_rqst *); void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */ #endif /* CONFIG_SUNRPC_BACKCHANNEL */
...@@ -671,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int); ...@@ -671,3 +675,5 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
extern struct xprt_class xprt_rdma_bc; extern struct xprt_class xprt_rdma_bc;
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
#include <trace/events/rpcrdma.h>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment