Commit 5d53884b authored by Chuck Lever's avatar Chuck Lever Committed by Greg Kroah-Hartman

xprtrdma: Per-connection pad optimization

commit b5f0afbe upstream.

Pad optimization is changed by echoing into
/proc/sys/sunrpc/rdma_pad_optimize. This is a global setting,
affecting all RPC-over-RDMA connections to all servers.

The marshaling code picks up that value and uses it for decisions
about how to construct each RPC-over-RDMA frame. Having it change
suddenly in mid-operation can result in unexpected failures. And
some servers a client mounts might need chunk round-up, while
others don't.

So instead, copy the pad_optimize setting into each connection's
rpcrdma_ia when the transport is created, and use the copy, which
can't change during the life of the connection, instead.

This also removes a hack: rpcrdma_convert_iovs was using
the remote-invalidation-expected flag to predict when it could leave
out Write chunk padding. This is because the Linux server handles
implicit XDR padding on Write chunks correctly, and only Linux
servers can set the connection's remote-invalidation-expected flag.

It's more sensible to use the pad optimization setting instead.

Fixes: 677eb17e ("xprtrdma: Fix XDR tail buffer marshalling")
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Signed-off-by: default avatarAnna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 921fe03a
...@@ -186,9 +186,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n) ...@@ -186,9 +186,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n)
*/ */
static int static int
rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, unsigned int pos, enum rpcrdma_chunktype type,
bool reminv_expected) struct rpcrdma_mr_seg *seg)
{ {
int len, n, p, page_base; int len, n, p, page_base;
struct page **ppages; struct page **ppages;
...@@ -229,14 +229,15 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, ...@@ -229,14 +229,15 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
/* When encoding a Read chunk, the tail iovec contains an /* When encoding a Read chunk, the tail iovec contains an
* XDR pad and may be omitted. * XDR pad and may be omitted.
*/ */
if (type == rpcrdma_readch && xprt_rdma_pad_optimize) if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
return n; return n;
/* When encoding the Write list, some servers need to see an extra /* When encoding a Write chunk, some servers need to see an
* segment for odd-length Write chunks. The upper layer provides * extra segment for non-XDR-aligned Write chunks. The upper
* space in the tail iovec for this purpose. * layer provides space in the tail iovec that may be used
* for this purpose.
*/ */
if (type == rpcrdma_writech && reminv_expected) if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
return n; return n;
if (xdrbuf->tail[0].iov_len) { if (xdrbuf->tail[0].iov_len) {
...@@ -291,7 +292,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, ...@@ -291,7 +292,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
if (rtype == rpcrdma_areadch) if (rtype == rpcrdma_areadch)
pos = 0; pos = 0;
seg = req->rl_segments; seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false); nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
rtype, seg);
if (nsegs < 0) if (nsegs < 0)
return ERR_PTR(nsegs); return ERR_PTR(nsegs);
...@@ -353,10 +355,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, ...@@ -353,10 +355,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
} }
seg = req->rl_segments; seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
rqst->rq_rcv_buf.head[0].iov_len, rqst->rq_rcv_buf.head[0].iov_len,
wtype, seg, wtype, seg);
r_xprt->rx_ia.ri_reminv_expected);
if (nsegs < 0) if (nsegs < 0)
return ERR_PTR(nsegs); return ERR_PTR(nsegs);
...@@ -421,8 +422,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, ...@@ -421,8 +422,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
} }
seg = req->rl_segments; seg = req->rl_segments;
nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
r_xprt->rx_ia.ri_reminv_expected);
if (nsegs < 0) if (nsegs < 0)
return ERR_PTR(nsegs); return ERR_PTR(nsegs);
......
...@@ -208,6 +208,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt, ...@@ -208,6 +208,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
/* Default settings for RPC-over-RDMA Version One */ /* Default settings for RPC-over-RDMA Version One */
r_xprt->rx_ia.ri_reminv_expected = false; r_xprt->rx_ia.ri_reminv_expected = false;
r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
rsize = RPCRDMA_V1_DEF_INLINE_SIZE; rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
wsize = RPCRDMA_V1_DEF_INLINE_SIZE; wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
......
...@@ -75,6 +75,7 @@ struct rpcrdma_ia { ...@@ -75,6 +75,7 @@ struct rpcrdma_ia {
unsigned int ri_max_inline_write; unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read; unsigned int ri_max_inline_read;
bool ri_reminv_expected; bool ri_reminv_expected;
bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype; enum ib_mr_type ri_mrtype;
struct ib_qp_attr ri_qp_attr; struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr; struct ib_qp_init_attr ri_qp_init_attr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment