Commit 2fcc213a authored by Chuck Lever's avatar Chuck Lever Committed by Anna Schumaker

xprtrdma: Fix large NFS SYMLINK calls

Repair how rpcrdma_marshal_req() chooses which RDMA message type
to use for large non-WRITE operations so that it picks RDMA_NOMSG
in the correct situations, and sets up the marshaling logic to
SEND only the RPC/RDMA header.

Large NFSv2 SYMLINK requests now use RDMA_NOMSG calls. The Linux NFS
server XDR decoder for NFSv2 SYMLINK does not handle having the
pathname argument arrive in a separate buffer. The decoder could be
fixed, but this is simpler and RDMA_NOMSG can be used in a variety
of other situations.

Ensure that the Linux client continues to use "RDMA_MSG + read
list" when sending large NFSv3 SYMLINK requests, which is more
efficient than using RDMA_NOMSG.

Large NFSv4 CREATE(NF4LNK) requests are changed to use "RDMA_MSG +
read list" just like NFSv3 (see Section 5 of RFC 5667). Before,
these did not work at all.
Signed-off-by: default avatarChuck Lever <chuck.lever@oracle.com>
Tested-by: default avatarDevesh Sharma <devesh.sharma@avagotech.com>
Signed-off-by: default avatarAnna Schumaker <Anna.Schumaker@Netapp.com>
parent 677eb17e
...@@ -1103,6 +1103,7 @@ static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req, ...@@ -1103,6 +1103,7 @@ static void nfs3_xdr_enc_symlink3args(struct rpc_rqst *req,
{ {
encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen); encode_diropargs3(xdr, args->fromfh, args->fromname, args->fromlen);
encode_symlinkdata3(xdr, args); encode_symlinkdata3(xdr, args);
xdr->buf->flags |= XDRBUF_WRITE;
} }
/* /*
......
...@@ -1154,7 +1154,9 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg * ...@@ -1154,7 +1154,9 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
case NF4LNK: case NF4LNK:
p = reserve_space(xdr, 4); p = reserve_space(xdr, 4);
*p = cpu_to_be32(create->u.symlink.len); *p = cpu_to_be32(create->u.symlink.len);
xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len); xdr_write_pages(xdr, create->u.symlink.pages, 0,
create->u.symlink.len);
xdr->buf->flags |= XDRBUF_WRITE;
break; break;
case NF4BLK: case NF4CHR: case NF4BLK: case NF4CHR:
......
...@@ -475,21 +475,24 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) ...@@ -475,21 +475,24 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* *
* o If the total request is under the inline threshold, all ops * o If the total request is under the inline threshold, all ops
* are sent as inline. * are sent as inline.
* o Large non-write ops are sent with the entire message as a
* single read chunk (protocol 0-position special case).
* o Large write ops transmit data as read chunk(s), header as * o Large write ops transmit data as read chunk(s), header as
* inline. * inline.
* o Large non-write ops are sent with the entire message as a
* single read chunk (protocol 0-position special case).
* *
* Note: the NFS code sending down multiple argument segments * This assumes that the upper layer does not present a request
* implies the op is a write. * that both has a data payload, and whose non-data arguments
* TBD check NFSv4 setacl * by themselves are larger than the inline threshold.
*/ */
if (rpcrdma_args_inline(rqst)) if (rpcrdma_args_inline(rqst)) {
rtype = rpcrdma_noch; rtype = rpcrdma_noch;
else if (rqst->rq_snd_buf.page_len == 0) } else if (rqst->rq_snd_buf.flags & XDRBUF_WRITE) {
rtype = rpcrdma_areadch;
else
rtype = rpcrdma_readch; rtype = rpcrdma_readch;
} else {
headerp->rm_type = htonl(RDMA_NOMSG);
rtype = rpcrdma_areadch;
rpclen = 0;
}
/* The following simplification is not true forever */ /* The following simplification is not true forever */
if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
...@@ -546,6 +549,10 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) ...@@ -546,6 +549,10 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
req->rl_send_iov[0].length = hdrlen; req->rl_send_iov[0].length = hdrlen;
req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf); req->rl_send_iov[0].lkey = rdmab_lkey(req->rl_rdmabuf);
req->rl_niovs = 1;
if (rtype == rpcrdma_areadch)
return 0;
req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf); req->rl_send_iov[1].addr = rdmab_addr(req->rl_sendbuf);
req->rl_send_iov[1].length = rpclen; req->rl_send_iov[1].length = rpclen;
req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf); req->rl_send_iov[1].lkey = rdmab_lkey(req->rl_sendbuf);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment