Commit e6604ecb authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfs-for-4.4-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  New features:
   - RDMA client backchannel from Chuck
   - Support for NFSv4.2 file CLONE using the btrfs ioctl

  Bugfixes + cleanups:
   - Move socket data receive out of the bottom halves and into a
     workqueue
   - Refactor NFSv4 error handling so synchronous and asynchronous RPC
     handles errors identically.
   - Fix a panic when blocks or object layouts reads return a bad data
     length
   - Fix nfsroot so it can handle a 1024 byte long path.
   - Fix bad usage of page offset in bl_read_pagelist
   - Various NFSv4 callback cleanups+fixes
   - Fix GETATTR bitmap verification
   - Support hexadecimal number for sunrpc debug sysctl files"

* tag 'nfs-for-4.4-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (53 commits)
  Sunrpc: Supports hexadecimal number for sysctl files of sunrpc debug
  nfs: Fix GETATTR bitmap verification
  nfs: Remove unused xdr page offsets in getacl/setacl arguments
  fs/nfs: remove unnecessary new_valid_dev check
  SUNRPC: fix variable type
  NFS: Enable client side NFSv4.1 backchannel to use other transports
  pNFS/flexfiles: Add support for FF_FLAGS_NO_IO_THRU_MDS
  pNFS/flexfiles: When mirrored, retry failed reads by switching mirrors
  SUNRPC: Remove the TCP-only restriction in bc_svc_process()
  svcrdma: Add backward direction service for RPC/RDMA transport
  xprtrdma: Handle incoming backward direction RPC calls
  xprtrdma: Add support for sending backward direction RPC replies
  xprtrdma: Pre-allocate Work Requests for backchannel
  xprtrdma: Pre-allocate backward rpc_rqst and send/receive buffers
  SUNRPC: Abstract backchannel operations
  xprtrdma: Saving IRQs no longer needed for rb_lock
  xprtrdma: Remove reply tasklet
  xprtrdma: Use workqueue to process RPC/RDMA replies
  xprtrdma: Replace send and receive arrays
  xprtrdma: Refactor reply handler error handling
  ...
parents 9d74288c 941c3ff3
......@@ -229,7 +229,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)
struct parallel_io *par;
loff_t f_offset = header->args.offset;
size_t bytes_left = header->args.count;
unsigned int pg_offset, pg_len;
unsigned int pg_offset = header->args.pgbase, pg_len;
struct page **pages = header->args.pages;
int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
const bool is_dio = (header->dreq != NULL);
......@@ -262,7 +262,6 @@ bl_read_pagelist(struct nfs_pgio_header *header)
extent_length = be.be_length - (isect - be.be_f_offset);
}
pg_offset = f_offset & ~PAGE_CACHE_MASK;
if (is_dio) {
if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
pg_len = PAGE_CACHE_SIZE - pg_offset;
......@@ -273,9 +272,6 @@ bl_read_pagelist(struct nfs_pgio_header *header)
pg_len = PAGE_CACHE_SIZE;
}
isect += (pg_offset >> SECTOR_SHIFT);
extent_length -= (pg_offset >> SECTOR_SHIFT);
if (is_hole(&be)) {
bio = bl_submit_bio(READ, bio);
/* Fill hole w/ zeroes w/o accessing device */
......@@ -301,6 +297,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)
extent_length -= (pg_len >> SECTOR_SHIFT);
f_offset += pg_len;
bytes_left -= pg_len;
pg_offset = 0;
}
if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
header->res.eof = 1;
......
......@@ -99,17 +99,6 @@ nfs4_callback_up(struct svc_serv *serv)
}
#if defined(CONFIG_NFS_V4_1)
static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net)
{
/*
* Create an svc_sock for the back channel service that shares the
* fore channel connection.
* Returns the input port (0) and sets the svc_serv bc_xprt on success
*/
return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
SVC_SOCK_ANONYMOUS);
}
/*
* The callback service for NFSv4.1 callbacks
*/
......@@ -184,11 +173,6 @@ static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
xprt->bc_serv = serv;
}
#else
static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net)
{
return 0;
}
static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv,
struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
{
......@@ -259,7 +243,8 @@ static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struc
svc_shutdown_net(serv, net);
}
static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net)
static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
struct net *net, struct rpc_xprt *xprt)
{
struct nfs_net *nn = net_generic(net, nfs_net_id);
int ret;
......@@ -275,20 +260,11 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct n
goto err_bind;
}
switch (minorversion) {
case 0:
ret = nfs4_callback_up_net(serv, net);
break;
case 1:
case 2:
ret = nfs41_callback_up_net(serv, net);
break;
default:
printk(KERN_ERR "NFS: unknown callback version: %d\n",
minorversion);
ret = -EINVAL;
break;
}
ret = -EPROTONOSUPPORT;
if (minorversion == 0)
ret = nfs4_callback_up_net(serv, net);
else if (xprt->ops->bc_up)
ret = xprt->ops->bc_up(serv, net);
if (ret < 0) {
printk(KERN_ERR "NFS: callback service start failed\n");
......@@ -364,7 +340,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
goto err_create;
}
ret = nfs_callback_up_net(minorversion, serv, net);
ret = nfs_callback_up_net(minorversion, serv, net, xprt);
if (ret < 0)
goto err_net;
......
......@@ -61,7 +61,6 @@ struct cb_compound_hdr_res {
};
struct cb_getattrargs {
struct sockaddr *addr;
struct nfs_fh fh;
uint32_t bitmap[2];
};
......@@ -76,7 +75,6 @@ struct cb_getattrres {
};
struct cb_recallargs {
struct sockaddr *addr;
struct nfs_fh fh;
nfs4_stateid stateid;
uint32_t truncate;
......@@ -119,9 +117,6 @@ extern __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
struct cb_sequenceres *res,
struct cb_process_state *cps);
extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
const nfs4_stateid *stateid);
#define RCA4_TYPE_MASK_RDATA_DLG 0
#define RCA4_TYPE_MASK_WDATA_DLG 1
#define RCA4_TYPE_MASK_DIR_DLG 2
......@@ -134,7 +129,6 @@ extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
#define RCA4_TYPE_MASK_ALL 0xf31f
struct cb_recallanyargs {
struct sockaddr *craa_addr;
uint32_t craa_objs_to_keep;
uint32_t craa_type_mask;
};
......@@ -144,7 +138,6 @@ extern __be32 nfs4_callback_recallany(struct cb_recallanyargs *args,
struct cb_process_state *cps);
struct cb_recallslotargs {
struct sockaddr *crsa_addr;
uint32_t crsa_target_highest_slotid;
};
extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
......@@ -152,7 +145,6 @@ extern __be32 nfs4_callback_recallslot(struct cb_recallslotargs *args,
struct cb_process_state *cps);
struct cb_layoutrecallargs {
struct sockaddr *cbl_addr;
uint32_t cbl_recall_type;
uint32_t cbl_layout_type;
uint32_t cbl_layoutchanged;
......@@ -196,9 +188,6 @@ extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
#if IS_ENABLED(CONFIG_NFS_V4)
extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
extern void nfs_callback_down(int minorversion, struct net *net);
extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
const nfs4_stateid *stateid);
extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
#endif /* CONFIG_NFS_V4 */
/*
* nfs41: Callbacks are expected to not cause substantial latency,
......@@ -209,6 +198,5 @@ extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
#define NFS41_BC_MAX_CALLBACKS 1
extern unsigned int nfs_callback_set_tcpport;
extern unsigned short nfs_callback_tcpport;
#endif /* __LINUX_FS_NFS_CALLBACK_H */
......@@ -17,9 +17,7 @@
#include "nfs4session.h"
#include "nfs4trace.h"
#ifdef NFS_DEBUG
#define NFSDBG_FACILITY NFSDBG_CALLBACK
#endif
__be32 nfs4_callback_getattr(struct cb_getattrargs *args,
struct cb_getattrres *res,
......
......@@ -18,19 +18,21 @@
#include "internal.h"
#include "nfs4session.h"
#define CB_OP_TAGLEN_MAXSZ (512)
#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ)
#define CB_OP_GETATTR_BITMAP_MAXSZ (4)
#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
CB_OP_GETATTR_BITMAP_MAXSZ + \
2 + 2 + 3 + 3)
#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#define CB_OP_TAGLEN_MAXSZ (512)
#define CB_OP_HDR_RES_MAXSZ (2 * 4) // opcode, status
#define CB_OP_GETATTR_BITMAP_MAXSZ (4 * 4) // bitmap length, 3 bitmaps
#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
CB_OP_GETATTR_BITMAP_MAXSZ + \
/* change, size, ctime, mtime */\
(2 + 2 + 3 + 3) * 4)
#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#if defined(CONFIG_NFS_V4_1)
#define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#define CB_OP_DEVICENOTIFY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \
4 + 1 + 3)
NFS4_MAX_SESSIONID_LEN + \
(1 + 3) * 4) // seqid, 3 slotids
#define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#define CB_OP_RECALLSLOT_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ)
#endif /* CONFIG_NFS_V4_1 */
......@@ -157,7 +159,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
if (unlikely(status != 0))
return status;
/* We do not like overly long tags! */
if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) {
if (hdr->taglen > CB_OP_TAGLEN_MAXSZ) {
printk("NFS: NFSv4 CALLBACK %s: client sent tag of length %u\n",
__func__, hdr->taglen);
return htonl(NFS4ERR_RESOURCE);
......@@ -198,7 +200,6 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr
status = decode_fh(xdr, &args->fh);
if (unlikely(status != 0))
goto out;
args->addr = svc_addr(rqstp);
status = decode_bitmap(xdr, args->bitmap);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
......@@ -210,7 +211,6 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr,
__be32 *p;
__be32 status;
args->addr = svc_addr(rqstp);
status = decode_stateid(xdr, &args->stateid);
if (unlikely(status != 0))
goto out;
......@@ -236,7 +236,6 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
__be32 status = 0;
uint32_t iomode;
args->cbl_addr = svc_addr(rqstp);
p = read_buf(xdr, 4 * sizeof(uint32_t));
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
......@@ -383,13 +382,12 @@ static __be32 decode_sessionid(struct xdr_stream *xdr,
struct nfs4_sessionid *sid)
{
__be32 *p;
int len = NFS4_MAX_SESSIONID_LEN;
p = read_buf(xdr, len);
p = read_buf(xdr, NFS4_MAX_SESSIONID_LEN);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
memcpy(sid->data, p, len);
memcpy(sid->data, p, NFS4_MAX_SESSIONID_LEN);
return 0;
}
......@@ -500,7 +498,6 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
uint32_t bitmap[2];
__be32 *p, status;
args->craa_addr = svc_addr(rqstp);
p = read_buf(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
......@@ -519,7 +516,6 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp,
{
__be32 *p;
args->crsa_addr = svc_addr(rqstp);
p = read_buf(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
......@@ -684,13 +680,12 @@ static __be32 encode_sessionid(struct xdr_stream *xdr,
const struct nfs4_sessionid *sid)
{
__be32 *p;
int len = NFS4_MAX_SESSIONID_LEN;
p = xdr_reserve_space(xdr, len);
p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
memcpy(p, sid, len);
memcpy(p, sid, NFS4_MAX_SESSIONID_LEN);
return 0;
}
......@@ -704,7 +699,9 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
if (unlikely(status != 0))
goto out;
encode_sessionid(xdr, &res->csr_sessionid);
status = encode_sessionid(xdr, &res->csr_sessionid);
if (status)
goto out;
p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t));
if (unlikely(p == NULL))
......
......@@ -764,6 +764,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->time_delta = fsinfo->time_delta;
server->clone_blksize = fsinfo->clone_blksize;
/* We're airborne Set socket buffersize */
rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
}
......
......@@ -721,14 +721,12 @@ int nfs_async_inode_return_delegation(struct inode *inode,
struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
filemap_flush(inode->i_mapping);
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
if (delegation == NULL)
goto out_enoent;
if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid))
if (stateid != NULL &&
!clp->cl_mvops->match_stateid(&delegation->stateid, stateid))
goto out_enoent;
nfs_mark_return_delegation(server, delegation);
rcu_read_unlock();
......
......@@ -1714,9 +1714,6 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
dir->i_sb->s_id, dir->i_ino, dentry);
if (!new_valid_dev(rdev))
return -EINVAL;
attr.ia_mode = mode;
attr.ia_valid = ATTR_MODE;
......
......@@ -339,6 +339,19 @@ static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls)
}
}
static void ff_layout_mark_devices_valid(struct nfs4_ff_layout_segment *fls)
{
struct nfs4_deviceid_node *node;
int i;
if (!(fls->flags & FF_FLAGS_NO_IO_THRU_MDS))
return;
for (i = 0; i < fls->mirror_array_cnt; i++) {
node = &fls->mirror_array[i]->mirror_ds->id_node;
clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
}
}
static struct pnfs_layout_segment *
ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
struct nfs4_layoutget_res *lgr,
......@@ -499,6 +512,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
rc = ff_layout_check_layout(lgr);
if (rc)
goto out_err_free;
ff_layout_mark_devices_valid(fls);
ret = &fls->generic_hdr;
dprintk("<-- %s (success)\n", __func__);
......@@ -741,17 +755,17 @@ ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg,
}
static struct nfs4_pnfs_ds *
ff_layout_choose_best_ds_for_read(struct nfs_pageio_descriptor *pgio,
ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
int start_idx,
int *best_idx)
{
struct nfs4_ff_layout_segment *fls;
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_pnfs_ds *ds;
int idx;
fls = FF_LAYOUT_LSEG(pgio->pg_lseg);
/* mirrors are sorted by efficiency */
for (idx = 0; idx < fls->mirror_array_cnt; idx++) {
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, idx, false);
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
ds = nfs4_ff_layout_prepare_ds(lseg, idx, false);
if (ds) {
*best_idx = idx;
return ds;
......@@ -782,7 +796,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
if (pgio->pg_lseg == NULL)
goto out_mds;
ds = ff_layout_choose_best_ds_for_read(pgio, &ds_idx);
ds = ff_layout_choose_best_ds_for_read(pgio->pg_lseg, 0, &ds_idx);
if (!ds)
goto out_mds;
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx);
......@@ -1035,7 +1049,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
rpc_wake_up(&tbl->slot_tbl_waitq);
/* fall through */
default:
if (ff_layout_has_available_ds(lseg))
if (ff_layout_no_fallback_to_mds(lseg) ||
ff_layout_has_available_ds(lseg))
return -NFS4ERR_RESET_TO_PNFS;
reset:
dprintk("%s Retry through MDS. Error %d\n", __func__,
......@@ -1153,7 +1168,6 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
}
/* NFS_PROTO call done callback routines */
static int ff_layout_read_done_cb(struct rpc_task *task,
struct nfs_pgio_header *hdr)
{
......@@ -1171,6 +1185,10 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
switch (err) {
case -NFS4ERR_RESET_TO_PNFS:
if (ff_layout_choose_best_ds_for_read(hdr->lseg,
hdr->pgio_mirror_idx + 1,
&hdr->pgio_mirror_idx))
goto out_eagain;
set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE,
&hdr->lseg->pls_layout->plh_flags);
pnfs_read_resend_pnfs(hdr);
......@@ -1179,11 +1197,13 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
ff_layout_reset_read(hdr);
return task->tk_status;
case -EAGAIN:
rpc_restart_call_prepare(task);
return -EAGAIN;
goto out_eagain;
}
return 0;
out_eagain:
rpc_restart_call_prepare(task);
return -EAGAIN;
}
static bool
......
......@@ -10,6 +10,7 @@
#define FS_NFS_NFS4FLEXFILELAYOUT_H
#define FF_FLAGS_NO_LAYOUTCOMMIT 1
#define FF_FLAGS_NO_IO_THRU_MDS 2
#include "../pnfs.h"
......@@ -145,6 +146,12 @@ FF_LAYOUT_MIRROR_COUNT(struct pnfs_layout_segment *lseg)
return FF_LAYOUT_LSEG(lseg)->mirror_array_cnt;
}
static inline bool
ff_layout_no_fallback_to_mds(struct pnfs_layout_segment *lseg)
{
return FF_LAYOUT_LSEG(lseg)->flags & FF_FLAGS_NO_IO_THRU_MDS;
}
static inline bool
ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node)
{
......
......@@ -16,9 +16,7 @@
#include <linux/nfs_fs.h>
#include "internal.h"
#ifdef NFS_DEBUG
# define NFSDBG_FACILITY NFSDBG_MOUNT
#endif
#define NFSDBG_FACILITY NFSDBG_MOUNT
/*
* Defined by RFC 1094, section A.3; and RFC 1813, section 5.1.4
......
......@@ -17,5 +17,6 @@ int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
int nfs42_proc_layoutstats_generic(struct nfs_server *,
struct nfs42_layoutstat_data *);
int nfs42_proc_clone(struct file *, struct file *, loff_t, loff_t, loff_t);
#endif /* __LINUX_FS_NFS_NFS4_2_H */
......@@ -271,3 +271,74 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
return PTR_ERR(task);
return 0;
}
static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
struct file *dst_f, loff_t src_offset,
loff_t dst_offset, loff_t count)
{
struct inode *src_inode = file_inode(src_f);
struct inode *dst_inode = file_inode(dst_f);
struct nfs_server *server = NFS_SERVER(dst_inode);
struct nfs42_clone_args args = {
.src_fh = NFS_FH(src_inode),
.dst_fh = NFS_FH(dst_inode),
.src_offset = src_offset,
.dst_offset = dst_offset,
.dst_bitmask = server->cache_consistency_bitmask,
};
struct nfs42_clone_res res = {
.server = server,
};
int status;
msg->rpc_argp = &args;
msg->rpc_resp = &res;
status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ);
if (status)
return status;
status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE);
if (status)
return status;
res.dst_fattr = nfs_alloc_fattr();
if (!res.dst_fattr)
return -ENOMEM;
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
if (status == 0)
status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
kfree(res.dst_fattr);
return status;
}
int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
loff_t src_offset, loff_t dst_offset, loff_t count)
{
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLONE],
};
struct inode *inode = file_inode(src_f);
struct nfs_server *server = NFS_SERVER(file_inode(src_f));
struct nfs4_exception exception = { };
int err;
if (!nfs_server_capable(inode, NFS_CAP_CLONE))
return -EOPNOTSUPP;
do {
err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset,
dst_offset, count);
if (err == -ENOTSUPP || err == -EOPNOTSUPP) {
NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE;
return -EOPNOTSUPP;
}
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
return err;
}
......@@ -34,6 +34,12 @@
1 /* opaque devaddr4 length */ + \
XDR_QUADLEN(PNFS_LAYOUTSTATS_MAXSIZE))
#define decode_layoutstats_maxsz (op_decode_hdr_maxsz)
#define encode_clone_maxsz (encode_stateid_maxsz + \
encode_stateid_maxsz + \
2 /* src offset */ + \
2 /* dst offset */ + \
2 /* count */)
#define decode_clone_maxsz (op_decode_hdr_maxsz)
#define NFS4_enc_allocate_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
......@@ -65,7 +71,20 @@
decode_sequence_maxsz + \
decode_putfh_maxsz + \
PNFS_LAYOUTSTATS_MAXDEV * decode_layoutstats_maxsz)
#define NFS4_enc_clone_sz (compound_encode_hdr_maxsz + \
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_savefh_maxsz + \
encode_putfh_maxsz + \
encode_clone_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_clone_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_savefh_maxsz + \
decode_putfh_maxsz + \
decode_clone_maxsz + \
decode_getattr_maxsz)
static void encode_fallocate(struct xdr_stream *xdr,
struct nfs42_falloc_args *args)
......@@ -128,6 +147,21 @@ static void encode_layoutstats(struct xdr_stream *xdr,
encode_uint32(xdr, 0);
}
static void encode_clone(struct xdr_stream *xdr,
struct nfs42_clone_args *args,
struct compound_hdr *hdr)
{
__be32 *p;
encode_op_hdr(xdr, OP_CLONE, decode_clone_maxsz, hdr);
encode_nfs4_stateid(xdr, &args->src_stateid);
encode_nfs4_stateid(xdr, &args->dst_stateid);
p = reserve_space(xdr, 3*8);
p = xdr_encode_hyper(p, args->src_offset);
p = xdr_encode_hyper(p, args->dst_offset);
xdr_encode_hyper(p, args->count);
}
/*
* Encode ALLOCATE request
*/
......@@ -206,6 +240,27 @@ static void nfs4_xdr_enc_layoutstats(struct rpc_rqst *req,
encode_nops(&hdr);
}
/*
* Encode CLONE request
*/
static void nfs4_xdr_enc_clone(struct rpc_rqst *req,
struct xdr_stream *xdr,
struct nfs42_clone_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->src_fh, &hdr);
encode_savefh(xdr, &hdr);
encode_putfh(xdr, args->dst_fh, &hdr);
encode_clone(xdr, args, &hdr);
encode_getfattr(xdr, args->dst_bitmask, &hdr);
encode_nops(&hdr);
}
static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_ALLOCATE);
......@@ -243,6 +298,11 @@ static int decode_layoutstats(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_LAYOUTSTATS);
}
static int decode_clone(struct xdr_stream *xdr)
{
return decode_op_hdr(xdr, OP_CLONE);
}
/*
* Decode ALLOCATE request
*/
......@@ -351,4 +411,39 @@ static int nfs4_xdr_dec_layoutstats(struct rpc_rqst *rqstp,
return status;
}
/*
* Decode CLONE request
*/
static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp,
struct xdr_stream *xdr,
struct nfs42_clone_res *res)
{
struct compound_hdr hdr;
int status;
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
status = decode_sequence(xdr, &res->seq_res, rqstp);
if (status)
goto out;
status = decode_putfh(xdr);
if (status)
goto out;
status = decode_savefh(xdr);
if (status)
goto out;
status = decode_putfh(xdr);
if (status)
goto out;
status = decode_clone(xdr);
if (status)
goto out;
status = decode_getfattr(xdr, res->dst_fattr, res->server);
out:
res->rpc_status = status;
return status;
}
#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
......@@ -183,10 +183,12 @@ struct nfs4_state {
struct nfs4_exception {
long timeout;
int retry;
struct nfs4_state *state;
struct inode *inode;
long timeout;
unsigned char delay : 1,
recovering : 1,
retry : 1;
};
struct nfs4_state_recovery_ops {
......
......@@ -4,6 +4,7 @@
* Copyright (C) 1992 Rick Sladkey
*/
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/falloc.h>
#include <linux/nfs_fs.h>
#include "delegation.h"
......@@ -192,8 +193,138 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
return nfs42_proc_deallocate(filep, offset, len);
return nfs42_proc_allocate(filep, offset, len);
}
static noinline long
nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
u64 src_off, u64 dst_off, u64 count)
{
struct inode *dst_inode = file_inode(dst_file);
struct nfs_server *server = NFS_SERVER(dst_inode);
struct fd src_file;
struct inode *src_inode;
unsigned int bs = server->clone_blksize;
int ret;
/* dst file must be opened for writing */
if (!(dst_file->f_mode & FMODE_WRITE))
return -EINVAL;
ret = mnt_want_write_file(dst_file);
if (ret)
return ret;
src_file = fdget(srcfd);
if (!src_file.file) {
ret = -EBADF;
goto out_drop_write;
}
src_inode = file_inode(src_file.file);
/* src and dst must be different files */
ret = -EINVAL;
if (src_inode == dst_inode)
goto out_fput;
/* src file must be opened for reading */
if (!(src_file.file->f_mode & FMODE_READ))
goto out_fput;
/* src and dst must be regular files */
ret = -EISDIR;
if (!S_ISREG(src_inode->i_mode) || !S_ISREG(dst_inode->i_mode))
goto out_fput;
ret = -EXDEV;
if (src_file.file->f_path.mnt != dst_file->f_path.mnt ||
src_inode->i_sb != dst_inode->i_sb)
goto out_fput;
/* check alignment w.r.t. clone_blksize */
ret = -EINVAL;
if (bs) {
if (!IS_ALIGNED(src_off, bs) || !IS_ALIGNED(dst_off, bs))
goto out_fput;
if (!IS_ALIGNED(count, bs) && i_size_read(src_inode) != (src_off + count))
goto out_fput;
}
/* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */
if (dst_inode < src_inode) {
mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD);
} else {
mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_PARENT);
mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_CHILD);
}
/* flush all pending writes on both src and dst so that server
* has the latest data */
ret = nfs_sync_inode(src_inode);
if (ret)
goto out_unlock;
ret = nfs_sync_inode(dst_inode);
if (ret)
goto out_unlock;
ret = nfs42_proc_clone(src_file.file, dst_file, src_off, dst_off, count);
/* truncate inode page cache of the dst range so that future reads can fetch
* new data from server */
if (!ret)
truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1);
out_unlock:
if (dst_inode < src_inode) {
mutex_unlock(&src_inode->i_mutex);
mutex_unlock(&dst_inode->i_mutex);
} else {
mutex_unlock(&dst_inode->i_mutex);
mutex_unlock(&src_inode->i_mutex);
}
out_fput:
fdput(src_file);
out_drop_write:
mnt_drop_write_file(dst_file);
return ret;
}
static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
{
struct nfs_ioctl_clone_range_args args;
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_off, args.dst_off, args.count);
}
#else
static long nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd,
u64 src_off, u64 dst_off, u64 count)
{
return -ENOTTY;
}
static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp)
{
return -ENOTTY;
}
#endif /* CONFIG_NFS_V4_2 */
long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
void __user *argp = (void __user *)arg;
switch (cmd) {
case NFS_IOC_CLONE:
return nfs42_ioctl_clone(file, arg, 0, 0, 0);
case NFS_IOC_CLONE_RANGE:
return nfs42_ioctl_clone_range(file, argp);
}
return -ENOTTY;
}
const struct file_operations nfs4_file_operations = {
#ifdef CONFIG_NFS_V4_2
.llseek = nfs4_file_llseek,
......@@ -216,4 +347,9 @@ const struct file_operations nfs4_file_operations = {
#endif /* CONFIG_NFS_V4_2 */
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
#ifdef CONFIG_COMPAT
.unlocked_ioctl = nfs4_ioctl,
#else
.compat_ioctl = nfs4_ioctl,
#endif /* CONFIG_COMPAT */
};
......@@ -78,7 +78,6 @@ struct nfs4_opendata;
static int _nfs4_proc_open(struct nfs4_opendata *data);
static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *);
static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label);
static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label);
......@@ -239,6 +238,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
FATTR4_WORD1_TIME_DELTA
| FATTR4_WORD1_FS_LAYOUT_TYPES,
FATTR4_WORD2_LAYOUT_BLKSIZE
| FATTR4_WORD2_CLONE_BLKSIZE
};
const u32 nfs4_fs_locations_bitmap[3] = {
......@@ -344,13 +344,16 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
/* This is the error handling routine for processes that are allowed
* to sleep.
*/
int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
static int nfs4_do_handle_exception(struct nfs_server *server,
int errorcode, struct nfs4_exception *exception)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_state *state = exception->state;
struct inode *inode = exception->inode;
int ret = errorcode;
exception->delay = 0;
exception->recovering = 0;
exception->retry = 0;
switch(errorcode) {
case 0:
......@@ -359,11 +362,9 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
if (inode && nfs4_have_delegation(inode, FMODE_READ)) {
nfs4_inode_return_delegation(inode);
exception->retry = 1;
return 0;
}
if (inode && nfs_async_inode_return_delegation(inode,
NULL) == 0)
goto wait_on_recovery;
if (state == NULL)
break;
ret = nfs4_schedule_stateid_recovery(server, state);
......@@ -409,11 +410,12 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
ret = -EBUSY;
break;
}
case -NFS4ERR_GRACE:
case -NFS4ERR_DELAY:
ret = nfs4_delay(server->client, &exception->timeout);
if (ret != 0)
break;
nfs_inc_server_stats(server, NFSIOS_DELAY);
case -NFS4ERR_GRACE:
exception->delay = 1;
return 0;
case -NFS4ERR_RETRY_UNCACHED_REP:
case -NFS4ERR_OLD_STATEID:
exception->retry = 1;
......@@ -434,14 +436,85 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
/* We failed to handle the error */
return nfs4_map_errors(ret);
wait_on_recovery:
ret = nfs4_wait_clnt_recover(clp);
exception->recovering = 1;
return 0;
}
/* This is the error handling routine for processes that are allowed
* to sleep.
*/
int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
{
struct nfs_client *clp = server->nfs_client;
int ret;
ret = nfs4_do_handle_exception(server, errorcode, exception);
if (exception->delay) {
ret = nfs4_delay(server->client, &exception->timeout);
goto out_retry;
}
if (exception->recovering) {
ret = nfs4_wait_clnt_recover(clp);
if (test_bit(NFS_MIG_FAILED, &server->mig_status))
return -EIO;
goto out_retry;
}
return ret;
out_retry:
if (ret == 0)
exception->retry = 1;
return ret;
}
static int
nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
int errorcode, struct nfs4_exception *exception)
{
struct nfs_client *clp = server->nfs_client;
int ret;
ret = nfs4_do_handle_exception(server, errorcode, exception);
if (exception->delay) {
rpc_delay(task, nfs4_update_delay(&exception->timeout));
goto out_retry;
}
if (exception->recovering) {
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
goto out_retry;
}
if (test_bit(NFS_MIG_FAILED, &server->mig_status))
return -EIO;
ret = -EIO;
return ret;
out_retry:
if (ret == 0)
exception->retry = 1;
return ret;
}
static int
nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server,
struct nfs4_state *state, long *timeout)
{
struct nfs4_exception exception = {
.state = state,
};
if (task->tk_status >= 0)
return 0;
if (timeout)
exception.timeout = *timeout;
task->tk_status = nfs4_async_handle_exception(task, server,
task->tk_status,
&exception);
if (exception.delay && timeout)
*timeout = exception.timeout;
if (exception.retry)
return -EAGAIN;
return 0;
}
/*
* Return 'true' if 'clp' is using an rpc_client that is integrity protected
* or 'false' otherwise.
......@@ -4530,7 +4603,7 @@ static inline int nfs4_server_supports_acls(struct nfs_server *server)
#define NFS4ACL_MAXPAGES DIV_ROUND_UP(XATTR_SIZE_MAX, PAGE_SIZE)
static int buf_to_pages_noslab(const void *buf, size_t buflen,
struct page **pages, unsigned int *pgbase)
struct page **pages)
{
struct page *newpage, **spages;
int rc = 0;
......@@ -4674,7 +4747,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
goto out_free;
args.acl_len = npages * PAGE_SIZE;
args.acl_pgbase = 0;
dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n",
__func__, buf, buflen, npages, args.acl_len);
......@@ -4766,7 +4838,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
return -EOPNOTSUPP;
if (npages > ARRAY_SIZE(pages))
return -ERANGE;
i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
i = buf_to_pages_noslab(buf, buflen, arg.acl_pages);
if (i < 0)
return i;
nfs4_inode_return_delegation(inode);
......@@ -4955,79 +5027,6 @@ nfs4_set_security_label(struct dentry *dentry, const void *buf, size_t buflen)
#endif /* CONFIG_NFS_V4_SECURITY_LABEL */
static int
nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
struct nfs4_state *state, long *timeout)
{
struct nfs_client *clp = server->nfs_client;
if (task->tk_status >= 0)
return 0;
switch(task->tk_status) {
case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
if (nfs4_schedule_stateid_recovery(server, state) < 0)
goto recovery_failed;
goto wait_on_recovery;
case -NFS4ERR_EXPIRED:
if (state != NULL) {
if (nfs4_schedule_stateid_recovery(server, state) < 0)
goto recovery_failed;
}
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_STALE_CLIENTID:
nfs4_schedule_lease_recovery(clp);
goto wait_on_recovery;
case -NFS4ERR_MOVED:
if (nfs4_schedule_migration_recovery(server) < 0)
goto recovery_failed;
goto wait_on_recovery;
case -NFS4ERR_LEASE_MOVED:
nfs4_schedule_lease_moved_recovery(clp);
goto wait_on_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_DEADSESSION:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_SEQ_FALSE_RETRY:
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
goto wait_on_recovery;
#endif /* CONFIG_NFS_V4_1 */
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
rpc_delay(task, nfs4_update_delay(timeout));
goto restart_call;
case -NFS4ERR_GRACE:
rpc_delay(task, NFS4_POLL_RETRY_MAX);
case -NFS4ERR_RETRY_UNCACHED_REP:
case -NFS4ERR_OLD_STATEID:
goto restart_call;
}
task->tk_status = nfs4_map_errors(task->tk_status);
return 0;
recovery_failed:
task->tk_status = -EIO;
return 0;
wait_on_recovery:
rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
if (test_bit(NFS_MIG_FAILED, &server->mig_status))
goto recovery_failed;
restart_call:
task->tk_status = 0;
return -EAGAIN;
}
static void nfs4_init_boot_verifier(const struct nfs_client *clp,
nfs4_verifier *bootverf)
{
......@@ -5522,7 +5521,7 @@ struct nfs4_unlockdata {
struct nfs4_lock_state *lsp;
struct nfs_open_context *ctx;
struct file_lock fl;
const struct nfs_server *server;
struct nfs_server *server;
unsigned long timestamp;
};
......@@ -8718,7 +8717,8 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_ALLOCATE
| NFS_CAP_DEALLOCATE
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS,
| NFS_CAP_LAYOUTSTATS
| NFS_CAP_CLONE,
.init_client = nfs41_init_client,
.shutdown_client = nfs41_shutdown_client,
.match_stateid = nfs41_match_stateid,
......
......@@ -1659,7 +1659,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
*p = cpu_to_be32(FATTR4_WORD0_ACL);
p = reserve_space(xdr, 4);
*p = cpu_to_be32(arg->acl_len);
xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
xdr_write_pages(xdr, arg->acl_pages, 0, arg->acl_len);
}
static void
......@@ -2491,7 +2491,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
args->acl_pages, args->acl_pgbase, args->acl_len);
args->acl_pages, 0, args->acl_len);
encode_nops(&hdr);
}
......@@ -4375,6 +4375,11 @@ static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
goto xdr_error;
if ((status = decode_attr_files_total(xdr, bitmap, &fsstat->tfiles)) != 0)
goto xdr_error;
status = -EIO;
if (unlikely(bitmap[0]))
goto xdr_error;
if ((status = decode_attr_space_avail(xdr, bitmap, &fsstat->abytes)) != 0)
goto xdr_error;
if ((status = decode_attr_space_free(xdr, bitmap, &fsstat->fbytes)) != 0)
......@@ -4574,6 +4579,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
goto xdr_error;
fattr->valid |= status;
status = -EIO;
if (unlikely(bitmap[0]))
goto xdr_error;
status = decode_attr_mode(xdr, bitmap, &fmode);
if (status < 0)
goto xdr_error;
......@@ -4627,6 +4636,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
goto xdr_error;
fattr->valid |= status;
status = -EIO;
if (unlikely(bitmap[1]))
goto xdr_error;
status = decode_attr_mdsthreshold(xdr, bitmap, fattr->mdsthreshold);
if (status < 0)
goto xdr_error;
......@@ -4764,6 +4777,28 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
return 0;
}
/*
* The granularity of a CLONE operation.
*/
static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
uint32_t *res)
{
__be32 *p;
dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
*res = 0;
if (bitmap[2] & FATTR4_WORD2_CLONE_BLKSIZE) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p)) {
print_overflow_msg(__func__, xdr);
return -EIO;
}
*res = be32_to_cpup(p);
bitmap[2] &= ~FATTR4_WORD2_CLONE_BLKSIZE;
}
return 0;
}
static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
{
unsigned int savep;
......@@ -4789,13 +4824,26 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0)
goto xdr_error;
fsinfo->wtpref = fsinfo->wtmax;
status = -EIO;
if (unlikely(bitmap[0]))
goto xdr_error;
status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta);
if (status != 0)
goto xdr_error;
status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype);
if (status != 0)
goto xdr_error;
status = -EIO;
if (unlikely(bitmap[1]))
goto xdr_error;
status = decode_attr_layout_blksize(xdr, bitmap, &fsinfo->blksize);
if (status)
goto xdr_error;
status = decode_attr_clone_blksize(xdr, bitmap, &fsinfo->clone_blksize);
if (status)
goto xdr_error;
......@@ -7465,6 +7513,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(ALLOCATE, enc_allocate, dec_allocate),
PROC(DEALLOCATE, enc_deallocate, dec_deallocate),
PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
PROC(CLONE, enc_clone, dec_clone),
#endif /* CONFIG_NFS_V4_2 */
};
......
......@@ -90,7 +90,7 @@
#define NFS_DEF_OPTIONS "vers=2,udp,rsize=4096,wsize=4096"
/* Parameters passed from the kernel command line */
static char nfs_root_parms[256] __initdata = "";
static char nfs_root_parms[NFS_MAXPATHLEN + 1] __initdata = "";
/* Text-based mount options passed to super.c */
static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
......
......@@ -1912,12 +1912,13 @@ static void pnfs_ld_handle_write_error(struct nfs_pgio_header *hdr)
*/
void pnfs_ld_write_done(struct nfs_pgio_header *hdr)
{
trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
if (!hdr->pnfs_error) {
if (likely(!hdr->pnfs_error)) {
pnfs_set_layoutcommit(hdr->inode, hdr->lseg,
hdr->mds_offset + hdr->res.count);
hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
} else
}
trace_nfs4_pnfs_write(hdr, hdr->pnfs_error);
if (unlikely(hdr->pnfs_error))
pnfs_ld_handle_write_error(hdr);
hdr->mds_ops->rpc_release(hdr);
}
......@@ -2028,11 +2029,12 @@ static void pnfs_ld_handle_read_error(struct nfs_pgio_header *hdr)
*/
void pnfs_ld_read_done(struct nfs_pgio_header *hdr)
{
trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
if (likely(!hdr->pnfs_error)) {
__nfs4_read_done_cb(hdr);
hdr->mds_ops->rpc_call_done(&hdr->task, hdr);
} else
}
trace_nfs4_pnfs_read(hdr, hdr->pnfs_error);
if (unlikely(hdr->pnfs_error))
pnfs_ld_handle_read_error(hdr);
hdr->mds_ops->rpc_release(hdr);
}
......
......@@ -246,6 +246,13 @@ static void nfs_readpage_retry(struct rpc_task *task,
nfs_set_pgio_error(hdr, -EIO, argp->offset);
return;
}
/* For non rpc-based layout drivers, retry-through-MDS */
if (!task->tk_ops) {
hdr->pnfs_error = -EAGAIN;
return;
}
/* Yes, so retry the read at the end of the hdr */
hdr->mds_offset += resp->count;
argp->offset += resp->count;
......@@ -268,7 +275,7 @@ static void nfs_readpage_result(struct rpc_task *task,
hdr->good_bytes = bound - hdr->io_start;
}
spin_unlock(&hdr->lock);
} else if (hdr->res.count != hdr->args.count)
} else if (hdr->res.count < hdr->args.count)
nfs_readpage_retry(task, hdr);
}
......
......@@ -2816,7 +2816,6 @@ static int nfs4_validate_mount_data(void *options,
* NFS client for backwards compatibility
*/
unsigned int nfs_callback_set_tcpport;
unsigned short nfs_callback_tcpport;
/* Default cache timeout is 10 minutes */
unsigned int nfs_idmap_cache_timeout = 600;
/* Turn off NFSv4 uid/gid mapping when using AUTH_SYS */
......@@ -2827,7 +2826,6 @@ char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = "";
bool recover_lost_locks = false;
EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport);
EXPORT_SYMBOL_GPL(nfs_callback_tcpport);
EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout);
EXPORT_SYMBOL_GPL(nfs4_disable_idmapping);
EXPORT_SYMBOL_GPL(max_session_slots);
......
......@@ -1505,6 +1505,13 @@ static void nfs_writeback_result(struct rpc_task *task,
task->tk_status = -EIO;
return;
}
/* For non rpc-based layout drivers, retry-through-MDS */
if (!task->tk_ops) {
hdr->pnfs_error = -EAGAIN;
return;
}
/* Was this an NFSv2 write or an NFSv3 stable write? */
if (resp->verf->committed != NFS_UNSTABLE) {
/* Resend from where the server left off */
......
......@@ -130,6 +130,7 @@ enum nfs_opnum4 {
OP_READ_PLUS = 68,
OP_SEEK = 69,
OP_WRITE_SAME = 70,
OP_CLONE = 71,
OP_ILLEGAL = 10044,
};
......@@ -421,6 +422,7 @@ enum lock_type4 {
#define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0)
#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13)
#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16)
/* MDS threshold bitmap bits */
......@@ -501,6 +503,7 @@ enum {
NFSPROC4_CLNT_ALLOCATE,
NFSPROC4_CLNT_DEALLOCATE,
NFSPROC4_CLNT_LAYOUTSTATS,
NFSPROC4_CLNT_CLONE,
};
/* nfs41 types */
......
......@@ -147,6 +147,7 @@ struct nfs_server {
unsigned int acdirmax;
unsigned int namelen;
unsigned int options; /* extra options enabled by mount */
unsigned int clone_blksize; /* granularity of a CLONE operation */
#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
#define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */
......@@ -243,5 +244,6 @@ struct nfs_server {
#define NFS_CAP_ALLOCATE (1U << 20)
#define NFS_CAP_DEALLOCATE (1U << 21)
#define NFS_CAP_LAYOUTSTATS (1U << 22)
#define NFS_CAP_CLONE (1U << 23)
#endif
......@@ -141,6 +141,7 @@ struct nfs_fsinfo {
__u32 lease_time; /* in seconds */
__u32 layouttype; /* supported pnfs layout driver */
__u32 blksize; /* preferred pnfs io block size */
__u32 clone_blksize; /* granularity of a CLONE operation */
};
struct nfs_fsstat {
......@@ -359,6 +360,25 @@ struct nfs42_layoutstat_data {
struct nfs42_layoutstat_res res;
};
struct nfs42_clone_args {
struct nfs4_sequence_args seq_args;
struct nfs_fh *src_fh;
struct nfs_fh *dst_fh;
nfs4_stateid src_stateid;
nfs4_stateid dst_stateid;
__u64 src_offset;
__u64 dst_offset;
__u64 count;
const u32 *dst_bitmask;
};
struct nfs42_clone_res {
struct nfs4_sequence_res seq_res;
unsigned int rpc_status;
struct nfs_fattr *dst_fattr;
const struct nfs_server *server;
};
struct stateowner_id {
__u64 create_time;
__u32 uniquifier;
......@@ -528,7 +548,7 @@ struct nfs4_delegreturnargs {
struct nfs4_delegreturnres {
struct nfs4_sequence_res seq_res;
struct nfs_fattr * fattr;
const struct nfs_server *server;
struct nfs_server *server;
};
/*
......@@ -601,7 +621,7 @@ struct nfs_removeargs {
struct nfs_removeres {
struct nfs4_sequence_res seq_res;
const struct nfs_server *server;
struct nfs_server *server;
struct nfs_fattr *dir_attr;
struct nfs4_change_info cinfo;
};
......@@ -619,7 +639,7 @@ struct nfs_renameargs {
struct nfs_renameres {
struct nfs4_sequence_res seq_res;
const struct nfs_server *server;
struct nfs_server *server;
struct nfs4_change_info old_cinfo;
struct nfs_fattr *old_fattr;
struct nfs4_change_info new_cinfo;
......@@ -685,7 +705,6 @@ struct nfs_setaclargs {
struct nfs4_sequence_args seq_args;
struct nfs_fh * fh;
size_t acl_len;
unsigned int acl_pgbase;
struct page ** acl_pages;
};
......@@ -697,7 +716,6 @@ struct nfs_getaclargs {
struct nfs4_sequence_args seq_args;
struct nfs_fh * fh;
size_t acl_len;
unsigned int acl_pgbase;
struct page ** acl_pages;
};
......
......@@ -38,6 +38,11 @@ void xprt_free_bc_request(struct rpc_rqst *req);
int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs);
/* Socket backchannel transport methods */
int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs);
void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs);
void xprt_free_bc_rqst(struct rpc_rqst *req);
/*
* Determine if a shared backchannel is in use
*/
......
......@@ -226,9 +226,13 @@ extern void svc_rdma_put_frmr(struct svcxprt_rdma *,
struct svc_rdma_fastreg_mr *);
extern void svc_sq_reap(struct svcxprt_rdma *);
extern void svc_rq_reap(struct svcxprt_rdma *);
extern struct svc_xprt_class svc_rdma_class;
extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
extern struct svc_xprt_class svc_rdma_class;
#ifdef CONFIG_SUNRPC_BACKCHANNEL
extern struct svc_xprt_class svc_rdma_bc_class;
#endif
/* svc_rdma.c */
extern int svc_rdma_init(void);
extern void svc_rdma_cleanup(void);
......
......@@ -54,6 +54,8 @@ enum rpc_display_format_t {
struct rpc_task;
struct rpc_xprt;
struct seq_file;
struct svc_serv;
struct net;
/*
* This describes a complete RPC request
......@@ -136,6 +138,12 @@ struct rpc_xprt_ops {
int (*enable_swap)(struct rpc_xprt *xprt);
void (*disable_swap)(struct rpc_xprt *xprt);
void (*inject_disconnect)(struct rpc_xprt *xprt);
int (*bc_setup)(struct rpc_xprt *xprt,
unsigned int min_reqs);
int (*bc_up)(struct svc_serv *serv, struct net *net);
void (*bc_free_rqst)(struct rpc_rqst *rqst);
void (*bc_destroy)(struct rpc_xprt *xprt,
unsigned int max_reqs);
};
/*
......@@ -153,6 +161,7 @@ enum xprt_transports {
XPRT_TRANSPORT_TCP = IPPROTO_TCP,
XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC,
XPRT_TRANSPORT_RDMA = 256,
XPRT_TRANSPORT_BC_RDMA = XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC,
XPRT_TRANSPORT_LOCAL = 257,
};
......
......@@ -44,6 +44,8 @@ struct sock_xprt {
*/
unsigned long sock_state;
struct delayed_work connect_worker;
struct work_struct recv_worker;
struct mutex recv_mutex;
struct sockaddr_storage srcaddr;
unsigned short srcport;
......
......@@ -7,6 +7,8 @@
#ifndef _UAPI_LINUX_NFS_H
#define _UAPI_LINUX_NFS_H
#include <linux/types.h>
#define NFS_PROGRAM 100003
#define NFS_PORT 2049
#define NFS_MAXDATA 8192
......@@ -31,6 +33,17 @@
#define NFS_PIPE_DIRNAME "nfs"
/* NFS ioctls */
/* Let's follow btrfs lead on CLONE to avoid messing userspace */
#define NFS_IOC_CLONE _IOW(0x94, 9, int)
#define NFS_IOC_CLONE_RANGE _IOW(0x94, 13, int)
struct nfs_ioctl_clone_range_args {
__s64 src_fd;
__u64 src_off, count;
__u64 dst_off;
};
/*
* NFS stats. The good thing with these values is that NFSv3 errors are
* a superset of NFSv2 errors (with the exception of NFSERR_WFLUSH which
......
......@@ -137,6 +137,14 @@ struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
* callback requests can be up to 4096 bytes in size.
*/
int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
{
if (!xprt->ops->bc_setup)
return 0;
return xprt->ops->bc_setup(xprt, min_reqs);
}
EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
{
struct rpc_rqst *req;
struct list_head tmp_list;
......@@ -192,7 +200,6 @@ int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs)
dprintk("RPC: setup backchannel transport failed\n");
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
/**
* xprt_destroy_backchannel - Destroys the backchannel preallocated structures.
......@@ -204,6 +211,13 @@ EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
* of reqs specified by the caller.
*/
void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
{
if (xprt->ops->bc_destroy)
xprt->ops->bc_destroy(xprt, max_reqs);
}
EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs)
{
struct rpc_rqst *req = NULL, *tmp = NULL;
......@@ -227,7 +241,6 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
dprintk("RPC: backchannel list empty= %s\n",
list_empty(&xprt->bc_pa_list) ? "true" : "false");
}
EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
static struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt, __be32 xid)
{
......@@ -264,6 +277,13 @@ void xprt_free_bc_request(struct rpc_rqst *req)
{
struct rpc_xprt *xprt = req->rq_xprt;
xprt->ops->bc_free_rqst(req);
}
void xprt_free_bc_rqst(struct rpc_rqst *req)
{
struct rpc_xprt *xprt = req->rq_xprt;
dprintk("RPC: free backchannel req=%p\n", req);
req->rq_connect_cookie = xprt->connect_cookie - 1;
......
......@@ -1367,11 +1367,6 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
/* reset result send buffer "put" position */
resv->iov_len = 0;
if (rqstp->rq_prot != IPPROTO_TCP) {
printk(KERN_ERR "No support for Non-TCP transports!\n");
BUG();
}
/*
* Skip the next two words because they've already been
* processed in the transport
......
......@@ -76,7 +76,7 @@ static int
proc_dodebug(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
char tmpbuf[20], c, *s;
char tmpbuf[20], c, *s = NULL;
char __user *p;
unsigned int value;
size_t left, len;
......@@ -103,23 +103,24 @@ proc_dodebug(struct ctl_table *table, int write,
return -EFAULT;
tmpbuf[left] = '\0';
for (s = tmpbuf, value = 0; '0' <= *s && *s <= '9'; s++, left--)
value = 10 * value + (*s - '0');
if (*s && !isspace(*s))
return -EINVAL;
while (left && isspace(*s))
left--, s++;
value = simple_strtol(tmpbuf, &s, 0);
if (s) {
left -= (s - tmpbuf);
if (left && !isspace(*s))
return -EINVAL;
while (left && isspace(*s))
left--, s++;
} else
left = 0;
*(unsigned int *) table->data = value;
/* Display the RPC tasks on writing to rpc_debug */
if (strcmp(table->procname, "rpc_debug") == 0)
rpc_show_tasks(&init_net);
} else {
if (!access_ok(VERIFY_WRITE, buffer, left))
return -EFAULT;
len = sprintf(tmpbuf, "%d", *(unsigned int *) table->data);
len = sprintf(tmpbuf, "0x%04x", *(unsigned int *) table->data);
if (len > left)
len = left;
if (__copy_to_user(buffer, tmpbuf, len))
if (copy_to_user(buffer, tmpbuf, len))
return -EFAULT;
if ((left -= len) > 0) {
if (put_user('\n', (char __user *)buffer + len))
......
......@@ -5,3 +5,4 @@ rpcrdma-y := transport.o rpc_rdma.o verbs.o \
svc_rdma.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
module.o
rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
This diff is collapsed.
......@@ -256,8 +256,11 @@ frwr_sendcompletion(struct ib_wc *wc)
/* WARNING: Only wr_id and status are reliable at this point */
r = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
pr_warn("RPC: %s: frmr %p flushed, status %s (%d)\n",
__func__, r, ib_wc_status_msg(wc->status), wc->status);
if (wc->status == IB_WC_WR_FLUSH_ERR)
dprintk("RPC: %s: frmr %p flushed\n", __func__, r);
else
pr_warn("RPC: %s: frmr %p error, status %s (%d)\n",
__func__, r, ib_wc_status_msg(wc->status), wc->status);
r->r.frmr.fr_state = FRMR_IS_STALE;
}
......
......@@ -441,6 +441,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
enum rpcrdma_chunktype rtype, wtype;
struct rpcrdma_msg *headerp;
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
return rpcrdma_bc_marshal_reply(rqst);
#endif
/*
* rpclen gets amount of data in first buffer, which is the
* pre-registered buffer.
......@@ -711,6 +716,37 @@ rpcrdma_connect_worker(struct work_struct *work)
spin_unlock_bh(&xprt->transport_lock);
}
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes
* the RPC/RDMA header small and fixed in size, so it is
* straightforward to check the RPC header's direction field.
*/
static bool
rpcrdma_is_bcall(struct rpcrdma_msg *headerp)
{
__be32 *p = (__be32 *)headerp;
if (headerp->rm_type != rdma_msg)
return false;
if (headerp->rm_body.rm_chunks[0] != xdr_zero)
return false;
if (headerp->rm_body.rm_chunks[1] != xdr_zero)
return false;
if (headerp->rm_body.rm_chunks[2] != xdr_zero)
return false;
/* sanity */
if (p[7] != headerp->rm_xid)
return false;
/* call direction */
if (p[8] != cpu_to_be32(RPC_CALL))
return false;
return true;
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/*
* This function is called when an async event is posted to
* the connection which changes the connection state. All it
......@@ -723,8 +759,8 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
schedule_delayed_work(&ep->rep_connect_worker, 0);
}
/*
* Called as a tasklet to do req/reply match and complete a request
/* Process received RPC/RDMA messages.
*
* Errors must result in the RPC task either being awakened, or
* allowed to timeout, to discover the errors at that time.
*/
......@@ -741,52 +777,32 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
unsigned long cwnd;
u32 credits;
/* Check status. If bad, signal disconnect and return rep to pool */
if (rep->rr_len == ~0U) {
rpcrdma_recv_buffer_put(rep);
if (r_xprt->rx_ep.rep_connected == 1) {
r_xprt->rx_ep.rep_connected = -EIO;
rpcrdma_conn_func(&r_xprt->rx_ep);
}
return;
}
if (rep->rr_len < RPCRDMA_HDRLEN_MIN) {
dprintk("RPC: %s: short/invalid reply\n", __func__);
goto repost;
}
dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
if (rep->rr_len == RPCRDMA_BAD_LEN)
goto out_badstatus;
if (rep->rr_len < RPCRDMA_HDRLEN_MIN)
goto out_shortreply;
headerp = rdmab_to_msg(rep->rr_rdmabuf);
if (headerp->rm_vers != rpcrdma_version) {
dprintk("RPC: %s: invalid version %d\n",
__func__, be32_to_cpu(headerp->rm_vers));
goto repost;
}
if (headerp->rm_vers != rpcrdma_version)
goto out_badversion;
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
if (rpcrdma_is_bcall(headerp))
goto out_bcall;
#endif
/* Get XID and try for a match. */
spin_lock(&xprt->transport_lock);
/* Match incoming rpcrdma_rep to an rpcrdma_req to
* get context for handling any incoming chunks.
*/
spin_lock_bh(&xprt->transport_lock);
rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
if (rqst == NULL) {
spin_unlock(&xprt->transport_lock);
dprintk("RPC: %s: reply 0x%p failed "
"to match any request xid 0x%08x len %d\n",
__func__, rep, be32_to_cpu(headerp->rm_xid),
rep->rr_len);
repost:
r_xprt->rx_stats.bad_reply_count++;
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
rpcrdma_recv_buffer_put(rep);
if (!rqst)
goto out_nomatch;
return;
}
/* get request object */
req = rpcr_to_rdmar(rqst);
if (req->rl_reply) {
spin_unlock(&xprt->transport_lock);
dprintk("RPC: %s: duplicate reply 0x%p to RPC "
"request 0x%p: xid 0x%08x\n", __func__, rep, req,
be32_to_cpu(headerp->rm_xid));
goto repost;
}
if (req->rl_reply)
goto out_duplicate;
dprintk("RPC: %s: reply 0x%p completes request 0x%p\n"
" RPC request 0x%p xid 0x%08x\n",
......@@ -883,8 +899,50 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
if (xprt->cwnd > cwnd)
xprt_release_rqst_cong(rqst->rq_task);
xprt_complete_rqst(rqst->rq_task, status);
spin_unlock_bh(&xprt->transport_lock);
dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
__func__, xprt, rqst, status);
xprt_complete_rqst(rqst->rq_task, status);
spin_unlock(&xprt->transport_lock);
return;
out_badstatus:
rpcrdma_recv_buffer_put(rep);
if (r_xprt->rx_ep.rep_connected == 1) {
r_xprt->rx_ep.rep_connected = -EIO;
rpcrdma_conn_func(&r_xprt->rx_ep);
}
return;
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
out_bcall:
rpcrdma_bc_receive_call(r_xprt, rep);
return;
#endif
out_shortreply:
dprintk("RPC: %s: short/invalid reply\n", __func__);
goto repost;
out_badversion:
dprintk("RPC: %s: invalid version %d\n",
__func__, be32_to_cpu(headerp->rm_vers));
goto repost;
out_nomatch:
spin_unlock_bh(&xprt->transport_lock);
dprintk("RPC: %s: no match for incoming xid 0x%08x len %d\n",
__func__, be32_to_cpu(headerp->rm_xid),
rep->rr_len);
goto repost;
out_duplicate:
spin_unlock_bh(&xprt->transport_lock);
dprintk("RPC: %s: "
"duplicate reply %p to RPC request %p: xid 0x%08x\n",
__func__, rep, req, be32_to_cpu(headerp->rm_xid));
repost:
r_xprt->rx_stats.bad_reply_count++;
if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep))
rpcrdma_recv_buffer_put(rep);
}
......@@ -239,6 +239,9 @@ void svc_rdma_cleanup(void)
unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL;
}
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
svc_unreg_xprt_class(&svc_rdma_bc_class);
#endif
svc_unreg_xprt_class(&svc_rdma_class);
kmem_cache_destroy(svc_rdma_map_cachep);
kmem_cache_destroy(svc_rdma_ctxt_cachep);
......@@ -286,6 +289,9 @@ int svc_rdma_init(void)
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
svc_reg_xprt_class(&svc_rdma_bc_class);
#endif
return 0;
err1:
kmem_cache_destroy(svc_rdma_map_cachep);
......
......@@ -56,6 +56,7 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *, int);
static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
......@@ -95,6 +96,63 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_ident = XPRT_TRANSPORT_RDMA,
};
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
struct sockaddr *, int, int);
static void svc_rdma_bc_detach(struct svc_xprt *);
static void svc_rdma_bc_free(struct svc_xprt *);
static struct svc_xprt_ops svc_rdma_bc_ops = {
.xpo_create = svc_rdma_bc_create,
.xpo_detach = svc_rdma_bc_detach,
.xpo_free = svc_rdma_bc_free,
.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
.xpo_secure_port = svc_rdma_secure_port,
};
struct svc_xprt_class svc_rdma_bc_class = {
.xcl_name = "rdma-bc",
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_rdma_bc_ops,
.xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
};
static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
int flags)
{
struct svcxprt_rdma *cma_xprt;
struct svc_xprt *xprt;
cma_xprt = rdma_create_xprt(serv, 0);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
xprt = &cma_xprt->sc_xprt;
svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
serv->sv_bc_xprt = xprt;
dprintk("svcrdma: %s(%p)\n", __func__, xprt);
return xprt;
}
static void svc_rdma_bc_detach(struct svc_xprt *xprt)
{
dprintk("svcrdma: %s(%p)\n", __func__, xprt);
}
static void svc_rdma_bc_free(struct svc_xprt *xprt)
{
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
dprintk("svcrdma: %s(%p)\n", __func__, xprt);
if (xprt)
kfree(rdma);
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt;
......
......@@ -676,7 +676,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
static int
xprt_rdma_enable_swap(struct rpc_xprt *xprt)
{
return -EINVAL;
return 0;
}
static void
......@@ -705,7 +705,13 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
.print_stats = xprt_rdma_print_stats,
.enable_swap = xprt_rdma_enable_swap,
.disable_swap = xprt_rdma_disable_swap,
.inject_disconnect = xprt_rdma_inject_disconnect
.inject_disconnect = xprt_rdma_inject_disconnect,
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
.bc_setup = xprt_rdma_bc_setup,
.bc_up = xprt_rdma_bc_up,
.bc_free_rqst = xprt_rdma_bc_free_rqst,
.bc_destroy = xprt_rdma_bc_destroy,
#endif
};
static struct xprt_class xprt_rdma = {
......@@ -732,6 +738,7 @@ void xprt_rdma_cleanup(void)
dprintk("RPC: %s: xprt_unregister returned %i\n",
__func__, rc);
rpcrdma_destroy_wq();
frwr_destroy_recovery_wq();
}
......@@ -743,8 +750,15 @@ int xprt_rdma_init(void)
if (rc)
return rc;
rc = rpcrdma_alloc_wq();
if (rc) {
frwr_destroy_recovery_wq();
return rc;
}
rc = xprt_register_transport(&xprt_rdma);
if (rc) {
rpcrdma_destroy_wq();
frwr_destroy_recovery_wq();
return rc;
}
......
This diff is collapsed.
......@@ -77,9 +77,6 @@ struct rpcrdma_ia {
* RDMA Endpoint -- one per transport instance
*/
#define RPCRDMA_WC_BUDGET (128)
#define RPCRDMA_POLLSIZE (16)
struct rpcrdma_ep {
atomic_t rep_cqcount;
int rep_cqinit;
......@@ -89,8 +86,6 @@ struct rpcrdma_ep {
struct rdma_conn_param rep_remote_cma;
struct sockaddr_storage rep_remote_addr;
struct delayed_work rep_connect_worker;
struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE];
struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE];
};
/*
......@@ -106,6 +101,16 @@ struct rpcrdma_ep {
*/
#define RPCRDMA_IGNORE_COMPLETION (0ULL)
/* Pre-allocate extra Work Requests for handling backward receives
* and sends. This is a fixed value because the Work Queues are
* allocated when the forward channel is set up.
*/
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
#define RPCRDMA_BACKWARD_WRS (8)
#else
#define RPCRDMA_BACKWARD_WRS (0)
#endif
/* Registered buffer -- registered kmalloc'd memory for RDMA SEND/RECV
*
* The below structure appears at the front of a large region of kmalloc'd
......@@ -169,10 +174,13 @@ struct rpcrdma_rep {
unsigned int rr_len;
struct ib_device *rr_device;
struct rpcrdma_xprt *rr_rxprt;
struct work_struct rr_work;
struct list_head rr_list;
struct rpcrdma_regbuf *rr_rdmabuf;
};
#define RPCRDMA_BAD_LEN (~0U)
/*
* struct rpcrdma_mw - external memory region metadata
*
......@@ -256,6 +264,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
#define RPCRDMA_MAX_IOVS (2)
struct rpcrdma_req {
struct list_head rl_free;
unsigned int rl_niovs;
unsigned int rl_nchunks;
unsigned int rl_connect_cookie;
......@@ -265,6 +274,9 @@ struct rpcrdma_req {
struct rpcrdma_regbuf *rl_rdmabuf;
struct rpcrdma_regbuf *rl_sendbuf;
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];
struct list_head rl_all;
bool rl_backchannel;
};
static inline struct rpcrdma_req *
......@@ -289,12 +301,14 @@ struct rpcrdma_buffer {
struct list_head rb_all;
char *rb_pool;
spinlock_t rb_lock; /* protect buf arrays */
spinlock_t rb_lock; /* protect buf lists */
struct list_head rb_send_bufs;
struct list_head rb_recv_bufs;
u32 rb_max_requests;
int rb_send_index;
int rb_recv_index;
struct rpcrdma_req **rb_send_bufs;
struct rpcrdma_rep **rb_recv_bufs;
u32 rb_bc_srv_max_requests;
spinlock_t rb_reqslock; /* protect rb_allreqs */
struct list_head rb_allreqs;
};
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
......@@ -340,6 +354,7 @@ struct rpcrdma_stats {
unsigned long failed_marshal_count;
unsigned long bad_reply_count;
unsigned long nomsg_call_count;
unsigned long bcall_count;
};
/*
......@@ -415,6 +430,9 @@ int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *,
/*
* Buffer calls - xprtrdma/verbs.c
*/
struct rpcrdma_req *rpcrdma_create_req(struct rpcrdma_xprt *);
struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *);
void rpcrdma_destroy_req(struct rpcrdma_ia *, struct rpcrdma_req *);
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
......@@ -431,10 +449,14 @@ void rpcrdma_free_regbuf(struct rpcrdma_ia *,
struct rpcrdma_regbuf *);
unsigned int rpcrdma_max_segments(struct rpcrdma_xprt *);
int rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *, unsigned int);
int frwr_alloc_recovery_wq(void);
void frwr_destroy_recovery_wq(void);
int rpcrdma_alloc_wq(void);
void rpcrdma_destroy_wq(void);
/*
* Wrappers for chunk registration, shared by read/write chunk code.
*/
......@@ -495,6 +517,18 @@ int rpcrdma_marshal_req(struct rpc_rqst *);
int xprt_rdma_init(void);
void xprt_rdma_cleanup(void);
/* Backchannel calls - xprtrdma/backchannel.c
*/
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
int xprt_rdma_bc_up(struct svc_serv *, struct net *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
int rpcrdma_bc_marshal_reply(struct rpc_rqst *);
void xprt_rdma_bc_free_rqst(struct rpc_rqst *);
void xprt_rdma_bc_destroy(struct rpc_xprt *, unsigned int);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/* Temporary NFS request map cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_map_cachep;
/* WR context cache. Created in svc_rdma.c */
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment