Commit e45428a4 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-4.21' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Thanks to Vasily Averin for fixing a use-after-free in the
  containerized NFSv4.2 client, and cleaning up some convoluted
  backchannel server code in the process.

  Otherwise, miscellaneous smaller bugfixes and cleanup"

* tag 'nfsd-4.21' of git://linux-nfs.org/~bfields/linux: (25 commits)
  nfs: fixed broken compilation in nfs_callback_up_net()
  nfs: minor typo in nfs4_callback_up_net()
  sunrpc: fix debug message in svc_create_xprt()
  sunrpc: make visible processing error in bc_svc_process()
  sunrpc: remove unused xpo_prep_reply_hdr callback
  sunrpc: remove svc_rdma_bc_class
  sunrpc: remove svc_tcp_bc_class
  sunrpc: remove unused bc_up operation from rpc_xprt_ops
  sunrpc: replace svc_serv->sv_bc_xprt by boolean flag
  sunrpc: use-after-free in svc_process_common()
  sunrpc: use SVC_NET() in svcauth_gss_* functions
  nfsd: drop useless LIST_HEAD
  lockd: Show pid of lockd for remote locks
  NFSD remove OP_CACHEME from 4.2 op_flags
  nfsd: Return EPERM, not EACCES, in some SETATTR cases
  sunrpc: fix cache_head leak due to queued request
  nfsd: clean up indentation, increase indentation in switch statement
  svcrdma: Optimize the logic that selects the R_key to invalidate
  nfsd: fix a warning in __cld_pipe_upcall()
  nfsd4: fix crash on writing v4_end_grace before nfsd startup
  ...
parents 85f78456 0ad30ff6
......@@ -128,24 +128,14 @@ static void encode_netobj(struct xdr_stream *xdr,
static int decode_netobj(struct xdr_stream *xdr,
struct xdr_netobj *obj)
{
u32 length;
__be32 *p;
ssize_t ret;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
length = be32_to_cpup(p++);
if (unlikely(length > XDR_MAX_NETOBJ))
goto out_size;
obj->len = length;
obj->data = (u8 *)p;
ret = xdr_stream_decode_opaque_inline(xdr, (void *)&obj->data,
XDR_MAX_NETOBJ);
if (unlikely(ret < 0))
return -EIO;
obj->len = ret;
return 0;
out_size:
dprintk("NFS: returned netobj was too long: %u\n", length);
return -EIO;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
......
......@@ -442,7 +442,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
fl->fl_start = req->a_res.lock.fl.fl_start;
fl->fl_end = req->a_res.lock.fl.fl_end;
fl->fl_type = req->a_res.lock.fl.fl_type;
fl->fl_pid = 0;
fl->fl_pid = -req->a_res.lock.fl.fl_pid;
break;
default:
status = nlm_stat_to_errno(req->a_res.status);
......
......@@ -125,24 +125,14 @@ static void encode_netobj(struct xdr_stream *xdr,
static int decode_netobj(struct xdr_stream *xdr,
struct xdr_netobj *obj)
{
u32 length;
__be32 *p;
ssize_t ret;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
length = be32_to_cpup(p++);
if (unlikely(length > XDR_MAX_NETOBJ))
goto out_size;
obj->len = length;
obj->data = (u8 *)p;
ret = xdr_stream_decode_opaque_inline(xdr, (void *)&obj->data,
XDR_MAX_NETOBJ);
if (unlikely(ret < 0))
return -EIO;
obj->len = ret;
return 0;
out_size:
dprintk("NFS: returned netobj was too long: %u\n", length);
return -EIO;
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
}
/*
......
......@@ -127,7 +127,7 @@ nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
locks_init_lock(fl);
fl->fl_owner = current->files;
fl->fl_pid = (pid_t)lock->svid;
fl->fl_pid = current->tgid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
start = ntohl(*p++);
......@@ -269,7 +269,7 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
lock->svid = ~(u32) 0;
lock->fl.fl_pid = (pid_t)lock->svid;
lock->fl.fl_pid = current->tgid;
if (!(p = nlm_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
......
......@@ -119,7 +119,7 @@ nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
locks_init_lock(fl);
fl->fl_owner = current->files;
fl->fl_pid = (pid_t)lock->svid;
fl->fl_pid = current->tgid;
fl->fl_flags = FL_POSIX;
fl->fl_type = F_RDLCK; /* as good as anything else */
p = xdr_decode_hyper(p, &start);
......@@ -266,7 +266,7 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p)
memset(lock, 0, sizeof(*lock));
locks_init_lock(&lock->fl);
lock->svid = ~(u32) 0;
lock->fl.fl_pid = (pid_t)lock->svid;
lock->fl.fl_pid = current->tgid;
if (!(p = nlm4_decode_cookie(p, &argp->cookie))
|| !(p = xdr_decode_string_inplace(p, &lock->caller,
......
......@@ -56,7 +56,7 @@ static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
if (ret > 0) {
nn->nfs_callback_tcpport6 = ret;
dprintk("NFS: Callback listener port = %u (af %u, net %x\n",
dprintk("NFS: Callback listener port = %u (af %u, net %x)\n",
nn->nfs_callback_tcpport6, PF_INET6, net->ns.inum);
} else if (ret != -EAFNOSUPPORT)
goto out_err;
......@@ -206,11 +206,13 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
goto err_bind;
}
ret = -EPROTONOSUPPORT;
ret = 0;
if (!IS_ENABLED(CONFIG_NFS_V4_1) || minorversion == 0)
ret = nfs4_callback_up_net(serv, net);
else if (xprt->ops->bc_up)
ret = xprt->ops->bc_up(serv, net);
else if (xprt->ops->bc_setup)
set_bc_enabled(serv);
else
ret = -EPROTONOSUPPORT;
if (ret < 0) {
printk(KERN_ERR "NFS: callback service start failed\n");
......
......@@ -656,7 +656,6 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
struct nfsd_net *nn;
ktime_t now, cutoff;
const struct nfsd4_layout_ops *ops;
LIST_HEAD(reaplist);
switch (task->tk_status) {
......
......@@ -863,8 +863,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_rename *rename = &u->rename;
__be32 status;
if (opens_in_grace(SVC_NET(rqstp)) &&
!(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
if (opens_in_grace(SVC_NET(rqstp)))
return nfserr_grace;
status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
rename->rn_snamelen, &cstate->current_fh,
......@@ -1016,8 +1015,6 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
&write->wr_head, write->wr_buflen);
if (!nvecs)
return nfserr_io;
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
......@@ -1348,7 +1345,7 @@ static __be32
nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
{
__be32 status = nfserr_notsupp;
__be32 status;
struct file *file;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
......@@ -2682,25 +2679,25 @@ static const struct nfsd4_operation nfsd4_ops[] = {
/* NFSv4.2 operations */
[OP_ALLOCATE] = {
.op_func = nfsd4_allocate,
.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_ALLOCATE",
.op_rsize_bop = nfsd4_only_status_rsize,
},
[OP_DEALLOCATE] = {
.op_func = nfsd4_deallocate,
.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_DEALLOCATE",
.op_rsize_bop = nfsd4_only_status_rsize,
},
[OP_CLONE] = {
.op_func = nfsd4_clone,
.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_CLONE",
.op_rsize_bop = nfsd4_only_status_rsize,
},
[OP_COPY] = {
.op_func = nfsd4_copy,
.op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_COPY",
.op_rsize_bop = nfsd4_copy_rsize,
},
......
......@@ -662,7 +662,7 @@ struct cld_net {
struct cld_upcall {
struct list_head cu_list;
struct cld_net *cu_net;
struct task_struct *cu_task;
struct completion cu_done;
struct cld_msg cu_msg;
};
......@@ -671,23 +671,18 @@ __cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
{
int ret;
struct rpc_pipe_msg msg;
struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg);
memset(&msg, 0, sizeof(msg));
msg.data = cmsg;
msg.len = sizeof(*cmsg);
/*
* Set task state before we queue the upcall. That prevents
* wake_up_process in the downcall from racing with schedule.
*/
set_current_state(TASK_UNINTERRUPTIBLE);
ret = rpc_queue_upcall(pipe, &msg);
if (ret < 0) {
set_current_state(TASK_RUNNING);
goto out;
}
schedule();
wait_for_completion(&cup->cu_done);
if (msg.errno < 0)
ret = msg.errno;
......@@ -754,7 +749,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
return -EFAULT;
wake_up_process(cup->cu_task);
complete(&cup->cu_done);
return mlen;
}
......@@ -769,7 +764,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
if (msg->errno >= 0)
return;
wake_up_process(cup->cu_task);
complete(&cup->cu_done);
}
static const struct rpc_pipe_ops cld_upcall_ops = {
......@@ -900,7 +895,7 @@ alloc_cld_upcall(struct cld_net *cn)
goto restart_search;
}
}
new->cu_task = current;
init_completion(&new->cu_done);
new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
new->cu_net = cn;
......
......@@ -5112,7 +5112,7 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
}
static __be32
nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags)
nfs4_check_olstateid(struct nfs4_ol_stateid *ols, int flags)
{
__be32 status;
......@@ -5195,7 +5195,7 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
status = nfs4_check_olstateid(fhp, openlockstateid(s), flags);
status = nfs4_check_olstateid(openlockstateid(s), flags);
break;
default:
status = nfserr_bad_stateid;
......@@ -6230,15 +6230,15 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
case NFS4_READ_LT:
case NFS4_READW_LT:
file_lock->fl_type = F_RDLCK;
break;
break;
case NFS4_WRITE_LT:
case NFS4_WRITEW_LT:
file_lock->fl_type = F_WRLCK;
break;
break;
default:
dprintk("NFSD: nfs4_lockt: bad lock type!\n");
status = nfserr_inval;
goto out;
goto out;
}
lo = find_lockowner_str(cstate->clp, &lockt->lt_owner);
......
......@@ -1126,6 +1126,8 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
case 'Y':
case 'y':
case '1':
if (nn->nfsd_serv)
return -EBUSY;
nfsd4_end_grace(nn);
break;
default:
......
......@@ -396,10 +396,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE);
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
if (iap->ia_valid & ATTR_SIZE) {
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
if (iap->ia_valid & ATTR_SIZE)
ftype = S_IFREG;
}
/*
* If utimes(2) and friends are called with times not NULL, we should
* not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission
* will return EACCESS, when the caller's effective UID does not match
* the owner of the file, and the caller is not privileged. In this
* situation, we should return EPERM(notify_change will return this).
*/
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME)) {
accmode |= NFSD_MAY_OWNER_OVERRIDE;
if (!(iap->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET)))
accmode |= NFSD_MAY_WRITE;
}
/* Callers that do fh_verify should do the fh_want_write: */
get_write_count = !fhp->fh_dentry;
......
......@@ -47,11 +47,14 @@ void xprt_free_bc_rqst(struct rpc_rqst *req);
/*
* Determine if a shared backchannel is in use
*/
static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
static inline bool svc_is_backchannel(const struct svc_rqst *rqstp)
{
if (rqstp->rq_server->sv_bc_xprt)
return 1;
return 0;
return rqstp->rq_server->sv_bc_enabled;
}
static inline void set_bc_enabled(struct svc_serv *serv)
{
serv->sv_bc_enabled = true;
}
#else /* CONFIG_SUNRPC_BACKCHANNEL */
static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
......@@ -60,9 +63,13 @@ static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
return 0;
}
static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
static inline bool svc_is_backchannel(const struct svc_rqst *rqstp)
{
return false;
}
static inline void set_bc_enabled(struct svc_serv *serv)
{
return 0;
}
static inline void xprt_free_bc_request(struct rpc_rqst *req)
......
......@@ -109,7 +109,7 @@ struct svc_serv {
spinlock_t sv_cb_lock; /* protects the svc_cb_list */
wait_queue_head_t sv_cb_waitq; /* sleep here if there are no
* entries in the svc_cb_list */
struct svc_xprt *sv_bc_xprt; /* callback on fore channel */
bool sv_bc_enabled; /* service uses backchannel */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
};
......@@ -295,9 +295,12 @@ struct svc_rqst {
struct svc_cacherep * rq_cacherep; /* cache info */
struct task_struct *rq_task; /* service thread */
spinlock_t rq_lock; /* per-request lock */
struct net *rq_bc_net; /* pointer to backchannel's
* net namespace
*/
};
#define SVC_NET(svc_rqst) (svc_rqst->rq_xprt->xpt_net)
#define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net)
/*
* Rigorous type checking on sockaddr type conversions
......
......@@ -135,6 +135,7 @@ struct svc_rdma_recv_ctxt {
u32 rc_byte_len;
unsigned int rc_page_count;
unsigned int rc_hdr_count;
u32 rc_inv_rkey;
struct page *rc_pages[RPCSVC_MAXPAGES];
};
......@@ -192,7 +193,6 @@ extern int svc_rdma_sendto(struct svc_rqst *);
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
extern void svc_sq_reap(struct svcxprt_rdma *);
extern void svc_rq_reap(struct svcxprt_rdma *);
extern void svc_rdma_prep_reply_hdr(struct svc_rqst *);
extern struct svc_xprt_class svc_rdma_class;
#ifdef CONFIG_SUNRPC_BACKCHANNEL
......
......@@ -20,7 +20,6 @@ struct svc_xprt_ops {
struct svc_xprt *(*xpo_accept)(struct svc_xprt *);
int (*xpo_has_wspace)(struct svc_xprt *);
int (*xpo_recvfrom)(struct svc_rqst *);
void (*xpo_prep_reply_hdr)(struct svc_rqst *);
int (*xpo_sendto)(struct svc_rqst *);
void (*xpo_release_rqst)(struct svc_rqst *);
void (*xpo_detach)(struct svc_xprt *);
......
......@@ -157,7 +157,6 @@ struct rpc_xprt_ops {
void (*inject_disconnect)(struct rpc_xprt *xprt);
int (*bc_setup)(struct rpc_xprt *xprt,
unsigned int min_reqs);
int (*bc_up)(struct svc_serv *serv, struct net *net);
size_t (*bc_maxpayload)(struct rpc_xprt *xprt);
void (*bc_free_rqst)(struct rpc_rqst *rqst);
void (*bc_destroy)(struct rpc_xprt *xprt,
......
......@@ -569,7 +569,8 @@ TRACE_EVENT(svc_process,
__field(u32, vers)
__field(u32, proc)
__string(service, name)
__string(addr, rqst->rq_xprt->xpt_remotebuf)
__string(addr, rqst->rq_xprt ?
rqst->rq_xprt->xpt_remotebuf : "(null)")
),
TP_fast_assign(
......@@ -577,7 +578,8 @@ TRACE_EVENT(svc_process,
__entry->vers = rqst->rq_vers;
__entry->proc = rqst->rq_proc;
__assign_str(service, name);
__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
__assign_str(addr, rqst->rq_xprt ?
rqst->rq_xprt->xpt_remotebuf : "(null)");
),
TP_printk("addr=%s xid=0x%08x service=%s vers=%u proc=%u",
......
......@@ -1142,7 +1142,7 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
struct kvec *resv = &rqstp->rq_res.head[0];
struct rsi *rsip, rsikey;
int ret;
struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
memset(&rsikey, 0, sizeof(rsikey));
ret = gss_read_verf(gc, argv, authp,
......@@ -1253,7 +1253,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
uint64_t handle;
int status;
int ret;
struct net *net = rqstp->rq_xprt->xpt_net;
struct net *net = SVC_NET(rqstp);
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
memset(&ud, 0, sizeof(ud));
......@@ -1444,7 +1444,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
__be32 *rpcstart;
__be32 *reject_stat = resv->iov_base + resv->iov_len;
int ret;
struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
dprintk("RPC: svcauth_gss: argv->iov_len = %zd\n",
argv->iov_len);
......@@ -1734,7 +1734,7 @@ svcauth_gss_release(struct svc_rqst *rqstp)
struct rpc_gss_wire_cred *gc = &gsd->clcred;
struct xdr_buf *resbuf = &rqstp->rq_res;
int stat = -EINVAL;
struct sunrpc_net *sn = net_generic(rqstp->rq_xprt->xpt_net, sunrpc_net_id);
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
if (gc->gc_proc != RPC_GSS_PROC_DATA)
goto out;
......
......@@ -54,6 +54,11 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail)
h->last_refresh = now;
}
static void cache_fresh_locked(struct cache_head *head, time_t expiry,
struct cache_detail *detail);
static void cache_fresh_unlocked(struct cache_head *head,
struct cache_detail *detail);
static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
struct cache_head *key,
int hash)
......@@ -100,6 +105,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
if (cache_is_expired(detail, tmp)) {
hlist_del_init_rcu(&tmp->cache_list);
detail->entries --;
cache_fresh_locked(tmp, 0, detail);
freeme = tmp;
break;
}
......@@ -115,8 +121,10 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
cache_get(new);
spin_unlock(&detail->hash_lock);
if (freeme)
if (freeme) {
cache_fresh_unlocked(freeme, detail);
cache_put(freeme, detail);
}
return new;
}
......
......@@ -1144,6 +1144,17 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
#endif
/*
* Setup response header for TCP, it has a 4B record length field.
*/
static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
{
struct kvec *resv = &rqstp->rq_res.head[0];
/* tcp needs a space for the record length... */
svc_putnl(resv, 0);
}
/*
* Common routine for processing the RPC request.
*/
......@@ -1172,7 +1183,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
clear_bit(RQ_DROPME, &rqstp->rq_flags);
/* Setup reply header */
rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
if (rqstp->rq_prot == IPPROTO_TCP)
svc_tcp_prep_reply_hdr(rqstp);
svc_putu32(resv, rqstp->rq_xid);
......@@ -1244,7 +1256,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
* for lower versions. RPC_PROG_MISMATCH seems to be the closest
* fit.
*/
if (versp->vs_need_cong_ctrl &&
if (versp->vs_need_cong_ctrl && rqstp->rq_xprt &&
!test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
goto err_bad_vers;
......@@ -1336,7 +1348,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
return 0;
close:
if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
svc_close_xprt(rqstp->rq_xprt);
dprintk("svc: svc_process close\n");
return 0;
......@@ -1459,10 +1471,10 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
dprintk("svc: %s(%p)\n", __func__, req);
/* Build the svc_rqst used by the common processing routine */
rqstp->rq_xprt = serv->sv_bc_xprt;
rqstp->rq_xid = req->rq_xid;
rqstp->rq_prot = req->rq_xprt->prot;
rqstp->rq_server = serv;
rqstp->rq_bc_net = req->rq_xprt->xprt_net;
rqstp->rq_addrlen = sizeof(req->rq_xprt->addr);
memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen);
......@@ -1499,9 +1511,9 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
if (!proc_error) {
/* Processing error: drop the request */
xprt_free_bc_request(req);
return 0;
error = -EINVAL;
goto out;
}
/* Finally, send the reply synchronously */
memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
task = rpc_run_bc_task(req);
......
......@@ -296,9 +296,9 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
request_module("svc%s", xprt_name);
err = _svc_create_xprt(serv, xprt_name, net, family, port, flags);
}
if (err)
if (err < 0)
dprintk("svc: transport %s not found, err %d\n",
xprt_name, err);
xprt_name, -err);
return err;
}
EXPORT_SYMBOL_GPL(svc_create_xprt);
......@@ -468,10 +468,11 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
*/
void svc_reserve(struct svc_rqst *rqstp, int space)
{
struct svc_xprt *xprt = rqstp->rq_xprt;
space += rqstp->rq_res.head[0].iov_len;
if (space < rqstp->rq_reserved) {
struct svc_xprt *xprt = rqstp->rq_xprt;
if (xprt && space < rqstp->rq_reserved) {
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space;
......
......@@ -70,13 +70,6 @@ static void svc_sock_free(struct svc_xprt *);
static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
struct net *, struct sockaddr *,
int, int);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
struct net *, struct sockaddr *,
int, int);
static void svc_bc_sock_free(struct svc_xprt *xprt);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key svc_key[2];
static struct lock_class_key svc_slock_key[2];
......@@ -617,10 +610,6 @@ svc_udp_sendto(struct svc_rqst *rqstp)
return error;
}
static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp)
{
}
static int svc_udp_has_wspace(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
......@@ -664,7 +653,6 @@ static const struct svc_xprt_ops svc_udp_ops = {
.xpo_release_rqst = svc_release_udp_skb,
.xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free,
.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
.xpo_has_wspace = svc_udp_has_wspace,
.xpo_accept = svc_udp_accept,
.xpo_secure_port = svc_sock_secure_port,
......@@ -1170,17 +1158,6 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
return sent;
}
/*
* Setup response header. TCP has a 4B record length field.
*/
static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
{
struct kvec *resv = &rqstp->rq_res.head[0];
/* tcp needs a space for the record length... */
svc_putnl(resv, 0);
}
static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
......@@ -1189,58 +1166,6 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
}
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
struct net *, struct sockaddr *,
int, int);
static void svc_bc_sock_free(struct svc_xprt *xprt);
static struct svc_xprt *svc_bc_tcp_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
int flags)
{
return svc_bc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
}
static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
{
}
static const struct svc_xprt_ops svc_tcp_bc_ops = {
.xpo_create = svc_bc_tcp_create,
.xpo_detach = svc_bc_tcp_sock_detach,
.xpo_free = svc_bc_sock_free,
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
.xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_tcp_bc_class = {
.xcl_name = "tcp-bc",
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_tcp_bc_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
};
static void svc_init_bc_xprt_sock(void)
{
svc_reg_xprt_class(&svc_tcp_bc_class);
}
static void svc_cleanup_bc_xprt_sock(void)
{
svc_unreg_xprt_class(&svc_tcp_bc_class);
}
#else /* CONFIG_SUNRPC_BACKCHANNEL */
static void svc_init_bc_xprt_sock(void)
{
}
static void svc_cleanup_bc_xprt_sock(void)
{
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_create = svc_tcp_create,
.xpo_recvfrom = svc_tcp_recvfrom,
......@@ -1248,7 +1173,6 @@ static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_release_rqst = svc_release_skb,
.xpo_detach = svc_tcp_sock_detach,
.xpo_free = svc_sock_free,
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
.xpo_secure_port = svc_sock_secure_port,
......@@ -1267,14 +1191,12 @@ void svc_init_xprt_sock(void)
{
svc_reg_xprt_class(&svc_tcp_class);
svc_reg_xprt_class(&svc_udp_class);
svc_init_bc_xprt_sock();
}
void svc_cleanup_xprt_sock(void)
{
svc_unreg_xprt_class(&svc_tcp_class);
svc_unreg_xprt_class(&svc_udp_class);
svc_cleanup_bc_xprt_sock();
}
static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
......@@ -1595,45 +1517,3 @@ static void svc_sock_free(struct svc_xprt *xprt)
sock_release(svsk->sk_sock);
kfree(svsk);
}
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
* Create a back channel svc_xprt which shares the fore channel socket.
*/
static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
int protocol,
struct net *net,
struct sockaddr *sin, int len,
int flags)
{
struct svc_sock *svsk;
struct svc_xprt *xprt;
if (protocol != IPPROTO_TCP) {
printk(KERN_WARNING "svc: only TCP sockets"
" supported on shared back channel\n");
return ERR_PTR(-EINVAL);
}
svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
if (!svsk)
return ERR_PTR(-ENOMEM);
xprt = &svsk->sk_xprt;
svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);
set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
serv->sv_bc_xprt = xprt;
return xprt;
}
/*
* Free a back channel svc_sock.
*/
static void svc_bc_sock_free(struct svc_xprt *xprt)
{
if (xprt)
kfree(container_of(xprt, struct svc_sock, sk_xprt));
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
......@@ -113,26 +113,6 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs)
return -ENOMEM;
}
/**
* xprt_rdma_bc_up - Create transport endpoint for backchannel service
* @serv: server endpoint
* @net: network namespace
*
* The "xprt" is an implied argument: it supplies the name of the
* backchannel transport class.
*
* Returns zero on success, negative errno on failure
*/
int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
{
int ret;
ret = svc_create_xprt(serv, "rdma-bc", net, PF_INET, 0, 0);
if (ret < 0)
return ret;
return 0;
}
/**
* xprt_rdma_bc_maxpayload - Return maximum backchannel message size
* @xprt: transport
......
......@@ -235,9 +235,6 @@ void svc_rdma_cleanup(void)
unregister_sysctl_table(svcrdma_table_header);
svcrdma_table_header = NULL;
}
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
svc_unreg_xprt_class(&svc_rdma_bc_class);
#endif
svc_unreg_xprt_class(&svc_rdma_class);
}
......@@ -259,8 +256,5 @@ int svc_rdma_init(void)
/* Register RDMA with the SVC transport switch */
svc_reg_xprt_class(&svc_rdma_class);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
svc_reg_xprt_class(&svc_rdma_bc_class);
#endif
return 0;
}
......@@ -485,6 +485,68 @@ static __be32 *xdr_check_reply_chunk(__be32 *p, const __be32 *end)
return p;
}
/* RPC-over-RDMA Version One private extension: Remote Invalidation.
* Responder's choice: requester signals it can handle Send With
* Invalidate, and responder chooses one R_key to invalidate.
*
* If there is exactly one distinct R_key in the received transport
* header, set rc_inv_rkey to that R_key. Otherwise, set it to zero.
*
* Perform this operation while the received transport header is
* still in the CPU cache.
*/
static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
__be32 inv_rkey, *p;
u32 i, segcount;
ctxt->rc_inv_rkey = 0;
if (!rdma->sc_snd_w_inv)
return;
inv_rkey = xdr_zero;
p = ctxt->rc_recv_buf;
p += rpcrdma_fixed_maxsz;
/* Read list */
while (*p++ != xdr_zero) {
p++; /* position */
if (inv_rkey == xdr_zero)
inv_rkey = *p;
else if (inv_rkey != *p)
return;
p += 4;
}
/* Write list */
while (*p++ != xdr_zero) {
segcount = be32_to_cpup(p++);
for (i = 0; i < segcount; i++) {
if (inv_rkey == xdr_zero)
inv_rkey = *p;
else if (inv_rkey != *p)
return;
p += 4;
}
}
/* Reply chunk */
if (*p++ != xdr_zero) {
segcount = be32_to_cpup(p++);
for (i = 0; i < segcount; i++) {
if (inv_rkey == xdr_zero)
inv_rkey = *p;
else if (inv_rkey != *p)
return;
p += 4;
}
}
ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
}
/* On entry, xdr->head[0].iov_base points to first byte in the
* RPC-over-RDMA header.
*
......@@ -746,6 +808,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return ret;
}
svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
p += rpcrdma_fixed_maxsz;
if (*p != xdr_zero)
......
......@@ -484,32 +484,6 @@ static void svc_rdma_get_write_arrays(__be32 *rdma_argp,
*reply = NULL;
}
/* RPC-over-RDMA Version One private extension: Remote Invalidation.
* Responder's choice: requester signals it can handle Send With
* Invalidate, and responder chooses one rkey to invalidate.
*
* Find a candidate rkey to invalidate when sending a reply. Picks the
* first R_key it finds in the chunk lists.
*
* Returns zero if RPC's chunk lists are empty.
*/
static u32 svc_rdma_get_inv_rkey(__be32 *rdma_argp,
__be32 *wr_lst, __be32 *rp_ch)
{
__be32 *p;
p = rdma_argp + rpcrdma_fixed_maxsz;
if (*p != xdr_zero)
p += 2;
else if (wr_lst && be32_to_cpup(wr_lst + 1))
p = wr_lst + 2;
else if (rp_ch && be32_to_cpup(rp_ch + 1))
p = rp_ch + 2;
else
return 0;
return be32_to_cpup(p);
}
static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt,
struct page *page,
......@@ -672,7 +646,7 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
*
* RDMA Send is the last step of transmitting an RPC reply. Pages
* involved in the earlier RDMA Writes are here transferred out
* of the rqstp and into the ctxt's page array. These pages are
* of the rqstp and into the sctxt's page array. These pages are
* DMA unmapped by each Write completion, but the subsequent Send
* completion finally releases these pages.
*
......@@ -680,32 +654,31 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
* - The Reply's transport header will never be larger than a page.
*/
static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt,
__be32 *rdma_argp,
struct svc_rdma_send_ctxt *sctxt,
struct svc_rdma_recv_ctxt *rctxt,
struct svc_rqst *rqstp,
__be32 *wr_lst, __be32 *rp_ch)
{
int ret;
if (!rp_ch) {
ret = svc_rdma_map_reply_msg(rdma, ctxt,
ret = svc_rdma_map_reply_msg(rdma, sctxt,
&rqstp->rq_res, wr_lst);
if (ret < 0)
return ret;
}
svc_rdma_save_io_pages(rqstp, ctxt);
svc_rdma_save_io_pages(rqstp, sctxt);
ctxt->sc_send_wr.opcode = IB_WR_SEND;
if (rdma->sc_snd_w_inv) {
ctxt->sc_send_wr.ex.invalidate_rkey =
svc_rdma_get_inv_rkey(rdma_argp, wr_lst, rp_ch);
if (ctxt->sc_send_wr.ex.invalidate_rkey)
ctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
if (rctxt->rc_inv_rkey) {
sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
} else {
sctxt->sc_send_wr.opcode = IB_WR_SEND;
}
dprintk("svcrdma: posting Send WR with %u sge(s)\n",
ctxt->sc_send_wr.num_sge);
return svc_rdma_send(rdma, &ctxt->sc_send_wr);
sctxt->sc_send_wr.num_sge);
return svc_rdma_send(rdma, &sctxt->sc_send_wr);
}
/* Given the client-provided Write and Reply chunks, the server was not
......@@ -741,10 +714,6 @@ static int svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
return 0;
}
void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
{
}
/**
* svc_rdma_sendto - Transmit an RPC reply
* @rqstp: processed RPC request, reply XDR already in ::rq_res
......@@ -809,7 +778,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
}
svc_rdma_sync_reply_hdr(rdma, sctxt, svc_rdma_reply_hdr_len(rdma_resp));
ret = svc_rdma_send_reply_msg(rdma, sctxt, rdma_argp, rqstp,
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp,
wr_lst, rp_ch);
if (ret < 0)
goto err1;
......
......@@ -85,7 +85,6 @@ static const struct svc_xprt_ops svc_rdma_ops = {
.xpo_release_rqst = svc_rdma_release_rqst,
.xpo_detach = svc_rdma_detach,
.xpo_free = svc_rdma_free,
.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
.xpo_has_wspace = svc_rdma_has_wspace,
.xpo_accept = svc_rdma_accept,
.xpo_secure_port = svc_rdma_secure_port,
......@@ -100,64 +99,6 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_ident = XPRT_TRANSPORT_RDMA,
};
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
struct sockaddr *, int, int);
static void svc_rdma_bc_detach(struct svc_xprt *);
static void svc_rdma_bc_free(struct svc_xprt *);
static const struct svc_xprt_ops svc_rdma_bc_ops = {
.xpo_create = svc_rdma_bc_create,
.xpo_detach = svc_rdma_bc_detach,
.xpo_free = svc_rdma_bc_free,
.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
.xpo_secure_port = svc_rdma_secure_port,
};
struct svc_xprt_class svc_rdma_bc_class = {
.xcl_name = "rdma-bc",
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_rdma_bc_ops,
.xcl_max_payload = (1024 - RPCRDMA_HDRLEN_MIN)
};
static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *serv,
struct net *net,
struct sockaddr *sa, int salen,
int flags)
{
struct svcxprt_rdma *cma_xprt;
struct svc_xprt *xprt;
cma_xprt = svc_rdma_create_xprt(serv, net);
if (!cma_xprt)
return ERR_PTR(-ENOMEM);
xprt = &cma_xprt->sc_xprt;
svc_xprt_init(net, &svc_rdma_bc_class, xprt, serv);
set_bit(XPT_CONG_CTRL, &xprt->xpt_flags);
serv->sv_bc_xprt = xprt;
dprintk("svcrdma: %s(%p)\n", __func__, xprt);
return xprt;
}
static void svc_rdma_bc_detach(struct svc_xprt *xprt)
{
dprintk("svcrdma: %s(%p)\n", __func__, xprt);
}
static void svc_rdma_bc_free(struct svc_xprt *xprt)
{
struct svcxprt_rdma *rdma =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
dprintk("svcrdma: %s(%p)\n", __func__, xprt);
if (xprt)
kfree(rdma);
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/* QP event handler */
static void qp_event_handler(struct ib_event *event, void *context)
{
......
......@@ -827,7 +827,6 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
.inject_disconnect = xprt_rdma_inject_disconnect,
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
.bc_setup = xprt_rdma_bc_setup,
.bc_up = xprt_rdma_bc_up,
.bc_maxpayload = xprt_rdma_bc_maxpayload,
.bc_free_rqst = xprt_rdma_bc_free_rqst,
.bc_destroy = xprt_rdma_bc_destroy,
......
......@@ -661,7 +661,6 @@ void xprt_rdma_cleanup(void);
*/
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
int xprt_rdma_bc_up(struct svc_serv *, struct net *);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
......
......@@ -1400,17 +1400,6 @@ static void xs_tcp_force_close(struct rpc_xprt *xprt)
}
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
{
int ret;
ret = svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
SVC_SOCK_ANONYMOUS);
if (ret < 0)
return ret;
return 0;
}
static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
{
return PAGE_SIZE;
......@@ -2665,7 +2654,6 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
.inject_disconnect = xs_inject_disconnect,
#ifdef CONFIG_SUNRPC_BACKCHANNEL
.bc_setup = xprt_setup_bc,
.bc_up = xs_tcp_bc_up,
.bc_maxpayload = xs_tcp_bc_maxpayload,
.bc_free_rqst = xprt_free_bc_rqst,
.bc_destroy = xprt_destroy_bc,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment