Commit d2c3ac7e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-4.2' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "A relatively quiet cycle, with a mix of cleanup and smaller bugfixes"

* 'for-4.2' of git://linux-nfs.org/~bfields/linux: (24 commits)
  sunrpc: use sg_init_one() in krb5_rc4_setup_enc/seq_key()
  nfsd: wrap too long lines in nfsd4_encode_read
  nfsd: fput rd_file from XDR encode context
  nfsd: take struct file setup fully into nfs4_preprocess_stateid_op
  nfsd: refactor nfs4_preprocess_stateid_op
  nfsd: clean up raparams handling
  nfsd: use swap() in sort_pacl_range()
  rpcrdma: Merge svcrdma and xprtrdma modules into one
  svcrdma: Add a separate "max data segs macro for svcrdma
  svcrdma: Replace GFP_KERNEL in a loop with GFP_NOFAIL
  svcrdma: Keep rpcrdma_msg fields in network byte-order
  svcrdma: Fix byte-swapping in svc_rdma_sendto.c
  nfsd: Update callback sequnce id only CB_SEQUENCE success
  nfsd: Reset cb_status in nfsd4_cb_prepare() at retrying
  svcrdma: Remove svc_rdma_xdr_decode_deferred_req()
  SUNRPC: Move EXPORT_SYMBOL for svc_process
  uapi/nfs: Add NFSv4.1 ACL definitions
  nfsd: Remove dead declarations
  nfsd: work around a gcc-5.1 warning
  nfsd: Checking for acl support does not require fetching any acls
  ...
parents 546fac60 901f1379
......@@ -68,16 +68,10 @@ sockets-enqueued
rate of change for this counter is zero; significantly non-zero
values may indicate a performance limitation.
This can happen either because there are too few nfsd threads in the
thread pool for the NFS workload (the workload is thread-limited),
or because the NFS workload needs more CPU time than is available in
the thread pool (the workload is CPU-limited). In the former case,
configuring more nfsd threads will probably improve the performance
of the NFS workload. In the latter case, the sunrpc server layer is
already choosing not to wake idle nfsd threads because there are too
many nfsd threads which want to run but cannot, so configuring more
nfsd threads will make no difference whatsoever. The overloads-avoided
statistic (see below) can be used to distinguish these cases.
This can happen because there are too few nfsd threads in the thread
pool for the NFS workload (the workload is thread-limited), in which
case configuring more nfsd threads will probably improve the
performance of the NFS workload.
threads-woken
Counts how many times an idle nfsd thread is woken to try to
......@@ -88,36 +82,6 @@ threads-woken
thing. The ideal rate of change for this counter will be close
to but less than the rate of change of the packets-arrived counter.
overloads-avoided
Counts how many times the sunrpc server layer chose not to wake an
nfsd thread, despite the presence of idle nfsd threads, because
too many nfsd threads had been recently woken but could not get
enough CPU time to actually run.
This statistic counts a circumstance where the sunrpc layer
heuristically avoids overloading the CPU scheduler with too many
runnable nfsd threads. The ideal rate of change for this counter
is zero. Significant non-zero values indicate that the workload
is CPU limited. Usually this is associated with heavy CPU usage
on all the CPUs in the nfsd thread pool.
If a sustained large overloads-avoided rate is detected on a pool,
the top(1) utility should be used to check for the following
pattern of CPU usage on all the CPUs associated with the given
nfsd thread pool.
- %us ~= 0 (as you're *NOT* running applications on your NFS server)
- %wa ~= 0
- %id ~= 0
- %sy + %hi + %si ~= 100
If this pattern is seen, configuring more nfsd threads will *not*
improve the performance of the workload. If this patten is not
seen, then something more subtle is wrong.
threads-timedout
Counts how many times an nfsd thread triggered an idle timeout,
i.e. was not woken to handle any incoming network packets for
......
......@@ -805,7 +805,7 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
static __be32
compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
const char *name, int namlen)
const char *name, int namlen, u64 ino)
{
struct svc_export *exp;
struct dentry *dparent, *dchild;
......@@ -830,19 +830,21 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
goto out;
if (d_really_is_negative(dchild))
goto out;
if (dchild->d_inode->i_ino != ino)
goto out;
rv = fh_compose(fhp, exp, dchild, &cd->fh);
out:
dput(dchild);
return rv;
}
static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen, u64 ino)
{
struct svc_fh *fh = &cd->scratch;
__be32 err;
fh_init(fh, NFS3_FHSIZE);
err = compose_entry_fh(cd, fh, name, namlen);
err = compose_entry_fh(cd, fh, name, namlen, ino);
if (err) {
*p++ = 0;
*p++ = 0;
......@@ -927,7 +929,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
p = encode_entry_baggage(cd, p, name, namlen, ino);
if (plus)
p = encode_entryplus_baggage(cd, p, name, namlen);
p = encode_entryplus_baggage(cd, p, name, namlen, ino);
num_entry_words = p - cd->buffer;
} else if (*(page+1) != NULL) {
/* temporarily encode entry into next page, then move back to
......@@ -941,7 +943,7 @@ encode_entry(struct readdir_cd *ccd, const char *name, int namlen,
p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
if (plus)
p1 = encode_entryplus_baggage(cd, p1, name, namlen);
p1 = encode_entryplus_baggage(cd, p1, name, namlen, ino);
/* determine entry word length and lengths to go in pages */
num_entry_words = p1 - tmp;
......
......@@ -52,10 +52,6 @@
#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE)
#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)
/* We don't support these bits; insist they be neither allowed nor denied */
#define NFS4_MASK_UNSUPP (NFS4_ACE_DELETE | NFS4_ACE_WRITE_OWNER \
| NFS4_ACE_READ_NAMED_ATTRS | NFS4_ACE_WRITE_NAMED_ATTRS)
/* flags used to simulate posix default ACLs */
#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
| NFS4_ACE_DIRECTORY_INHERIT_ACE)
......@@ -64,9 +60,6 @@
| NFS4_ACE_INHERIT_ONLY_ACE \
| NFS4_ACE_IDENTIFIER_GROUP)
#define MASK_EQUAL(mask1, mask2) \
( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
static u32
mask_from_posix(unsigned short perm, unsigned int flags)
{
......@@ -126,11 +119,6 @@ low_mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
*mode |= ACL_EXECUTE;
}
struct ace_container {
struct nfs4_ace *ace;
struct list_head ace_l;
};
static short ace2type(struct nfs4_ace *);
static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *,
unsigned int);
......@@ -384,7 +372,6 @@ pace_gt(struct posix_acl_entry *pace1, struct posix_acl_entry *pace2)
static void
sort_pacl_range(struct posix_acl *pacl, int start, int end) {
int sorted = 0, i;
struct posix_acl_entry tmp;
/* We just do a bubble sort; easy to do in place, and we're not
* expecting acl's to be long enough to justify anything more. */
......@@ -394,9 +381,8 @@ sort_pacl_range(struct posix_acl *pacl, int start, int end) {
if (pace_gt(&pacl->a_entries[i],
&pacl->a_entries[i+1])) {
sorted = 0;
tmp = pacl->a_entries[i];
pacl->a_entries[i] = pacl->a_entries[i+1];
pacl->a_entries[i+1] = tmp;
swap(pacl->a_entries[i],
pacl->a_entries[i + 1]);
}
}
}
......
......@@ -455,6 +455,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
if (unlikely(status || cb->cb_status))
return status;
cb->cb_update_seq_nr = true;
return decode_cb_sequence4resok(xdr, cb);
}
......@@ -875,6 +876,8 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
u32 minorversion = clp->cl_minorversion;
cb->cb_minorversion = minorversion;
cb->cb_update_seq_nr = false;
cb->cb_status = 0;
if (minorversion) {
if (!nfsd41_cb_get_slot(clp, task))
return;
......@@ -891,9 +894,16 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
clp->cl_minorversion);
if (clp->cl_minorversion) {
/* No need for lock, access serialized in nfsd4_cb_prepare */
if (!task->tk_status)
/*
* No need for lock, access serialized in nfsd4_cb_prepare
*
* RFC5661 20.9.3
* If CB_SEQUENCE returns an error, then the state of the slot
* (sequence ID, cached reply) MUST NOT change.
*/
if (cb->cb_update_seq_nr)
++clp->cl_cb_session->se_cb_seq_nr;
clear_bit(0, &clp->cl_cb_slot_busy);
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
......@@ -1090,6 +1100,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_ops = ops;
INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_status = 0;
cb->cb_update_seq_nr = false;
cb->cb_need_restart = false;
}
......
......@@ -760,8 +760,6 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
__be32 status;
/* no need to check permission - this will be done in nfsd_read() */
read->rd_filp = NULL;
if (read->rd_offset >= OFFSET_MAX)
return nfserr_inval;
......@@ -778,9 +776,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* check stateid */
if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
cstate, &read->rd_stateid,
RD_STATE, &read->rd_filp))) {
status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid,
RD_STATE, &read->rd_filp, &read->rd_tmp_file);
if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
}
......@@ -924,8 +922,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
int err;
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
&setattr->sa_stateid, WR_STATE, NULL);
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&setattr->sa_stateid, WR_STATE, NULL, NULL);
if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
......@@ -986,13 +984,11 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
unsigned long cnt;
int nvecs;
/* no need to check permission - this will be done in nfsd_write() */
if (write->wr_offset >= OFFSET_MAX)
return nfserr_inval;
status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
cstate, stateid, WR_STATE, &filp);
status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE,
&filp, NULL);
if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
......@@ -1005,11 +1001,10 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nvecs = fill_in_write_vector(rqstp->rq_vec, write);
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
status = nfsd_write(rqstp, &cstate->current_fh, filp,
write->wr_offset, rqstp->rq_vec, nvecs,
&cnt, &write->wr_how_written);
if (filp)
fput(filp);
status = nfsd_vfs_write(rqstp, &cstate->current_fh, filp,
write->wr_offset, rqstp->rq_vec, nvecs, &cnt,
&write->wr_how_written);
fput(filp);
write->wr_bytes_written = cnt;
......@@ -1023,15 +1018,13 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status = nfserr_notsupp;
struct file *file;
status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&fallocate->falloc_stateid,
WR_STATE, &file);
WR_STATE, &file, NULL);
if (status != nfs_ok) {
dprintk("NFSD: nfsd4_fallocate: couldn't process stateid!\n");
return status;
}
if (!file)
return nfserr_bad_stateid;
status = nfsd4_vfs_fallocate(rqstp, &cstate->current_fh, file,
fallocate->falloc_offset,
......@@ -1064,15 +1057,13 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct file *file;
status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
status = nfs4_preprocess_stateid_op(rqstp, cstate,
&seek->seek_stateid,
RD_STATE, &file);
RD_STATE, &file, NULL);
if (status) {
dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
return status;
}
if (!file)
return nfserr_bad_stateid;
switch (seek->seek_whence) {
case NFS4_CONTENT_DATA:
......@@ -1732,10 +1723,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
be32_to_cpu(status));
nfsd4_cstate_clear_replay(cstate);
/* XXX Ugh, we need to get rid of this kind of special case: */
if (op->opnum == OP_READ && op->u.read.rd_filp)
fput(op->u.read.rd_filp);
nfsd4_increment_op_stats(op->opnum);
}
......
......@@ -3861,7 +3861,7 @@ static __be32
nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
{
__be32 status;
unsigned char old_deny_bmap;
unsigned char old_deny_bmap = stp->st_deny_bmap;
if (!test_access(open->op_share_access, stp))
return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
......@@ -3870,7 +3870,6 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c
spin_lock(&fp->fi_lock);
status = nfs4_file_check_deny(fp, open->op_share_deny);
if (status == nfs_ok) {
old_deny_bmap = stp->st_deny_bmap;
set_deny(open->op_share_deny, stp);
fp->fi_share_deny |=
(open->op_share_deny & NFS4_SHARE_DENY_BOTH);
......@@ -4574,85 +4573,130 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
return nfs_ok;
}
static struct file *
nfs4_find_file(struct nfs4_stid *s, int flags)
{
if (!s)
return NULL;
switch (s->sc_type) {
case NFS4_DELEG_STID:
if (WARN_ON_ONCE(!s->sc_file->fi_deleg_file))
return NULL;
return get_file(s->sc_file->fi_deleg_file);
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
if (flags & RD_STATE)
return find_readable_file(s->sc_file);
else
return find_writeable_file(s->sc_file);
break;
}
return NULL;
}
static __be32
nfs4_check_olstateid(struct svc_fh *fhp, struct nfs4_ol_stateid *ols, int flags)
{
__be32 status;
status = nfs4_check_fh(fhp, ols);
if (status)
return status;
status = nfsd4_check_openowner_confirmed(ols);
if (status)
return status;
return nfs4_check_openmode(ols, flags);
}
static __be32
nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
struct file **filpp, bool *tmp_file, int flags)
{
int acc = (flags & RD_STATE) ? NFSD_MAY_READ : NFSD_MAY_WRITE;
struct file *file;
__be32 status;
file = nfs4_find_file(s, flags);
if (file) {
status = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
acc | NFSD_MAY_OWNER_OVERRIDE);
if (status) {
fput(file);
return status;
}
*filpp = file;
} else {
status = nfsd_open(rqstp, fhp, S_IFREG, acc, filpp);
if (status)
return status;
if (tmp_file)
*tmp_file = true;
}
return 0;
}
/*
* Checks for stateid operations
*/
* Checks for stateid operations
*/
__be32
nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
stateid_t *stateid, int flags, struct file **filpp)
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, stateid_t *stateid,
int flags, struct file **filpp, bool *tmp_file)
{
struct nfs4_stid *s;
struct nfs4_ol_stateid *stp = NULL;
struct nfs4_delegation *dp = NULL;
struct svc_fh *current_fh = &cstate->current_fh;
struct inode *ino = d_inode(current_fh->fh_dentry);
struct svc_fh *fhp = &cstate->current_fh;
struct inode *ino = d_inode(fhp->fh_dentry);
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
struct file *file = NULL;
struct nfs4_stid *s = NULL;
__be32 status;
if (filpp)
*filpp = NULL;
if (tmp_file)
*tmp_file = false;
if (grace_disallows_io(net, ino))
return nfserr_grace;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
return check_special_stateids(net, current_fh, stateid, flags);
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
status = check_special_stateids(net, fhp, stateid, flags);
goto done;
}
status = nfsd4_lookup_stateid(cstate, stateid,
NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
&s, nn);
if (status)
return status;
status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
status = check_stateid_generation(stateid, &s->sc_stateid,
nfsd4_has_session(cstate));
if (status)
goto out;
switch (s->sc_type) {
case NFS4_DELEG_STID:
dp = delegstateid(s);
status = nfs4_check_delegmode(dp, flags);
if (status)
goto out;
if (filpp) {
file = dp->dl_stid.sc_file->fi_deleg_file;
if (!file) {
WARN_ON_ONCE(1);
status = nfserr_serverfault;
goto out;
}
get_file(file);
}
status = nfs4_check_delegmode(delegstateid(s), flags);
break;
case NFS4_OPEN_STID:
case NFS4_LOCK_STID:
stp = openlockstateid(s);
status = nfs4_check_fh(current_fh, stp);
if (status)
goto out;
status = nfsd4_check_openowner_confirmed(stp);
if (status)
goto out;
status = nfs4_check_openmode(stp, flags);
if (status)
goto out;
if (filpp) {
struct nfs4_file *fp = stp->st_stid.sc_file;
if (flags & RD_STATE)
file = find_readable_file(fp);
else
file = find_writeable_file(fp);
}
status = nfs4_check_olstateid(fhp, openlockstateid(s), flags);
break;
default:
status = nfserr_bad_stateid;
goto out;
break;
}
status = nfs_ok;
if (file)
*filpp = file;
done:
if (!status && filpp)
status = nfs4_check_file(rqstp, fhp, s, filpp, tmp_file, flags);
out:
nfs4_put_stid(s);
if (s)
nfs4_put_stid(s);
return status;
}
......@@ -5505,7 +5549,7 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
__be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (!err) {
err = nfserrno(vfs_test_lock(file, lock));
nfsd_close(file);
fput(file);
}
return err;
}
......
......@@ -33,6 +33,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/namei.h>
#include <linux/statfs.h>
......@@ -2227,7 +2228,6 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
u32 rdattr_err = 0;
__be32 status;
int err;
int aclsupport = 0;
struct nfs4_acl *acl = NULL;
void *context = NULL;
int contextlen;
......@@ -2274,19 +2274,15 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
goto out;
fhp = tempfh;
}
if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT
| FATTR4_WORD0_SUPPORTED_ATTRS)) {
if (bmval0 & FATTR4_WORD0_ACL) {
err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
aclsupport = (err == 0);
if (bmval0 & FATTR4_WORD0_ACL) {
if (err == -EOPNOTSUPP)
bmval0 &= ~FATTR4_WORD0_ACL;
else if (err == -EINVAL) {
status = nfserr_attrnotsupp;
goto out;
} else if (err != 0)
goto out_nfserr;
}
if (err == -EOPNOTSUPP)
bmval0 &= ~FATTR4_WORD0_ACL;
else if (err == -EINVAL) {
status = nfserr_attrnotsupp;
goto out;
} else if (err != 0)
goto out_nfserr;
}
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
......@@ -2338,7 +2334,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
u32 word1 = nfsd_suppattrs1(minorversion);
u32 word2 = nfsd_suppattrs2(minorversion);
if (!aclsupport)
if (!IS_POSIXACL(dentry->d_inode))
word0 &= ~FATTR4_WORD0_ACL;
if (!contextsupport)
word2 &= ~FATTR4_WORD2_SECURITY_LABEL;
......@@ -2486,7 +2482,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
p = xdr_reserve_space(xdr, 4);
if (!p)
goto out_resource;
*p++ = cpu_to_be32(aclsupport ?
*p++ = cpu_to_be32(IS_POSIXACL(dentry->d_inode) ?
ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0);
}
if (bmval0 & FATTR4_WORD0_CANSETTIME) {
......@@ -3422,52 +3418,51 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
unsigned long maxcount;
struct xdr_stream *xdr = &resp->xdr;
struct file *file = read->rd_filp;
struct svc_fh *fhp = read->rd_fhp;
int starting_len = xdr->buf->len;
struct raparms *ra;
struct raparms *ra = NULL;
__be32 *p;
__be32 err;
if (nfserr)
return nfserr;
goto out;
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p) {
WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
return nfserr_resource;
nfserr = nfserr_resource;
goto out;
}
if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
if (resp->xdr.buf->page_len &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
WARN_ON_ONCE(1);
return nfserr_resource;
nfserr = nfserr_resource;
goto out;
}
xdr_commit_encode(xdr);
maxcount = svc_max_payload(resp->rqstp);
maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
maxcount = min_t(unsigned long, maxcount,
(xdr->buf->buflen - xdr->buf->len));
maxcount = min_t(unsigned long, maxcount, read->rd_length);
if (read->rd_filp)
err = nfsd_permission(resp->rqstp, fhp->fh_export,
fhp->fh_dentry,
NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
else
err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
&file, &ra);
if (err)
goto err_truncate;
if (read->rd_tmp_file)
ra = nfsd_init_raparms(file);
if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
err = nfsd4_encode_splice_read(resp, read, file, maxcount);
if (file->f_op->splice_read &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
else
err = nfsd4_encode_readv(resp, read, file, maxcount);
nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
if (!read->rd_filp)
nfsd_put_tmp_read_open(file, ra);
if (ra)
nfsd_put_raparams(file, ra);
err_truncate:
if (err)
if (nfserr)
xdr_truncate_encode(xdr, starting_len);
return err;
out:
if (file)
fput(file);
return nfserr;
}
static __be32
......
......@@ -59,13 +59,61 @@ static __be32
nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
struct nfsd_attrstat *resp)
{
struct iattr *iap = &argp->attrs;
struct svc_fh *fhp;
__be32 nfserr;
dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n",
SVCFH_fmt(&argp->fh),
argp->attrs.ia_valid, (long) argp->attrs.ia_size);
fh_copy(&resp->fh, &argp->fh);
nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0);
fhp = fh_copy(&resp->fh, &argp->fh);
/*
* NFSv2 does not differentiate between "set-[ac]time-to-now"
* which only requires access, and "set-[ac]time-to-X" which
* requires ownership.
* So if it looks like it might be "set both to the same time which
* is close to now", and if inode_change_ok fails, then we
* convert to "set to now" instead of "set to explicit time"
*
* We only call inode_change_ok as the last test as technically
* it is not an interface that we should be using.
*/
#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
#define MAX_TOUCH_TIME_ERROR (30*60)
if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
/*
* Looks probable.
*
* Now just make sure time is in the right ballpark.
* Solaris, at least, doesn't seem to care what the time
* request is. We require it be within 30 minutes of now.
*/
time_t delta = iap->ia_atime.tv_sec - get_seconds();
struct inode *inode;
nfserr = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
if (nfserr)
goto done;
inode = d_inode(fhp->fh_dentry);
if (delta < 0)
delta = -delta;
if (delta < MAX_TOUCH_TIME_ERROR &&
inode_change_ok(inode, iap) != 0) {
/*
* Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
* This will cause notify_change to set these times
* to "now"
*/
iap->ia_valid &= ~BOTH_TIME_SET;
}
}
nfserr = nfsd_setattr(rqstp, fhp, iap, 0, (time_t)0);
done:
return nfsd_return_attrs(nfserr, resp);
}
......
......@@ -68,6 +68,7 @@ struct nfsd4_callback {
struct nfsd4_callback_ops *cb_ops;
struct work_struct cb_work;
int cb_status;
bool cb_update_seq_nr;
bool cb_need_restart;
};
......@@ -582,9 +583,9 @@ enum nfsd4_cb_op {
struct nfsd4_compound_state;
struct nfsd_net;
extern __be32 nfs4_preprocess_stateid_op(struct net *net,
struct nfsd4_compound_state *cstate,
stateid_t *stateid, int flags, struct file **filp);
extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, stateid_t *stateid,
int flags, struct file **filp, bool *tmp_file);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
......
......@@ -302,42 +302,6 @@ commit_metadata(struct svc_fh *fhp)
static void
nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
{
/*
* NFSv2 does not differentiate between "set-[ac]time-to-now"
* which only requires access, and "set-[ac]time-to-X" which
* requires ownership.
* So if it looks like it might be "set both to the same time which
* is close to now", and if inode_change_ok fails, then we
* convert to "set to now" instead of "set to explicit time"
*
* We only call inode_change_ok as the last test as technically
* it is not an interface that we should be using.
*/
#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
#define MAX_TOUCH_TIME_ERROR (30*60)
if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET &&
iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec) {
/*
* Looks probable.
*
* Now just make sure time is in the right ballpark.
* Solaris, at least, doesn't seem to care what the time
* request is. We require it be within 30 minutes of now.
*/
time_t delta = iap->ia_atime.tv_sec - get_seconds();
if (delta < 0)
delta = -delta;
if (delta < MAX_TOUCH_TIME_ERROR &&
inode_change_ok(inode, iap) != 0) {
/*
* Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
* This will cause notify_change to set these times
* to "now"
*/
iap->ia_valid &= ~BOTH_TIME_SET;
}
}
/* sanitize the mode change */
if (iap->ia_valid & ATTR_MODE) {
iap->ia_mode &= S_IALLUGO;
......@@ -538,16 +502,11 @@ __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, loff_t len,
int flags)
{
__be32 err;
int error;
if (!S_ISREG(file_inode(file)->i_mode))
return nfserr_inval;
err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, NFSD_MAY_WRITE);
if (err)
return err;
error = vfs_fallocate(file, flags, offset, len);
if (!error)
error = commit_metadata(fhp);
......@@ -744,7 +703,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
host_err = ima_file_check(file, may_flags, 0);
if (host_err) {
nfsd_close(file);
fput(file);
goto out_nfserr;
}
......@@ -761,23 +720,12 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
return err;
}
/*
* Close a file.
*/
void
nfsd_close(struct file *filp)
{
fput(filp);
}
/*
* Obtain the readahead parameters for the file
* specified by (dev, ino).
*/
static inline struct raparms *
nfsd_get_raparms(dev_t dev, ino_t ino)
struct raparms *
nfsd_init_raparms(struct file *file)
{
struct inode *inode = file_inode(file);
dev_t dev = inode->i_sb->s_dev;
ino_t ino = inode->i_ino;
struct raparms *ra, **rap, **frap = NULL;
int depth = 0;
unsigned int hash;
......@@ -814,9 +762,23 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
ra->p_count++;
nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
spin_unlock(&rab->pb_lock);
if (ra->p_set)
file->f_ra = ra->p_ra;
return ra;
}
void nfsd_put_raparams(struct file *file, struct raparms *ra)
{
struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
spin_lock(&rab->pb_lock);
ra->p_ra = file->f_ra;
ra->p_set = 1;
ra->p_count--;
spin_unlock(&rab->pb_lock);
}
/*
* Grab and keep cached pages associated with a file in the svc_rqst
* so that they can be passed to the network sendmsg/sendpage routines
......@@ -945,7 +907,7 @@ static int wait_for_concurrent_writes(struct file *file)
return err;
}
static __be32
__be32
nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
unsigned long *cnt, int *stablep)
......@@ -1009,40 +971,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
return err;
}
__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file **file, struct raparms **ra)
{
struct inode *inode;
__be32 err;
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file);
if (err)
return err;
inode = file_inode(*file);
/* Get readahead parameters */
*ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
if (*ra && (*ra)->p_set)
(*file)->f_ra = (*ra)->p_ra;
return nfs_ok;
}
void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra)
{
/* Write back readahead params */
if (ra) {
struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
spin_lock(&rab->pb_lock);
ra->p_ra = file->f_ra;
ra->p_set = 1;
ra->p_count--;
spin_unlock(&rab->pb_lock);
}
nfsd_close(file);
}
/*
* Read data from a file. count must contain the requested read count
* on entry. On return, *count contains the number of bytes actually read.
......@@ -1055,13 +983,15 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct raparms *ra;
__be32 err;
err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra);
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
if (err)
return err;
ra = nfsd_init_raparms(file);
err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
nfsd_put_tmp_read_open(file, ra);
if (ra)
nfsd_put_raparams(file, ra);
fput(file);
return err;
}
......@@ -1093,7 +1023,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
if (cnt)
err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
cnt, stablep);
nfsd_close(file);
fput(file);
}
out:
return err;
......@@ -1138,7 +1068,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfserr_notsupp;
}
nfsd_close(file);
fput(file);
out:
return err;
}
......@@ -1977,7 +1907,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
if (err == nfserr_eof || err == nfserr_toosmall)
err = nfs_ok; /* can still be found in ->err */
out_close:
nfsd_close(file);
fput(file);
out:
return err;
}
......
......@@ -71,11 +71,7 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
#endif /* CONFIG_NFSD_V3 */
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
void nfsd_close(struct file *);
struct raparms;
__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
struct file **, struct raparms **);
void nfsd_put_tmp_read_open(struct file *, struct raparms *);
__be32 nfsd_splice_read(struct svc_rqst *,
struct file *, loff_t, unsigned long *);
__be32 nfsd_readv(struct file *, loff_t, struct kvec *, int,
......@@ -84,6 +80,10 @@ __be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
loff_t, struct kvec *,int, unsigned long *, int *);
__be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset,
struct kvec *vec, int vlen, unsigned long *cnt,
int *stablep);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *);
__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
......@@ -104,6 +104,9 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);
struct raparms *nfsd_init_raparms(struct file *file);
void nfsd_put_raparams(struct file *file, struct raparms *ra);
static inline int fh_want_write(struct svc_fh *fh)
{
int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
......
......@@ -273,6 +273,7 @@ struct nfsd4_read {
u32 rd_length; /* request */
int rd_vlen;
struct file *rd_filp;
bool rd_tmp_file;
struct svc_rqst *rd_rqstp; /* response */
struct svc_fh * rd_fhp; /* response */
......
......@@ -172,6 +172,13 @@ struct svcxprt_rdma {
#define RDMAXPRT_SQ_PENDING 2
#define RDMAXPRT_CONN_PENDING 3
#define RPCRDMA_MAX_SVC_SEGS (64) /* server max scatter/gather */
#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
#define RPCRDMA_MAXPAYLOAD RPCSVC_MAXPAYLOAD
#else
#define RPCRDMA_MAXPAYLOAD (RPCRDMA_MAX_SVC_SEGS << PAGE_SHIFT)
#endif
#define RPCRDMA_LISTEN_BACKLOG 10
/* The default ORD value is based on two outstanding full-size writes with a
* page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */
......@@ -182,10 +189,9 @@ struct svcxprt_rdma {
/* svc_rdma_marshal.c */
extern int svc_rdma_xdr_decode_req(struct rpcrdma_msg **, struct svc_rqst *);
extern int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *);
extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *,
struct rpcrdma_msg *,
enum rpcrdma_errcode, u32 *);
enum rpcrdma_errcode, __be32 *);
extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int);
extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int);
extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int,
......@@ -212,7 +218,6 @@ extern int svc_rdma_sendto(struct svc_rqst *);
extern int svc_rdma_send(struct svcxprt_rdma *, struct ib_send_wr *);
extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *,
enum rpcrdma_errcode);
struct page *svc_rdma_get_page(void);
extern int svc_rdma_post_recv(struct svcxprt_rdma *);
extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *);
extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *);
......
......@@ -86,6 +86,10 @@
#define ACL4_SUPPORT_AUDIT_ACL 0x04
#define ACL4_SUPPORT_ALARM_ACL 0x08
#define NFS4_ACL_AUTO_INHERIT 0x00000001
#define NFS4_ACL_PROTECTED 0x00000002
#define NFS4_ACL_DEFAULTED 0x00000004
#define NFS4_ACE_FILE_INHERIT_ACE 0x00000001
#define NFS4_ACE_DIRECTORY_INHERIT_ACE 0x00000002
#define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE 0x00000004
......@@ -93,6 +97,7 @@
#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010
#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020
#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040
#define NFS4_ACE_INHERITED_ACE 0x00000080
#define NFS4_ACE_READ_DATA 0x00000001
#define NFS4_ACE_LIST_DIRECTORY 0x00000001
......@@ -106,6 +111,8 @@
#define NFS4_ACE_DELETE_CHILD 0x00000040
#define NFS4_ACE_READ_ATTRIBUTES 0x00000080
#define NFS4_ACE_WRITE_ATTRIBUTES 0x00000100
#define NFS4_ACE_WRITE_RETENTION 0x00000200
#define NFS4_ACE_WRITE_RETENTION_HOLD 0x00000400
#define NFS4_ACE_DELETE 0x00010000
#define NFS4_ACE_READ_ACL 0x00020000
#define NFS4_ACE_WRITE_ACL 0x00040000
......
......@@ -48,28 +48,16 @@ config SUNRPC_DEBUG
If unsure, say Y.
config SUNRPC_XPRT_RDMA_CLIENT
tristate "RPC over RDMA Client Support"
config SUNRPC_XPRT_RDMA
tristate "RPC-over-RDMA transport"
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND
help
This option allows the NFS client to support an RDMA-enabled
transport.
This option allows the NFS client and server to use RDMA
transports (InfiniBand, iWARP, or RoCE).
To compile RPC client RDMA transport support as a module,
choose M here: the module will be called xprtrdma.
To compile this support as a module, choose M. The module
will be called rpcrdma.ko.
If unsure, say N.
config SUNRPC_XPRT_RDMA_SERVER
tristate "RPC over RDMA Server Support"
depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND
help
This option allows the NFS server to support an RDMA-enabled
transport.
To compile RPC server RDMA transport support as a module,
choose M here: the module will be called svcrdma.
If unsure, say N.
If unsure, or you know there is no RDMA capability on your
hardware platform, say N.
......@@ -5,8 +5,7 @@
obj-$(CONFIG_SUNRPC) += sunrpc.o
obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
obj-y += xprtrdma/
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
auth.o auth_null.o auth_unix.o auth_generic.o \
......
......@@ -881,9 +881,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
if (err)
goto out_err;
sg_init_table(sg, 1);
sg_set_buf(sg, &zeroconstant, 4);
sg_init_one(sg, &zeroconstant, 4);
err = crypto_hash_digest(&desc, sg, 4, Kseq);
if (err)
goto out_err;
......@@ -951,9 +949,7 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
if (err)
goto out_err;
sg_init_table(sg, 1);
sg_set_buf(sg, zeroconstant, 4);
sg_init_one(sg, zeroconstant, 4);
err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
if (err)
goto out_err;
......
......@@ -1290,7 +1290,6 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
svc_putnl(resv, ntohl(rpc_stat));
goto sendit;
}
EXPORT_SYMBOL_GPL(svc_process);
/*
* Process the RPC request.
......@@ -1338,6 +1337,7 @@ svc_process(struct svc_rqst *rqstp)
svc_drop(rqstp);
return 0;
}
EXPORT_SYMBOL_GPL(svc_process);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
/*
......
obj-$(CONFIG_SUNRPC_XPRT_RDMA_CLIENT) += xprtrdma.o
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
xprtrdma-y := transport.o rpc_rdma.o verbs.o \
fmr_ops.o frwr_ops.o physical_ops.o
obj-$(CONFIG_SUNRPC_XPRT_RDMA_SERVER) += svcrdma.o
svcrdma-y := svc_rdma.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o
rpcrdma-y := transport.o rpc_rdma.o verbs.o \
fmr_ops.o frwr_ops.o physical_ops.o \
svc_rdma.o svc_rdma_transport.o \
svc_rdma_marshal.o svc_rdma_sendto.o svc_rdma_recvfrom.o \
module.o
/*
* Copyright (c) 2015 Oracle. All rights reserved.
*/
/* rpcrdma.ko module initialization
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/sunrpc/svc_rdma.h>
#include "xprt_rdma.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
MODULE_AUTHOR("Open Grid Computing and Network Appliance, Inc.");
MODULE_DESCRIPTION("RPC/RDMA Transport");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("svcrdma");
MODULE_ALIAS("xprtrdma");
static void __exit rpc_rdma_cleanup(void)
{
xprt_rdma_cleanup();
svc_rdma_cleanup();
}
static int __init rpc_rdma_init(void)
{
int rc;
rc = svc_rdma_init();
if (rc)
goto out;
rc = xprt_rdma_init();
if (rc)
svc_rdma_cleanup();
out:
return rc;
}
module_init(rpc_rdma_init);
module_exit(rpc_rdma_cleanup);
......@@ -38,8 +38,7 @@
*
* Author: Tom Tucker <tom@opengridcomputing.com>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/sysctl.h>
......@@ -295,8 +294,3 @@ int svc_rdma_init(void)
destroy_workqueue(svc_rdma_wq);
return -ENOMEM;
}
MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
MODULE_DESCRIPTION("SVC RDMA Transport");
MODULE_LICENSE("Dual BSD/GPL");
module_init(svc_rdma_init);
module_exit(svc_rdma_cleanup);
......@@ -50,12 +50,12 @@
/*
* Decodes a read chunk list. The expected format is as follows:
* descrim : xdr_one
* position : u32 offset into XDR stream
* handle : u32 RKEY
* position : __be32 offset into XDR stream
* handle : __be32 RKEY
* . . .
* end-of-list: xdr_zero
*/
static u32 *decode_read_list(u32 *va, u32 *vaend)
static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
{
struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
......@@ -67,20 +67,20 @@ static u32 *decode_read_list(u32 *va, u32 *vaend)
}
ch++;
}
return (u32 *)&ch->rc_position;
return &ch->rc_position;
}
/*
* Decodes a write chunk list. The expected format is as follows:
* descrim : xdr_one
* nchunks : <count>
* handle : u32 RKEY ---+
* length : u32 <len of segment> |
* handle : __be32 RKEY ---+
* length : __be32 <len of segment> |
* offset : remove va + <count>
* . . . |
* ---+
*/
static u32 *decode_write_list(u32 *va, u32 *vaend)
static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
{
unsigned long start, end;
int nchunks;
......@@ -90,14 +90,14 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
/* Check for not write-array */
if (ary->wc_discrim == xdr_zero)
return (u32 *)&ary->wc_nchunks;
return &ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
nchunks = ntohl(ary->wc_nchunks);
nchunks = be32_to_cpu(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
......@@ -112,10 +112,10 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
return (u32 *)&ary->wc_array[nchunks].wc_target.rs_length;
return &ary->wc_array[nchunks].wc_target.rs_length;
}
static u32 *decode_reply_array(u32 *va, u32 *vaend)
static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
{
unsigned long start, end;
int nchunks;
......@@ -124,14 +124,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
/* Check for no reply-array */
if (ary->wc_discrim == xdr_zero)
return (u32 *)&ary->wc_nchunks;
return &ary->wc_nchunks;
if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
(unsigned long)vaend) {
dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
return NULL;
}
nchunks = ntohl(ary->wc_nchunks);
nchunks = be32_to_cpu(ary->wc_nchunks);
start = (unsigned long)&ary->wc_array[0];
end = (unsigned long)vaend;
......@@ -142,15 +142,14 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
ary, nchunks, vaend);
return NULL;
}
return (u32 *)&ary->wc_array[nchunks];
return (__be32 *)&ary->wc_array[nchunks];
}
int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
u32 *va;
u32 *vaend;
__be32 *va, *vaend;
u32 hdr_len;
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
......@@ -162,22 +161,17 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
return -EINVAL;
}
/* Decode the header */
rmsgp->rm_xid = ntohl(rmsgp->rm_xid);
rmsgp->rm_vers = ntohl(rmsgp->rm_vers);
rmsgp->rm_credit = ntohl(rmsgp->rm_credit);
rmsgp->rm_type = ntohl(rmsgp->rm_type);
if (rmsgp->rm_vers != RPCRDMA_VERSION)
if (rmsgp->rm_vers != rpcrdma_version)
return -ENOSYS;
/* Pull in the extra for the padded case and bump our pointer */
if (rmsgp->rm_type == RDMA_MSGP) {
if (rmsgp->rm_type == rdma_msgp) {
int hdrlen;
rmsgp->rm_body.rm_padded.rm_align =
ntohl(rmsgp->rm_body.rm_padded.rm_align);
be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
rmsgp->rm_body.rm_padded.rm_thresh =
ntohl(rmsgp->rm_body.rm_padded.rm_thresh);
be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
......@@ -192,7 +186,7 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
* chunk list and a reply chunk list.
*/
va = &rmsgp->rm_body.rm_chunks[0];
vaend = (u32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
vaend = (__be32 *)((unsigned long)rmsgp + rqstp->rq_arg.len);
va = decode_read_list(va, vaend);
if (!va)
return -EINVAL;
......@@ -211,76 +205,20 @@ int svc_rdma_xdr_decode_req(struct rpcrdma_msg **rdma_req,
return hdr_len;
}
int svc_rdma_xdr_decode_deferred_req(struct svc_rqst *rqstp)
{
struct rpcrdma_msg *rmsgp = NULL;
struct rpcrdma_read_chunk *ch;
struct rpcrdma_write_array *ary;
u32 *va;
u32 hdrlen;
dprintk("svcrdma: processing deferred RDMA header on rqstp=%p\n",
rqstp);
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
/* Pull in the extra for the padded case and bump our pointer */
if (rmsgp->rm_type == RDMA_MSGP) {
va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (u32)((unsigned long)va - (unsigned long)rmsgp);
rqstp->rq_arg.head[0].iov_len -= hdrlen;
return hdrlen;
}
/*
* Skip all chunks to find RPC msg. These were previously processed
*/
va = &rmsgp->rm_body.rm_chunks[0];
/* Skip read-list */
for (ch = (struct rpcrdma_read_chunk *)va;
ch->rc_discrim != xdr_zero; ch++);
va = (u32 *)&ch->rc_position;
/* Skip write-list */
ary = (struct rpcrdma_write_array *)va;
if (ary->wc_discrim == xdr_zero)
va = (u32 *)&ary->wc_nchunks;
else
/*
* rs_length is the 2nd 4B field in wc_target and taking its
* address skips the list terminator
*/
va = (u32 *)&ary->wc_array[ary->wc_nchunks].wc_target.rs_length;
/* Skip reply-array */
ary = (struct rpcrdma_write_array *)va;
if (ary->wc_discrim == xdr_zero)
va = (u32 *)&ary->wc_nchunks;
else
va = (u32 *)&ary->wc_array[ary->wc_nchunks];
rqstp->rq_arg.head[0].iov_base = va;
hdrlen = (unsigned long)va - (unsigned long)rmsgp;
rqstp->rq_arg.head[0].iov_len -= hdrlen;
return hdrlen;
}
int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rmsgp,
enum rpcrdma_errcode err, u32 *va)
enum rpcrdma_errcode err, __be32 *va)
{
u32 *startp = va;
__be32 *startp = va;
*va++ = htonl(rmsgp->rm_xid);
*va++ = htonl(rmsgp->rm_vers);
*va++ = htonl(xprt->sc_max_requests);
*va++ = htonl(RDMA_ERROR);
*va++ = htonl(err);
*va++ = rmsgp->rm_xid;
*va++ = rmsgp->rm_vers;
*va++ = cpu_to_be32(xprt->sc_max_requests);
*va++ = rdma_error;
*va++ = cpu_to_be32(err);
if (err == ERR_VERS) {
*va++ = htonl(RPCRDMA_VERSION);
*va++ = htonl(RPCRDMA_VERSION);
*va++ = rpcrdma_version;
*va++ = rpcrdma_version;
}
return (int)((unsigned long)va - (unsigned long)startp);
......@@ -297,7 +235,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
&rmsgp->rm_body.rm_chunks[1];
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)].
&wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
wc_target.rs_length;
else
wr_ary = (struct rpcrdma_write_array *)
......@@ -306,7 +244,7 @@ int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
/* skip reply array */
if (wr_ary->wc_discrim)
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_array[ntohl(wr_ary->wc_nchunks)];
&wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
else
wr_ary = (struct rpcrdma_write_array *)
&wr_ary->wc_nchunks;
......@@ -325,7 +263,7 @@ void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
ary = (struct rpcrdma_write_array *)
&rmsgp->rm_body.rm_chunks[1];
ary->wc_discrim = xdr_one;
ary->wc_nchunks = htonl(chunks);
ary->wc_nchunks = cpu_to_be32(chunks);
/* write-list terminator */
ary->wc_array[chunks].wc_target.rs_handle = xdr_zero;
......@@ -338,7 +276,7 @@ void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *ary,
int chunks)
{
ary->wc_discrim = xdr_one;
ary->wc_nchunks = htonl(chunks);
ary->wc_nchunks = cpu_to_be32(chunks);
}
void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
......@@ -350,7 +288,7 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
struct rpcrdma_segment *seg = &ary->wc_array[chunk_no].wc_target;
seg->rs_handle = rs_handle;
seg->rs_offset = rs_offset;
seg->rs_length = htonl(write_len);
seg->rs_length = cpu_to_be32(write_len);
}
void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
......@@ -358,10 +296,10 @@ void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
struct rpcrdma_msg *rdma_resp,
enum rpcrdma_proc rdma_type)
{
rdma_resp->rm_xid = htonl(rdma_argp->rm_xid);
rdma_resp->rm_vers = htonl(rdma_argp->rm_vers);
rdma_resp->rm_credit = htonl(xprt->sc_max_requests);
rdma_resp->rm_type = htonl(rdma_type);
rdma_resp->rm_xid = rdma_argp->rm_xid;
rdma_resp->rm_vers = rdma_argp->rm_vers;
rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
rdma_resp->rm_type = cpu_to_be32(rdma_type);
/* Encode <nul> chunks lists */
rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
......
......@@ -85,7 +85,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* RDMA_NOMSG: RDMA READ data should land just after RDMA RECV data */
rmsgp = (struct rpcrdma_msg *)rqstp->rq_arg.head[0].iov_base;
if (be32_to_cpu(rmsgp->rm_type) == RDMA_NOMSG)
if (rmsgp->rm_type == rdma_nomsg)
rqstp->rq_arg.pages = &rqstp->rq_pages[0];
else
rqstp->rq_arg.pages = &rqstp->rq_pages[1];
......
......@@ -240,6 +240,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
u32 xdr_off;
int chunk_off;
int chunk_no;
int nchunks;
struct rpcrdma_write_array *arg_ary;
struct rpcrdma_write_array *res_ary;
int ret;
......@@ -251,14 +252,15 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
&rdma_resp->rm_body.rm_chunks[1];
/* Write chunks start at the pagelist */
nchunks = be32_to_cpu(arg_ary->wc_nchunks);
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
xfer_len && chunk_no < arg_ary->wc_nchunks;
xfer_len && chunk_no < nchunks;
chunk_no++) {
struct rpcrdma_segment *arg_ch;
u64 rs_offset;
arg_ch = &arg_ary->wc_array[chunk_no].wc_target;
write_len = min(xfer_len, ntohl(arg_ch->rs_length));
write_len = min(xfer_len, be32_to_cpu(arg_ch->rs_length));
/* Prepare the response chunk given the length actually
* written */
......@@ -270,7 +272,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
chunk_off = 0;
while (write_len) {
ret = send_write(xprt, rqstp,
ntohl(arg_ch->rs_handle),
be32_to_cpu(arg_ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
write_len,
......@@ -318,13 +320,13 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
&rdma_resp->rm_body.rm_chunks[2];
/* xdr offset starts at RPC message */
nchunks = ntohl(arg_ary->wc_nchunks);
nchunks = be32_to_cpu(arg_ary->wc_nchunks);
for (xdr_off = 0, chunk_no = 0;
xfer_len && chunk_no < nchunks;
chunk_no++) {
u64 rs_offset;
ch = &arg_ary->wc_array[chunk_no].wc_target;
write_len = min(xfer_len, htonl(ch->rs_length));
write_len = min(xfer_len, be32_to_cpu(ch->rs_length));
/* Prepare the reply chunk given the length actually
* written */
......@@ -335,7 +337,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
chunk_off = 0;
while (write_len) {
ret = send_write(xprt, rqstp,
ntohl(ch->rs_handle),
be32_to_cpu(ch->rs_handle),
rs_offset + chunk_off,
xdr_off,
write_len,
......@@ -515,7 +517,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
inline_bytes = rqstp->rq_res.len;
/* Create the RDMA response header */
res_page = svc_rdma_get_page();
res_page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
rdma_resp = page_address(res_page);
reply_ary = svc_rdma_get_reply_array(rdma_argp);
if (reply_ary)
......
......@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_name = "rdma",
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_rdma_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
.xcl_max_payload = RPCRDMA_MAXPAYLOAD,
.xcl_ident = XPRT_TRANSPORT_RDMA,
};
......@@ -99,12 +99,8 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt;
while (1) {
ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep, GFP_KERNEL);
if (ctxt)
break;
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
}
ctxt = kmem_cache_alloc(svc_rdma_ctxt_cachep,
GFP_KERNEL | __GFP_NOFAIL);
ctxt->xprt = xprt;
INIT_LIST_HEAD(&ctxt->dto_q);
ctxt->count = 0;
......@@ -156,12 +152,8 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
struct svc_rdma_req_map *svc_rdma_get_req_map(void)
{
struct svc_rdma_req_map *map;
while (1) {
map = kmem_cache_alloc(svc_rdma_map_cachep, GFP_KERNEL);
if (map)
break;
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
}
map = kmem_cache_alloc(svc_rdma_map_cachep,
GFP_KERNEL | __GFP_NOFAIL);
map->count = 0;
return map;
}
......@@ -493,18 +485,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
return cma_xprt;
}
struct page *svc_rdma_get_page(void)
{
struct page *page;
while ((page = alloc_page(GFP_KERNEL)) == NULL) {
/* If we can't get memory, wait a bit and try again */
printk(KERN_INFO "svcrdma: out of memory...retrying in 1s\n");
schedule_timeout_uninterruptible(msecs_to_jiffies(1000));
}
return page;
}
int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
{
struct ib_recv_wr recv_wr, *bad_recv_wr;
......@@ -523,7 +503,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
pr_err("svcrdma: Too many sges (%d)\n", sge_no);
goto err_put_ctxt;
}
page = svc_rdma_get_page();
page = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
ctxt->pages[sge_no] = page;
pa = ib_dma_map_page(xprt->sc_cm_id->device,
page, 0, PAGE_SIZE,
......@@ -1318,11 +1298,11 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
struct ib_send_wr err_wr;
struct page *p;
struct svc_rdma_op_ctxt *ctxt;
u32 *va;
__be32 *va;
int length;
int ret;
p = svc_rdma_get_page();
p = alloc_page(GFP_KERNEL | __GFP_NOFAIL);
va = page_address(p);
/* XDR encode error */
......
......@@ -48,7 +48,6 @@
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/addr.h>
......@@ -59,11 +58,6 @@
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS");
MODULE_AUTHOR("Network Appliance, Inc.");
/*
* tunables
*/
......@@ -711,7 +705,7 @@ static struct xprt_class xprt_rdma = {
.setup = xprt_setup_rdma,
};
static void __exit xprt_rdma_cleanup(void)
void xprt_rdma_cleanup(void)
{
int rc;
......@@ -728,7 +722,7 @@ static void __exit xprt_rdma_cleanup(void)
__func__, rc);
}
static int __init xprt_rdma_init(void)
int xprt_rdma_init(void)
{
int rc;
......@@ -753,6 +747,3 @@ static int __init xprt_rdma_init(void)
#endif
return 0;
}
module_init(xprt_rdma_init);
module_exit(xprt_rdma_cleanup);
......@@ -480,6 +480,11 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *);
*/
int rpcrdma_marshal_req(struct rpc_rqst *);
/* RPC/RDMA module init - xprtrdma/transport.c
*/
int xprt_rdma_init(void);
void xprt_rdma_cleanup(void);
/* Temporary NFS request map cache. Created in svc_rdma.c */
extern struct kmem_cache *svc_rdma_map_cachep;
/* WR context cache. Created in svc_rdma.c */
......@@ -487,10 +492,4 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep;
/* Workqueue created in svc_rdma.c */
extern struct workqueue_struct *svc_rdma_wq;
#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
#else
#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
#endif
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment