Commit a74d70b6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-2.6.40' of git://linux-nfs.org/~bfields/linux

* 'for-2.6.40' of git://linux-nfs.org/~bfields/linux: (22 commits)
  nfsd: make local functions static
  NFSD: Remove unused variable from nfsd4_decode_bind_conn_to_session()
  NFSD: Check status from nfsd4_map_bcts_dir()
  NFSD: Remove setting unused variable in nfsd_vfs_read()
  nfsd41: error out on repeated RECLAIM_COMPLETE
  nfsd41: compare request's opcnt with session's maxops at nfsd4_sequence
  nfsd v4.1 lOCKT clientid field must be ignored
  nfsd41: add flag checking for create_session
  nfsd41: make sure nfs server process OPEN with EXCLUSIVE4_1 correctly
  nfsd4: fix wrongsec handling for PUTFH + op cases
  nfsd4: make fh_verify responsibility of nfsd_lookup_dentry caller
  nfsd4: introduce OPDESC helper
  nfsd4: allow fh_verify caller to skip pseudoflavor checks
  nfsd: distinguish functions of NFSD_MAY_* flags
  svcrpc: complete svsk processing on cb receive failure
  svcrpc: take advantage of tcp autotuning
  SUNRPC: Don't wait for full record to receive tcp data
  svcrpc: copy cb reply instead of pages
  svcrpc: close connection if client sends short packet
  svcrpc: note network-order types in svc_process_calldir
  ...
parents b11b06d9 c47d832b
......@@ -1354,12 +1354,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
if (IS_ERR(exp))
return nfserrno(PTR_ERR(exp));
rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL);
if (rv)
goto out;
rv = check_nfsd_access(exp, rqstp);
if (rv)
fh_put(fhp);
out:
exp_put(exp);
return rv;
}
......
......@@ -245,7 +245,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
}
/* Now create the file and set attributes */
nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len,
nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len,
attr, newfhp,
argp->createmode, argp->verf, NULL, NULL);
......
......@@ -842,7 +842,7 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
return rv;
}
__be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
{
struct svc_fh fh;
int err;
......
......@@ -196,9 +196,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
/*
* Note: create modes (UNCHECKED,GUARDED...) are the same
* in NFSv4 as in v3.
* in NFSv4 as in v3 except EXCLUSIVE4_1.
*/
status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data,
status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
open->op_fname.len, &open->op_iattr,
&resfh, open->op_createmode,
(u32 *)open->op_verf.data,
......@@ -403,7 +403,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
putfh->pf_fhlen);
return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS);
}
static __be32
......@@ -762,6 +762,9 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 err;
fh_init(&resfh, NFS4_FHSIZE);
err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC);
if (err)
return err;
err = nfsd_lookup_dentry(rqstp, &cstate->current_fh,
secinfo->si_name, secinfo->si_namelen,
&exp, &dentry);
......@@ -986,6 +989,9 @@ enum nfsd4_op_flags {
ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */
/* For rfc 5661 section 2.6.3.1.1: */
OP_HANDLES_WRONGSEC = 1 << 3,
OP_IS_PUTFH_LIKE = 1 << 4,
};
struct nfsd4_operation {
......@@ -1031,6 +1037,44 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
return nfs_ok;
}
static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
{
return &nfsd4_ops[op->opnum];
}
static bool need_wrongsec_check(struct svc_rqst *rqstp)
{
struct nfsd4_compoundres *resp = rqstp->rq_resp;
struct nfsd4_compoundargs *argp = rqstp->rq_argp;
struct nfsd4_op *this = &argp->ops[resp->opcnt - 1];
struct nfsd4_op *next = &argp->ops[resp->opcnt];
struct nfsd4_operation *thisd;
struct nfsd4_operation *nextd;
thisd = OPDESC(this);
/*
* Most ops check wronsec on our own; only the putfh-like ops
* have special rules.
*/
if (!(thisd->op_flags & OP_IS_PUTFH_LIKE))
return false;
/*
* rfc 5661 2.6.3.1.1.6: don't bother erroring out a
* put-filehandle operation if we're not going to use the
* result:
*/
if (argp->opcnt == resp->opcnt)
return false;
nextd = OPDESC(next);
/*
* Rest of 2.6.3.1.1: certain operations will return WRONGSEC
* errors themselves as necessary; others should check for them
* now:
*/
return !(nextd->op_flags & OP_HANDLES_WRONGSEC);
}
/*
* COMPOUND call.
*/
......@@ -1108,7 +1152,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
goto encode_op;
}
opdesc = &nfsd4_ops[op->opnum];
opdesc = OPDESC(op);
if (!cstate->current_fh.fh_dentry) {
if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
......@@ -1126,6 +1170,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
else
BUG_ON(op->status == nfs_ok);
if (!op->status && need_wrongsec_check(rqstp))
op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp);
encode_op:
/* Only from SEQUENCE */
if (resp->cstate.status == nfserr_replay_cache) {
......@@ -1217,10 +1264,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_LOOKUP] = {
.op_func = (nfsd4op_func)nfsd4_lookup,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_LOOKUP",
},
[OP_LOOKUPP] = {
.op_func = (nfsd4op_func)nfsd4_lookupp,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_LOOKUPP",
},
[OP_NVERIFY] = {
......@@ -1229,6 +1278,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_OPEN] = {
.op_func = (nfsd4op_func)nfsd4_open,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_OPEN",
},
[OP_OPEN_CONFIRM] = {
......@@ -1241,17 +1291,20 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_PUTFH] = {
.op_func = (nfsd4op_func)nfsd4_putfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
| OP_IS_PUTFH_LIKE,
.op_name = "OP_PUTFH",
},
[OP_PUTPUBFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
| OP_IS_PUTFH_LIKE,
.op_name = "OP_PUTPUBFH",
},
[OP_PUTROOTFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
| OP_IS_PUTFH_LIKE,
.op_name = "OP_PUTROOTFH",
},
[OP_READ] = {
......@@ -1281,15 +1334,18 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_RESTOREFH] = {
.op_func = (nfsd4op_func)nfsd4_restorefh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
| OP_IS_PUTFH_LIKE,
.op_name = "OP_RESTOREFH",
},
[OP_SAVEFH] = {
.op_func = (nfsd4op_func)nfsd4_savefh,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SAVEFH",
},
[OP_SECINFO] = {
.op_func = (nfsd4op_func)nfsd4_secinfo,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO",
},
[OP_SETATTR] = {
......@@ -1353,6 +1409,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_SECINFO_NO_NAME] = {
.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO_NO_NAME",
},
};
......
......@@ -1519,6 +1519,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
bool confirm_me = false;
int status = 0;
if (cr_ses->flags & ~SESSION4_FLAG_MASK_A)
return nfserr_inval;
nfs4_lock_state();
unconf = find_unconfirmed_client(&cr_ses->clientid);
conf = find_confirmed_client(&cr_ses->clientid);
......@@ -1637,8 +1640,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
return nfserr_badsession;
status = nfsd4_map_bcts_dir(&bcts->dir);
nfsd4_new_conn(rqstp, cstate->session, bcts->dir);
return nfs_ok;
if (!status)
nfsd4_new_conn(rqstp, cstate->session, bcts->dir);
return status;
}
static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
......@@ -1725,6 +1729,13 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
return;
}
static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session)
{
struct nfsd4_compoundargs *args = rqstp->rq_argp;
return args->opcnt > session->se_fchannel.maxops;
}
__be32
nfsd4_sequence(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
......@@ -1753,6 +1764,10 @@ nfsd4_sequence(struct svc_rqst *rqstp,
if (!session)
goto out;
status = nfserr_too_many_ops;
if (nfsd4_session_too_many_ops(rqstp, session))
goto out;
status = nfserr_badslot;
if (seq->slotid >= session->se_fchannel.maxreqs)
goto out;
......@@ -1808,6 +1823,8 @@ nfsd4_sequence(struct svc_rqst *rqstp,
__be32
nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
{
int status = 0;
if (rc->rca_one_fs) {
if (!cstate->current_fh.fh_dentry)
return nfserr_nofilehandle;
......@@ -1817,9 +1834,14 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
*/
return nfs_ok;
}
nfs4_lock_state();
if (is_client_expired(cstate->session->se_client)) {
nfs4_unlock_state();
status = nfserr_complete_already;
if (cstate->session->se_client->cl_firststate)
goto out;
status = nfserr_stale_clientid;
if (is_client_expired(cstate->session->se_client))
/*
* The following error isn't really legal.
* But we only get here if the client just explicitly
......@@ -1827,11 +1849,13 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
* error it gets back on an operation for the dead
* client.
*/
return nfserr_stale_clientid;
}
goto out;
status = nfs_ok;
nfsd4_create_clid_dir(cstate->session->se_client);
out:
nfs4_unlock_state();
return nfs_ok;
return status;
}
__be32
......@@ -2462,7 +2486,7 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
return NULL;
}
int share_access_to_flags(u32 share_access)
static int share_access_to_flags(u32 share_access)
{
share_access &= ~NFS4_SHARE_WANT_MASK;
......@@ -2882,7 +2906,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
}
struct lock_manager nfsd4_manager = {
static struct lock_manager nfsd4_manager = {
};
static void
......
......@@ -424,15 +424,12 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access
static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
{
DECODE_HEAD;
u32 dummy;
READ_BUF(NFS4_MAX_SESSIONID_LEN + 8);
COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
READ32(bcts->dir);
/* XXX: Perhaps Tom Tucker could help us figure out how we
* should be using ctsa_use_conn_in_rdma_mode: */
READ32(dummy);
/* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker
* could help us figure out we should be using it. */
DECODE_TAIL;
}
......@@ -588,8 +585,6 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
READ_BUF(lockt->lt_owner.len);
READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
if (argp->minorversion && !zero_clientid(&lockt->lt_clientid))
return nfserr_inval;
DECODE_TAIL;
}
......@@ -3120,7 +3115,7 @@ nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr,
return nfserr;
}
__be32
static __be32
nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
struct nfsd4_sequence *seq)
{
......
......@@ -344,7 +344,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
* which clients virtually always use auth_sys for,
* even while using RPCSEC_GSS for NFS.
*/
if (access & NFSD_MAY_LOCK)
if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS)
goto skip_pseudoflavor_check;
/*
* Clients may expect to be able to use auth_sys during mount,
......
......@@ -181,16 +181,10 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct svc_export *exp;
struct dentry *dparent;
struct dentry *dentry;
__be32 err;
int host_err;
dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
/* Obtain dentry and export. */
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
if (err)
return err;
dparent = fhp->fh_dentry;
exp = fhp->fh_export;
exp_get(exp);
......@@ -254,6 +248,9 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
struct dentry *dentry;
__be32 err;
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
if (err)
return err;
err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
if (err)
return err;
......@@ -877,13 +874,11 @@ static __be32
nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
{
struct inode *inode;
mm_segment_t oldfs;
__be32 err;
int host_err;
err = nfserr_perm;
inode = file->f_path.dentry->d_inode;
if (file->f_op->splice_read && rqstp->rq_splice_ok) {
struct splice_desc sd = {
......@@ -1340,11 +1335,18 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#ifdef CONFIG_NFSD_V3
static inline int nfsd_create_is_exclusive(int createmode)
{
return createmode == NFS3_CREATE_EXCLUSIVE
|| createmode == NFS4_CREATE_EXCLUSIVE4_1;
}
/*
* NFSv3 version of nfsd_create
* NFSv3 and NFSv4 version of nfsd_create
*/
__be32
nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
char *fname, int flen, struct iattr *iap,
struct svc_fh *resfhp, int createmode, u32 *verifier,
int *truncp, int *created)
......@@ -1396,7 +1398,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (err)
goto out;
if (createmode == NFS3_CREATE_EXCLUSIVE) {
if (nfsd_create_is_exclusive(createmode)) {
/* solaris7 gets confused (bugid 4218508) if these have
* the high bit set, so just clear the high bits. If this is
* ever changed to use different attrs for storing the
......@@ -1437,6 +1439,11 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
&& dchild->d_inode->i_atime.tv_sec == v_atime
&& dchild->d_inode->i_size == 0 )
break;
case NFS4_CREATE_EXCLUSIVE4_1:
if ( dchild->d_inode->i_mtime.tv_sec == v_mtime
&& dchild->d_inode->i_atime.tv_sec == v_atime
&& dchild->d_inode->i_size == 0 )
goto set_attr;
/* fallthru */
case NFS3_CREATE_GUARDED:
err = nfserr_exist;
......@@ -1455,7 +1462,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_check_ignore_resizing(iap);
if (createmode == NFS3_CREATE_EXCLUSIVE) {
if (nfsd_create_is_exclusive(createmode)) {
/* Cram the verifier into atime/mtime */
iap->ia_valid = ATTR_MTIME|ATTR_ATIME
| ATTR_MTIME_SET|ATTR_ATIME_SET;
......@@ -2034,7 +2041,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
struct inode *inode = dentry->d_inode;
int err;
if (acc == NFSD_MAY_NOP)
if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP)
return 0;
#if 0
dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
......
......@@ -17,10 +17,14 @@
#define NFSD_MAY_SATTR 8
#define NFSD_MAY_TRUNC 16
#define NFSD_MAY_LOCK 32
#define NFSD_MAY_MASK 63
/* extra hints to permission and open routines: */
#define NFSD_MAY_OWNER_OVERRIDE 64
#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
#define NFSD_MAY_NOT_BREAK_LEASE 512
#define NFSD_MAY_BYPASS_GSS 1024
#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
......@@ -54,7 +58,7 @@ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
int type, dev_t rdev, struct svc_fh *res);
#ifdef CONFIG_NFSD_V3
__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
__be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *,
char *name, int len, struct iattr *attrs,
struct svc_fh *res, int createmode,
u32 *verifier, int *truncp, int *created);
......
......@@ -570,9 +570,11 @@ struct nfs4_sessionid {
};
/* Create Session Flags */
#define SESSION4_PERSIST 0x001
#define SESSION4_BACK_CHAN 0x002
#define SESSION4_RDMA 0x004
#define SESSION4_PERSIST 0x001
#define SESSION4_BACK_CHAN 0x002
#define SESSION4_RDMA 0x004
#define SESSION4_FLAG_MASK_A 0x007
enum state_protect_how4 {
SP4_NONE = 0,
......
......@@ -28,6 +28,7 @@ struct svc_sock {
/* private TCP part */
u32 sk_reclen; /* length of record */
u32 sk_tcplen; /* current read length */
struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */
};
/*
......
......@@ -387,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
return len;
}
static int svc_partial_recvfrom(struct svc_rqst *rqstp,
struct kvec *iov, int nr,
int buflen, unsigned int base)
{
size_t save_iovlen;
void __user *save_iovbase;
unsigned int i;
int ret;
if (base == 0)
return svc_recvfrom(rqstp, iov, nr, buflen);
for (i = 0; i < nr; i++) {
if (iov[i].iov_len > base)
break;
base -= iov[i].iov_len;
}
save_iovlen = iov[i].iov_len;
save_iovbase = iov[i].iov_base;
iov[i].iov_len -= base;
iov[i].iov_base += base;
ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen);
iov[i].iov_len = save_iovlen;
iov[i].iov_base = save_iovbase;
return ret;
}
/*
* Set socket snd and rcv buffer lengths
*/
......@@ -409,7 +436,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
lock_sock(sock->sk);
sock->sk->sk_sndbuf = snd * 2;
sock->sk->sk_rcvbuf = rcv * 2;
sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
sock->sk->sk_write_space(sock->sk);
release_sock(sock->sk);
#endif
......@@ -884,6 +910,56 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
return NULL;
}
static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
{
unsigned int i, len, npages;
if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
return 0;
len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < npages; i++) {
if (rqstp->rq_pages[i] != NULL)
put_page(rqstp->rq_pages[i]);
BUG_ON(svsk->sk_pages[i] == NULL);
rqstp->rq_pages[i] = svsk->sk_pages[i];
svsk->sk_pages[i] = NULL;
}
rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]);
return len;
}
static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
{
unsigned int i, len, npages;
if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
return;
len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < npages; i++) {
svsk->sk_pages[i] = rqstp->rq_pages[i];
rqstp->rq_pages[i] = NULL;
}
}
static void svc_tcp_clear_pages(struct svc_sock *svsk)
{
unsigned int i, len, npages;
if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
goto out;
len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < npages; i++) {
BUG_ON(svsk->sk_pages[i] == NULL);
put_page(svsk->sk_pages[i]);
svsk->sk_pages[i] = NULL;
}
out:
svsk->sk_tcplen = 0;
}
/*
* Receive data.
* If we haven't gotten the record length yet, get the next four bytes.
......@@ -893,31 +969,15 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
{
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
unsigned int want;
int len;
if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
/* sndbuf needs to have room for one request
* per thread, otherwise we can stall even when the
* network isn't a bottleneck.
*
* We count all threads rather than threads in a
* particular pool, which provides an upper bound
* on the number of threads which will access the socket.
*
* rcvbuf just needs to be able to hold a few requests.
* Normally they will be removed from the queue
* as soon a a complete request arrives.
*/
svc_sock_setbufsize(svsk->sk_sock,
(serv->sv_nrthreads+3) * serv->sv_max_mesg,
3 * serv->sv_max_mesg);
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
struct kvec iov;
want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
iov.iov_len = want;
if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
......@@ -927,7 +987,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
if (len < want) {
dprintk("svc: short recvfrom while reading record "
"length (%d of %d)\n", len, want);
goto err_again; /* record header not complete */
return -EAGAIN;
}
svsk->sk_reclen = ntohl(svsk->sk_reclen);
......@@ -954,83 +1014,75 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
}
}
/* Check whether enough data is available */
len = svc_recv_available(svsk);
if (len < 0)
goto error;
if (svsk->sk_reclen < 8)
goto err_delete; /* client is nuts. */
if (len < svsk->sk_reclen) {
dprintk("svc: incomplete TCP record (%d of %d)\n",
len, svsk->sk_reclen);
goto err_again; /* record not complete */
}
len = svsk->sk_reclen;
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
return len;
error:
if (len == -EAGAIN)
dprintk("RPC: TCP recv_record got EAGAIN\n");
error:
dprintk("RPC: TCP recv_record got %d\n", len);
return len;
err_delete:
err_delete:
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
err_again:
return -EAGAIN;
}
static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp,
struct rpc_rqst **reqpp, struct kvec *vec)
static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
{
struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
struct rpc_rqst *req = NULL;
u32 *p;
u32 xid;
u32 calldir;
int len;
len = svc_recvfrom(rqstp, vec, 1, 8);
if (len < 0)
goto error;
struct kvec *src, *dst;
__be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
__be32 xid;
__be32 calldir;
p = (u32 *)rqstp->rq_arg.head[0].iov_base;
xid = *p++;
calldir = *p;
if (calldir == 0) {
/* REQUEST is the most common case */
vec[0] = rqstp->rq_arg.head[0];
} else {
/* REPLY */
struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
if (bc_xprt)
req = xprt_lookup_rqst(bc_xprt, xid);
if (!req) {
printk(KERN_NOTICE
"%s: Got unrecognized reply: "
"calldir 0x%x xpt_bc_xprt %p xid %08x\n",
__func__, ntohl(calldir),
bc_xprt, xid);
vec[0] = rqstp->rq_arg.head[0];
goto out;
}
if (bc_xprt)
req = xprt_lookup_rqst(bc_xprt, xid);
memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
sizeof(struct xdr_buf));
/* copy the xid and call direction */
memcpy(req->rq_private_buf.head[0].iov_base,
rqstp->rq_arg.head[0].iov_base, 8);
vec[0] = req->rq_private_buf.head[0];
if (!req) {
printk(KERN_NOTICE
"%s: Got unrecognized reply: "
"calldir 0x%x xpt_bc_xprt %p xid %08x\n",
__func__, ntohl(calldir),
bc_xprt, xid);
return -EAGAIN;
}
out:
vec[0].iov_base += 8;
vec[0].iov_len -= 8;
len = svsk->sk_reclen - 8;
error:
*reqpp = req;
return len;
memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
/*
* XXX!: cheating for now! Only copying HEAD.
* But we know this is good enough for now (in fact, for any
* callback reply in the forseeable future).
*/
dst = &req->rq_private_buf.head[0];
src = &rqstp->rq_arg.head[0];
if (dst->iov_len < src->iov_len)
return -EAGAIN; /* whatever; just giving up. */
memcpy(dst->iov_base, src->iov_base, src->iov_len);
xprt_complete_rqst(req->rq_task, svsk->sk_reclen);
rqstp->rq_arg.len = 0;
return 0;
}
static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
{
int i = 0;
int t = 0;
while (t < len) {
vec[i].iov_base = page_address(pages[i]);
vec[i].iov_len = PAGE_SIZE;
i++;
t += PAGE_SIZE;
}
return i;
}
/*
* Receive data from a TCP socket.
*/
......@@ -1041,8 +1093,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
int len;
struct kvec *vec;
int pnum, vlen;
struct rpc_rqst *req = NULL;
unsigned int want, base;
__be32 *p;
__be32 calldir;
int pnum;
dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
......@@ -1053,87 +1107,73 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
if (len < 0)
goto error;
base = svc_tcp_restore_pages(svsk, rqstp);
want = svsk->sk_reclen - base;
vec = rqstp->rq_vec;
vec[0] = rqstp->rq_arg.head[0];
vlen = PAGE_SIZE;
/*
* We have enough data for the whole tcp record. Let's try and read the
* first 8 bytes to get the xid and the call direction. We can use this
* to figure out if this is a call or a reply to a callback. If
* sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
* In that case, don't bother with the calldir and just read the data.
* It will be rejected in svc_process.
*/
if (len >= 8) {
len = svc_process_calldir(svsk, rqstp, &req, vec);
if (len < 0)
goto err_again;
vlen -= 8;
}
pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
svsk->sk_reclen);
pnum = 1;
while (vlen < len) {
vec[pnum].iov_base = (req) ?
page_address(req->rq_private_buf.pages[pnum - 1]) :
page_address(rqstp->rq_pages[pnum]);
vec[pnum].iov_len = PAGE_SIZE;
pnum++;
vlen += PAGE_SIZE;
}
rqstp->rq_respages = &rqstp->rq_pages[pnum];
/* Now receive data */
len = svc_recvfrom(rqstp, vec, pnum, len);
if (len < 0)
goto err_again;
/*
* Account for the 8 bytes we read earlier
*/
len += 8;
if (req) {
xprt_complete_rqst(req->rq_task, len);
len = 0;
goto out;
len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
if (len >= 0)
svsk->sk_tcplen += len;
if (len != want) {
if (len < 0 && len != -EAGAIN)
goto err_other;
svc_tcp_save_pages(svsk, rqstp);
dprintk("svc: incomplete TCP record (%d of %d)\n",
svsk->sk_tcplen, svsk->sk_reclen);
goto err_noclose;
}
dprintk("svc: TCP complete record (%d bytes)\n", len);
rqstp->rq_arg.len = len;
rqstp->rq_arg.len = svsk->sk_reclen;
rqstp->rq_arg.page_base = 0;
if (len <= rqstp->rq_arg.head[0].iov_len) {
rqstp->rq_arg.head[0].iov_len = len;
if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
rqstp->rq_arg.page_len = 0;
} else {
rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
}
} else
rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
out:
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1];
if (calldir)
len = receive_cb_reply(svsk, rqstp);
/* Reset TCP read info */
svsk->sk_reclen = 0;
svsk->sk_tcplen = 0;
/* If we have more data, signal svc_xprt_enqueue() to try again */
if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
if (len < 0)
goto error;
svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
if (serv->sv_stats)
serv->sv_stats->nettcpcnt++;
return len;
dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len);
return rqstp->rq_arg.len;
err_again:
if (len == -EAGAIN) {
dprintk("RPC: TCP recvfrom got EAGAIN\n");
return len;
}
error:
if (len != -EAGAIN) {
printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
svsk->sk_xprt.xpt_server->sv_name, -len);
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
}
if (len != -EAGAIN)
goto err_other;
dprintk("RPC: TCP recvfrom got EAGAIN\n");
return -EAGAIN;
err_other:
printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
svsk->sk_xprt.xpt_server->sv_name, -len);
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
err_noclose:
return -EAGAIN; /* record not complete */
}
/*
......@@ -1304,18 +1344,10 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
svsk->sk_reclen = 0;
svsk->sk_tcplen = 0;
memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
/* initialise setting must have enough space to
* receive and respond to one request.
* svc_tcp_recvfrom will re-adjust if necessary
*/
svc_sock_setbufsize(svsk->sk_sock,
3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state != TCP_ESTABLISHED)
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
......@@ -1379,8 +1411,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
/* Initialize the socket */
if (sock->type == SOCK_DGRAM)
svc_udp_init(svsk, serv);
else
else {
/* initialise setting must have enough space to
* receive and respond to one request.
*/
svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
4 * serv->sv_max_mesg);
svc_tcp_init(svsk, serv);
}
dprintk("svc: svc_setup_socket created %p (inet %p)\n",
svsk, svsk->sk_sk);
......@@ -1562,8 +1600,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
svc_sock_detach(xprt);
if (!test_bit(XPT_LISTENER, &xprt->xpt_flags))
if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
svc_tcp_clear_pages(svsk);
kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR);
}
}
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment