Commit a0e7d495 authored by Neil Brown's avatar Neil Brown Committed by Linus Torvalds

[PATCH] kNFSd: Convert nfsd to use a list of pages instead of one big buffer

This means:
  1/ We don't need an order-4 allocation for each nfsd that starts
  2/ We don't need an order-4 allocation in skb_linearize when
     we receive a 32K write request
  3/ It will be easier to incorporate the zero-copy read changes

The pages are handed around using an xdr_buf (instead of svc_buf)
much like the NFS client so future crypto code can use the same
data structure for both client and server.

The code assumes that most requests and replies fit in a single page.
The exceptions are assumed to have some largish 'data' bit, and the
rest must fit in a single page.
The 'data' bits are file data, readdir data, and symlinks.
There must be only one 'data' bit per request.
This is all fine for nfs/nlm.

This isn't complete:
  1/ NFSv4 hasn't been converted yet (it won't compile)
  2/ NFSv3 allows symlinks upto 4096, but the code will only support
     upto about 3800 at the moment
  3/ readdir responses are limited to about 3800.

but I thought that patch was big enough, and the rest can come
later.


This patch introduces vfs_readv and vfs_writev as parallels to
vfs_read and vfs_write.  This means there is a fair bit of
duplication in read_write.c that should probably be tidied up...
parent 335c5fc7
...@@ -216,25 +216,6 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp) ...@@ -216,25 +216,6 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp)
return p; return p;
} }
/*
* Check buffer bounds after decoding arguments
*/
static inline int
xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_argbuf;
return p - buf->base <= buf->buflen;
}
static inline int
xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_resbuf;
buf->len = p - buf->base;
return (buf->len <= buf->buflen);
}
/* /*
* First, the server side XDR functions * First, the server side XDR functions
......
...@@ -222,26 +222,6 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp) ...@@ -222,26 +222,6 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
} }
/*
* Check buffer bounds after decoding arguments
*/
static int
xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_argbuf;
return p - buf->base <= buf->buflen;
}
static int
xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_resbuf;
buf->len = p - buf->base;
return (buf->len <= buf->buflen);
}
/* /*
* First, the server side XDR functions * First, the server side XDR functions
*/ */
......
...@@ -43,11 +43,11 @@ static int nfs3_ftypes[] = { ...@@ -43,11 +43,11 @@ static int nfs3_ftypes[] = {
/* /*
* Reserve room in the send buffer * Reserve room in the send buffer
*/ */
static void static inline void
svcbuf_reserve(struct svc_buf *buf, u32 **ptr, int *len, int nr) svcbuf_reserve(struct xdr_buf *buf, u32 **ptr, int *len, int nr)
{ {
*ptr = buf->buf + nr; *ptr = (u32*)(buf->head[0].iov_base+buf->head[0].iov_len) + nr;
*len = buf->buflen - buf->len - nr; *len = ((PAGE_SIZE-buf->head[0].iov_len)>>2) - nr;
} }
/* /*
...@@ -150,7 +150,7 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, ...@@ -150,7 +150,7 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh)); dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
/* Reserve room for status, post_op_attr, and path length */ /* Reserve room for status, post_op_attr, and path length */
svcbuf_reserve(&rqstp->rq_resbuf, &path, &dummy, svcbuf_reserve(&rqstp->rq_res, &path, &dummy,
1 + NFS3_POST_OP_ATTR_WORDS + 1); 1 + NFS3_POST_OP_ATTR_WORDS + 1);
/* Read the symlink. */ /* Read the symlink. */
...@@ -167,8 +167,7 @@ static int ...@@ -167,8 +167,7 @@ static int
nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
struct nfsd3_readres *resp) struct nfsd3_readres *resp)
{ {
u32 * buffer; int nfserr;
int nfserr, avail;
dprintk("nfsd: READ(3) %s %lu bytes at %lu\n", dprintk("nfsd: READ(3) %s %lu bytes at %lu\n",
SVCFH_fmt(&argp->fh), SVCFH_fmt(&argp->fh),
...@@ -179,18 +178,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, ...@@ -179,18 +178,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof) * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
* + 1 (xdr opaque byte count) = 26 * + 1 (xdr opaque byte count) = 26
*/ */
svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &avail,
1 + NFS3_POST_OP_ATTR_WORDS + 3);
resp->count = argp->count; resp->count = argp->count;
if ((avail << 2) < resp->count) if (NFSSVC_MAXBLKSIZE < resp->count)
resp->count = avail << 2; resp->count = NFSSVC_MAXBLKSIZE;
svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + argp->count +4); svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
fh_copy(&resp->fh, &argp->fh); fh_copy(&resp->fh, &argp->fh);
nfserr = nfsd_read(rqstp, &resp->fh, nfserr = nfsd_read(rqstp, &resp->fh,
argp->offset, argp->offset,
(char *) buffer, argp->vec, argp->vlen,
&resp->count); &resp->count);
if (nfserr == 0) { if (nfserr == 0) {
struct inode *inode = resp->fh.fh_dentry->d_inode; struct inode *inode = resp->fh.fh_dentry->d_inode;
...@@ -220,7 +218,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp, ...@@ -220,7 +218,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
resp->committed = argp->stable; resp->committed = argp->stable;
nfserr = nfsd_write(rqstp, &resp->fh, nfserr = nfsd_write(rqstp, &resp->fh,
argp->offset, argp->offset,
argp->data, argp->vec, argp->vlen,
argp->len, argp->len,
&resp->committed); &resp->committed);
resp->count = argp->count; resp->count = argp->count;
...@@ -447,7 +445,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, ...@@ -447,7 +445,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
argp->count, (u32) argp->cookie); argp->count, (u32) argp->cookie);
/* Reserve buffer space for status, attributes and verifier */ /* Reserve buffer space for status, attributes and verifier */
svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, svcbuf_reserve(&rqstp->rq_res, &buffer, &count,
1 + NFS3_POST_OP_ATTR_WORDS + 2); 1 + NFS3_POST_OP_ATTR_WORDS + 2);
/* Make sure we've room for the NULL ptr & eof flag, and shrink to /* Make sure we've room for the NULL ptr & eof flag, and shrink to
...@@ -490,7 +488,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, ...@@ -490,7 +488,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
argp->count, (u32) argp->cookie); argp->count, (u32) argp->cookie);
/* Reserve buffer space for status, attributes and verifier */ /* Reserve buffer space for status, attributes and verifier */
svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, svcbuf_reserve(&rqstp->rq_res, &buffer, &count,
1 + NFS3_POST_OP_ATTR_WORDS + 2); 1 + NFS3_POST_OP_ATTR_WORDS + 2);
/* Make sure we've room for the NULL ptr & eof flag, and shrink to /* Make sure we've room for the NULL ptr & eof flag, and shrink to
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/dcache.h> #include <linux/dcache.h>
#include <linux/namei.h> #include <linux/namei.h>
#include <linux/mm.h>
#include <linux/sunrpc/xdr.h> #include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svc.h> #include <linux/sunrpc/svc.h>
...@@ -269,27 +270,6 @@ encode_wcc_data(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) ...@@ -269,27 +270,6 @@ encode_wcc_data(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
return encode_post_op_attr(rqstp, p, fhp); return encode_post_op_attr(rqstp, p, fhp);
} }
/*
* Check buffer bounds after decoding arguments
*/
static inline int
xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_argbuf;
return p - buf->base <= buf->buflen;
}
static inline int
xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_resbuf;
buf->len = p - buf->base;
dprintk("nfsd: ressize_check p %p base %p len %d\n",
p, buf->base, buf->buflen);
return (buf->len <= buf->buflen);
}
/* /*
* XDR decode functions * XDR decode functions
...@@ -342,11 +322,29 @@ int ...@@ -342,11 +322,29 @@ int
nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p, nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd3_readargs *args) struct nfsd3_readargs *args)
{ {
int len;
int v,pn;
if (!(p = decode_fh(p, &args->fh)) if (!(p = decode_fh(p, &args->fh))
|| !(p = xdr_decode_hyper(p, &args->offset))) || !(p = xdr_decode_hyper(p, &args->offset)))
return 0; return 0;
args->count = ntohl(*p++); len = args->count = ntohl(*p++);
if (len > NFSSVC_MAXBLKSIZE)
len = NFSSVC_MAXBLKSIZE;
/* set up the iovec */
v=0;
while (len > 0) {
pn = rqstp->rq_resused;
take_page(rqstp);
args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
v++;
len -= PAGE_SIZE;
}
args->vlen = v;
return xdr_argsize_check(rqstp, p); return xdr_argsize_check(rqstp, p);
} }
...@@ -354,17 +352,33 @@ int ...@@ -354,17 +352,33 @@ int
nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd3_writeargs *args) struct nfsd3_writeargs *args)
{ {
int len, v;
if (!(p = decode_fh(p, &args->fh)) if (!(p = decode_fh(p, &args->fh))
|| !(p = xdr_decode_hyper(p, &args->offset))) || !(p = xdr_decode_hyper(p, &args->offset)))
return 0; return 0;
args->count = ntohl(*p++); args->count = ntohl(*p++);
args->stable = ntohl(*p++); args->stable = ntohl(*p++);
args->len = ntohl(*p++); len = args->len = ntohl(*p++);
args->data = (char *) p;
p += XDR_QUADLEN(args->len); args->vec[0].iov_base = (void*)p;
args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
(((void*)p) - rqstp->rq_arg.head[0].iov_base);
if (len > NFSSVC_MAXBLKSIZE)
len = NFSSVC_MAXBLKSIZE;
v= 0;
while (len > args->vec[v].iov_len) {
len -= args->vec[v].iov_len;
v++;
args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
args->vec[v].iov_len = PAGE_SIZE;
}
args->vec[v].iov_len = len;
args->vlen = v+1;
return xdr_argsize_check(rqstp, p); return args->count == args->len && args->vec[0].iov_len > 0;
} }
int int
...@@ -584,8 +598,22 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p, ...@@ -584,8 +598,22 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
*p++ = htonl(resp->count); *p++ = htonl(resp->count);
*p++ = htonl(resp->eof); *p++ = htonl(resp->eof);
*p++ = htonl(resp->count); /* xdr opaque count */ *p++ = htonl(resp->count); /* xdr opaque count */
p += XDR_QUADLEN(resp->count); xdr_ressize_check(rqstp, p);
/* now update rqstp->rq_res to reflect data aswell */
rqstp->rq_res.page_base = 0;
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to page with tail */
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
} }
rqstp->rq_res.len =
rqstp->rq_res.head[0].iov_len+
rqstp->rq_res.page_len+
rqstp->rq_res.tail[0].iov_len;
return 1;
} else
return xdr_ressize_check(rqstp, p); return xdr_ressize_check(rqstp, p);
} }
......
...@@ -41,7 +41,7 @@ static struct svc_cacherep * lru_tail; ...@@ -41,7 +41,7 @@ static struct svc_cacherep * lru_tail;
static struct svc_cacherep * nfscache; static struct svc_cacherep * nfscache;
static int cache_disabled = 1; static int cache_disabled = 1;
static int nfsd_cache_append(struct svc_rqst *rqstp, struct svc_buf *data); static int nfsd_cache_append(struct svc_rqst *rqstp, struct iovec *vec);
/* /*
* locking for the reply cache: * locking for the reply cache:
...@@ -107,7 +107,7 @@ nfsd_cache_shutdown(void) ...@@ -107,7 +107,7 @@ nfsd_cache_shutdown(void)
for (rp = lru_head; rp; rp = rp->c_lru_next) { for (rp = lru_head; rp; rp = rp->c_lru_next) {
if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF)
kfree(rp->c_replbuf.buf); kfree(rp->c_replvec.iov_base);
} }
cache_disabled = 1; cache_disabled = 1;
...@@ -242,8 +242,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) ...@@ -242,8 +242,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
/* release any buffer */ /* release any buffer */
if (rp->c_type == RC_REPLBUFF) { if (rp->c_type == RC_REPLBUFF) {
kfree(rp->c_replbuf.buf); kfree(rp->c_replvec.iov_base);
rp->c_replbuf.buf = NULL; rp->c_replvec.iov_base = NULL;
} }
rp->c_type = RC_NOCACHE; rp->c_type = RC_NOCACHE;
out: out:
...@@ -272,11 +272,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) ...@@ -272,11 +272,11 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
case RC_NOCACHE: case RC_NOCACHE:
break; break;
case RC_REPLSTAT: case RC_REPLSTAT:
svc_putu32(&rqstp->rq_resbuf, rp->c_replstat); svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat);
rtn = RC_REPLY; rtn = RC_REPLY;
break; break;
case RC_REPLBUFF: case RC_REPLBUFF:
if (!nfsd_cache_append(rqstp, &rp->c_replbuf)) if (!nfsd_cache_append(rqstp, &rp->c_replvec))
goto out; /* should not happen */ goto out; /* should not happen */
rtn = RC_REPLY; rtn = RC_REPLY;
break; break;
...@@ -308,13 +308,14 @@ void ...@@ -308,13 +308,14 @@ void
nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp) nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp)
{ {
struct svc_cacherep *rp; struct svc_cacherep *rp;
struct svc_buf *resp = &rqstp->rq_resbuf, *cachp; struct iovec *resv = &rqstp->rq_res.head[0], *cachv;
int len; int len;
if (!(rp = rqstp->rq_cacherep) || cache_disabled) if (!(rp = rqstp->rq_cacherep) || cache_disabled)
return; return;
len = resp->len - (statp - resp->base); len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
len >>= 2;
/* Don't cache excessive amounts of data and XDR failures */ /* Don't cache excessive amounts of data and XDR failures */
if (!statp || len > (256 >> 2)) { if (!statp || len > (256 >> 2)) {
...@@ -329,16 +330,16 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp) ...@@ -329,16 +330,16 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp)
rp->c_replstat = *statp; rp->c_replstat = *statp;
break; break;
case RC_REPLBUFF: case RC_REPLBUFF:
cachp = &rp->c_replbuf; cachv = &rp->c_replvec;
cachp->buf = (u32 *) kmalloc(len << 2, GFP_KERNEL); cachv->iov_base = kmalloc(len << 2, GFP_KERNEL);
if (!cachp->buf) { if (!cachv->iov_base) {
spin_lock(&cache_lock); spin_lock(&cache_lock);
rp->c_state = RC_UNUSED; rp->c_state = RC_UNUSED;
spin_unlock(&cache_lock); spin_unlock(&cache_lock);
return; return;
} }
cachp->len = len; cachv->iov_len = len << 2;
memcpy(cachp->buf, statp, len << 2); memcpy(cachv->iov_base, statp, len << 2);
break; break;
} }
spin_lock(&cache_lock); spin_lock(&cache_lock);
...@@ -353,19 +354,20 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp) ...@@ -353,19 +354,20 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp)
/* /*
* Copy cached reply to current reply buffer. Should always fit. * Copy cached reply to current reply buffer. Should always fit.
* FIXME as reply is in a page, we should just attach the page, and
* keep a refcount....
*/ */
static int static int
nfsd_cache_append(struct svc_rqst *rqstp, struct svc_buf *data) nfsd_cache_append(struct svc_rqst *rqstp, struct iovec *data)
{ {
struct svc_buf *resp = &rqstp->rq_resbuf; struct iovec *vec = &rqstp->rq_res.head[0];
if (resp->len + data->len > resp->buflen) { if (vec->iov_len + data->iov_len > PAGE_SIZE) {
printk(KERN_WARNING "nfsd: cached reply too large (%d).\n", printk(KERN_WARNING "nfsd: cached reply too large (%d).\n",
data->len); data->iov_len);
return 0; return 0;
} }
memcpy(resp->buf, data->buf, data->len << 2); memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
resp->buf += data->len; vec->iov_len += data->iov_len;
resp->len += data->len;
return 1; return 1;
} }
...@@ -30,11 +30,11 @@ typedef struct svc_buf svc_buf; ...@@ -30,11 +30,11 @@ typedef struct svc_buf svc_buf;
#define NFSDDBG_FACILITY NFSDDBG_PROC #define NFSDDBG_FACILITY NFSDDBG_PROC
static void static inline void
svcbuf_reserve(struct svc_buf *buf, u32 **ptr, int *len, int nr) svcbuf_reserve(struct xdr_buf *buf, u32 **ptr, int *len, int nr)
{ {
*ptr = buf->buf + nr; *ptr = (u32*)(buf->head[0].iov_base+buf->head[0].iov_len) + nr;
*len = buf->buflen - buf->len - nr; *len = ((PAGE_SIZE-buf->head[0].iov_len)>>2) - nr;
} }
static int static int
...@@ -109,7 +109,7 @@ nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, ...@@ -109,7 +109,7 @@ nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh)); dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
/* Reserve room for status and path length */ /* Reserve room for status and path length */
svcbuf_reserve(&rqstp->rq_resbuf, &path, &dummy, 2); svcbuf_reserve(&rqstp->rq_res, &path, &dummy, 2);
/* Read the symlink. */ /* Read the symlink. */
resp->len = NFS_MAXPATHLEN; resp->len = NFS_MAXPATHLEN;
...@@ -127,8 +127,7 @@ static int ...@@ -127,8 +127,7 @@ static int
nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
struct nfsd_readres *resp) struct nfsd_readres *resp)
{ {
u32 * buffer; int nfserr;
int nfserr, avail;
dprintk("nfsd: READ %s %d bytes at %d\n", dprintk("nfsd: READ %s %d bytes at %d\n",
SVCFH_fmt(&argp->fh), SVCFH_fmt(&argp->fh),
...@@ -137,22 +136,21 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, ...@@ -137,22 +136,21 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
/* Obtain buffer pointer for payload. 19 is 1 word for /* Obtain buffer pointer for payload. 19 is 1 word for
* status, 17 words for fattr, and 1 word for the byte count. * status, 17 words for fattr, and 1 word for the byte count.
*/ */
svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &avail, 19);
if ((avail << 2) < argp->count) { if (NFSSVC_MAXBLKSIZE < argp->count) {
printk(KERN_NOTICE printk(KERN_NOTICE
"oversized read request from %08x:%d (%d bytes)\n", "oversized read request from %08x:%d (%d bytes)\n",
ntohl(rqstp->rq_addr.sin_addr.s_addr), ntohl(rqstp->rq_addr.sin_addr.s_addr),
ntohs(rqstp->rq_addr.sin_port), ntohs(rqstp->rq_addr.sin_port),
argp->count); argp->count);
argp->count = avail << 2; argp->count = NFSSVC_MAXBLKSIZE;
} }
svc_reserve(rqstp, (19<<2) + argp->count + 4); svc_reserve(rqstp, (19<<2) + argp->count + 4);
resp->count = argp->count; resp->count = argp->count;
nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset, argp->offset,
(char *) buffer, argp->vec, argp->vlen,
&resp->count); &resp->count);
return nfserr; return nfserr;
...@@ -175,7 +173,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp, ...@@ -175,7 +173,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset, argp->offset,
argp->data, argp->vec, argp->vlen,
argp->len, argp->len,
&stable); &stable);
return nfserr; return nfserr;
...@@ -478,7 +476,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp, ...@@ -478,7 +476,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
argp->count, argp->cookie); argp->count, argp->cookie);
/* Reserve buffer space for status */ /* Reserve buffer space for status */
svcbuf_reserve(&rqstp->rq_resbuf, &buffer, &count, 1); svcbuf_reserve(&rqstp->rq_res, &buffer, &count, 1);
/* Shrink to the client read size */ /* Shrink to the client read size */
if (count > (argp->count >> 2)) if (count > (argp->count >> 2))
......
...@@ -277,7 +277,8 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp) ...@@ -277,7 +277,8 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
/* Decode arguments */ /* Decode arguments */
xdr = proc->pc_decode; xdr = proc->pc_decode;
if (xdr && !xdr(rqstp, rqstp->rq_argbuf.buf, rqstp->rq_argp)) { if (xdr && !xdr(rqstp, (u32*)rqstp->rq_arg.head[0].iov_base,
rqstp->rq_argp)) {
dprintk("nfsd: failed to decode arguments!\n"); dprintk("nfsd: failed to decode arguments!\n");
nfsd_cache_update(rqstp, RC_NOCACHE, NULL); nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
*statp = rpc_garbage_args; *statp = rpc_garbage_args;
...@@ -293,14 +294,15 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp) ...@@ -293,14 +294,15 @@ nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
} }
if (rqstp->rq_proc != 0) if (rqstp->rq_proc != 0)
svc_putu32(&rqstp->rq_resbuf, nfserr); svc_putu32(&rqstp->rq_res.head[0], nfserr);
/* Encode result. /* Encode result.
* For NFSv2, additional info is never returned in case of an error. * For NFSv2, additional info is never returned in case of an error.
*/ */
if (!(nfserr && rqstp->rq_vers == 2)) { if (!(nfserr && rqstp->rq_vers == 2)) {
xdr = proc->pc_encode; xdr = proc->pc_encode;
if (xdr && !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) { if (xdr && !xdr(rqstp, (u32*)(rqstp->rq_res.head[0].iov_base+rqstp->rq_res.head[0].iov_len),
rqstp->rq_resp)) {
/* Failed to encode result. Release cache entry */ /* Failed to encode result. Release cache entry */
dprintk("nfsd: failed to encode result!\n"); dprintk("nfsd: failed to encode result!\n");
nfsd_cache_update(rqstp, RC_NOCACHE, NULL); nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/sunrpc/svc.h> #include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h> #include <linux/nfsd/nfsd.h>
#include <linux/nfsd/xdr.h> #include <linux/nfsd/xdr.h>
#include <linux/mm.h>
#define NFSDDBG_FACILITY NFSDDBG_XDR #define NFSDDBG_FACILITY NFSDDBG_XDR
...@@ -176,27 +177,6 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp) ...@@ -176,27 +177,6 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
return p; return p;
} }
/*
* Check buffer bounds after decoding arguments
*/
static inline int
xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_argbuf;
return p - buf->base <= buf->buflen;
}
static inline int
xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
{
struct svc_buf *buf = &rqstp->rq_resbuf;
buf->len = p - buf->base;
dprintk("nfsd: ressize_check p %p base %p len %d\n",
p, buf->base, buf->buflen);
return (buf->len <= buf->buflen);
}
/* /*
* XDR decode functions * XDR decode functions
...@@ -241,13 +221,31 @@ int ...@@ -241,13 +221,31 @@ int
nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p, nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd_readargs *args) struct nfsd_readargs *args)
{ {
int len;
int v,pn;
if (!(p = decode_fh(p, &args->fh))) if (!(p = decode_fh(p, &args->fh)))
return 0; return 0;
args->offset = ntohl(*p++); args->offset = ntohl(*p++);
args->count = ntohl(*p++); len = args->count = ntohl(*p++);
args->totalsize = ntohl(*p++); p++; /* totalcount - unused */
if (len > NFSSVC_MAXBLKSIZE)
len = NFSSVC_MAXBLKSIZE;
/* set up somewhere to store response.
* We take pages, put them on reslist and include in iovec
*/
v=0;
while (len > 0) {
pn=rqstp->rq_resused;
take_page(rqstp);
args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
v++;
len -= PAGE_SIZE;
}
args->vlen = v;
return xdr_argsize_check(rqstp, p); return xdr_argsize_check(rqstp, p);
} }
...@@ -255,17 +253,30 @@ int ...@@ -255,17 +253,30 @@ int
nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
struct nfsd_writeargs *args) struct nfsd_writeargs *args)
{ {
int len;
int v;
if (!(p = decode_fh(p, &args->fh))) if (!(p = decode_fh(p, &args->fh)))
return 0; return 0;
p++; /* beginoffset */ p++; /* beginoffset */
args->offset = ntohl(*p++); /* offset */ args->offset = ntohl(*p++); /* offset */
p++; /* totalcount */ p++; /* totalcount */
args->len = ntohl(*p++); len = args->len = ntohl(*p++);
args->data = (char *) p; args->vec[0].iov_base = (void*)p;
p += XDR_QUADLEN(args->len); args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
(((void*)p) - rqstp->rq_arg.head[0].iov_base);
return xdr_argsize_check(rqstp, p); if (len > NFSSVC_MAXBLKSIZE)
len = NFSSVC_MAXBLKSIZE;
v = 0;
while (len > args->vec[v].iov_len) {
len -= args->vec[v].iov_len;
v++;
args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
args->vec[v].iov_len = PAGE_SIZE;
}
args->vec[v].iov_len = len;
args->vlen = v+1;
return args->vec[0].iov_len > 0;
} }
int int
...@@ -371,9 +382,22 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p, ...@@ -371,9 +382,22 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
{ {
p = encode_fattr(rqstp, p, &resp->fh); p = encode_fattr(rqstp, p, &resp->fh);
*p++ = htonl(resp->count); *p++ = htonl(resp->count);
p += XDR_QUADLEN(resp->count); xdr_ressize_check(rqstp, p);
return xdr_ressize_check(rqstp, p); /* now update rqstp->rq_res to reflect data aswell */
rqstp->rq_res.page_base = 0;
rqstp->rq_res.page_len = resp->count;
if (resp->count & 3) {
/* need to pad with tail */
rqstp->rq_res.tail[0].iov_base = p;
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
}
rqstp->rq_res.len =
rqstp->rq_res.head[0].iov_len+
rqstp->rq_res.page_len+
rqstp->rq_res.tail[0].iov_len;
return 1;
} }
int int
......
...@@ -577,7 +577,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino) ...@@ -577,7 +577,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
*/ */
int int
nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
char *buf, unsigned long *count) struct iovec *vec, int vlen, unsigned long *count)
{ {
struct raparms *ra; struct raparms *ra;
mm_segment_t oldfs; mm_segment_t oldfs;
...@@ -603,7 +603,7 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, ...@@ -603,7 +603,7 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
oldfs = get_fs(); oldfs = get_fs();
set_fs(KERNEL_DS); set_fs(KERNEL_DS);
err = vfs_read(&file, buf, *count, &offset); err = vfs_readv(&file, vec, vlen, *count, &offset);
set_fs(oldfs); set_fs(oldfs);
/* Write back readahead params */ /* Write back readahead params */
...@@ -629,7 +629,8 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, ...@@ -629,7 +629,8 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
*/ */
int int
nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
char *buf, unsigned long cnt, int *stablep) struct iovec *vec, int vlen,
unsigned long cnt, int *stablep)
{ {
struct svc_export *exp; struct svc_export *exp;
struct file file; struct file file;
...@@ -677,7 +678,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, ...@@ -677,7 +678,7 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
/* Write the data. */ /* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS); oldfs = get_fs(); set_fs(KERNEL_DS);
err = vfs_write(&file, buf, cnt, &offset); err = vfs_writev(&file, vec, vlen, cnt, &offset);
if (err >= 0) if (err >= 0)
nfsdstats.io_write += cnt; nfsdstats.io_write += cnt;
set_fs(oldfs); set_fs(oldfs);
......
...@@ -207,6 +207,53 @@ ssize_t vfs_read(struct file *file, char *buf, size_t count, loff_t *pos) ...@@ -207,6 +207,53 @@ ssize_t vfs_read(struct file *file, char *buf, size_t count, loff_t *pos)
return ret; return ret;
} }
ssize_t vfs_readv(struct file *file, struct iovec *vec, int vlen, size_t count, loff_t *pos)
{
struct inode *inode = file->f_dentry->d_inode;
ssize_t ret;
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read))
return -EINVAL;
ret = locks_verify_area(FLOCK_VERIFY_READ, inode, file, *pos, count);
if (!ret) {
ret = security_ops->file_permission (file, MAY_READ);
if (!ret) {
if (file->f_op->readv)
ret = file->f_op->readv(file, vec, vlen, pos);
else {
/* do it by hand */
struct iovec *vector = vec;
ret = 0;
while (vlen > 0) {
void * base = vector->iov_base;
size_t len = vector->iov_len;
ssize_t nr;
vector++;
vlen--;
if (file->f_op->read)
nr = file->f_op->read(file, base, len, pos);
else
nr = do_sync_read(file, base, len, pos);
if (nr < 0) {
if (!ret) ret = nr;
break;
}
ret += nr;
if (nr != len)
break;
}
}
if (ret > 0)
dnotify_parent(file->f_dentry, DN_ACCESS);
}
}
return ret;
}
ssize_t do_sync_write(struct file *filp, const char *buf, size_t len, loff_t *ppos) ssize_t do_sync_write(struct file *filp, const char *buf, size_t len, loff_t *ppos)
{ {
struct kiocb kiocb; struct kiocb kiocb;
...@@ -247,6 +294,53 @@ ssize_t vfs_write(struct file *file, const char *buf, size_t count, loff_t *pos) ...@@ -247,6 +294,53 @@ ssize_t vfs_write(struct file *file, const char *buf, size_t count, loff_t *pos)
return ret; return ret;
} }
ssize_t vfs_writev(struct file *file, const struct iovec *vec, int vlen, size_t count, loff_t *pos)
{
struct inode *inode = file->f_dentry->d_inode;
ssize_t ret;
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write))
return -EINVAL;
ret = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file, *pos, count);
if (!ret) {
ret = security_ops->file_permission (file, MAY_WRITE);
if (!ret) {
if (file->f_op->writev)
ret = file->f_op->writev(file, vec, vlen, pos);
else {
/* do it by hand */
const struct iovec *vector = vec;
ret = 0;
while (vlen > 0) {
void * base = vector->iov_base;
size_t len = vector->iov_len;
ssize_t nr;
vector++;
vlen--;
if (file->f_op->write)
nr = file->f_op->write(file, base, len, pos);
else
nr = do_sync_write(file, base, len, pos);
if (nr < 0) {
if (!ret) ret = nr;
break;
}
ret += nr;
if (nr != len)
break;
}
}
if (ret > 0)
dnotify_parent(file->f_dentry, DN_MODIFY);
}
}
return ret;
}
asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count) asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count)
{ {
struct file *file; struct file *file;
......
...@@ -795,6 +795,8 @@ struct seq_file; ...@@ -795,6 +795,8 @@ struct seq_file;
extern ssize_t vfs_read(struct file *, char *, size_t, loff_t *); extern ssize_t vfs_read(struct file *, char *, size_t, loff_t *);
extern ssize_t vfs_write(struct file *, const char *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char *, size_t, loff_t *);
extern ssize_t vfs_readv(struct file *, struct iovec *, int, size_t, loff_t *);
extern ssize_t vfs_writev(struct file *, const struct iovec *, int, size_t, loff_t *);
/* /*
* NOTE: write_inode, delete_inode, clear_inode, put_inode can be called * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
......
...@@ -32,12 +32,12 @@ struct svc_cacherep { ...@@ -32,12 +32,12 @@ struct svc_cacherep {
u32 c_vers; u32 c_vers;
unsigned long c_timestamp; unsigned long c_timestamp;
union { union {
struct svc_buf u_buffer; struct iovec u_vec;
u32 u_status; u32 u_status;
} c_u; } c_u;
}; };
#define c_replbuf c_u.u_buffer #define c_replvec c_u.u_vec
#define c_replstat c_u.u_status #define c_replstat c_u.u_status
/* cache entry states */ /* cache entry states */
......
...@@ -88,9 +88,9 @@ int nfsd_open(struct svc_rqst *, struct svc_fh *, int, ...@@ -88,9 +88,9 @@ int nfsd_open(struct svc_rqst *, struct svc_fh *, int,
int, struct file *); int, struct file *);
void nfsd_close(struct file *); void nfsd_close(struct file *);
int nfsd_read(struct svc_rqst *, struct svc_fh *, int nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, char *, unsigned long *); loff_t, struct iovec *,int, unsigned long *);
int nfsd_write(struct svc_rqst *, struct svc_fh *, int nfsd_write(struct svc_rqst *, struct svc_fh *,
loff_t, char *, unsigned long, int *); loff_t, struct iovec *,int, unsigned long, int *);
int nfsd_readlink(struct svc_rqst *, struct svc_fh *, int nfsd_readlink(struct svc_rqst *, struct svc_fh *,
char *, int *); char *, int *);
int nfsd_symlink(struct svc_rqst *, struct svc_fh *, int nfsd_symlink(struct svc_rqst *, struct svc_fh *,
......
...@@ -29,16 +29,16 @@ struct nfsd_readargs { ...@@ -29,16 +29,16 @@ struct nfsd_readargs {
struct svc_fh fh; struct svc_fh fh;
__u32 offset; __u32 offset;
__u32 count; __u32 count;
__u32 totalsize; struct iovec vec[RPCSVC_MAXPAGES];
int vlen;
}; };
struct nfsd_writeargs { struct nfsd_writeargs {
svc_fh fh; svc_fh fh;
__u32 beginoffset;
__u32 offset; __u32 offset;
__u32 totalcount;
__u8 * data;
int len; int len;
struct iovec vec[RPCSVC_MAXPAGES];
int vlen;
}; };
struct nfsd_createargs { struct nfsd_createargs {
......
...@@ -33,6 +33,8 @@ struct nfsd3_readargs { ...@@ -33,6 +33,8 @@ struct nfsd3_readargs {
struct svc_fh fh; struct svc_fh fh;
__u64 offset; __u64 offset;
__u32 count; __u32 count;
struct iovec vec[RPCSVC_MAXPAGES];
int vlen;
}; };
struct nfsd3_writeargs { struct nfsd3_writeargs {
...@@ -40,8 +42,9 @@ struct nfsd3_writeargs { ...@@ -40,8 +42,9 @@ struct nfsd3_writeargs {
__u64 offset; __u64 offset;
__u32 count; __u32 count;
int stable; int stable;
__u8 * data;
int len; int len;
struct iovec vec[RPCSVC_MAXPAGES];
int vlen;
}; };
struct nfsd3_createargs { struct nfsd3_createargs {
......
...@@ -48,42 +48,48 @@ struct svc_serv { ...@@ -48,42 +48,48 @@ struct svc_serv {
* This is use to determine the max number of pages nfsd is * This is use to determine the max number of pages nfsd is
* willing to return in a single READ operation. * willing to return in a single READ operation.
*/ */
#define RPCSVC_MAXPAYLOAD 16384u #define RPCSVC_MAXPAYLOAD (64*1024u)
/* /*
* Buffer to store RPC requests or replies in. * RPC Requsts and replies are stored in one or more pages.
* Each server thread has one of these beasts. * We maintain an array of pages for each server thread.
* Requests are copied into these pages as they arrive. Remaining
* pages are available to write the reply into.
* *
* Area points to the allocated memory chunk currently owned by the * Currently pages are all re-used by the same server. Later we
* buffer. Base points to the buffer containing the request, which is * will use ->sendpage to transmit pages with reduced copying. In
* different from area when directly reading from an sk_buff. buf is * that case we will need to give away the page and allocate new ones.
* the current read/write position while processing an RPC request. * In preparation for this, we explicitly move pages off the recv
* list onto the transmit list, and back.
* *
* The array of iovecs can hold additional data that the server process * We use xdr_buf for holding responses as it fits well with NFS
* may not want to copy into the RPC reply buffer, but pass to the * read responses (that have a header, and some data pages, and possibly
* network sendmsg routines directly. The prime candidate for this * a tail) and means we can share some client side routines.
* will of course be NFS READ operations, but one might also want to
* do something about READLINK and READDIR. It might be worthwhile
* to implement some generic readdir cache in the VFS layer...
* *
* On the receiving end of the RPC server, the iovec may be used to hold * The xdr_buf.head iovec always points to the first page in the rq_*pages
* the list of IP fragments once we get to process fragmented UDP * list. The xdr_buf.pages pointer points to the second page on that
* datagrams directly. * list. xdr_buf.tail points to the end of the first page.
* This assumes that the non-page part of an rpc reply will fit
* in a page - NFSd ensures this. lockd also has no trouble.
*/ */
#define RPCSVC_MAXIOV ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE + 1) #define RPCSVC_MAXPAGES ((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE + 1)
struct svc_buf {
u32 * area; /* allocated memory */ static inline u32 svc_getu32(struct iovec *iov)
u32 * base; /* base of RPC datagram */ {
int buflen; /* total length of buffer */ u32 val, *vp;
u32 * buf; /* read/write pointer */ vp = iov->iov_base;
int len; /* current end of buffer */ val = *vp++;
iov->iov_base = (void*)vp;
/* iovec for zero-copy NFS READs */ iov->iov_len -= sizeof(u32);
struct iovec iov[RPCSVC_MAXIOV]; return val;
int nriov; }
}; static inline void svc_putu32(struct iovec *iov, u32 val)
#define svc_getu32(argp, val) { (val) = *(argp)->buf++; (argp)->len--; } {
#define svc_putu32(resp, val) { *(resp)->buf++ = (val); (resp)->len++; } u32 *vp = iov->iov_base + iov->iov_len;
*vp = val;
iov->iov_len += sizeof(u32);
}
/* /*
* The context of a single thread, including the request currently being * The context of a single thread, including the request currently being
...@@ -102,9 +108,15 @@ struct svc_rqst { ...@@ -102,9 +108,15 @@ struct svc_rqst {
struct svc_cred rq_cred; /* auth info */ struct svc_cred rq_cred; /* auth info */
struct sk_buff * rq_skbuff; /* fast recv inet buffer */ struct sk_buff * rq_skbuff; /* fast recv inet buffer */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */ struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
struct svc_buf rq_defbuf; /* default buffer */
struct svc_buf rq_argbuf; /* argument buffer */ struct xdr_buf rq_arg;
struct svc_buf rq_resbuf; /* result buffer */ struct xdr_buf rq_res;
struct page * rq_argpages[RPCSVC_MAXPAGES];
struct page * rq_respages[RPCSVC_MAXPAGES];
short rq_argused; /* pages used for argument */
short rq_arghi; /* pages available in argument page list */
short rq_resused; /* pages used for result */
u32 rq_xid; /* transmission id */ u32 rq_xid; /* transmission id */
u32 rq_prog; /* program number */ u32 rq_prog; /* program number */
u32 rq_vers; /* program version */ u32 rq_vers; /* program version */
...@@ -136,6 +148,38 @@ struct svc_rqst { ...@@ -136,6 +148,38 @@ struct svc_rqst {
wait_queue_head_t rq_wait; /* synchronization */ wait_queue_head_t rq_wait; /* synchronization */
}; };
/*
* Check buffer bounds after decoding arguments
*/
static inline int
xdr_argsize_check(struct svc_rqst *rqstp, u32 *p)
{
char *cp = (char *)p;
struct iovec *vec = &rqstp->rq_arg.head[0];
return cp - (char*)vec->iov_base <= vec->iov_len;
}
static inline int
xdr_ressize_check(struct svc_rqst *rqstp, u32 *p)
{
struct iovec *vec = &rqstp->rq_res.head[0];
char *cp = (char*)p;
vec->iov_len = cp - (char*)vec->iov_base;
rqstp->rq_res.len = vec->iov_len;
return vec->iov_len <= PAGE_SIZE;
}
static int inline take_page(struct svc_rqst *rqstp)
{
if (rqstp->rq_arghi <= rqstp->rq_argused)
return -ENOMEM;
rqstp->rq_respages[rqstp->rq_resused++] =
rqstp->rq_argpages[--rqstp->rq_arghi];
return 0;
}
struct svc_deferred_req { struct svc_deferred_req {
struct svc_serv *serv; struct svc_serv *serv;
u32 prot; /* protocol (UDP or TCP) */ u32 prot; /* protocol (UDP or TCP) */
......
...@@ -253,7 +253,9 @@ EXPORT_SYMBOL(find_inode_number); ...@@ -253,7 +253,9 @@ EXPORT_SYMBOL(find_inode_number);
EXPORT_SYMBOL(is_subdir); EXPORT_SYMBOL(is_subdir);
EXPORT_SYMBOL(get_unused_fd); EXPORT_SYMBOL(get_unused_fd);
EXPORT_SYMBOL(vfs_read); EXPORT_SYMBOL(vfs_read);
EXPORT_SYMBOL(vfs_readv);
EXPORT_SYMBOL(vfs_write); EXPORT_SYMBOL(vfs_write);
EXPORT_SYMBOL(vfs_writev);
EXPORT_SYMBOL(vfs_create); EXPORT_SYMBOL(vfs_create);
EXPORT_SYMBOL(vfs_mkdir); EXPORT_SYMBOL(vfs_mkdir);
EXPORT_SYMBOL(vfs_mknod); EXPORT_SYMBOL(vfs_mknod);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include <linux/net.h> #include <linux/net.h>
#include <linux/in.h> #include <linux/in.h>
#include <linux/unistd.h> #include <linux/unistd.h>
#include <linux/mm.h>
#include <linux/sunrpc/types.h> #include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h> #include <linux/sunrpc/xdr.h>
...@@ -35,7 +36,6 @@ svc_create(struct svc_program *prog, unsigned int bufsize) ...@@ -35,7 +36,6 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL))) if (!(serv = (struct svc_serv *) kmalloc(sizeof(*serv), GFP_KERNEL)))
return NULL; return NULL;
memset(serv, 0, sizeof(*serv)); memset(serv, 0, sizeof(*serv));
serv->sv_program = prog; serv->sv_program = prog;
serv->sv_nrthreads = 1; serv->sv_nrthreads = 1;
...@@ -105,35 +105,42 @@ svc_destroy(struct svc_serv *serv) ...@@ -105,35 +105,42 @@ svc_destroy(struct svc_serv *serv)
} }
/* /*
* Allocate an RPC server buffer * Allocate an RPC server's buffer space.
* Later versions may do nifty things by allocating multiple pages * We allocate pages and place them in rq_argpages.
* of memory directly and putting them into the bufp->iov.
*/ */
int static int
svc_init_buffer(struct svc_buf *bufp, unsigned int size) svc_init_buffer(struct svc_rqst *rqstp, unsigned int size)
{ {
if (!(bufp->area = (u32 *) kmalloc(size, GFP_KERNEL))) int pages = 2 + (size+ PAGE_SIZE -1) / PAGE_SIZE;
return 0; int arghi;
bufp->base = bufp->area;
bufp->buf = bufp->area; rqstp->rq_argused = 0;
bufp->len = 0; rqstp->rq_resused = 0;
bufp->buflen = size >> 2; arghi = 0;
if (pages > RPCSVC_MAXPAGES)
bufp->iov[0].iov_base = bufp->area; BUG();
bufp->iov[0].iov_len = size; while (pages) {
bufp->nriov = 1; struct page *p = alloc_page(GFP_KERNEL);
if (!p)
return 1; break;
rqstp->rq_argpages[arghi++] = p;
pages--;
}
rqstp->rq_arghi = arghi;
return ! pages;
} }
/* /*
* Release an RPC server buffer * Release an RPC server buffer
*/ */
void static void
svc_release_buffer(struct svc_buf *bufp) svc_release_buffer(struct svc_rqst *rqstp)
{ {
kfree(bufp->area); while (rqstp->rq_arghi)
bufp->area = 0; put_page(rqstp->rq_argpages[--rqstp->rq_arghi]);
while (rqstp->rq_resused)
put_page(rqstp->rq_respages[--rqstp->rq_resused]);
rqstp->rq_argused = 0;
} }
/* /*
...@@ -154,7 +161,7 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv) ...@@ -154,7 +161,7 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL)) if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL)) || !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !svc_init_buffer(&rqstp->rq_defbuf, serv->sv_bufsz)) || !svc_init_buffer(rqstp, serv->sv_bufsz))
goto out_thread; goto out_thread;
serv->sv_nrthreads++; serv->sv_nrthreads++;
...@@ -180,7 +187,7 @@ svc_exit_thread(struct svc_rqst *rqstp) ...@@ -180,7 +187,7 @@ svc_exit_thread(struct svc_rqst *rqstp)
{ {
struct svc_serv *serv = rqstp->rq_server; struct svc_serv *serv = rqstp->rq_server;
svc_release_buffer(&rqstp->rq_defbuf); svc_release_buffer(rqstp);
if (rqstp->rq_resp) if (rqstp->rq_resp)
kfree(rqstp->rq_resp); kfree(rqstp->rq_resp);
if (rqstp->rq_argp) if (rqstp->rq_argp)
...@@ -242,37 +249,51 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) ...@@ -242,37 +249,51 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
struct svc_program *progp; struct svc_program *progp;
struct svc_version *versp = NULL; /* compiler food */ struct svc_version *versp = NULL; /* compiler food */
struct svc_procedure *procp = NULL; struct svc_procedure *procp = NULL;
struct svc_buf * argp = &rqstp->rq_argbuf; struct iovec * argv = &rqstp->rq_arg.head[0];
struct svc_buf * resp = &rqstp->rq_resbuf; struct iovec * resv = &rqstp->rq_res.head[0];
kxdrproc_t xdr; kxdrproc_t xdr;
u32 *bufp, *statp; u32 *statp;
u32 dir, prog, vers, proc, u32 dir, prog, vers, proc,
auth_stat, rpc_stat; auth_stat, rpc_stat;
rpc_stat = rpc_success; rpc_stat = rpc_success;
bufp = argp->buf;
if (argp->len < 5) if (argv->iov_len < 6*4)
goto err_short_len; goto err_short_len;
dir = ntohl(*bufp++); /* setup response xdr_buf.
vers = ntohl(*bufp++); * Initially it has just one page
*/
take_page(rqstp); /* must succeed */
resv->iov_base = page_address(rqstp->rq_respages[0]);
resv->iov_len = 0;
rqstp->rq_res.pages = rqstp->rq_respages+1;
rqstp->rq_res.len = 0;
rqstp->rq_res.page_base = 0;
rqstp->rq_res.page_len = 0;
/* tcp needs a space for the record length... */
if (rqstp->rq_prot == IPPROTO_TCP)
svc_putu32(resv, 0);
rqstp->rq_xid = svc_getu32(argv);
svc_putu32(resv, rqstp->rq_xid);
dir = ntohl(svc_getu32(argv));
vers = ntohl(svc_getu32(argv));
/* First words of reply: */ /* First words of reply: */
svc_putu32(resp, xdr_one); /* REPLY */ svc_putu32(resv, xdr_one); /* REPLY */
svc_putu32(resp, xdr_zero); /* ACCEPT */
if (dir != 0) /* direction != CALL */ if (dir != 0) /* direction != CALL */
goto err_bad_dir; goto err_bad_dir;
if (vers != 2) /* RPC version number */ if (vers != 2) /* RPC version number */
goto err_bad_rpc; goto err_bad_rpc;
rqstp->rq_prog = prog = ntohl(*bufp++); /* program number */ svc_putu32(resv, xdr_zero); /* ACCEPT */
rqstp->rq_vers = vers = ntohl(*bufp++); /* version number */
rqstp->rq_proc = proc = ntohl(*bufp++); /* procedure number */
argp->buf += 5; rqstp->rq_prog = prog = ntohl(svc_getu32(argv)); /* program number */
argp->len -= 5; rqstp->rq_vers = vers = ntohl(svc_getu32(argv)); /* version number */
rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */
/* /*
* Decode auth data, and add verifier to reply buffer. * Decode auth data, and add verifier to reply buffer.
...@@ -307,8 +328,8 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) ...@@ -307,8 +328,8 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
serv->sv_stats->rpccnt++; serv->sv_stats->rpccnt++;
/* Build the reply header. */ /* Build the reply header. */
statp = resp->buf; statp = resv->iov_base +resv->iov_len;
svc_putu32(resp, rpc_success); /* RPC_SUCCESS */ svc_putu32(resv, rpc_success); /* RPC_SUCCESS */
/* Bump per-procedure stats counter */ /* Bump per-procedure stats counter */
procp->pc_count++; procp->pc_count++;
...@@ -327,14 +348,14 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) ...@@ -327,14 +348,14 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
if (!versp->vs_dispatch) { if (!versp->vs_dispatch) {
/* Decode arguments */ /* Decode arguments */
xdr = procp->pc_decode; xdr = procp->pc_decode;
if (xdr && !xdr(rqstp, rqstp->rq_argbuf.buf, rqstp->rq_argp)) if (xdr && !xdr(rqstp, argv->iov_base, rqstp->rq_argp))
goto err_garbage; goto err_garbage;
*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); *statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
/* Encode reply */ /* Encode reply */
if (*statp == rpc_success && (xdr = procp->pc_encode) if (*statp == rpc_success && (xdr = procp->pc_encode)
&& !xdr(rqstp, rqstp->rq_resbuf.buf, rqstp->rq_resp)) { && !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
dprintk("svc: failed to encode reply\n"); dprintk("svc: failed to encode reply\n");
/* serv->sv_stats->rpcsystemerr++; */ /* serv->sv_stats->rpcsystemerr++; */
*statp = rpc_system_err; *statp = rpc_system_err;
...@@ -347,7 +368,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) ...@@ -347,7 +368,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
/* Check RPC status result */ /* Check RPC status result */
if (*statp != rpc_success) if (*statp != rpc_success)
resp->len = statp + 1 - resp->base; resv->iov_len = ((void*)statp) - resv->iov_base + 4;
/* Release reply info */ /* Release reply info */
if (procp->pc_release) if (procp->pc_release)
...@@ -369,7 +390,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) ...@@ -369,7 +390,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
err_short_len: err_short_len:
#ifdef RPC_PARANOIA #ifdef RPC_PARANOIA
printk("svc: short len %d, dropping request\n", argp->len); printk("svc: short len %d, dropping request\n", argv->iov_len);
#endif #endif
goto dropit; /* drop request */ goto dropit; /* drop request */
...@@ -382,18 +403,19 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) ...@@ -382,18 +403,19 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
err_bad_rpc: err_bad_rpc:
serv->sv_stats->rpcbadfmt++; serv->sv_stats->rpcbadfmt++;
resp->buf[-1] = xdr_one; /* REJECT */ svc_putu32(resv, xdr_one); /* REJECT */
svc_putu32(resp, xdr_zero); /* RPC_MISMATCH */ svc_putu32(resv, xdr_zero); /* RPC_MISMATCH */
svc_putu32(resp, xdr_two); /* Only RPCv2 supported */ svc_putu32(resv, xdr_two); /* Only RPCv2 supported */
svc_putu32(resp, xdr_two); svc_putu32(resv, xdr_two);
goto sendit; goto sendit;
err_bad_auth: err_bad_auth:
dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat)); dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
serv->sv_stats->rpcbadauth++; serv->sv_stats->rpcbadauth++;
resp->buf[-1] = xdr_one; /* REJECT */ resv->iov_len -= 4;
svc_putu32(resp, xdr_one); /* AUTH_ERROR */ svc_putu32(resv, xdr_one); /* REJECT */
svc_putu32(resp, auth_stat); /* status */ svc_putu32(resv, xdr_one); /* AUTH_ERROR */
svc_putu32(resv, auth_stat); /* status */
goto sendit; goto sendit;
err_bad_prog: err_bad_prog:
...@@ -403,7 +425,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) ...@@ -403,7 +425,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
/* else it is just a Solaris client seeing if ACLs are supported */ /* else it is just a Solaris client seeing if ACLs are supported */
#endif #endif
serv->sv_stats->rpcbadfmt++; serv->sv_stats->rpcbadfmt++;
svc_putu32(resp, rpc_prog_unavail); svc_putu32(resv, rpc_prog_unavail);
goto sendit; goto sendit;
err_bad_vers: err_bad_vers:
...@@ -411,9 +433,9 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) ...@@ -411,9 +433,9 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
printk("svc: unknown version (%d)\n", vers); printk("svc: unknown version (%d)\n", vers);
#endif #endif
serv->sv_stats->rpcbadfmt++; serv->sv_stats->rpcbadfmt++;
svc_putu32(resp, rpc_prog_mismatch); svc_putu32(resv, rpc_prog_mismatch);
svc_putu32(resp, htonl(progp->pg_lovers)); svc_putu32(resv, htonl(progp->pg_lovers));
svc_putu32(resp, htonl(progp->pg_hivers)); svc_putu32(resv, htonl(progp->pg_hivers));
goto sendit; goto sendit;
err_bad_proc: err_bad_proc:
...@@ -421,7 +443,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) ...@@ -421,7 +443,7 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
printk("svc: unknown procedure (%d)\n", proc); printk("svc: unknown procedure (%d)\n", proc);
#endif #endif
serv->sv_stats->rpcbadfmt++; serv->sv_stats->rpcbadfmt++;
svc_putu32(resp, rpc_proc_unavail); svc_putu32(resv, rpc_proc_unavail);
goto sendit; goto sendit;
err_garbage: err_garbage:
...@@ -429,6 +451,6 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp) ...@@ -429,6 +451,6 @@ svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
printk("svc: failed to decode args\n"); printk("svc: failed to decode args\n");
#endif #endif
serv->sv_stats->rpcbadfmt++; serv->sv_stats->rpcbadfmt++;
svc_putu32(resp, rpc_garbage_args); svc_putu32(resv, rpc_garbage_args);
goto sendit; goto sendit;
} }
...@@ -40,8 +40,7 @@ svc_authenticate(struct svc_rqst *rqstp, u32 *statp, u32 *authp, int proc) ...@@ -40,8 +40,7 @@ svc_authenticate(struct svc_rqst *rqstp, u32 *statp, u32 *authp, int proc)
*statp = rpc_success; *statp = rpc_success;
*authp = rpc_auth_ok; *authp = rpc_auth_ok;
svc_getu32(&rqstp->rq_argbuf, flavor); flavor = ntohl(svc_getu32(&rqstp->rq_arg.head[0]));
flavor = ntohl(flavor);
dprintk("svc: svc_authenticate (%d)\n", flavor); dprintk("svc: svc_authenticate (%d)\n", flavor);
if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor])) { if (flavor >= RPC_AUTH_MAXFLAVOR || !(aops = authtab[flavor])) {
......
...@@ -295,20 +295,20 @@ void svcauth_unix_purge(void) ...@@ -295,20 +295,20 @@ void svcauth_unix_purge(void)
static int static int
svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp, int proc) svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp, int proc)
{ {
struct svc_buf *argp = &rqstp->rq_argbuf; struct iovec *argv = &rqstp->rq_arg.head[0];
struct svc_buf *resp = &rqstp->rq_resbuf; struct iovec *resv = &rqstp->rq_res.head[0];
int rv=0; int rv=0;
struct ip_map key, *ipm; struct ip_map key, *ipm;
if ((argp->len -= 3) < 0) { if (argv->iov_len < 3*4)
return SVC_GARBAGE; return SVC_GARBAGE;
}
if (*(argp->buf)++ != 0) { /* we already skipped the flavor */ if (svc_getu32(argv) != 0) {
dprintk("svc: bad null cred\n"); dprintk("svc: bad null cred\n");
*authp = rpc_autherr_badcred; *authp = rpc_autherr_badcred;
return SVC_DENIED; return SVC_DENIED;
} }
if (*(argp->buf)++ != RPC_AUTH_NULL || *(argp->buf)++ != 0) { if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
dprintk("svc: bad null verf\n"); dprintk("svc: bad null verf\n");
*authp = rpc_autherr_badverf; *authp = rpc_autherr_badverf;
return SVC_DENIED; return SVC_DENIED;
...@@ -320,8 +320,8 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp, int proc) ...@@ -320,8 +320,8 @@ svcauth_null_accept(struct svc_rqst *rqstp, u32 *authp, int proc)
rqstp->rq_cred.cr_groups[0] = NOGROUP; rqstp->rq_cred.cr_groups[0] = NOGROUP;
/* Put NULL verifier */ /* Put NULL verifier */
svc_putu32(resp, RPC_AUTH_NULL); svc_putu32(resv, RPC_AUTH_NULL);
svc_putu32(resp, 0); svc_putu32(resv, 0);
key.m_class = rqstp->rq_server->sv_program->pg_class; key.m_class = rqstp->rq_server->sv_program->pg_class;
key.m_addr = rqstp->rq_addr.sin_addr; key.m_addr = rqstp->rq_addr.sin_addr;
...@@ -376,55 +376,54 @@ struct auth_ops svcauth_null = { ...@@ -376,55 +376,54 @@ struct auth_ops svcauth_null = {
int int
svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp, int proc) svcauth_unix_accept(struct svc_rqst *rqstp, u32 *authp, int proc)
{ {
struct svc_buf *argp = &rqstp->rq_argbuf; struct iovec *argv = &rqstp->rq_arg.head[0];
struct svc_buf *resp = &rqstp->rq_resbuf; struct iovec *resv = &rqstp->rq_res.head[0];
struct svc_cred *cred = &rqstp->rq_cred; struct svc_cred *cred = &rqstp->rq_cred;
u32 *bufp = argp->buf, slen, i; u32 slen, i;
int len = argp->len; int len = argv->iov_len;
int rv=0; int rv=0;
struct ip_map key, *ipm; struct ip_map key, *ipm;
if ((len -= 3) < 0) if ((len -= 3*4) < 0)
return SVC_GARBAGE; return SVC_GARBAGE;
bufp++; /* length */ svc_getu32(argv); /* length */
bufp++; /* time stamp */ svc_getu32(argv); /* time stamp */
slen = XDR_QUADLEN(ntohl(*bufp++)); /* machname length */ slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); /* machname length */
if (slen > 64 || (len -= slen + 3) < 0) if (slen > 64 || (len -= (slen + 3)*4) < 0)
goto badcred; goto badcred;
bufp += slen; /* skip machname */ argv->iov_base = (void*)((u32*)argv->iov_base + slen); /* skip machname */
argv->iov_len -= slen*4;
cred->cr_uid = ntohl(*bufp++); /* uid */ cred->cr_uid = ntohl(svc_getu32(argv)); /* uid */
cred->cr_gid = ntohl(*bufp++); /* gid */ cred->cr_gid = ntohl(svc_getu32(argv)); /* gid */
slen = ntohl(svc_getu32(argv)); /* gids length */
slen = ntohl(*bufp++); /* gids length */ if (slen > 16 || (len -= (slen + 2)*4) < 0)
if (slen > 16 || (len -= slen + 2) < 0)
goto badcred; goto badcred;
for (i = 0; i < NGROUPS && i < slen; i++) for (i = 0; i < slen; i++)
cred->cr_groups[i] = ntohl(*bufp++); if (i < NGROUPS)
cred->cr_groups[i] = ntohl(svc_getu32(argv));
else
svc_getu32(argv);
if (i < NGROUPS) if (i < NGROUPS)
cred->cr_groups[i] = NOGROUP; cred->cr_groups[i] = NOGROUP;
bufp += (slen - i);
if (*bufp++ != RPC_AUTH_NULL || *bufp++ != 0) { if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf; *authp = rpc_autherr_badverf;
return SVC_DENIED; return SVC_DENIED;
} }
argp->buf = bufp;
argp->len = len;
/* Put NULL verifier */ /* Put NULL verifier */
svc_putu32(resp, RPC_AUTH_NULL); svc_putu32(resv, RPC_AUTH_NULL);
svc_putu32(resp, 0); svc_putu32(resv, 0);
key.m_class = rqstp->rq_server->sv_program->pg_class; key.m_class = rqstp->rq_server->sv_program->pg_class;
key.m_addr = rqstp->rq_addr.sin_addr; key.m_addr = rqstp->rq_addr.sin_addr;
ipm = ip_map_lookup(&key, 0); ipm = ip_map_lookup(&key, 0);
rqstp->rq_client = NULL; rqstp->rq_client = NULL;
if (ipm) if (ipm)
switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) { switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
case -EAGAIN: case -EAGAIN:
......
...@@ -234,7 +234,7 @@ svc_sock_received(struct svc_sock *svsk) ...@@ -234,7 +234,7 @@ svc_sock_received(struct svc_sock *svsk)
*/ */
void svc_reserve(struct svc_rqst *rqstp, int space) void svc_reserve(struct svc_rqst *rqstp, int space)
{ {
space += rqstp->rq_resbuf.len<<2; space += rqstp->rq_res.head[0].iov_len;
if (space < rqstp->rq_reserved) { if (space < rqstp->rq_reserved) {
struct svc_sock *svsk = rqstp->rq_sock; struct svc_sock *svsk = rqstp->rq_sock;
...@@ -278,13 +278,12 @@ svc_sock_release(struct svc_rqst *rqstp) ...@@ -278,13 +278,12 @@ svc_sock_release(struct svc_rqst *rqstp)
* But first, check that enough space was reserved * But first, check that enough space was reserved
* for the reply, otherwise we have a bug! * for the reply, otherwise we have a bug!
*/ */
if ((rqstp->rq_resbuf.len<<2) > rqstp->rq_reserved) if ((rqstp->rq_res.len) > rqstp->rq_reserved)
printk(KERN_ERR "RPC request reserved %d but used %d\n", printk(KERN_ERR "RPC request reserved %d but used %d\n",
rqstp->rq_reserved, rqstp->rq_reserved,
rqstp->rq_resbuf.len<<2); rqstp->rq_res.len);
rqstp->rq_resbuf.buf = rqstp->rq_resbuf.base; rqstp->rq_res.head[0].iov_len = 0;
rqstp->rq_resbuf.len = 0;
svc_reserve(rqstp, 0); svc_reserve(rqstp, 0);
rqstp->rq_sock = NULL; rqstp->rq_sock = NULL;
...@@ -348,8 +347,9 @@ svc_sendto(struct svc_rqst *rqstp, struct iovec *iov, int nr) ...@@ -348,8 +347,9 @@ svc_sendto(struct svc_rqst *rqstp, struct iovec *iov, int nr)
len = sock_sendmsg(sock, &msg, buflen); len = sock_sendmsg(sock, &msg, buflen);
set_fs(oldfs); set_fs(oldfs);
dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d\n", dprintk("svc: socket %p sendto([%p %Zu... ], %d, %d) = %d (addr %x)\n",
rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len); rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, nr, buflen, len,
rqstp->rq_addr.sin_addr.s_addr);
return len; return len;
} }
...@@ -480,13 +480,15 @@ svc_write_space(struct sock *sk) ...@@ -480,13 +480,15 @@ svc_write_space(struct sock *sk)
/* /*
* Receive a datagram from a UDP socket. * Receive a datagram from a UDP socket.
*/ */
extern int
csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb);
static int static int
svc_udp_recvfrom(struct svc_rqst *rqstp) svc_udp_recvfrom(struct svc_rqst *rqstp)
{ {
struct svc_sock *svsk = rqstp->rq_sock; struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server; struct svc_serv *serv = svsk->sk_server;
struct sk_buff *skb; struct sk_buff *skb;
u32 *data;
int err, len; int err, len;
if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
...@@ -512,33 +514,19 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) ...@@ -512,33 +514,19 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
} }
set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
/* Sorry. */ len = skb->len - sizeof(struct udphdr);
if (skb_is_nonlinear(skb)) {
if (skb_linearize(skb, GFP_KERNEL) != 0) {
kfree_skb(skb);
svc_sock_received(svsk);
return 0;
}
}
if (skb->ip_summed != CHECKSUM_UNNECESSARY) { if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {
if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) { /* checksum error */
skb_free_datagram(svsk->sk_sk, skb); skb_free_datagram(svsk->sk_sk, skb);
svc_sock_received(svsk); svc_sock_received(svsk);
return 0; return 0;
} }
}
len = skb->len - sizeof(struct udphdr); rqstp->rq_arg.len = len;
data = (u32 *) (skb->data + sizeof(struct udphdr)); rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
rqstp->rq_argused += (rqstp->rq_arg.page_len + PAGE_SIZE - 1)/ PAGE_SIZE;
rqstp->rq_skbuff = skb;
rqstp->rq_argbuf.base = data;
rqstp->rq_argbuf.buf = data;
rqstp->rq_argbuf.len = (len >> 2);
rqstp->rq_argbuf.buflen = (len >> 2);
/* rqstp->rq_resbuf = rqstp->rq_defbuf; */
rqstp->rq_prot = IPPROTO_UDP; rqstp->rq_prot = IPPROTO_UDP;
/* Get sender address */ /* Get sender address */
...@@ -546,6 +534,8 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) ...@@ -546,6 +534,8 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_addr.sin_port = skb->h.uh->source; rqstp->rq_addr.sin_port = skb->h.uh->source;
rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr; rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr;
skb_free_datagram(svsk->sk_sk, skb);
if (serv->sv_stats) if (serv->sv_stats)
serv->sv_stats->netudpcnt++; serv->sv_stats->netudpcnt++;
...@@ -559,21 +549,36 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) ...@@ -559,21 +549,36 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
static int static int
svc_udp_sendto(struct svc_rqst *rqstp) svc_udp_sendto(struct svc_rqst *rqstp)
{ {
struct svc_buf *bufp = &rqstp->rq_resbuf;
int error; int error;
struct iovec vec[RPCSVC_MAXPAGES];
int v;
int base, len;
/* Set up the first element of the reply iovec. /* Set up the first element of the reply iovec.
* Any other iovecs that may be in use have been taken * Any other iovecs that may be in use have been taken
* care of by the server implementation itself. * care of by the server implementation itself.
*/ */
/* bufp->base = bufp->area; */ vec[0] = rqstp->rq_res.head[0];
bufp->iov[0].iov_base = bufp->base; v=1;
bufp->iov[0].iov_len = bufp->len << 2; base=rqstp->rq_res.page_base;
len = rqstp->rq_res.page_len;
error = svc_sendto(rqstp, bufp->iov, bufp->nriov); while (len) {
vec[v].iov_base = page_address(rqstp->rq_res.pages[v-1]) + base;
vec[v].iov_len = PAGE_SIZE-base;
if (len <= vec[v].iov_len)
vec[v].iov_len = len;
len -= vec[v].iov_len;
base = 0;
v++;
}
if (rqstp->rq_res.tail[0].iov_len) {
vec[v] = rqstp->rq_res.tail[0];
v++;
}
error = svc_sendto(rqstp, vec, v);
if (error == -ECONNREFUSED) if (error == -ECONNREFUSED)
/* ICMP error on earlier request. */ /* ICMP error on earlier request. */
error = svc_sendto(rqstp, bufp->iov, bufp->nriov); error = svc_sendto(rqstp, vec, v);
return error; return error;
} }
...@@ -785,8 +790,9 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) ...@@ -785,8 +790,9 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
{ {
struct svc_sock *svsk = rqstp->rq_sock; struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server; struct svc_serv *serv = svsk->sk_server;
struct svc_buf *bufp = &rqstp->rq_argbuf;
int len; int len;
struct iovec vec[RPCSVC_MAXPAGES];
int pnum, vlen;
dprintk("svc: tcp_recv %p data %d conn %d close %d\n", dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
svsk, test_bit(SK_DATA, &svsk->sk_flags), svsk, test_bit(SK_DATA, &svsk->sk_flags),
...@@ -851,7 +857,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) ...@@ -851,7 +857,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
} }
svsk->sk_reclen &= 0x7fffffff; svsk->sk_reclen &= 0x7fffffff;
dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
if (svsk->sk_reclen > (bufp->buflen<<2)) { if (svsk->sk_reclen > serv->sv_bufsz) {
printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n", printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n",
(unsigned long) svsk->sk_reclen); (unsigned long) svsk->sk_reclen);
goto err_delete; goto err_delete;
...@@ -869,30 +875,35 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) ...@@ -869,30 +875,35 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
svc_sock_received(svsk); svc_sock_received(svsk);
return -EAGAIN; /* record not complete */ return -EAGAIN; /* record not complete */
} }
len = svsk->sk_reclen;
set_bit(SK_DATA, &svsk->sk_flags); set_bit(SK_DATA, &svsk->sk_flags);
/* Frob argbuf */ vec[0] = rqstp->rq_arg.head[0];
bufp->iov[0].iov_base += 4; vlen = PAGE_SIZE;
bufp->iov[0].iov_len -= 4; pnum = 1;
while (vlen < len) {
vec[pnum].iov_base = page_address(rqstp->rq_argpages[rqstp->rq_argused++]);
vec[pnum].iov_len = PAGE_SIZE;
pnum++;
vlen += PAGE_SIZE;
}
/* Now receive data */ /* Now receive data */
len = svc_recvfrom(rqstp, bufp->iov, bufp->nriov, svsk->sk_reclen); len = svc_recvfrom(rqstp, vec, pnum, len);
if (len < 0) if (len < 0)
goto error; goto error;
dprintk("svc: TCP complete record (%d bytes)\n", len); dprintk("svc: TCP complete record (%d bytes)\n", len);
rqstp->rq_arg.len = len;
/* Position reply write pointer immediately after args, rqstp->rq_arg.page_base = 0;
* allowing for record length */ if (len <= rqstp->rq_arg.head[0].iov_len) {
rqstp->rq_resbuf.base = rqstp->rq_argbuf.base + 1 + (len>>2); rqstp->rq_arg.head[0].iov_len = len;
rqstp->rq_resbuf.buf = rqstp->rq_resbuf.base + 1; rqstp->rq_arg.page_len = 0;
rqstp->rq_resbuf.len = 1; } else {
rqstp->rq_resbuf.buflen= rqstp->rq_argbuf.buflen - (len>>2) - 1; rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
}
rqstp->rq_skbuff = 0; rqstp->rq_skbuff = 0;
rqstp->rq_argbuf.buf += 1;
rqstp->rq_argbuf.len = (len >> 2);
rqstp->rq_argbuf.buflen = (len >> 2) +1;
rqstp->rq_prot = IPPROTO_TCP; rqstp->rq_prot = IPPROTO_TCP;
/* Reset TCP read info */ /* Reset TCP read info */
...@@ -928,23 +939,44 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) ...@@ -928,23 +939,44 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
static int static int
svc_tcp_sendto(struct svc_rqst *rqstp) svc_tcp_sendto(struct svc_rqst *rqstp)
{ {
struct svc_buf *bufp = &rqstp->rq_resbuf; struct xdr_buf *xbufp = &rqstp->rq_res;
struct iovec vec[RPCSVC_MAXPAGES];
int v;
int base, len;
int sent; int sent;
u32 reclen;
/* Set up the first element of the reply iovec. /* Set up the first element of the reply iovec.
* Any other iovecs that may be in use have been taken * Any other iovecs that may be in use have been taken
* care of by the server implementation itself. * care of by the server implementation itself.
*/ */
bufp->iov[0].iov_base = bufp->base; reclen = htonl(0x80000000|((xbufp->len ) - 4));
bufp->iov[0].iov_len = bufp->len << 2; memcpy(xbufp->head[0].iov_base, &reclen, 4);
bufp->base[0] = htonl(0x80000000|((bufp->len << 2) - 4));
vec[0] = rqstp->rq_res.head[0];
v=1;
base= xbufp->page_base;
len = xbufp->page_len;
while (len) {
vec[v].iov_base = page_address(xbufp->pages[v-1]) + base;
vec[v].iov_len = PAGE_SIZE-base;
if (len <= vec[v].iov_len)
vec[v].iov_len = len;
len -= vec[v].iov_len;
base = 0;
v++;
}
if (xbufp->tail[0].iov_len) {
vec[v] = xbufp->tail[0];
v++;
}
sent = svc_sendto(rqstp, bufp->iov, bufp->nriov); sent = svc_sendto(rqstp, vec, v);
if (sent != bufp->len<<2) { if (sent != xbufp->len) {
printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", printk(KERN_NOTICE "rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n",
rqstp->rq_sock->sk_server->sv_name, rqstp->rq_sock->sk_server->sv_name,
(sent<0)?"got error":"sent only", (sent<0)?"got error":"sent only",
sent, bufp->len << 2); sent, xbufp->len);
svc_delete_socket(rqstp->rq_sock); svc_delete_socket(rqstp->rq_sock);
sent = -EAGAIN; sent = -EAGAIN;
} }
...@@ -1016,6 +1048,8 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) ...@@ -1016,6 +1048,8 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
{ {
struct svc_sock *svsk =NULL; struct svc_sock *svsk =NULL;
int len; int len;
int pages;
struct xdr_buf *arg;
DECLARE_WAITQUEUE(wait, current); DECLARE_WAITQUEUE(wait, current);
dprintk("svc: server %p waiting for data (to = %ld)\n", dprintk("svc: server %p waiting for data (to = %ld)\n",
...@@ -1031,8 +1065,34 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) ...@@ -1031,8 +1065,34 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
rqstp); rqstp);
/* Initialize the buffers */ /* Initialize the buffers */
rqstp->rq_argbuf = rqstp->rq_defbuf; /* first reclaim pages that were moved to response list */
rqstp->rq_resbuf = rqstp->rq_defbuf; while (rqstp->rq_resused)
rqstp->rq_argpages[rqstp->rq_arghi++] =
rqstp->rq_respages[--rqstp->rq_resused];
/* now allocate needed pages. If we get a failure, sleep briefly */
pages = 2 + (serv->sv_bufsz + PAGE_SIZE -1) / PAGE_SIZE;
while (rqstp->rq_arghi < pages) {
struct page *p = alloc_page(GFP_KERNEL);
if (!p) {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(HZ/2);
current->state = TASK_RUNNING;
continue;
}
rqstp->rq_argpages[rqstp->rq_arghi++] = p;
}
/* Make arg->head point to first page and arg->pages point to rest */
arg = &rqstp->rq_arg;
arg->head[0].iov_base = page_address(rqstp->rq_argpages[0]);
arg->head[0].iov_len = PAGE_SIZE;
rqstp->rq_argused = 1;
arg->pages = rqstp->rq_argpages + 1;
arg->page_base = 0;
/* save at least one page for response */
arg->page_len = (pages-2)*PAGE_SIZE;
arg->len = (pages-1)*PAGE_SIZE;
arg->tail[0].iov_len = 0;
if (signalled()) if (signalled())
return -EINTR; return -EINTR;
...@@ -1109,12 +1169,6 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout) ...@@ -1109,12 +1169,6 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
rqstp->rq_userset = 0; rqstp->rq_userset = 0;
rqstp->rq_chandle.defer = svc_defer; rqstp->rq_chandle.defer = svc_defer;
svc_getu32(&rqstp->rq_argbuf, rqstp->rq_xid);
svc_putu32(&rqstp->rq_resbuf, rqstp->rq_xid);
/* Assume that the reply consists of a single buffer. */
rqstp->rq_resbuf.nriov = 1;
if (serv->sv_stats) if (serv->sv_stats)
serv->sv_stats->netcnt++; serv->sv_stats->netcnt++;
return len; return len;
...@@ -1354,23 +1408,25 @@ static struct cache_deferred_req * ...@@ -1354,23 +1408,25 @@ static struct cache_deferred_req *
svc_defer(struct cache_req *req) svc_defer(struct cache_req *req)
{ {
struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
int size = sizeof(struct svc_deferred_req) + (rqstp->rq_argbuf.buflen << 2); int size = sizeof(struct svc_deferred_req) + (rqstp->rq_arg.head[0].iov_len);
struct svc_deferred_req *dr; struct svc_deferred_req *dr;
if (rqstp->rq_arg.page_len)
return NULL; /* if more than a page, give up FIXME */
if (rqstp->rq_deferred) { if (rqstp->rq_deferred) {
dr = rqstp->rq_deferred; dr = rqstp->rq_deferred;
rqstp->rq_deferred = NULL; rqstp->rq_deferred = NULL;
} else { } else {
/* FIXME maybe discard if size too large */ /* FIXME maybe discard if size too large */
dr = kmalloc(size<<2, GFP_KERNEL); dr = kmalloc(size, GFP_KERNEL);
if (dr == NULL) if (dr == NULL)
return NULL; return NULL;
dr->serv = rqstp->rq_server; dr->serv = rqstp->rq_server;
dr->prot = rqstp->rq_prot; dr->prot = rqstp->rq_prot;
dr->addr = rqstp->rq_addr; dr->addr = rqstp->rq_addr;
dr->argslen = rqstp->rq_argbuf.buflen; dr->argslen = rqstp->rq_arg.head[0].iov_len >> 2;
memcpy(dr->args, rqstp->rq_argbuf.base, dr->argslen<<2); memcpy(dr->args, rqstp->rq_arg.head[0].iov_base, dr->argslen<<2);
} }
spin_lock(&rqstp->rq_server->sv_lock); spin_lock(&rqstp->rq_server->sv_lock);
rqstp->rq_sock->sk_inuse++; rqstp->rq_sock->sk_inuse++;
...@@ -1388,10 +1444,10 @@ static int svc_deferred_recv(struct svc_rqst *rqstp) ...@@ -1388,10 +1444,10 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
{ {
struct svc_deferred_req *dr = rqstp->rq_deferred; struct svc_deferred_req *dr = rqstp->rq_deferred;
rqstp->rq_argbuf.base = dr->args; rqstp->rq_arg.head[0].iov_base = dr->args;
rqstp->rq_argbuf.buf = dr->args; rqstp->rq_arg.head[0].iov_len = dr->argslen<<2;
rqstp->rq_argbuf.len = dr->argslen; rqstp->rq_arg.page_len = 0;
rqstp->rq_argbuf.buflen = dr->argslen; rqstp->rq_arg.len = dr->argslen<<2;
rqstp->rq_prot = dr->prot; rqstp->rq_prot = dr->prot;
rqstp->rq_addr = dr->addr; rqstp->rq_addr = dr->addr;
return dr->argslen<<2; return dr->argslen<<2;
......
...@@ -655,7 +655,7 @@ skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len) ...@@ -655,7 +655,7 @@ skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
* We have set things up such that we perform the checksum of the UDP * We have set things up such that we perform the checksum of the UDP
* packet in parallel with the copies into the RPC client iovec. -DaveM * packet in parallel with the copies into the RPC client iovec. -DaveM
*/ */
static int int
csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
{ {
skb_reader_t desc; skb_reader_t desc;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment