Commit 5b174fd6 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.16' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "The largest piece is a long-overdue rewrite of the xdr code to remove
  some annoying limitations: for example, there was no way to return
  ACLs larger than 4K, and readdir results were returned only in 4k
  chunks, limiting performance on large directories.

  Also:
        - part of Neil Brown's work to make NFS work reliably over the
          loopback interface (so client and server can run on the same
          machine without deadlocks).  The rest of it is coming through
          other trees.
        - cleanup and bugfixes for some of the server RDMA code, from
          Steve Wise.
        - Various cleanup of NFSv4 state code in preparation for an
          overhaul of the locking, from Jeff, Trond, and Benny.
        - smaller bugfixes and cleanup from Christoph Hellwig and
          Kinglong Mee.

  Thanks to everyone!

  This summer looks likely to be busier than usual for knfsd.  Hopefully
  we won't break it too badly; testing definitely welcomed"

* 'for-3.16' of git://linux-nfs.org/~bfields/linux: (100 commits)
  nfsd4: fix FREE_STATEID lockowner leak
  svcrdma: Fence LOCAL_INV work requests
  svcrdma: refactor marshalling logic
  nfsd: don't halt scanning the DRC LRU list when there's an RC_INPROG entry
  nfs4: remove unused CHANGE_SECURITY_LABEL
  nfsd4: kill READ64
  nfsd4: kill READ32
  nfsd4: simplify server xdr->next_page use
  nfsd4: hash deleg stateid only on successful nfs4_set_delegation
  nfsd4: rename recall_lock to state_lock
  nfsd: remove unneeded zeroing of fields in nfsd4_proc_compound
  nfsd: fix setting of NFS4_OO_CONFIRMED in nfsd4_open
  nfsd4: use recall_lock for delegation hashing
  nfsd: fix laundromat next-run-time calculation
  nfsd: make nfsd4_encode_fattr static
  SUNRPC/NFSD: Remove using of dprintk with KERN_WARNING
  nfsd: remove unused function nfsd_read_file
  nfsd: getattr for FATTR4_WORD0_FILES_AVAIL needs the statfs buffer
  NFSD: Error out when getting more than one fsloc/secinfo/uuid
  NFSD: Using type of uint32_t for ex_nflavors instead of int
  ...
parents 1d21b1bf 48385408
......@@ -176,7 +176,5 @@ Nonstandard compound limitations:
ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
fail to live up to the promise we made in CREATE_SESSION fore channel
negotiation.
* No more than one read-like operation allowed per compound; encoding
replies that cross page boundaries (except for read data) not handled.
See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues.
......@@ -14,6 +14,8 @@
#include <linux/sunrpc/stats.h>
#include <linux/lockd/lockd.h>
#include <uapi/linux/nfs3.h>
#define NLMDBG_FACILITY NLMDBG_XDR
#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
......
......@@ -15,6 +15,8 @@
#include <linux/sunrpc/stats.h>
#include <linux/lockd/lockd.h>
#include <uapi/linux/nfs2.h>
#define NLMDBG_FACILITY NLMDBG_XDR
#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
......
......@@ -622,8 +622,8 @@ static int __init init_nlm(void)
err_pernet:
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nlm_sysctl_table);
#endif
err_sysctl:
#endif
return err;
}
......
......@@ -14,12 +14,11 @@
#include <linux/mutex.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/addr.h>
#include <linux/nfsd/nfsfh.h>
#include <linux/nfsd/export.h>
#include <linux/lockd/lockd.h>
#include <linux/lockd/share.h>
#include <linux/module.h>
#include <linux/mount.h>
#include <uapi/linux/nfs2.h>
#define NLMDBG_FACILITY NLMDBG_SVCSUBS
......
......@@ -16,6 +16,8 @@
#include <linux/sunrpc/stats.h>
#include <linux/lockd/lockd.h>
#include <uapi/linux/nfs2.h>
#define NLMDBG_FACILITY NLMDBG_XDR
......
......@@ -2750,7 +2750,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL)
#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL)
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
......
......@@ -49,7 +49,7 @@ struct svc_rqst;
struct nfs4_acl *nfs4_acl_new(int);
int nfs4_acl_get_whotype(char *, u32);
__be32 nfs4_acl_write_who(int who, __be32 **p, int *len);
__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
struct nfs4_acl **acl);
......
/* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
#include <linux/sched.h>
#include <linux/user_namespace.h>
#include "nfsd.h"
#include "auth.h"
......@@ -25,7 +24,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
struct cred *new;
int i;
int flags = nfsexp_flags(rqstp, exp);
int ret;
validate_process_creds();
......@@ -86,8 +84,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
return 0;
oom:
ret = -ENOMEM;
abort_creds(new);
return ret;
return -ENOMEM;
}
......@@ -17,17 +17,12 @@
#include <linux/exportfs.h>
#include <linux/sunrpc/svc_xprt.h>
#include <net/ipv6.h>
#include "nfsd.h"
#include "nfsfh.h"
#include "netns.h"
#define NFSDDBG_FACILITY NFSDDBG_EXPORT
typedef struct auth_domain svc_client;
typedef struct svc_export svc_export;
/*
* We have two caches.
* One maps client+vfsmnt+dentry to export options - the export map
......@@ -73,7 +68,7 @@ static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_
static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
{
/* client fsidtype fsid [path] */
/* client fsidtype fsid expiry [path] */
char *buf;
int len;
struct auth_domain *dom = NULL;
......@@ -295,13 +290,19 @@ svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new,
static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
{
struct nfsd4_fs_location *locations = fsloc->locations;
int i;
if (!locations)
return;
for (i = 0; i < fsloc->locations_count; i++) {
kfree(fsloc->locations[i].path);
kfree(fsloc->locations[i].hosts);
kfree(locations[i].path);
kfree(locations[i].hosts);
}
kfree(fsloc->locations);
kfree(locations);
fsloc->locations = NULL;
}
static void svc_export_put(struct kref *ref)
......@@ -388,6 +389,10 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
int len;
int migrated, i, err;
/* more than one fsloc */
if (fsloc->locations)
return -EINVAL;
/* listsize */
err = get_uint(mesg, &fsloc->locations_count);
if (err)
......@@ -437,13 +442,18 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
{
int listsize, err;
struct exp_flavor_info *f;
u32 listsize;
int err;
/* more than one secinfo */
if (exp->ex_nflavors)
return -EINVAL;
err = get_int(mesg, &listsize);
err = get_uint(mesg, &listsize);
if (err)
return err;
if (listsize < 0 || listsize > MAX_SECINFO_LIST)
if (listsize > MAX_SECINFO_LIST)
return -EINVAL;
for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) {
......@@ -474,6 +484,27 @@ static inline int
secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
#endif
static inline int
uuid_parse(char **mesg, char *buf, unsigned char **puuid)
{
int len;
/* more than one uuid */
if (*puuid)
return -EINVAL;
/* expect a 16 byte uuid encoded as \xXXXX... */
len = qword_get(mesg, buf, PAGE_SIZE);
if (len != EX_UUID_LEN)
return -EINVAL;
*puuid = kmemdup(buf, EX_UUID_LEN, GFP_KERNEL);
if (*puuid == NULL)
return -ENOMEM;
return 0;
}
static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
{
/* client path expiry [flags anonuid anongid fsid] */
......@@ -552,18 +583,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {
if (strcmp(buf, "fsloc") == 0)
err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
else if (strcmp(buf, "uuid") == 0) {
/* expect a 16 byte uuid encoded as \xXXXX... */
len = qword_get(&mesg, buf, PAGE_SIZE);
if (len != 16)
err = -EINVAL;
else {
exp.ex_uuid =
kmemdup(buf, 16, GFP_KERNEL);
if (exp.ex_uuid == NULL)
err = -ENOMEM;
}
} else if (strcmp(buf, "secinfo") == 0)
else if (strcmp(buf, "uuid") == 0)
err = uuid_parse(&mesg, buf, &exp.ex_uuid);
else if (strcmp(buf, "secinfo") == 0)
err = secinfo_parse(&mesg, buf, &exp);
else
/* quietly ignore unknown words and anything
......@@ -649,7 +671,7 @@ static int svc_export_show(struct seq_file *m,
if (exp->ex_uuid) {
int i;
seq_puts(m, ",uuid=");
for (i=0; i<16; i++) {
for (i = 0; i < EX_UUID_LEN; i++) {
if ((i&3) == 0 && i)
seq_putc(m, ':');
seq_printf(m, "%02x", exp->ex_uuid[i]);
......@@ -771,7 +793,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old)
static struct svc_expkey *
exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type,
exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type,
u32 *fsidv, struct cache_req *reqp)
{
struct svc_expkey key, *ek;
......@@ -793,9 +815,9 @@ exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type,
return ek;
}
static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp,
const struct path *path, struct cache_req *reqp)
static struct svc_export *
exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp,
const struct path *path, struct cache_req *reqp)
{
struct svc_export *exp, key;
int err;
......@@ -819,11 +841,11 @@ static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp,
/*
* Find the export entry for a given dentry.
*/
static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp,
struct path *path)
static struct svc_export *
exp_parent(struct cache_detail *cd, struct auth_domain *clp, struct path *path)
{
struct dentry *saved = dget(path->dentry);
svc_export *exp = exp_get_by_name(cd, clp, path, NULL);
struct svc_export *exp = exp_get_by_name(cd, clp, path, NULL);
while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
struct dentry *parent = dget_parent(path->dentry);
......@@ -844,7 +866,7 @@ static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp,
* since its harder to fool a kernel module than a user space program.
*/
int
exp_rootfh(struct net *net, svc_client *clp, char *name,
exp_rootfh(struct net *net, struct auth_domain *clp, char *name,
struct knfsd_fh *f, int maxsize)
{
struct svc_export *exp;
......
/*
* include/linux/nfsd/export.h
*
* Public declarations for NFS exports. The definitions for the
* syscall interface are in nfsctl.h
*
* Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
*/
#ifndef NFSD_EXPORT_H
#define NFSD_EXPORT_H
# include <linux/nfsd/nfsfh.h>
#include <linux/sunrpc/cache.h>
#include <uapi/linux/nfsd/export.h>
struct knfsd_fh;
struct svc_fh;
struct svc_rqst;
/*
* FS Locations
*/
......@@ -38,6 +37,7 @@ struct nfsd4_fs_locations {
* spkm3i, and spkm3p (and using all 8 at once should be rare).
*/
#define MAX_SECINFO_LIST 8
#define EX_UUID_LEN 16
struct exp_flavor_info {
u32 pseudoflavor;
......@@ -54,7 +54,7 @@ struct svc_export {
int ex_fsid;
unsigned char * ex_uuid; /* 16 byte fsid */
struct nfsd4_fs_locations ex_fslocs;
int ex_nflavors;
uint32_t ex_nflavors;
struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST];
struct cache_detail *cd;
};
......
......@@ -97,25 +97,14 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf,
{
static u64 val;
char read_buf[25];
size_t size, ret;
size_t size;
loff_t pos = *ppos;
if (!pos)
nfsd_inject_get(file_inode(file)->i_private, &val);
size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
if (pos < 0)
return -EINVAL;
if (pos >= size || !len)
return 0;
if (len > size - pos)
len = size - pos;
ret = copy_to_user(buf, read_buf + pos, len);
if (ret == len)
return -EFAULT;
len -= ret;
*ppos = pos + len;
return len;
return simple_read_from_buffer(buf, len, ppos, read_buf, size);
}
static ssize_t fault_inject_write(struct file *file, const char __user *buf,
......
......@@ -56,7 +56,7 @@ static inline void nfsd_idmap_shutdown(struct net *net)
__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *);
__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *);
__be32 nfsd4_encode_user(struct svc_rqst *, kuid_t, __be32 **, int *);
__be32 nfsd4_encode_group(struct svc_rqst *, kgid_t, __be32 **, int *);
__be32 nfsd4_encode_user(struct xdr_stream *, struct svc_rqst *, kuid_t);
__be32 nfsd4_encode_group(struct xdr_stream *, struct svc_rqst *, kgid_t);
#endif /* LINUX_NFSD_IDMAP_H */
......@@ -182,7 +182,8 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg
static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_getaclargs *argp)
{
if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
p = nfs2svc_decode_fh(p, &argp->fh);
if (!p)
return 0;
argp->mask = ntohl(*p); p++;
......@@ -197,7 +198,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
unsigned int base;
int n;
if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
p = nfs2svc_decode_fh(p, &argp->fh);
if (!p)
return 0;
argp->mask = ntohl(*p++);
if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
......@@ -218,7 +220,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd_fhandle *argp)
{
if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
p = nfs2svc_decode_fh(p, &argp->fh);
if (!p)
return 0;
return xdr_argsize_check(rqstp, p);
}
......@@ -226,7 +229,8 @@ static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_accessargs *argp)
{
if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
p = nfs2svc_decode_fh(p, &argp->fh);
if (!p)
return 0;
argp->access = ntohl(*p++);
......
......@@ -128,7 +128,8 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_getaclargs *args)
{
if (!(p = nfs3svc_decode_fh(p, &args->fh)))
p = nfs3svc_decode_fh(p, &args->fh);
if (!p)
return 0;
args->mask = ntohl(*p); p++;
......@@ -143,7 +144,8 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
unsigned int base;
int n;
if (!(p = nfs3svc_decode_fh(p, &args->fh)))
p = nfs3svc_decode_fh(p, &args->fh);
if (!p)
return 0;
args->mask = ntohl(*p++);
if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
......
......@@ -278,7 +278,8 @@ void fill_post_wcc(struct svc_fh *fhp)
int
nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
{
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
return xdr_argsize_check(rqstp, p);
}
......@@ -287,7 +288,8 @@ int
nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_sattrargs *args)
{
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
p = decode_sattr3(p, &args->attrs);
......@@ -315,7 +317,8 @@ int
nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_accessargs *args)
{
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
args->access = ntohl(*p++);
......@@ -330,7 +333,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
int v;
u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
p = xdr_decode_hyper(p, &args->offset);
......@@ -360,7 +364,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
unsigned int len, v, hdr, dlen;
u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
p = xdr_decode_hyper(p, &args->offset);
......@@ -535,7 +540,8 @@ int
nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_readlinkargs *args)
{
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
args->buffer = page_address(*(rqstp->rq_next_page++));
......@@ -558,7 +564,8 @@ int
nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_readdirargs *args)
{
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
p = xdr_decode_hyper(p, &args->cookie);
args->verf = p; p += 2;
......@@ -580,7 +587,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
int len;
u32 max_blocksize = svc_max_payload(rqstp);
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
p = xdr_decode_hyper(p, &args->cookie);
args->verf = p; p += 2;
......@@ -605,7 +613,8 @@ int
nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_commitargs *args)
{
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
p = xdr_decode_hyper(p, &args->offset);
args->count = ntohl(*p++);
......
......@@ -36,7 +36,6 @@
#include <linux/slab.h>
#include <linux/nfs_fs.h>
#include <linux/export.h>
#include "nfsfh.h"
#include "nfsd.h"
#include "acl.h"
......@@ -920,20 +919,19 @@ nfs4_acl_get_whotype(char *p, u32 len)
return NFS4_ACL_WHO_NAMED;
}
__be32 nfs4_acl_write_who(int who, __be32 **p, int *len)
__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who)
{
__be32 *p;
int i;
int bytes;
for (i = 0; i < ARRAY_SIZE(s2t_map); i++) {
if (s2t_map[i].type != who)
continue;
bytes = 4 + (XDR_QUADLEN(s2t_map[i].stringlen) << 2);
if (bytes > *len)
p = xdr_reserve_space(xdr, s2t_map[i].stringlen + 4);
if (!p)
return nfserr_resource;
*p = xdr_encode_opaque(*p, s2t_map[i].string,
p = xdr_encode_opaque(p, s2t_map[i].string,
s2t_map[i].stringlen);
*len -= bytes;
return 0;
}
WARN_ON_ONCE(1);
......
......@@ -551,44 +551,43 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
return 0;
}
static __be32 encode_ascii_id(u32 id, __be32 **p, int *buflen)
static __be32 encode_ascii_id(struct xdr_stream *xdr, u32 id)
{
char buf[11];
int len;
int bytes;
__be32 *p;
len = sprintf(buf, "%u", id);
bytes = 4 + (XDR_QUADLEN(len) << 2);
if (bytes > *buflen)
p = xdr_reserve_space(xdr, len + 4);
if (!p)
return nfserr_resource;
*p = xdr_encode_opaque(*p, buf, len);
*buflen -= bytes;
p = xdr_encode_opaque(p, buf, len);
return 0;
}
static __be32 idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen)
static __be32 idmap_id_to_name(struct xdr_stream *xdr,
struct svc_rqst *rqstp, int type, u32 id)
{
struct ent *item, key = {
.id = id,
.type = type,
};
__be32 *p;
int ret;
int bytes;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item);
if (ret == -ENOENT)
return encode_ascii_id(id, p, buflen);
return encode_ascii_id(xdr, id);
if (ret)
return nfserrno(ret);
ret = strlen(item->name);
WARN_ON_ONCE(ret > IDMAP_NAMESZ);
bytes = 4 + (XDR_QUADLEN(ret) << 2);
if (bytes > *buflen)
p = xdr_reserve_space(xdr, ret + 4);
if (!p)
return nfserr_resource;
*p = xdr_encode_opaque(*p, item->name, ret);
*buflen -= bytes;
p = xdr_encode_opaque(p, item->name, ret);
cache_put(&item->h, nn->idtoname_cache);
return 0;
}
......@@ -622,11 +621,12 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u
return idmap_name_to_id(rqstp, type, name, namelen, id);
}
static __be32 encode_name_from_id(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen)
static __be32 encode_name_from_id(struct xdr_stream *xdr,
struct svc_rqst *rqstp, int type, u32 id)
{
if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
return encode_ascii_id(id, p, buflen);
return idmap_id_to_name(rqstp, type, id, p, buflen);
return encode_ascii_id(xdr, id);
return idmap_id_to_name(xdr, rqstp, type, id);
}
__be32
......@@ -655,14 +655,16 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
return status;
}
__be32 nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t uid, __be32 **p, int *buflen)
__be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp,
kuid_t uid)
{
u32 id = from_kuid(&init_user_ns, uid);
return encode_name_from_id(rqstp, IDMAP_TYPE_USER, id, p, buflen);
return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id);
}
__be32 nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t gid, __be32 **p, int *buflen)
__be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp,
kgid_t gid)
{
u32 id = from_kgid(&init_user_ns, gid);
return encode_name_from_id(rqstp, IDMAP_TYPE_GROUP, id, p, buflen);
return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id);
}
This diff is collapsed.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -224,13 +224,6 @@ hash_refile(struct svc_cacherep *rp)
hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits));
}
static inline bool
nfsd_cache_entry_expired(struct svc_cacherep *rp)
{
return rp->c_state != RC_INPROG &&
time_after(jiffies, rp->c_timestamp + RC_EXPIRE);
}
/*
* Walk the LRU list and prune off entries that are older than RC_EXPIRE.
* Also prune the oldest ones when the total exceeds the max number of entries.
......@@ -242,8 +235,14 @@ prune_cache_entries(void)
long freed = 0;
list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) {
if (!nfsd_cache_entry_expired(rp) &&
num_drc_entries <= max_drc_entries)
/*
* Don't free entries attached to calls that are still
* in-progress, but do keep scanning the list.
*/
if (rp->c_state == RC_INPROG)
continue;
if (num_drc_entries <= max_drc_entries &&
time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
break;
nfsd_reply_cache_free_locked(rp);
freed++;
......
......@@ -1179,7 +1179,6 @@ static int __init init_nfsd(void)
retval = nfsd4_init_slabs();
if (retval)
goto out_unregister_pernet;
nfs4_state_init();
retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
if (retval)
goto out_free_slabs;
......
......@@ -15,11 +15,20 @@
#include <linux/nfs2.h>
#include <linux/nfs3.h>
#include <linux/nfs4.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/msg_prot.h>
#include <linux/nfsd/debug.h>
#include <linux/nfsd/export.h>
#include <linux/nfsd/stats.h>
#include <uapi/linux/nfsd/debug.h>
#include "stats.h"
#include "export.h"
#undef ifdebug
#ifdef NFSD_DEBUG
# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag)
#else
# define ifdebug(flag) if (0)
#endif
/*
* nfsd version
......@@ -106,7 +115,6 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
*/
#ifdef CONFIG_NFSD_V4
extern unsigned long max_delegations;
void nfs4_state_init(void);
int nfsd4_init_slabs(void);
void nfsd4_free_slabs(void);
int nfs4_state_start(void);
......@@ -117,7 +125,6 @@ void nfs4_reset_lease(time_t leasetime);
int nfs4_reset_recoverydir(char *recdir);
char * nfs4_recoverydir(void);
#else
static inline void nfs4_state_init(void) { }
static inline int nfsd4_init_slabs(void) { return 0; }
static inline void nfsd4_free_slabs(void) { }
static inline int nfs4_state_start(void) { return 0; }
......
......@@ -88,9 +88,8 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
/* Check if the request originated from a secure port. */
if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk(KERN_WARNING
"nfsd: request from insecure port %s!\n",
svc_print_addr(rqstp, buf, sizeof(buf)));
dprintk("nfsd: request from insecure port %s!\n",
svc_print_addr(rqstp, buf, sizeof(buf)));
return nfserr_perm;
}
......@@ -169,8 +168,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
data_left -= len;
if (data_left < 0)
return error;
exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
fid = (struct fid *)(fh->fh_auth + len);
exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
fid = (struct fid *)(fh->fh_fsid + len);
} else {
__u32 tfh[2];
dev_t xdev;
......@@ -385,7 +384,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
{
if (dentry != exp->ex_path.dentry) {
struct fid *fid = (struct fid *)
(fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1);
(fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1);
int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK);
......@@ -513,7 +512,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
*/
struct inode * inode = dentry->d_inode;
__u32 *datap;
dev_t ex_dev = exp_sb(exp)->s_dev;
dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
......@@ -557,17 +555,16 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
if (inode)
_fh_update_old(dentry, exp, &fhp->fh_handle);
} else {
int len;
fhp->fh_handle.fh_size =
key_len(fhp->fh_handle.fh_fsid_type) + 4;
fhp->fh_handle.fh_auth_type = 0;
datap = fhp->fh_handle.fh_auth+0;
mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev,
mk_fsid(fhp->fh_handle.fh_fsid_type,
fhp->fh_handle.fh_fsid,
ex_dev,
exp->ex_path.dentry->d_inode->i_ino,
exp->ex_fsid, exp->ex_uuid);
len = key_len(fhp->fh_handle.fh_fsid_type);
datap += len/4;
fhp->fh_handle.fh_size = 4 + len;
if (inode)
_fh_update(fhp, exp, dentry);
if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
......
/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
/*
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*
* This file describes the layout of the file handles as passed
* over the wire.
*/
#ifndef _LINUX_NFSD_NFSFH_H
#define _LINUX_NFSD_NFSFH_H
#include <linux/sunrpc/svc.h>
#include <uapi/linux/nfsd/nfsfh.h>
static inline __u32 ino_t_to_u32(ino_t ino)
{
return (__u32) ino;
}
static inline ino_t u32_to_ino_t(__u32 uino)
{
return (ino_t) uino;
}
#ifndef _LINUX_NFSD_FH_INT_H
#define _LINUX_NFSD_FH_INT_H
/*
* This is the internal representation of an NFS handle used in knfsd.
* pre_mtime/post_version will be used to support wcc_attr's in NFSv3.
*/
typedef struct svc_fh {
struct knfsd_fh fh_handle; /* FH data */
struct dentry * fh_dentry; /* validated dentry */
struct svc_export * fh_export; /* export pointer */
int fh_maxsize; /* max size for fh_handle */
unsigned char fh_locked; /* inode locked by us */
unsigned char fh_want_write; /* remount protection taken */
#ifdef CONFIG_NFSD_V3
unsigned char fh_post_saved; /* post-op attrs saved */
unsigned char fh_pre_saved; /* pre-op attrs saved */
/* Pre-op attributes saved during fh_lock */
__u64 fh_pre_size; /* size before operation */
struct timespec fh_pre_mtime; /* mtime before oper */
struct timespec fh_pre_ctime; /* ctime before oper */
/*
* pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
* to find out if it is valid.
*/
u64 fh_pre_change;
/* Post-op attributes saved in fh_unlock */
struct kstat fh_post_attr; /* full attrs after operation */
u64 fh_post_change; /* nfsv4 change; see above */
#endif /* CONFIG_NFSD_V3 */
#include <linux/nfsd/nfsfh.h>
} svc_fh;
enum nfsd_fsid {
FSID_DEV = 0,
......@@ -215,4 +264,4 @@ fh_unlock(struct svc_fh *fhp)
}
}
#endif /* _LINUX_NFSD_FH_INT_H */
#endif /* _LINUX_NFSD_NFSFH_H */
......@@ -591,12 +591,6 @@ nfsd(void *vrqstp)
nfsdstats.th_cnt++;
mutex_unlock(&nfsd_mutex);
/*
* We want less throttling in balance_dirty_pages() so that nfs to
* localhost doesn't cause nfsd to lock up due to all the client's
* dirty pages.
*/
current->flags |= PF_LESS_THROTTLE;
set_freezable();
/*
......
......@@ -214,7 +214,8 @@ nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
int
nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
{
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
return xdr_argsize_check(rqstp, p);
}
......@@ -248,7 +249,8 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
{
unsigned int len;
int v;
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
args->offset = ntohl(*p++);
......@@ -281,7 +283,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
unsigned int len, hdr, dlen;
int v;
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
p++; /* beginoffset */
......@@ -355,7 +358,8 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
int
nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args)
{
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
args->buffer = page_address(*(rqstp->rq_next_page++));
......@@ -391,7 +395,8 @@ int
nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd_readdirargs *args)
{
if (!(p = decode_fh(p, &args->fh)))
p = decode_fh(p, &args->fh);
if (!p)
return 0;
args->cookie = ntohl(*p++);
args->count = ntohl(*p++);
......
......@@ -37,7 +37,6 @@
#include <linux/idr.h>
#include <linux/sunrpc/svc_xprt.h>
#include <linux/nfsd/nfsfh.h>
#include "nfsfh.h"
typedef struct {
......@@ -123,7 +122,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
/* Maximum number of operations per session compound */
#define NFSD_MAX_OPS_PER_COMPOUND 16
/* Maximum session per slot cache size */
#define NFSD_SLOT_CACHE_SIZE 1024
#define NFSD_SLOT_CACHE_SIZE 2048
/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32
#define NFSD_MAX_MEM_PER_SESSION \
......@@ -464,8 +463,6 @@ extern void nfs4_release_reclaim(struct nfsd_net *);
extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
struct nfsd_net *nn);
extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn);
extern void nfs4_free_openowner(struct nfs4_openowner *);
extern void nfs4_free_lockowner(struct nfs4_lockowner *);
extern int set_callback_cred(void);
extern void nfsd4_init_callback(struct nfsd4_callback *);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
......
......@@ -24,7 +24,6 @@
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/sunrpc/stats.h>
#include <linux/nfsd/stats.h>
#include <net/net_namespace.h>
#include "nfsd.h"
......
/*
* linux/include/linux/nfsd/stats.h
*
* Statistics for NFS server.
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
#ifndef LINUX_NFSD_STATS_H
#define LINUX_NFSD_STATS_H
#ifndef _NFSD_STATS_H
#define _NFSD_STATS_H
#include <uapi/linux/nfsd/stats.h>
......@@ -42,4 +40,4 @@ extern struct svc_stat nfsd_svcstats;
void nfsd_stat_init(void);
void nfsd_stat_shutdown(void);
#endif /* LINUX_NFSD_STATS_H */
#endif /* _NFSD_STATS_H */
......@@ -820,55 +820,54 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
}
static __be32
nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
{
mm_segment_t oldfs;
__be32 err;
int host_err;
err = nfserr_perm;
if (file->f_op->splice_read && rqstp->rq_splice_ok) {
struct splice_desc sd = {
.len = 0,
.total_len = *count,
.pos = offset,
.u.data = rqstp,
};
rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
} else {
oldfs = get_fs();
set_fs(KERNEL_DS);
host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
set_fs(oldfs);
}
if (host_err >= 0) {
nfsdstats.io_read += host_err;
*count = host_err;
err = 0;
fsnotify_access(file);
return 0;
} else
err = nfserrno(host_err);
return err;
return nfserrno(host_err);
}
int nfsd_splice_read(struct svc_rqst *rqstp,
struct file *file, loff_t offset, unsigned long *count)
{
struct splice_desc sd = {
.len = 0,
.total_len = *count,
.pos = offset,
.u.data = rqstp,
};
int host_err;
rqstp->rq_next_page = rqstp->rq_respages + 1;
host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
return nfsd_finish_read(file, count, host_err);
}
static void kill_suid(struct dentry *dentry)
int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
unsigned long *count)
{
struct iattr ia;
ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
mm_segment_t oldfs;
int host_err;
mutex_lock(&dentry->d_inode->i_mutex);
/*
* Note we call this on write, so notify_change will not
* encounter any conflicting delegations:
*/
notify_change(dentry, &ia, NULL);
mutex_unlock(&dentry->d_inode->i_mutex);
oldfs = get_fs();
set_fs(KERNEL_DS);
host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
set_fs(oldfs);
return nfsd_finish_read(file, count, host_err);
}
static __be32
nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
{
if (file->f_op->splice_read && rqstp->rq_splice_ok)
return nfsd_splice_read(rqstp, file, offset, count);
else
return nfsd_readv(file, offset, vec, vlen, count);
}
/*
......@@ -922,6 +921,16 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
int stable = *stablep;
int use_wgather;
loff_t pos = offset;
unsigned int pflags = current->flags;
if (rqstp->rq_local)
/*
* We want less throttling in balance_dirty_pages()
* and shrink_inactive_list() so that nfs to
* localhost doesn't cause nfsd to lock up due to all
* the client's dirty pages or its congested queue.
*/
current->flags |= PF_LESS_THROTTLE;
dentry = file->f_path.dentry;
inode = dentry->d_inode;
......@@ -942,10 +951,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
nfsdstats.io_write += host_err;
fsnotify_modify(file);
/* clear setuid/setgid flag after write */
if (inode->i_mode & (S_ISUID | S_ISGID))
kill_suid(dentry);
if (stable) {
if (use_wgather)
host_err = wait_for_concurrent_writes(file);
......@@ -959,36 +964,33 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
err = 0;
else
err = nfserrno(host_err);
if (rqstp->rq_local)
tsk_restore_flags(current, pflags, PF_LESS_THROTTLE);
return err;
}
/*
* Read data from a file. count must contain the requested read count
* on entry. On return, *count contains the number of bytes actually read.
* N.B. After this call fhp needs an fh_put
*/
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file **file, struct raparms **ra)
{
struct file *file;
struct inode *inode;
struct raparms *ra;
__be32 err;
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file);
if (err)
return err;
inode = file_inode(file);
inode = file_inode(*file);
/* Get readahead parameters */
ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
*ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
if (ra && ra->p_set)
file->f_ra = ra->p_ra;
err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
if (*ra && (*ra)->p_set)
(*file)->f_ra = (*ra)->p_ra;
return nfs_ok;
}
void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra)
{
/* Write back readahead params */
if (ra) {
struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
......@@ -998,28 +1000,29 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
ra->p_count--;
spin_unlock(&rab->pb_lock);
}
nfsd_close(file);
return err;
}
/* As above, but use the provided file descriptor. */
__be32
nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
unsigned long *count)
/*
* Read data from a file. count must contain the requested read count
* on entry. On return, *count contains the number of bytes actually read.
* N.B. After this call fhp needs an fh_put
*/
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
{
__be32 err;
struct file *file;
struct raparms *ra;
__be32 err;
err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra);
if (err)
return err;
err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
nfsd_put_tmp_read_open(file, ra);
if (file) {
err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
} else /* Note file may still be NULL in NFSv4 special stateid case: */
err = nfsd_read(rqstp, fhp, offset, vec, vlen, count);
out:
return err;
}
......
......@@ -70,10 +70,16 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
void nfsd_close(struct file *);
struct raparms;
__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
struct file **, struct raparms **);
void nfsd_put_tmp_read_open(struct file *, struct raparms *);
int nfsd_splice_read(struct svc_rqst *,
struct file *, loff_t, unsigned long *);
int nfsd_readv(struct file *, loff_t, struct kvec *, int,
unsigned long *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
__be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *,
loff_t, struct kvec *, int, unsigned long *);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
loff_t, struct kvec *,int, unsigned long *, int *);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
......
......@@ -58,7 +58,7 @@ struct nfsd4_compound_state {
/* For sessions DRC */
struct nfsd4_session *session;
struct nfsd4_slot *slot;
__be32 *datap;
int data_offset;
size_t iovlen;
u32 minorversion;
__be32 status;
......@@ -287,9 +287,8 @@ struct nfsd4_readdir {
struct svc_fh * rd_fhp; /* response */
struct readdir_cd common;
__be32 * buffer;
int buflen;
__be32 * offset;
struct xdr_stream *xdr;
int cookie_offset;
};
struct nfsd4_release_lockowner {
......@@ -506,9 +505,7 @@ struct nfsd4_compoundargs {
struct nfsd4_compoundres {
/* scratch variables for XDR encode */
__be32 * p;
__be32 * end;
struct xdr_buf * xbuf;
struct xdr_stream xdr;
struct svc_rqst * rqstp;
u32 taglen;
......@@ -538,6 +535,9 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
return argp->opcnt == resp->opcnt;
}
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op);
void warn_on_nonidempotent_op(struct nfsd4_op *op);
#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
static inline void
......@@ -563,10 +563,11 @@ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
struct nfsd4_compoundres *);
__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry, __be32 **buffer, int countp,
u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry,
u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *,
struct nfsd4_setclientid *setclid);
......
......@@ -17,13 +17,13 @@
#include <linux/fs.h>
#include <linux/kref.h>
#include <linux/utsname.h>
#include <linux/nfsd/nfsfh.h>
#include <linux/lockd/bind.h>
#include <linux/lockd/xdr.h>
#ifdef CONFIG_LOCKD_V4
#include <linux/lockd/xdr4.h>
#endif
#include <linux/lockd/debug.h>
#include <linux/sunrpc/svc.h>
/*
* Version string
......
......@@ -399,8 +399,6 @@ enum lock_type4 {
#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16)
#define FATTR4_WORD2_CHANGE_SECURITY_LABEL \
(1UL << 17)
/* MDS threshold bitmap bits */
#define THRESHOLD_RD (1UL << 0)
......
/*
* linux/include/linux/nfsd/debug.h
*
* Debugging-related stuff for nfsd
*
* Copyright (C) 1995 Olaf Kirch <okir@monad.swb.de>
*/
#ifndef LINUX_NFSD_DEBUG_H
#define LINUX_NFSD_DEBUG_H
#include <uapi/linux/nfsd/debug.h>
# undef ifdebug
# ifdef NFSD_DEBUG
# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag)
# else
# define ifdebug(flag) if (0)
# endif
#endif /* LINUX_NFSD_DEBUG_H */
/*
* include/linux/nfsd/nfsfh.h
*
* This file describes the layout of the file handles as passed
* over the wire.
*
* Earlier versions of knfsd used to sign file handles using keyed MD5
* or SHA. I've removed this code, because it doesn't give you more
* security than blocking external access to port 2049 on your firewall.
*
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*/
#ifndef _LINUX_NFSD_FH_H
#define _LINUX_NFSD_FH_H
# include <linux/sunrpc/svc.h>
#include <uapi/linux/nfsd/nfsfh.h>
static inline __u32 ino_t_to_u32(ino_t ino)
{
return (__u32) ino;
}
static inline ino_t u32_to_ino_t(__u32 uino)
{
return (ino_t) uino;
}
/*
* This is the internal representation of an NFS handle used in knfsd.
* pre_mtime/post_version will be used to support wcc_attr's in NFSv3.
*/
typedef struct svc_fh {
struct knfsd_fh fh_handle; /* FH data */
struct dentry * fh_dentry; /* validated dentry */
struct svc_export * fh_export; /* export pointer */
int fh_maxsize; /* max size for fh_handle */
unsigned char fh_locked; /* inode locked by us */
unsigned char fh_want_write; /* remount protection taken */
#ifdef CONFIG_NFSD_V3
unsigned char fh_post_saved; /* post-op attrs saved */
unsigned char fh_pre_saved; /* pre-op attrs saved */
/* Pre-op attributes saved during fh_lock */
__u64 fh_pre_size; /* size before operation */
struct timespec fh_pre_mtime; /* mtime before oper */
struct timespec fh_pre_ctime; /* ctime before oper */
/*
* pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
* to find out if it is valid.
*/
u64 fh_pre_change;
/* Post-op attributes saved in fh_unlock */
struct kstat fh_post_attr; /* full attrs after operation */
u64 fh_post_change; /* nfsv4 change; see above */
#endif /* CONFIG_NFSD_V3 */
} svc_fh;
#endif /* _LINUX_NFSD_FH_H */
......@@ -244,6 +244,7 @@ struct svc_rqst {
struct page * rq_pages[RPCSVC_MAXPAGES];
struct page * *rq_respages; /* points into rq_pages */
struct page * *rq_next_page; /* next reply page to use */
struct page * *rq_page_end; /* one past the last page */
struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
......@@ -254,11 +255,15 @@ struct svc_rqst {
u32 rq_prot; /* IP protocol */
unsigned short
rq_secure : 1; /* secure port */
unsigned short rq_local : 1; /* local request */
void * rq_argp; /* decoded arguments */
void * rq_resp; /* xdr'd results */
void * rq_auth_data; /* flavor-specific data */
int rq_auth_slack; /* extra space xdr code
* should leave in head
* for krb5i, krb5p.
*/
int rq_reserved; /* space on socket outq
* reserved for this request
*/
......@@ -454,11 +459,7 @@ char * svc_print_addr(struct svc_rqst *, char *, size_t);
*/
static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
{
int added_space = 0;
if (rqstp->rq_authop->flavour)
added_space = RPC_MAX_AUTH_SIZE;
svc_reserve(rqstp, space + added_space);
svc_reserve(rqstp, space + rqstp->rq_auth_slack);
}
#endif /* SUNRPC_SVC_H */
......@@ -115,14 +115,13 @@ struct svc_rdma_fastreg_mr {
struct list_head frmr_list;
};
struct svc_rdma_req_map {
struct svc_rdma_fastreg_mr *frmr;
unsigned long count;
union {
struct kvec sge[RPCSVC_MAXPAGES];
struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
unsigned long lkey[RPCSVC_MAXPAGES];
};
};
#define RDMACTXT_F_FAST_UNREG 1
#define RDMACTXT_F_LAST_CTXT 2
#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */
......
......@@ -24,6 +24,7 @@ struct svc_xprt_ops {
void (*xpo_release_rqst)(struct svc_rqst *);
void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *);
int (*xpo_secure_port)(struct svc_rqst *);
};
struct svc_xprt_class {
......@@ -63,6 +64,7 @@ struct svc_xprt {
#define XPT_DETACHED 10 /* detached from tempsocks list */
#define XPT_LISTENER 11 /* listening endpoint */
#define XPT_CACHE_AUTH 12 /* cache auth info */
#define XPT_LOCAL 13 /* connection from loopback interface */
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
......
......@@ -215,6 +215,9 @@ typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
extern void xdr_commit_encode(struct xdr_stream *xdr);
extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
unsigned int base, unsigned int len);
extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);
......
/*
* include/linux/nfsd/nfsfh.h
*
* This file describes the layout of the file handles as passed
* over the wire.
*
* Earlier versions of knfsd used to sign file handles using keyed MD5
* or SHA. I've removed this code, because it doesn't give you more
* security than blocking external access to port 2049 on your firewall.
*
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*/
......@@ -37,7 +31,7 @@ struct nfs_fhbase_old {
};
/*
* This is the new flexible, extensible style NFSv2/v3 file handle.
* This is the new flexible, extensible style NFSv2/v3/v4 file handle.
* by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
*
* The file handle starts with a sequence of four-byte words.
......@@ -47,14 +41,7 @@ struct nfs_fhbase_old {
*
* All four-byte values are in host-byte-order.
*
* The auth_type field specifies how the filehandle can be authenticated
* This might allow a file to be confirmed to be in a writable part of a
* filetree without checking the path from it up to the root.
* Current values:
* 0 - No authentication. fb_auth is 0 bytes long
* Possible future values:
* 1 - 4 bytes taken from MD5 hash of the remainer of the file handle
* prefixed by a secret and with the important export flags.
* The auth_type field is deprecated and must be set to 0.
*
* The fsid_type identifies how the filesystem (or export point) is
* encoded.
......@@ -71,14 +58,9 @@ struct nfs_fhbase_old {
* 7 - 8 byte inode number and 16 byte uuid
*
* The fileid_type identified how the file within the filesystem is encoded.
* This is (will be) passed to, and set by, the underlying filesystem if it supports
* filehandle operations. The filesystem must not use the value '0' or '0xff' and may
* only use the values 1 and 2 as defined below:
* Current values:
* 0 - The root, or export point, of the filesystem. fb_fileid is 0 bytes.
* 1 - 32bit inode number, 32 bit generation number.
* 2 - 32bit inode number, 32 bit generation number, 32 bit parent directory inode number.
*
* The values for this field are filesystem specific, exccept that
* filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
* in include/linux/exportfs.h for currently registered values.
*/
struct nfs_fhbase_new {
__u8 fb_version; /* == 1, even => nfs_fhbase_old */
......@@ -114,9 +96,9 @@ struct knfsd_fh {
#define fh_fsid_type fh_base.fh_new.fb_fsid_type
#define fh_auth_type fh_base.fh_new.fb_auth_type
#define fh_fileid_type fh_base.fh_new.fb_fileid_type
#define fh_auth fh_base.fh_new.fb_auth
#define fh_fsid fh_base.fh_new.fb_auth
/* Do not use, provided for userspace compatiblity. */
#define fh_auth fh_base.fh_new.fb_auth
#endif /* _UAPI_LINUX_NFSD_FH_H */
......@@ -1503,6 +1503,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
gc->gc_seq, rsci->mechctx))
goto garbage_args;
rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE;
break;
case RPC_GSS_SVC_PRIVACY:
/* placeholders for length and seq. number: */
......@@ -1511,6 +1512,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
gc->gc_seq, rsci->mechctx))
goto garbage_args;
rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2;
break;
default:
goto auth_err;
......
......@@ -374,7 +374,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
}
return;
out:
printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name);
printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
}
EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
......
......@@ -43,6 +43,19 @@ static inline int rpc_reply_expected(struct rpc_task *task)
(task->tk_msg.rpc_proc->p_decode != NULL);
}
static inline int sock_is_loopback(struct sock *sk)
{
struct dst_entry *dst;
int loopback = 0;
rcu_read_lock();
dst = rcu_dereference(sk->sk_dst_cache);
if (dst && dst->dev &&
(dst->dev->features & NETIF_F_LOOPBACK))
loopback = 1;
rcu_read_unlock();
return loopback;
}
int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
struct page *headpage, unsigned long headoffset,
struct page *tailpage, unsigned long tailoffset);
......
......@@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
}
rqstp->rq_pages[i] = p;
}
rqstp->rq_page_end = &rqstp->rq_pages[i];
rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
/* Make arg->head point to first page and arg->pages point to rest */
......@@ -730,6 +731,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
newxpt = xprt->xpt_ops->xpo_accept(xprt);
if (newxpt)
svc_add_new_temp_xprt(serv, newxpt);
else
module_put(xprt->xpt_class->xcl_owner);
} else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
......@@ -793,7 +796,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
clear_bit(XPT_OLD, &xprt->xpt_flags);
rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp);
rqstp->rq_chandle.defer = svc_defer;
if (serv->sv_stats)
......
......@@ -54,6 +54,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
}
spin_unlock(&authtab_lock);
rqstp->rq_auth_slack = 0;
rqstp->rq_authop = aops;
return aops->accept(rqstp, authp);
}
......
......@@ -400,6 +400,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
release_sock(sock->sk);
#endif
}
static int svc_sock_secure_port(struct svc_rqst *rqstp)
{
return svc_port_is_privileged(svc_addr(rqstp));
}
/*
* INET callback when data has been received on the socket.
*/
......@@ -678,6 +684,7 @@ static struct svc_xprt_ops svc_udp_ops = {
.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
.xpo_has_wspace = svc_udp_has_wspace,
.xpo_accept = svc_udp_accept,
.xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_udp_class = {
......@@ -842,8 +849,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
* tell us anything. For now just warn about unpriv connections.
*/
if (!svc_port_is_privileged(sin)) {
dprintk(KERN_WARNING
"%s: connect from unprivileged port: %s\n",
dprintk("%s: connect from unprivileged port: %s\n",
serv->sv_name,
__svc_print_addr(sin, buf, sizeof(buf)));
}
......@@ -867,6 +873,10 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
}
svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
if (sock_is_loopback(newsock->sk))
set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
else
clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
if (serv->sv_stats)
serv->sv_stats->nettcpconn++;
......@@ -1112,6 +1122,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
rqstp->rq_local = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags);
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1];
......@@ -1234,6 +1245,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = {
.xpo_detach = svc_bc_tcp_sock_detach,
.xpo_free = svc_bc_sock_free,
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
.xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_tcp_bc_class = {
......@@ -1272,6 +1284,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
.xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_tcp_class = {
......
......@@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
struct kvec *iov = buf->head;
int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
xdr_set_scratch_buffer(xdr, NULL, 0);
BUG_ON(scratch_len < 0);
xdr->buf = buf;
xdr->iov = iov;
......@@ -481,6 +482,73 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
}
EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
* xdr_commit_encode - Ensure all data is written to buffer
* @xdr: pointer to xdr_stream
*
* We handle encoding across page boundaries by giving the caller a
* temporary location to write to, then later copying the data into
* place; xdr_commit_encode does that copying.
*
* Normally the caller doesn't need to call this directly, as the
* following xdr_reserve_space will do it. But an explicit call may be
* required at the end of encoding, or any other time when the xdr_buf
* data might be read.
*/
void xdr_commit_encode(struct xdr_stream *xdr)
{
int shift = xdr->scratch.iov_len;
void *page;
if (shift == 0)
return;
page = page_address(*xdr->page_ptr);
memcpy(xdr->scratch.iov_base, page, shift);
memmove(page, page + shift, (void *)xdr->p - page);
xdr->scratch.iov_len = 0;
}
EXPORT_SYMBOL_GPL(xdr_commit_encode);
__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes)
{
static __be32 *p;
int space_left;
int frag1bytes, frag2bytes;
if (nbytes > PAGE_SIZE)
return NULL; /* Bigger buffers require special handling */
if (xdr->buf->len + nbytes > xdr->buf->buflen)
return NULL; /* Sorry, we're totally out of space */
frag1bytes = (xdr->end - xdr->p) << 2;
frag2bytes = nbytes - frag1bytes;
if (xdr->iov)
xdr->iov->iov_len += frag1bytes;
else
xdr->buf->page_len += frag1bytes;
xdr->page_ptr++;
xdr->iov = NULL;
/*
* If the last encode didn't end exactly on a page boundary, the
* next one will straddle boundaries. Encode into the next
* page, then copy it back later in xdr_commit_encode. We use
* the "scratch" iov to track any temporarily unused fragment of
* space at the end of the previous buffer:
*/
xdr->scratch.iov_base = xdr->p;
xdr->scratch.iov_len = frag1bytes;
p = page_address(*xdr->page_ptr);
/*
* Note this is where the next encode will start after we've
* shifted this one back:
*/
xdr->p = (void *)p + frag2bytes;
space_left = xdr->buf->buflen - xdr->buf->len;
xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
}
/**
* xdr_reserve_space - Reserve buffer space for sending
* @xdr: pointer to xdr_stream
......@@ -495,19 +563,121 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
__be32 *p = xdr->p;
__be32 *q;
xdr_commit_encode(xdr);
/* align nbytes on the next 32-bit boundary */
nbytes += 3;
nbytes &= ~3;
q = p + (nbytes >> 2);
if (unlikely(q > xdr->end || q < p))
return NULL;
return xdr_get_next_encode_buffer(xdr, nbytes);
xdr->p = q;
xdr->iov->iov_len += nbytes;
if (xdr->iov)
xdr->iov->iov_len += nbytes;
else
xdr->buf->page_len += nbytes;
xdr->buf->len += nbytes;
return p;
}
EXPORT_SYMBOL_GPL(xdr_reserve_space);
/**
* xdr_truncate_encode - truncate an encode buffer
* @xdr: pointer to xdr_stream
* @len: new length of buffer
*
* Truncates the xdr stream, so that xdr->buf->len == len,
* and xdr->p points at offset len from the start of the buffer, and
* head, tail, and page lengths are adjusted to correspond.
*
* If this means moving xdr->p to a different buffer, we assume that
* that the end pointer should be set to the end of the current page,
* except in the case of the head buffer when we assume the head
* buffer's current length represents the end of the available buffer.
*
* This is *not* safe to use on a buffer that already has inlined page
* cache pages (as in a zero-copy server read reply), except for the
* simple case of truncating from one position in the tail to another.
*
*/
void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
{
struct xdr_buf *buf = xdr->buf;
struct kvec *head = buf->head;
struct kvec *tail = buf->tail;
int fraglen;
int new, old;
if (len > buf->len) {
WARN_ON_ONCE(1);
return;
}
xdr_commit_encode(xdr);
fraglen = min_t(int, buf->len - len, tail->iov_len);
tail->iov_len -= fraglen;
buf->len -= fraglen;
if (tail->iov_len && buf->len == len) {
xdr->p = tail->iov_base + tail->iov_len;
/* xdr->end, xdr->iov should be set already */
return;
}
WARN_ON_ONCE(fraglen);
fraglen = min_t(int, buf->len - len, buf->page_len);
buf->page_len -= fraglen;
buf->len -= fraglen;
new = buf->page_base + buf->page_len;
old = new + fraglen;
xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT);
if (buf->page_len && buf->len == len) {
xdr->p = page_address(*xdr->page_ptr);
xdr->end = (void *)xdr->p + PAGE_SIZE;
xdr->p = (void *)xdr->p + (new % PAGE_SIZE);
/* xdr->iov should already be NULL */
return;
}
if (fraglen) {
xdr->end = head->iov_base + head->iov_len;
xdr->page_ptr--;
}
/* (otherwise assume xdr->end is already set) */
head->iov_len = len;
buf->len = len;
xdr->p = head->iov_base + head->iov_len;
xdr->iov = buf->head;
}
EXPORT_SYMBOL(xdr_truncate_encode);
/**
* xdr_restrict_buflen - decrease available buffer space
* @xdr: pointer to xdr_stream
* @newbuflen: new maximum number of bytes available
*
* Adjust our idea of how much space is available in the buffer.
* If we've already used too much space in the buffer, returns -1.
* If the available space is already smaller than newbuflen, returns 0
* and does nothing. Otherwise, adjusts xdr->buf->buflen to newbuflen
* and ensures xdr->end is set at most offset newbuflen from the start
* of the buffer.
*/
int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen)
{
struct xdr_buf *buf = xdr->buf;
int left_in_this_buf = (void *)xdr->end - (void *)xdr->p;
int end_offset = buf->len + left_in_this_buf;
if (newbuflen < 0 || newbuflen < buf->len)
return -1;
if (newbuflen > buf->buflen)
return 0;
if (newbuflen < end_offset)
xdr->end = (void *)xdr->end + newbuflen - end_offset;
buf->buflen = newbuflen;
return 0;
}
EXPORT_SYMBOL(xdr_restrict_buflen);
/**
* xdr_write_pages - Insert a list of pages into an XDR buffer for sending
* @xdr: pointer to xdr_stream
......
This diff is collapsed.
/*
* Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
......@@ -49,152 +50,6 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
/* Encode an XDR as an array of IB SGE
*
* Assumptions:
* - head[0] is physically contiguous.
* - tail[0] is physically contiguous.
* - pages[] is not physically or virtually contiguous and consists of
* PAGE_SIZE elements.
*
* Output:
* SGE[0] reserved for RCPRDMA header
* SGE[1] data from xdr->head[]
* SGE[2..sge_count-2] data from xdr->pages[]
* SGE[sge_count-1] data from xdr->tail.
*
* The max SGE we need is the length of the XDR / pagesize + one for
* head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
* reserves a page for both the request and the reply header, and this
* array is only concerned with the reply we are assured that we have
* on extra page for the RPCRMDA header.
*/
static int fast_reg_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct svc_rdma_req_map *vec)
{
int sge_no;
u32 sge_bytes;
u32 page_bytes;
u32 page_off;
int page_no = 0;
u8 *frva;
struct svc_rdma_fastreg_mr *frmr;
frmr = svc_rdma_get_frmr(xprt);
if (IS_ERR(frmr))
return -ENOMEM;
vec->frmr = frmr;
/* Skip the RPCRDMA header */
sge_no = 1;
/* Map the head. */
frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
vec->count = 2;
sge_no++;
/* Map the XDR head */
frmr->kva = frva;
frmr->direction = DMA_TO_DEVICE;
frmr->access_flags = 0;
frmr->map_len = PAGE_SIZE;
frmr->page_list_len = 1;
page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
frmr->page_list->page_list[page_no] =
ib_dma_map_page(xprt->sc_cm_id->device,
virt_to_page(xdr->head[0].iov_base),
page_off,
PAGE_SIZE - page_off,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
atomic_inc(&xprt->sc_dma_used);
/* Map the XDR page list */
page_off = xdr->page_base;
page_bytes = xdr->page_len + page_off;
if (!page_bytes)
goto encode_tail;
/* Map the pages */
vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
vec->sge[sge_no].iov_len = page_bytes;
sge_no++;
while (page_bytes) {
struct page *page;
page = xdr->pages[page_no++];
sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
page_bytes -= sge_bytes;
frmr->page_list->page_list[page_no] =
ib_dma_map_page(xprt->sc_cm_id->device,
page, page_off,
sge_bytes, DMA_TO_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
atomic_inc(&xprt->sc_dma_used);
page_off = 0; /* reset for next time through loop */
frmr->map_len += PAGE_SIZE;
frmr->page_list_len++;
}
vec->count++;
encode_tail:
/* Map tail */
if (0 == xdr->tail[0].iov_len)
goto done;
vec->count++;
vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
/*
* If head and tail use the same page, we don't need
* to map it again.
*/
vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
} else {
void *va;
/* Map another page for the tail */
page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
frmr->page_list->page_list[page_no] =
ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
page_off,
PAGE_SIZE,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
atomic_inc(&xprt->sc_dma_used);
frmr->map_len += PAGE_SIZE;
frmr->page_list_len++;
}
done:
if (svc_rdma_fastreg(xprt, frmr))
goto fatal_err;
return 0;
fatal_err:
printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
vec->frmr = NULL;
svc_rdma_put_frmr(xprt, frmr);
return -EIO;
}
static int map_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct svc_rdma_req_map *vec)
......@@ -208,9 +63,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
BUG_ON(xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
if (xprt->sc_frmr_pg_list_len)
return fast_reg_xdr(xprt, xdr, vec);
/* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1;
......@@ -282,8 +134,6 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
}
/* Assumptions:
* - We are using FRMR
* - or -
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
......@@ -327,23 +177,16 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_bytes = min_t(size_t,
bc, vec->sge[xdr_sge_no].iov_len-sge_off);
sge[sge_no].length = sge_bytes;
if (!vec->frmr) {
sge[sge_no].addr =
dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
sge_bytes, DMA_TO_DEVICE);
xdr_off += sge_bytes;
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
sge[sge_no].addr))
goto err;
atomic_inc(&xprt->sc_dma_used);
sge[sge_no].lkey = xprt->sc_dma_lkey;
} else {
sge[sge_no].addr = (unsigned long)
vec->sge[xdr_sge_no].iov_base + sge_off;
sge[sge_no].lkey = vec->frmr->mr->lkey;
}
sge[sge_no].addr =
dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
sge_bytes, DMA_TO_DEVICE);
xdr_off += sge_bytes;
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
sge[sge_no].addr))
goto err;
atomic_inc(&xprt->sc_dma_used);
sge[sge_no].lkey = xprt->sc_dma_lkey;
ctxt->count++;
ctxt->frmr = vec->frmr;
sge_off = 0;
sge_no++;
xdr_sge_no++;
......@@ -369,7 +212,6 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
return 0;
err:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_frmr(xprt, vec->frmr);
svc_rdma_put_context(ctxt, 0);
/* Fatal error, close transport */
return -EIO;
......@@ -397,10 +239,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[1];
if (vec->frmr)
max_write = vec->frmr->map_len;
else
max_write = xprt->sc_max_sge * PAGE_SIZE;
max_write = xprt->sc_max_sge * PAGE_SIZE;
/* Write chunks start at the pagelist */
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
......@@ -472,10 +311,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[2];
if (vec->frmr)
max_write = vec->frmr->map_len;
else
max_write = xprt->sc_max_sge * PAGE_SIZE;
max_write = xprt->sc_max_sge * PAGE_SIZE;
/* xdr offset starts at RPC message */
nchunks = ntohl(arg_ary->wc_nchunks);
......@@ -545,7 +381,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
int byte_count)
{
struct ib_send_wr send_wr;
struct ib_send_wr inv_wr;
int sge_no;
int sge_bytes;
int page_no;
......@@ -559,7 +394,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
"svcrdma: could not post a receive buffer, err=%d."
"Closing transport %p.\n", ret, rdma);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
svc_rdma_put_frmr(rdma, vec->frmr);
svc_rdma_put_context(ctxt, 0);
return -ENOTCONN;
}
......@@ -567,11 +401,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
/* Prepare the context */
ctxt->pages[0] = page;
ctxt->count = 1;
ctxt->frmr = vec->frmr;
if (vec->frmr)
set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
else
clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
/* Prepare the SGE for the RPCRDMA Header */
ctxt->sge[0].lkey = rdma->sc_dma_lkey;
......@@ -590,21 +419,15 @@ static int send_reply(struct svcxprt_rdma *rdma,
int xdr_off = 0;
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
byte_count -= sge_bytes;
if (!vec->frmr) {
ctxt->sge[sge_no].addr =
dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
sge_bytes, DMA_TO_DEVICE);
xdr_off += sge_bytes;
if (ib_dma_mapping_error(rdma->sc_cm_id->device,
ctxt->sge[sge_no].addr))
goto err;
atomic_inc(&rdma->sc_dma_used);
ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
} else {
ctxt->sge[sge_no].addr = (unsigned long)
vec->sge[sge_no].iov_base;
ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
}
ctxt->sge[sge_no].addr =
dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
sge_bytes, DMA_TO_DEVICE);
xdr_off += sge_bytes;
if (ib_dma_mapping_error(rdma->sc_cm_id->device,
ctxt->sge[sge_no].addr))
goto err;
atomic_inc(&rdma->sc_dma_used);
ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
BUG_ON(byte_count != 0);
......@@ -627,6 +450,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[page_no+1].length = 0;
}
rqstp->rq_next_page = rqstp->rq_respages + 1;
BUG_ON(sge_no > rdma->sc_max_sge);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
......@@ -635,15 +459,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
send_wr.num_sge = sge_no;
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
if (vec->frmr) {
/* Prepare INVALIDATE WR */
memset(&inv_wr, 0, sizeof inv_wr);
inv_wr.opcode = IB_WR_LOCAL_INV;
inv_wr.send_flags = IB_SEND_SIGNALED;
inv_wr.ex.invalidate_rkey =
vec->frmr->mr->lkey;
send_wr.next = &inv_wr;
}
ret = svc_rdma_send(rdma, &send_wr);
if (ret)
......@@ -653,7 +468,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
err:
svc_rdma_unmap_dma(ctxt);
svc_rdma_put_frmr(rdma, vec->frmr);
svc_rdma_put_context(ctxt, 1);
return -EIO;
}
......
/*
* Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
......@@ -65,6 +66,7 @@ static void dto_tasklet_func(unsigned long data);
static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt);
static int svc_rdma_secure_port(struct svc_rqst *);
static void rq_cq_reap(struct svcxprt_rdma *xprt);
static void sq_cq_reap(struct svcxprt_rdma *xprt);
......@@ -82,6 +84,7 @@ static struct svc_xprt_ops svc_rdma_ops = {
.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
.xpo_has_wspace = svc_rdma_has_wspace,
.xpo_accept = svc_rdma_accept,
.xpo_secure_port = svc_rdma_secure_port,
};
struct svc_xprt_class svc_rdma_class = {
......@@ -160,7 +163,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
}
map->count = 0;
map->frmr = NULL;
return map;
}
......@@ -336,22 +338,21 @@ static void process_context(struct svcxprt_rdma *xprt,
switch (ctxt->wr_op) {
case IB_WR_SEND:
if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
svc_rdma_put_frmr(xprt, ctxt->frmr);
BUG_ON(ctxt->frmr);
svc_rdma_put_context(ctxt, 1);
break;
case IB_WR_RDMA_WRITE:
BUG_ON(ctxt->frmr);
svc_rdma_put_context(ctxt, 0);
break;
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
svc_rdma_put_frmr(xprt, ctxt->frmr);
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
BUG_ON(!read_hdr);
if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
svc_rdma_put_frmr(xprt, ctxt->frmr);
spin_lock_bh(&xprt->sc_rq_dto_lock);
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
list_add_tail(&read_hdr->dto_q,
......@@ -363,6 +364,7 @@ static void process_context(struct svcxprt_rdma *xprt,
break;
default:
BUG_ON(1);
printk(KERN_ERR "svcrdma: unexpected completion type, "
"opcode=%d\n",
ctxt->wr_op);
......@@ -378,29 +380,42 @@ static void process_context(struct svcxprt_rdma *xprt,
static void sq_cq_reap(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt = NULL;
struct ib_wc wc;
struct ib_wc wc_a[6];
struct ib_wc *wc;
struct ib_cq *cq = xprt->sc_sq_cq;
int ret;
memset(wc_a, 0, sizeof(wc_a));
if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
return;
ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
atomic_inc(&rdma_stat_sq_poll);
while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
if (wc.status != IB_WC_SUCCESS)
/* Close the transport */
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
int i;
/* Decrement used SQ WR count */
atomic_dec(&xprt->sc_sq_count);
wake_up(&xprt->sc_send_wait);
for (i = 0; i < ret; i++) {
wc = &wc_a[i];
if (wc->status != IB_WC_SUCCESS) {
dprintk("svcrdma: sq wc err status %d\n",
wc->status);
ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
if (ctxt)
process_context(xprt, ctxt);
/* Close the transport */
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
}
svc_xprt_put(&xprt->sc_xprt);
/* Decrement used SQ WR count */
atomic_dec(&xprt->sc_sq_count);
wake_up(&xprt->sc_send_wait);
ctxt = (struct svc_rdma_op_ctxt *)
(unsigned long)wc->wr_id;
if (ctxt)
process_context(xprt, ctxt);
svc_xprt_put(&xprt->sc_xprt);
}
}
if (ctxt)
......@@ -993,7 +1008,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
need_dma_mr = 0;
break;
case RDMA_TRANSPORT_IB:
if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
need_dma_mr = 1;
dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
} else if (!(devattr.device_cap_flags &
IB_DEVICE_LOCAL_DMA_LKEY)) {
need_dma_mr = 1;
dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
} else
......@@ -1190,14 +1209,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
container_of(xprt, struct svcxprt_rdma, sc_xprt);
/*
* If there are fewer SQ WR available than required to send a
* simple response, return false.
*/
if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3))
return 0;
/*
* ...or there are already waiters on the SQ,
* If there are already waiters on the SQ,
* return false.
*/
if (waitqueue_active(&rdma->sc_send_wait))
......@@ -1207,6 +1219,11 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return 1;
}
static int svc_rdma_secure_port(struct svc_rqst *rqstp)
{
return 1;
}
/*
* Attempt to register the kvec representing the RPC memory with the
* device.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment