Commit 6dea0737 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.18' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Highlights:

   - support the NFSv4.2 SEEK operation (allowing clients to support
     SEEK_HOLE/SEEK_DATA), thanks to Anna.
   - end the grace period early in a number of cases, mitigating a
     long-standing annoyance, thanks to Jeff
   - improve SMP scalability, thanks to Trond"

* 'for-3.18' of git://linux-nfs.org/~bfields/linux: (55 commits)
  nfsd: eliminate "to_delegation" define
  NFSD: Implement SEEK
  NFSD: Add generic v4.2 infrastructure
  svcrdma: advertise the correct max payload
  nfsd: introduce nfsd4_callback_ops
  nfsd: split nfsd4_callback initialization and use
  nfsd: introduce a generic nfsd4_cb
  nfsd: remove nfsd4_callback.cb_op
  nfsd: do not clear rpc_resp in nfsd4_cb_done_sequence
  nfsd: fix nfsd4_cb_recall_done error handling
  nfsd4: clarify how grace period ends
  nfsd4: stop grace_time update at end of grace period
  nfsd: skip subsequent UMH "create" operations after the first one for v4.0 clients
  nfsd: set and test NFSD4_CLIENT_STABLE bit to reduce nfsdcltrack upcalls
  nfsd: serialize nfsdcltrack upcalls for a particular client
  nfsd: pass extra info in env vars to upcalls to allow for early grace period end
  nfsd: add a v4_end_grace file to /proc/fs/nfsd
  lockd: add a /proc/fs/lockd/nlm_end_grace file
  nfsd: reject reclaim request when client has already sent RECLAIM_COMPLETE
  nfsd: remove redundant boot_time parm from grace_done client tracking op
  ...
parents 25641c0c 34549ab0
...@@ -233,9 +233,13 @@ if NETWORK_FILESYSTEMS ...@@ -233,9 +233,13 @@ if NETWORK_FILESYSTEMS
source "fs/nfs/Kconfig" source "fs/nfs/Kconfig"
source "fs/nfsd/Kconfig" source "fs/nfsd/Kconfig"
config GRACE_PERIOD
tristate
config LOCKD config LOCKD
tristate tristate
depends on FILE_LOCKING depends on FILE_LOCKING
select GRACE_PERIOD
config LOCKD_V4 config LOCKD_V4
bool bool
...@@ -249,7 +253,7 @@ config NFS_ACL_SUPPORT ...@@ -249,7 +253,7 @@ config NFS_ACL_SUPPORT
config NFS_COMMON config NFS_COMMON
bool bool
depends on NFSD || NFS_FS depends on NFSD || NFS_FS || LOCKD
default y default y
source "net/sunrpc/Kconfig" source "net/sunrpc/Kconfig"
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
obj-$(CONFIG_LOCKD) += lockd.o obj-$(CONFIG_LOCKD) += lockd.o
lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \ lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o svcshare.o svcproc.o svcsubs.o mon.o xdr.o
lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
lockd-objs-$(CONFIG_PROC_FS) += procfs.o
lockd-objs := $(lockd-objs-y) lockd-objs := $(lockd-objs-y)
...@@ -11,7 +11,6 @@ struct lockd_net { ...@@ -11,7 +11,6 @@ struct lockd_net {
struct delayed_work grace_period_end; struct delayed_work grace_period_end;
struct lock_manager lockd_manager; struct lock_manager lockd_manager;
struct list_head grace_list;
spinlock_t nsm_clnt_lock; spinlock_t nsm_clnt_lock;
unsigned int nsm_users; unsigned int nsm_users;
......
/*
* Procfs support for lockd
*
* Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com>
*/
#include <linux/fs.h>
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/nsproxy.h>
#include <net/net_namespace.h>
#include "netns.h"
#include "procfs.h"
/*
* We only allow strings that start with 'Y', 'y', or '1'.
*/
static ssize_t
nlm_end_grace_write(struct file *file, const char __user *buf, size_t size,
loff_t *pos)
{
char *data;
struct lockd_net *ln = net_generic(current->nsproxy->net_ns,
lockd_net_id);
if (size < 1)
return -EINVAL;
data = simple_transaction_get(file, buf, size);
if (IS_ERR(data))
return PTR_ERR(data);
switch(data[0]) {
case 'Y':
case 'y':
case '1':
locks_end_grace(&ln->lockd_manager);
break;
default:
return -EINVAL;
}
return size;
}
static ssize_t
nlm_end_grace_read(struct file *file, char __user *buf, size_t size,
loff_t *pos)
{
struct lockd_net *ln = net_generic(current->nsproxy->net_ns,
lockd_net_id);
char resp[3];
resp[0] = list_empty(&ln->lockd_manager.list) ? 'Y' : 'N';
resp[1] = '\n';
resp[2] = '\0';
return simple_read_from_buffer(buf, size, pos, resp, sizeof(resp));
}
static const struct file_operations lockd_end_grace_operations = {
.write = nlm_end_grace_write,
.read = nlm_end_grace_read,
.llseek = default_llseek,
.release = simple_transaction_release,
.owner = THIS_MODULE,
};
int __init
lockd_create_procfs(void)
{
struct proc_dir_entry *entry;
entry = proc_mkdir("fs/lockd", NULL);
if (!entry)
return -ENOMEM;
entry = proc_create("nlm_end_grace", S_IRUGO|S_IWUSR, entry,
&lockd_end_grace_operations);
if (!entry) {
remove_proc_entry("fs/lockd", NULL);
return -ENOMEM;
}
return 0;
}
void __exit
lockd_remove_procfs(void)
{
remove_proc_entry("fs/lockd/nlm_end_grace", NULL);
remove_proc_entry("fs/lockd", NULL);
}
/*
* Procfs support for lockd
*
* Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com>
*/
#ifndef _LOCKD_PROCFS_H
#define _LOCKD_PROCFS_H
#include <linux/kconfig.h>
#if IS_ENABLED(CONFIG_PROC_FS)
int lockd_create_procfs(void);
void lockd_remove_procfs(void);
#else
static inline int
lockd_create_procfs(void)
{
return 0;
}
static inline void
lockd_remove_procfs(void)
{
return;
}
#endif /* IS_ENABLED(CONFIG_PROC_FS) */
#endif /* _LOCKD_PROCFS_H */
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <linux/nfs.h> #include <linux/nfs.h>
#include "netns.h" #include "netns.h"
#include "procfs.h"
#define NLMDBG_FACILITY NLMDBG_SVC #define NLMDBG_FACILITY NLMDBG_SVC
#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE) #define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE)
...@@ -304,13 +305,16 @@ static int lockd_start_svc(struct svc_serv *serv) ...@@ -304,13 +305,16 @@ static int lockd_start_svc(struct svc_serv *serv)
svc_sock_update_bufs(serv); svc_sock_update_bufs(serv);
serv->sv_maxconn = nlm_max_connections; serv->sv_maxconn = nlm_max_connections;
nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, "%s", serv->sv_name); nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name);
if (IS_ERR(nlmsvc_task)) { if (IS_ERR(nlmsvc_task)) {
error = PTR_ERR(nlmsvc_task); error = PTR_ERR(nlmsvc_task);
printk(KERN_WARNING printk(KERN_WARNING
"lockd_up: kthread_run failed, error=%d\n", error); "lockd_up: kthread_run failed, error=%d\n", error);
goto out_task; goto out_task;
} }
nlmsvc_rqst->rq_task = nlmsvc_task;
wake_up_process(nlmsvc_task);
dprintk("lockd_up: service started\n"); dprintk("lockd_up: service started\n");
return 0; return 0;
...@@ -581,7 +585,7 @@ static int lockd_init_net(struct net *net) ...@@ -581,7 +585,7 @@ static int lockd_init_net(struct net *net)
struct lockd_net *ln = net_generic(net, lockd_net_id); struct lockd_net *ln = net_generic(net, lockd_net_id);
INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
INIT_LIST_HEAD(&ln->grace_list); INIT_LIST_HEAD(&ln->lockd_manager.list);
spin_lock_init(&ln->nsm_clnt_lock); spin_lock_init(&ln->nsm_clnt_lock);
return 0; return 0;
} }
...@@ -615,8 +619,15 @@ static int __init init_nlm(void) ...@@ -615,8 +619,15 @@ static int __init init_nlm(void)
err = register_pernet_subsys(&lockd_net_ops); err = register_pernet_subsys(&lockd_net_ops);
if (err) if (err)
goto err_pernet; goto err_pernet;
err = lockd_create_procfs();
if (err)
goto err_procfs;
return 0; return 0;
err_procfs:
unregister_pernet_subsys(&lockd_net_ops);
err_pernet: err_pernet:
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
unregister_sysctl_table(nlm_sysctl_table); unregister_sysctl_table(nlm_sysctl_table);
...@@ -629,6 +640,7 @@ static void __exit exit_nlm(void) ...@@ -629,6 +640,7 @@ static void __exit exit_nlm(void)
{ {
/* FIXME: delete all NLM clients */ /* FIXME: delete all NLM clients */
nlm_shutdown_hosts(); nlm_shutdown_hosts();
lockd_remove_procfs();
unregister_pernet_subsys(&lockd_net_ops); unregister_pernet_subsys(&lockd_net_ops);
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
unregister_sysctl_table(nlm_sysctl_table); unregister_sysctl_table(nlm_sysctl_table);
......
...@@ -235,7 +235,7 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, ...@@ -235,7 +235,7 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
cb_info->serv = serv; cb_info->serv = serv;
cb_info->rqst = rqstp; cb_info->rqst = rqstp;
cb_info->task = kthread_run(callback_svc, cb_info->rqst, cb_info->task = kthread_create(callback_svc, cb_info->rqst,
"nfsv4.%u-svc", minorversion); "nfsv4.%u-svc", minorversion);
if (IS_ERR(cb_info->task)) { if (IS_ERR(cb_info->task)) {
ret = PTR_ERR(cb_info->task); ret = PTR_ERR(cb_info->task);
...@@ -244,6 +244,8 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt, ...@@ -244,6 +244,8 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
cb_info->task = NULL; cb_info->task = NULL;
return ret; return ret;
} }
rqstp->rq_task = cb_info->task;
wake_up_process(cb_info->task);
dprintk("nfs_callback_up: service started\n"); dprintk("nfs_callback_up: service started\n");
return 0; return 0;
} }
......
...@@ -3,5 +3,6 @@ ...@@ -3,5 +3,6 @@
# #
obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o
nfs_acl-objs := nfsacl.o nfs_acl-objs := nfsacl.o
obj-$(CONFIG_GRACE_PERIOD) += grace.o
/* /*
* Common code for control of lockd and nfsv4 grace periods. * Common code for control of lockd and nfsv4 grace periods.
*
* Transplanted from lockd code
*/ */
#include <linux/module.h> #include <linux/module.h>
#include <linux/lockd/bind.h>
#include <net/net_namespace.h> #include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/fs.h>
#include "netns.h" static int grace_net_id;
static DEFINE_SPINLOCK(grace_lock); static DEFINE_SPINLOCK(grace_lock);
/** /**
* locks_start_grace * locks_start_grace
* @net: net namespace that this lock manager belongs to
* @lm: who this grace period is for * @lm: who this grace period is for
* *
* A grace period is a period during which locks should not be given * A grace period is a period during which locks should not be given
...@@ -21,18 +24,20 @@ static DEFINE_SPINLOCK(grace_lock); ...@@ -21,18 +24,20 @@ static DEFINE_SPINLOCK(grace_lock);
* *
* This function is called to start a grace period. * This function is called to start a grace period.
*/ */
void locks_start_grace(struct net *net, struct lock_manager *lm) void
locks_start_grace(struct net *net, struct lock_manager *lm)
{ {
struct lockd_net *ln = net_generic(net, lockd_net_id); struct list_head *grace_list = net_generic(net, grace_net_id);
spin_lock(&grace_lock); spin_lock(&grace_lock);
list_add(&lm->list, &ln->grace_list); list_add(&lm->list, grace_list);
spin_unlock(&grace_lock); spin_unlock(&grace_lock);
} }
EXPORT_SYMBOL_GPL(locks_start_grace); EXPORT_SYMBOL_GPL(locks_start_grace);
/** /**
* locks_end_grace * locks_end_grace
* @net: net namespace that this lock manager belongs to
* @lm: who this grace period is for * @lm: who this grace period is for
* *
* Call this function to state that the given lock manager is ready to * Call this function to state that the given lock manager is ready to
...@@ -41,7 +46,8 @@ EXPORT_SYMBOL_GPL(locks_start_grace); ...@@ -41,7 +46,8 @@ EXPORT_SYMBOL_GPL(locks_start_grace);
* Note that callers count on it being safe to call this more than once, * Note that callers count on it being safe to call this more than once,
* and the second call should be a no-op. * and the second call should be a no-op.
*/ */
void locks_end_grace(struct lock_manager *lm) void
locks_end_grace(struct lock_manager *lm)
{ {
spin_lock(&grace_lock); spin_lock(&grace_lock);
list_del_init(&lm->list); list_del_init(&lm->list);
...@@ -56,10 +62,52 @@ EXPORT_SYMBOL_GPL(locks_end_grace); ...@@ -56,10 +62,52 @@ EXPORT_SYMBOL_GPL(locks_end_grace);
* to answer ordinary lock requests, and when they should accept only * to answer ordinary lock requests, and when they should accept only
* lock reclaims. * lock reclaims.
*/ */
int locks_in_grace(struct net *net) int
locks_in_grace(struct net *net)
{ {
struct lockd_net *ln = net_generic(net, lockd_net_id); struct list_head *grace_list = net_generic(net, grace_net_id);
return !list_empty(&ln->grace_list); return !list_empty(grace_list);
} }
EXPORT_SYMBOL_GPL(locks_in_grace); EXPORT_SYMBOL_GPL(locks_in_grace);
static int __net_init
grace_init_net(struct net *net)
{
struct list_head *grace_list = net_generic(net, grace_net_id);
INIT_LIST_HEAD(grace_list);
return 0;
}
static void __net_exit
grace_exit_net(struct net *net)
{
struct list_head *grace_list = net_generic(net, grace_net_id);
BUG_ON(!list_empty(grace_list));
}
static struct pernet_operations grace_net_ops = {
.init = grace_init_net,
.exit = grace_exit_net,
.id = &grace_net_id,
.size = sizeof(struct list_head),
};
static int __init
init_grace(void)
{
return register_pernet_subsys(&grace_net_ops);
}
static void __exit
exit_grace(void)
{
unregister_pernet_subsys(&grace_net_ops);
}
MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>");
MODULE_LICENSE("GPL");
module_init(init_grace)
module_exit(exit_grace)
...@@ -71,6 +71,7 @@ config NFSD_V4 ...@@ -71,6 +71,7 @@ config NFSD_V4
select FS_POSIX_ACL select FS_POSIX_ACL
select SUNRPC_GSS select SUNRPC_GSS
select CRYPTO select CRYPTO
select GRACE_PERIOD
help help
This option enables support in your system's NFS server for This option enables support in your system's NFS server for
version 4 of the NFS protocol (RFC 3530). version 4 of the NFS protocol (RFC 3530).
...@@ -94,9 +95,6 @@ config NFSD_V4_SECURITY_LABEL ...@@ -94,9 +95,6 @@ config NFSD_V4_SECURITY_LABEL
If you do not wish to enable fine-grained security labels SELinux or If you do not wish to enable fine-grained security labels SELinux or
Smack policies on NFSv4 files, say N. Smack policies on NFSv4 files, say N.
WARNING: there is still a chance of backwards-incompatible protocol changes.
For now we recommend "Y" only for developers and testers.
config NFSD_FAULT_INJECTION config NFSD_FAULT_INJECTION
bool "NFS server manual fault injection" bool "NFS server manual fault injection"
depends on NFSD_V4 && DEBUG_KERNEL depends on NFSD_V4 && DEBUG_KERNEL
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
* is much larger than a sockaddr_in6. * is much larger than a sockaddr_in6.
*/ */
struct svc_cacherep { struct svc_cacherep {
struct hlist_node c_hash;
struct list_head c_lru; struct list_head c_lru;
unsigned char c_state, /* unused, inprog, done */ unsigned char c_state, /* unused, inprog, done */
......
...@@ -1145,6 +1145,7 @@ static struct flags { ...@@ -1145,6 +1145,7 @@ static struct flags {
{ NFSEXP_ALLSQUASH, {"all_squash", ""}}, { NFSEXP_ALLSQUASH, {"all_squash", ""}},
{ NFSEXP_ASYNC, {"async", "sync"}}, { NFSEXP_ASYNC, {"async", "sync"}},
{ NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}}, { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}},
{ NFSEXP_NOREADDIRPLUS, {"nordirplus", ""}},
{ NFSEXP_NOHIDE, {"nohide", ""}}, { NFSEXP_NOHIDE, {"nohide", ""}},
{ NFSEXP_CROSSMOUNT, {"crossmnt", ""}}, { NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}}, { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
......
...@@ -223,11 +223,6 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, ...@@ -223,11 +223,6 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
newfhp = fh_init(&resp->fh, NFS3_FHSIZE); newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
attr = &argp->attrs; attr = &argp->attrs;
/* Get the directory inode */
nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE);
if (nfserr)
RETURN_STATUS(nfserr);
/* Unfudge the mode bits */ /* Unfudge the mode bits */
attr->ia_mode &= ~S_IFMT; attr->ia_mode &= ~S_IFMT;
if (!(attr->ia_valid & ATTR_MODE)) { if (!(attr->ia_valid & ATTR_MODE)) {
...@@ -471,6 +466,14 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp, ...@@ -471,6 +466,14 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
resp->buflen = resp->count; resp->buflen = resp->count;
resp->rqstp = rqstp; resp->rqstp = rqstp;
offset = argp->cookie; offset = argp->cookie;
nfserr = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP);
if (nfserr)
RETURN_STATUS(nfserr);
if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS)
RETURN_STATUS(nfserr_notsupp);
nfserr = nfsd_readdir(rqstp, &resp->fh, nfserr = nfsd_readdir(rqstp, &resp->fh,
&offset, &offset,
&resp->common, &resp->common,
......
...@@ -49,12 +49,6 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason); ...@@ -49,12 +49,6 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
/* Index of predefined Linux callback client operations */ /* Index of predefined Linux callback client operations */
enum {
NFSPROC4_CLNT_CB_NULL = 0,
NFSPROC4_CLNT_CB_RECALL,
NFSPROC4_CLNT_CB_SEQUENCE,
};
struct nfs4_cb_compound_hdr { struct nfs4_cb_compound_hdr {
/* args */ /* args */
u32 ident; /* minorversion 0 only */ u32 ident; /* minorversion 0 only */
...@@ -494,7 +488,7 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, ...@@ -494,7 +488,7 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
const struct nfsd4_callback *cb) const struct nfsd4_callback *cb)
{ {
const struct nfs4_delegation *args = cb->cb_op; const struct nfs4_delegation *dp = cb_to_delegation(cb);
struct nfs4_cb_compound_hdr hdr = { struct nfs4_cb_compound_hdr hdr = {
.ident = cb->cb_clp->cl_cb_ident, .ident = cb->cb_clp->cl_cb_ident,
.minorversion = cb->cb_minorversion, .minorversion = cb->cb_minorversion,
...@@ -502,7 +496,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, ...@@ -502,7 +496,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_cb_compound4args(xdr, &hdr); encode_cb_compound4args(xdr, &hdr);
encode_cb_sequence4args(xdr, cb, &hdr); encode_cb_sequence4args(xdr, cb, &hdr);
encode_cb_recall4args(xdr, args, &hdr); encode_cb_recall4args(xdr, dp, &hdr);
encode_cb_nops(&hdr); encode_cb_nops(&hdr);
} }
...@@ -746,27 +740,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = { ...@@ -746,27 +740,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
static struct workqueue_struct *callback_wq; static struct workqueue_struct *callback_wq;
static void run_nfsd4_cb(struct nfsd4_callback *cb)
{
queue_work(callback_wq, &cb->cb_work);
}
static void do_probe_callback(struct nfs4_client *clp)
{
struct nfsd4_callback *cb = &clp->cl_cb_null;
cb->cb_op = NULL;
cb->cb_clp = clp;
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
cb->cb_msg.rpc_argp = NULL;
cb->cb_msg.rpc_resp = NULL;
cb->cb_ops = &nfsd4_cb_probe_ops;
run_nfsd4_cb(cb);
}
/* /*
* Poke the callback thread to process any updates to the callback * Poke the callback thread to process any updates to the callback
* parameters, and send a null probe. * parameters, and send a null probe.
...@@ -775,7 +748,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp) ...@@ -775,7 +748,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp)
{ {
clp->cl_cb_state = NFSD4_CB_UNKNOWN; clp->cl_cb_state = NFSD4_CB_UNKNOWN;
set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags); set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
do_probe_callback(clp); nfsd4_run_cb(&clp->cl_cb_null);
} }
void nfsd4_probe_callback_sync(struct nfs4_client *clp) void nfsd4_probe_callback_sync(struct nfs4_client *clp)
...@@ -847,23 +820,9 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) ...@@ -847,23 +820,9 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
rpc_wake_up_next(&clp->cl_cb_waitq); rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__, dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr); clp->cl_cb_session->se_cb_seq_nr);
/* We're done looking into the sequence information */
task->tk_msg.rpc_resp = NULL;
} }
}
static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
{
struct nfsd4_callback *cb = calldata;
struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
nfsd4_cb_done(task, calldata);
if (current_rpc_client != task->tk_client) { if (clp->cl_cb_client != task->tk_client) {
/* We're shutting down or changing cl_cb_client; leave /* We're shutting down or changing cl_cb_client; leave
* it to nfsd4_process_cb_update to restart the call if * it to nfsd4_process_cb_update to restart the call if
* necessary. */ * necessary. */
...@@ -872,47 +831,42 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) ...@@ -872,47 +831,42 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
if (cb->cb_done) if (cb->cb_done)
return; return;
switch (task->tk_status) {
switch (cb->cb_ops->done(cb, task)) {
case 0: case 0:
cb->cb_done = true; task->tk_status = 0;
rpc_restart_call_prepare(task);
return; return;
case -EBADHANDLE: case 1:
case -NFS4ERR_BAD_STATEID:
/* Race: client probably got cb_recall
* before open reply granting delegation */
break; break;
default: case -1:
/* Network partition? */ /* Network partition? */
nfsd4_mark_cb_down(clp, task->tk_status); nfsd4_mark_cb_down(clp, task->tk_status);
break;
default:
BUG();
} }
if (dp->dl_retries--) {
rpc_delay(task, 2*HZ);
task->tk_status = 0;
rpc_restart_call_prepare(task);
return;
}
nfsd4_mark_cb_down(clp, task->tk_status);
cb->cb_done = true; cb->cb_done = true;
} }
static void nfsd4_cb_recall_release(void *calldata) static void nfsd4_cb_release(void *calldata)
{ {
struct nfsd4_callback *cb = calldata; struct nfsd4_callback *cb = calldata;
struct nfs4_client *clp = cb->cb_clp; struct nfs4_client *clp = cb->cb_clp;
struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
if (cb->cb_done) { if (cb->cb_done) {
spin_lock(&clp->cl_lock); spin_lock(&clp->cl_lock);
list_del(&cb->cb_per_client); list_del(&cb->cb_per_client);
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
nfs4_put_stid(&dp->dl_stid);
cb->cb_ops->release(cb);
} }
} }
static const struct rpc_call_ops nfsd4_cb_recall_ops = { static const struct rpc_call_ops nfsd4_cb_ops = {
.rpc_call_prepare = nfsd4_cb_prepare, .rpc_call_prepare = nfsd4_cb_prepare,
.rpc_call_done = nfsd4_cb_recall_done, .rpc_call_done = nfsd4_cb_done,
.rpc_release = nfsd4_cb_recall_release, .rpc_release = nfsd4_cb_release,
}; };
int nfsd4_create_callback_queue(void) int nfsd4_create_callback_queue(void)
...@@ -937,16 +891,10 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp) ...@@ -937,16 +891,10 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
* instead, nfsd4_run_cb_null() will detect the killed * instead, nfsd4_run_cb_null() will detect the killed
* client, destroy the rpc client, and stop: * client, destroy the rpc client, and stop:
*/ */
do_probe_callback(clp); nfsd4_run_cb(&clp->cl_cb_null);
flush_workqueue(callback_wq); flush_workqueue(callback_wq);
} }
static void nfsd4_release_cb(struct nfsd4_callback *cb)
{
if (cb->cb_ops->rpc_release)
cb->cb_ops->rpc_release(cb);
}
/* requires cl_lock: */ /* requires cl_lock: */
static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp) static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
{ {
...@@ -1009,63 +957,49 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) ...@@ -1009,63 +957,49 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
} }
/* Yay, the callback channel's back! Restart any callbacks: */ /* Yay, the callback channel's back! Restart any callbacks: */
list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
run_nfsd4_cb(cb); queue_work(callback_wq, &cb->cb_work);
} }
static void static void
nfsd4_run_callback_rpc(struct nfsd4_callback *cb) nfsd4_run_cb_work(struct work_struct *work)
{ {
struct nfsd4_callback *cb =
container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp; struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt; struct rpc_clnt *clnt;
if (cb->cb_ops && cb->cb_ops->prepare)
cb->cb_ops->prepare(cb);
if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
nfsd4_process_cb_update(cb); nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client; clnt = clp->cl_cb_client;
if (!clnt) { if (!clnt) {
/* Callback channel broken, or client killed; give up: */ /* Callback channel broken, or client killed; give up: */
nfsd4_release_cb(cb); if (cb->cb_ops && cb->cb_ops->release)
cb->cb_ops->release(cb);
return; return;
} }
cb->cb_msg.rpc_cred = clp->cl_cb_cred; cb->cb_msg.rpc_cred = clp->cl_cb_cred;
rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
cb->cb_ops, cb); cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
} }
void void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
nfsd4_run_cb_null(struct work_struct *w) struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op)
{ {
struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
cb_work);
nfsd4_run_callback_rpc(cb);
}
void
nfsd4_run_cb_recall(struct work_struct *w)
{
struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
cb_work);
nfsd4_prepare_cb_recall(cb->cb_op);
nfsd4_run_callback_rpc(cb);
}
void nfsd4_cb_recall(struct nfs4_delegation *dp)
{
struct nfsd4_callback *cb = &dp->dl_recall;
struct nfs4_client *clp = dp->dl_stid.sc_client;
dp->dl_retries = 1;
cb->cb_op = dp;
cb->cb_clp = clp; cb->cb_clp = clp;
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op];
cb->cb_msg.rpc_argp = cb; cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb; cb->cb_msg.rpc_resp = cb;
cb->cb_ops = ops;
cb->cb_ops = &nfsd4_cb_recall_ops; INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
INIT_LIST_HEAD(&cb->cb_per_client); INIT_LIST_HEAD(&cb->cb_per_client);
cb->cb_done = true; cb->cb_done = true;
}
run_nfsd4_cb(&dp->dl_recall); void nfsd4_run_cb(struct nfsd4_callback *cb)
{
queue_work(callback_wq, &cb->cb_work);
} }
...@@ -215,7 +215,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) ...@@ -215,7 +215,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
memset(&ent, 0, sizeof(ent)); memset(&ent, 0, sizeof(ent));
/* Authentication name */ /* Authentication name */
if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) len = qword_get(&buf, buf1, PAGE_SIZE);
if (len <= 0 || len >= IDMAP_NAMESZ)
goto out; goto out;
memcpy(ent.authname, buf1, sizeof(ent.authname)); memcpy(ent.authname, buf1, sizeof(ent.authname));
...@@ -245,12 +246,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) ...@@ -245,12 +246,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
/* Name */ /* Name */
error = -EINVAL; error = -EINVAL;
len = qword_get(&buf, buf1, PAGE_SIZE); len = qword_get(&buf, buf1, PAGE_SIZE);
if (len < 0) if (len < 0 || len >= IDMAP_NAMESZ)
goto out; goto out;
if (len == 0) if (len == 0)
set_bit(CACHE_NEGATIVE, &ent.h.flags); set_bit(CACHE_NEGATIVE, &ent.h.flags);
else if (len >= IDMAP_NAMESZ)
goto out;
else else
memcpy(ent.name, buf1, sizeof(ent.name)); memcpy(ent.name, buf1, sizeof(ent.name));
error = -ENOMEM; error = -ENOMEM;
...@@ -259,15 +258,12 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen) ...@@ -259,15 +258,12 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
goto out; goto out;
cache_put(&res->h, cd); cache_put(&res->h, cd);
error = 0; error = 0;
out: out:
kfree(buf1); kfree(buf1);
return error; return error;
} }
static struct ent * static struct ent *
idtoname_lookup(struct cache_detail *cd, struct ent *item) idtoname_lookup(struct cache_detail *cd, struct ent *item)
{ {
...@@ -368,7 +364,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) ...@@ -368,7 +364,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
{ {
struct ent ent, *res; struct ent ent, *res;
char *buf1; char *buf1;
int error = -EINVAL; int len, error = -EINVAL;
if (buf[buflen - 1] != '\n') if (buf[buflen - 1] != '\n')
return (-EINVAL); return (-EINVAL);
...@@ -381,7 +377,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) ...@@ -381,7 +377,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
memset(&ent, 0, sizeof(ent)); memset(&ent, 0, sizeof(ent));
/* Authentication name */ /* Authentication name */
if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) len = qword_get(&buf, buf1, PAGE_SIZE);
if (len <= 0 || len >= IDMAP_NAMESZ)
goto out; goto out;
memcpy(ent.authname, buf1, sizeof(ent.authname)); memcpy(ent.authname, buf1, sizeof(ent.authname));
...@@ -392,8 +389,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) ...@@ -392,8 +389,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; IDMAP_TYPE_USER : IDMAP_TYPE_GROUP;
/* Name */ /* Name */
error = qword_get(&buf, buf1, PAGE_SIZE); len = qword_get(&buf, buf1, PAGE_SIZE);
if (error <= 0 || error >= IDMAP_NAMESZ) if (len <= 0 || len >= IDMAP_NAMESZ)
goto out; goto out;
memcpy(ent.name, buf1, sizeof(ent.name)); memcpy(ent.name, buf1, sizeof(ent.name));
...@@ -421,7 +418,6 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen) ...@@ -421,7 +418,6 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
error = 0; error = 0;
out: out:
kfree(buf1); kfree(buf1);
return (error); return (error);
} }
......
...@@ -1013,6 +1013,49 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -1013,6 +1013,49 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status; return status;
} }
static __be32
nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_seek *seek)
{
int whence;
__be32 status;
struct file *file;
status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
&seek->seek_stateid,
RD_STATE, &file);
if (status) {
dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
return status;
}
switch (seek->seek_whence) {
case NFS4_CONTENT_DATA:
whence = SEEK_DATA;
break;
case NFS4_CONTENT_HOLE:
whence = SEEK_HOLE;
break;
default:
status = nfserr_union_notsupp;
goto out;
}
/*
* Note: This call does change file->f_pos, but nothing in NFSD
* should ever file->f_pos.
*/
seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
if (seek->seek_pos < 0)
status = nfserrno(seek->seek_pos);
else if (seek->seek_pos >= i_size_read(file_inode(file)))
seek->seek_eof = true;
out:
fput(file);
return status;
}
/* This routine never returns NFS_OK! If there are no other errors, it /* This routine never returns NFS_OK! If there are no other errors, it
* will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the
* attributes matched. VERIFY is implemented by mapping NFSERR_SAME * attributes matched. VERIFY is implemented by mapping NFSERR_SAME
...@@ -1881,6 +1924,12 @@ static struct nfsd4_operation nfsd4_ops[] = { ...@@ -1881,6 +1924,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
}, },
/* NFSv4.2 operations */
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
},
}; };
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op) int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
......
...@@ -58,7 +58,7 @@ struct nfsd4_client_tracking_ops { ...@@ -58,7 +58,7 @@ struct nfsd4_client_tracking_ops {
void (*create)(struct nfs4_client *); void (*create)(struct nfs4_client *);
void (*remove)(struct nfs4_client *); void (*remove)(struct nfs4_client *);
int (*check)(struct nfs4_client *); int (*check)(struct nfs4_client *);
void (*grace_done)(struct nfsd_net *, time_t); void (*grace_done)(struct nfsd_net *);
}; };
/* Globals */ /* Globals */
...@@ -188,7 +188,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) ...@@ -188,7 +188,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
status = mnt_want_write_file(nn->rec_file); status = mnt_want_write_file(nn->rec_file);
if (status) if (status)
return; goto out_creds;
dir = nn->rec_file->f_path.dentry; dir = nn->rec_file->f_path.dentry;
/* lock the parent */ /* lock the parent */
...@@ -228,6 +228,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) ...@@ -228,6 +228,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
user_recovery_dirname); user_recovery_dirname);
} }
mnt_drop_write_file(nn->rec_file); mnt_drop_write_file(nn->rec_file);
out_creds:
nfs4_reset_creds(original_cred); nfs4_reset_creds(original_cred);
} }
...@@ -392,7 +393,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn) ...@@ -392,7 +393,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
} }
static void static void
nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time) nfsd4_recdir_purge_old(struct nfsd_net *nn)
{ {
int status; int status;
...@@ -479,6 +480,16 @@ nfsd4_init_recdir(struct net *net) ...@@ -479,6 +480,16 @@ nfsd4_init_recdir(struct net *net)
return status; return status;
} }
static void
nfsd4_shutdown_recdir(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
if (!nn->rec_file)
return;
fput(nn->rec_file);
nn->rec_file = NULL;
}
static int static int
nfs4_legacy_state_init(struct net *net) nfs4_legacy_state_init(struct net *net)
...@@ -512,10 +523,13 @@ nfsd4_load_reboot_recovery_data(struct net *net) ...@@ -512,10 +523,13 @@ nfsd4_load_reboot_recovery_data(struct net *net)
int status; int status;
status = nfsd4_init_recdir(net); status = nfsd4_init_recdir(net);
if (!status)
status = nfsd4_recdir_load(net);
if (status) if (status)
printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n"); return status;
status = nfsd4_recdir_load(net);
if (status)
nfsd4_shutdown_recdir(net);
return status; return status;
} }
...@@ -545,22 +559,13 @@ nfsd4_legacy_tracking_init(struct net *net) ...@@ -545,22 +559,13 @@ nfsd4_legacy_tracking_init(struct net *net)
return status; return status;
} }
static void
nfsd4_shutdown_recdir(struct nfsd_net *nn)
{
if (!nn->rec_file)
return;
fput(nn->rec_file);
nn->rec_file = NULL;
}
static void static void
nfsd4_legacy_tracking_exit(struct net *net) nfsd4_legacy_tracking_exit(struct net *net)
{ {
struct nfsd_net *nn = net_generic(net, nfsd_net_id); struct nfsd_net *nn = net_generic(net, nfsd_net_id);
nfs4_release_reclaim(nn); nfs4_release_reclaim(nn);
nfsd4_shutdown_recdir(nn); nfsd4_shutdown_recdir(net);
nfs4_legacy_state_shutdown(net); nfs4_legacy_state_shutdown(net);
} }
...@@ -1016,7 +1021,7 @@ nfsd4_cld_check(struct nfs4_client *clp) ...@@ -1016,7 +1021,7 @@ nfsd4_cld_check(struct nfs4_client *clp)
} }
static void static void
nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) nfsd4_cld_grace_done(struct nfsd_net *nn)
{ {
int ret; int ret;
struct cld_upcall *cup; struct cld_upcall *cup;
...@@ -1029,7 +1034,7 @@ nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time) ...@@ -1029,7 +1034,7 @@ nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time)
} }
cup->cu_msg.cm_cmd = Cld_GraceDone; cup->cu_msg.cm_cmd = Cld_GraceDone;
cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time; cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg); ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
if (!ret) if (!ret)
ret = cup->cu_msg.cm_status; ret = cup->cu_msg.cm_status;
...@@ -1062,6 +1067,8 @@ MODULE_PARM_DESC(cltrack_legacy_disable, ...@@ -1062,6 +1067,8 @@ MODULE_PARM_DESC(cltrack_legacy_disable,
#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR=" #define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR="
#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR=" #define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR="
#define HAS_SESSION_ENV_PREFIX "NFSDCLTRACK_CLIENT_HAS_SESSION="
#define GRACE_START_ENV_PREFIX "NFSDCLTRACK_GRACE_START="
static char * static char *
nfsd4_cltrack_legacy_topdir(void) nfsd4_cltrack_legacy_topdir(void)
...@@ -1126,10 +1133,60 @@ nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name) ...@@ -1126,10 +1133,60 @@ nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name)
return result; return result;
} }
static char *
nfsd4_cltrack_client_has_session(struct nfs4_client *clp)
{
int copied;
size_t len;
char *result;
/* prefix + Y/N character + terminating NULL */
len = strlen(HAS_SESSION_ENV_PREFIX) + 1 + 1;
result = kmalloc(len, GFP_KERNEL);
if (!result)
return result;
copied = snprintf(result, len, HAS_SESSION_ENV_PREFIX "%c",
clp->cl_minorversion ? 'Y' : 'N');
if (copied >= len) {
/* just return nothing if output was truncated */
kfree(result);
return NULL;
}
return result;
}
static char *
nfsd4_cltrack_grace_start(time_t grace_start)
{
int copied;
size_t len;
char *result;
/* prefix + max width of int64_t string + terminating NULL */
len = strlen(GRACE_START_ENV_PREFIX) + 22 + 1;
result = kmalloc(len, GFP_KERNEL);
if (!result)
return result;
copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld",
grace_start);
if (copied >= len) {
/* just return nothing if output was truncated */
kfree(result);
return NULL;
}
return result;
}
static int static int
nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1)
{ {
char *envp[2]; char *envp[3];
char *argv[4]; char *argv[4];
int ret; int ret;
...@@ -1140,10 +1197,12 @@ nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy) ...@@ -1140,10 +1197,12 @@ nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy)
dprintk("%s: cmd: %s\n", __func__, cmd); dprintk("%s: cmd: %s\n", __func__, cmd);
dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)"); dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)");
dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)"); dprintk("%s: env0: %s\n", __func__, env0 ? env0 : "(null)");
dprintk("%s: env1: %s\n", __func__, env1 ? env1 : "(null)");
envp[0] = legacy; envp[0] = env0;
envp[1] = NULL; envp[1] = env1;
envp[2] = NULL;
argv[0] = (char *)cltrack_prog; argv[0] = (char *)cltrack_prog;
argv[1] = cmd; argv[1] = cmd;
...@@ -1187,28 +1246,78 @@ bin_to_hex_dup(const unsigned char *src, int srclen) ...@@ -1187,28 +1246,78 @@ bin_to_hex_dup(const unsigned char *src, int srclen)
} }
static int static int
nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) nfsd4_umh_cltrack_init(struct net *net)
{ {
int ret;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
char *grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
/* XXX: The usermode helper s not working in container yet. */ /* XXX: The usermode helper s not working in container yet. */
if (net != &init_net) { if (net != &init_net) {
WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " WARN(1, KERN_ERR "NFSD: attempt to initialize umh client "
"tracking in a container!\n"); "tracking in a container!\n");
return -EINVAL; return -EINVAL;
} }
return nfsd4_umh_cltrack_upcall("init", NULL, NULL);
ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL);
kfree(grace_start);
return ret;
}
static void
nfsd4_cltrack_upcall_lock(struct nfs4_client *clp)
{
wait_on_bit_lock(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK,
TASK_UNINTERRUPTIBLE);
}
static void
nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp)
{
smp_mb__before_atomic();
clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags);
smp_mb__after_atomic();
wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK);
} }
static void static void
nfsd4_umh_cltrack_create(struct nfs4_client *clp) nfsd4_umh_cltrack_create(struct nfs4_client *clp)
{ {
char *hexid; char *hexid, *has_session, *grace_start;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
/*
* With v4.0 clients, there's little difference in outcome between a
* create and check operation, and we can end up calling into this
* function multiple times per client (once for each openowner). So,
* for v4.0 clients skip upcalling once the client has been recorded
* on stable storage.
*
* For v4.1+ clients, the outcome of the two operations is different,
* so we must ensure that we upcall for the create operation. v4.1+
* clients call this on RECLAIM_COMPLETE though, so we should only end
* up doing a single create upcall per client.
*/
if (clp->cl_minorversion == 0 &&
test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
if (!hexid) { if (!hexid) {
dprintk("%s: can't allocate memory for upcall!\n", __func__); dprintk("%s: can't allocate memory for upcall!\n", __func__);
return; return;
} }
nfsd4_umh_cltrack_upcall("create", hexid, NULL);
has_session = nfsd4_cltrack_client_has_session(clp);
grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
nfsd4_cltrack_upcall_lock(clp);
if (!nfsd4_umh_cltrack_upcall("create", hexid, has_session, grace_start))
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
nfsd4_cltrack_upcall_unlock(clp);
kfree(has_session);
kfree(grace_start);
kfree(hexid); kfree(hexid);
} }
...@@ -1217,12 +1326,21 @@ nfsd4_umh_cltrack_remove(struct nfs4_client *clp) ...@@ -1217,12 +1326,21 @@ nfsd4_umh_cltrack_remove(struct nfs4_client *clp)
{ {
char *hexid; char *hexid;
if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
if (!hexid) { if (!hexid) {
dprintk("%s: can't allocate memory for upcall!\n", __func__); dprintk("%s: can't allocate memory for upcall!\n", __func__);
return; return;
} }
nfsd4_umh_cltrack_upcall("remove", hexid, NULL);
nfsd4_cltrack_upcall_lock(clp);
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags) &&
nfsd4_umh_cltrack_upcall("remove", hexid, NULL, NULL) == 0)
clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
nfsd4_cltrack_upcall_unlock(clp);
kfree(hexid); kfree(hexid);
} }
...@@ -1230,30 +1348,45 @@ static int ...@@ -1230,30 +1348,45 @@ static int
nfsd4_umh_cltrack_check(struct nfs4_client *clp) nfsd4_umh_cltrack_check(struct nfs4_client *clp)
{ {
int ret; int ret;
char *hexid, *legacy; char *hexid, *has_session, *legacy;
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return 0;
hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len); hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
if (!hexid) { if (!hexid) {
dprintk("%s: can't allocate memory for upcall!\n", __func__); dprintk("%s: can't allocate memory for upcall!\n", __func__);
return -ENOMEM; return -ENOMEM;
} }
has_session = nfsd4_cltrack_client_has_session(clp);
legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name); legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name);
ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy);
nfsd4_cltrack_upcall_lock(clp);
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) {
ret = 0;
} else {
ret = nfsd4_umh_cltrack_upcall("check", hexid, has_session, legacy);
if (ret == 0)
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
nfsd4_cltrack_upcall_unlock(clp);
kfree(has_session);
kfree(legacy); kfree(legacy);
kfree(hexid); kfree(hexid);
return ret; return ret;
} }
static void static void
nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn, nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
time_t boot_time)
{ {
char *legacy; char *legacy;
char timestr[22]; /* FIXME: better way to determine max size? */ char timestr[22]; /* FIXME: better way to determine max size? */
sprintf(timestr, "%ld", boot_time); sprintf(timestr, "%ld", nn->boot_time);
legacy = nfsd4_cltrack_legacy_topdir(); legacy = nfsd4_cltrack_legacy_topdir();
nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy); nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL);
kfree(legacy); kfree(legacy);
} }
...@@ -1356,10 +1489,10 @@ nfsd4_client_record_check(struct nfs4_client *clp) ...@@ -1356,10 +1489,10 @@ nfsd4_client_record_check(struct nfs4_client *clp)
} }
void void
nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time) nfsd4_record_grace_done(struct nfsd_net *nn)
{ {
if (nn->client_tracking_ops) if (nn->client_tracking_ops)
nn->client_tracking_ops->grace_done(nn, boot_time); nn->client_tracking_ops->grace_done(nn);
} }
static int static int
......
...@@ -96,6 +96,8 @@ static struct kmem_cache *deleg_slab; ...@@ -96,6 +96,8 @@ static struct kmem_cache *deleg_slab;
static void free_session(struct nfsd4_session *); static void free_session(struct nfsd4_session *);
static struct nfsd4_callback_ops nfsd4_cb_recall_ops;
static bool is_session_dead(struct nfsd4_session *ses) static bool is_session_dead(struct nfsd4_session *ses)
{ {
return ses->se_flags & NFS4_SESSION_DEAD; return ses->se_flags & NFS4_SESSION_DEAD;
...@@ -645,7 +647,9 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) ...@@ -645,7 +647,9 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_perclnt);
INIT_LIST_HEAD(&dp->dl_recall_lru); INIT_LIST_HEAD(&dp->dl_recall_lru);
dp->dl_type = NFS4_OPEN_DELEGATE_READ; dp->dl_type = NFS4_OPEN_DELEGATE_READ;
INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall); dp->dl_retries = 1;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
return dp; return dp;
out_dec: out_dec:
atomic_long_dec(&num_delegations); atomic_long_dec(&num_delegations);
...@@ -673,15 +677,20 @@ nfs4_put_stid(struct nfs4_stid *s) ...@@ -673,15 +677,20 @@ nfs4_put_stid(struct nfs4_stid *s)
static void nfs4_put_deleg_lease(struct nfs4_file *fp) static void nfs4_put_deleg_lease(struct nfs4_file *fp)
{ {
lockdep_assert_held(&state_lock); struct file *filp = NULL;
struct file_lock *fl;
if (!fp->fi_lease) spin_lock(&fp->fi_lock);
return; if (fp->fi_lease && atomic_dec_and_test(&fp->fi_delegees)) {
if (atomic_dec_and_test(&fp->fi_delegees)) { swap(filp, fp->fi_deleg_file);
vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease); fl = fp->fi_lease;
fp->fi_lease = NULL; fp->fi_lease = NULL;
fput(fp->fi_deleg_file); }
fp->fi_deleg_file = NULL; spin_unlock(&fp->fi_lock);
if (filp) {
vfs_setlease(filp, F_UNLCK, &fl);
fput(filp);
} }
} }
...@@ -717,8 +726,6 @@ unhash_delegation_locked(struct nfs4_delegation *dp) ...@@ -717,8 +726,6 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_recall_lru);
list_del_init(&dp->dl_perfile); list_del_init(&dp->dl_perfile);
spin_unlock(&fp->fi_lock); spin_unlock(&fp->fi_lock);
if (fp)
nfs4_put_deleg_lease(fp);
} }
static void destroy_delegation(struct nfs4_delegation *dp) static void destroy_delegation(struct nfs4_delegation *dp)
...@@ -726,6 +733,7 @@ static void destroy_delegation(struct nfs4_delegation *dp) ...@@ -726,6 +733,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
spin_lock(&state_lock); spin_lock(&state_lock);
unhash_delegation_locked(dp); unhash_delegation_locked(dp);
spin_unlock(&state_lock); spin_unlock(&state_lock);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid); nfs4_put_stid(&dp->dl_stid);
} }
...@@ -735,6 +743,8 @@ static void revoke_delegation(struct nfs4_delegation *dp) ...@@ -735,6 +743,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
WARN_ON(!list_empty(&dp->dl_recall_lru)); WARN_ON(!list_empty(&dp->dl_recall_lru));
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
if (clp->cl_minorversion == 0) if (clp->cl_minorversion == 0)
nfs4_put_stid(&dp->dl_stid); nfs4_put_stid(&dp->dl_stid);
else { else {
...@@ -1635,6 +1645,7 @@ __destroy_client(struct nfs4_client *clp) ...@@ -1635,6 +1645,7 @@ __destroy_client(struct nfs4_client *clp)
while (!list_empty(&reaplist)) { while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_recall_lru);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid); nfs4_put_stid(&dp->dl_stid);
} }
while (!list_empty(&clp->cl_revoked)) { while (!list_empty(&clp->cl_revoked)) {
...@@ -1862,7 +1873,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, ...@@ -1862,7 +1873,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp); free_client(clp);
return NULL; return NULL;
} }
INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null); nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
clp->cl_time = get_seconds(); clp->cl_time = get_seconds();
clear_bit(0, &clp->cl_cb_slot_busy); clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf); copy_verf(clp, verf);
...@@ -3349,8 +3360,9 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) ...@@ -3349,8 +3360,9 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
return ret; return ret;
} }
void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
{ {
struct nfs4_delegation *dp = cb_to_delegation(cb);
struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net, struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net,
nfsd_net_id); nfsd_net_id);
...@@ -3371,6 +3383,43 @@ void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp) ...@@ -3371,6 +3383,43 @@ void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp)
spin_unlock(&state_lock); spin_unlock(&state_lock);
} }
static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
struct rpc_task *task)
{
struct nfs4_delegation *dp = cb_to_delegation(cb);
switch (task->tk_status) {
case 0:
return 1;
case -EBADHANDLE:
case -NFS4ERR_BAD_STATEID:
/*
* Race: client probably got cb_recall before open reply
* granting delegation.
*/
if (dp->dl_retries--) {
rpc_delay(task, 2 * HZ);
return 0;
}
/*FALLTHRU*/
default:
return -1;
}
}
static void nfsd4_cb_recall_release(struct nfsd4_callback *cb)
{
struct nfs4_delegation *dp = cb_to_delegation(cb);
nfs4_put_stid(&dp->dl_stid);
}
static struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
.prepare = nfsd4_cb_recall_prepare,
.done = nfsd4_cb_recall_done,
.release = nfsd4_cb_recall_release,
};
static void nfsd_break_one_deleg(struct nfs4_delegation *dp) static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
{ {
/* /*
...@@ -3381,7 +3430,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) ...@@ -3381,7 +3430,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
* it's safe to take a reference. * it's safe to take a reference.
*/ */
atomic_inc(&dp->dl_stid.sc_count); atomic_inc(&dp->dl_stid.sc_count);
nfsd4_cb_recall(dp); nfsd4_run_cb(&dp->dl_recall);
} }
/* Called from break_lease() with i_lock held. */ /* Called from break_lease() with i_lock held. */
...@@ -3759,7 +3808,6 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) ...@@ -3759,7 +3808,6 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
fl = locks_alloc_lock(); fl = locks_alloc_lock();
if (!fl) if (!fl)
return NULL; return NULL;
locks_init_lock(fl);
fl->fl_lmops = &nfsd_lease_mng_ops; fl->fl_lmops = &nfsd_lease_mng_ops;
fl->fl_flags = FL_DELEG; fl->fl_flags = FL_DELEG;
fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
...@@ -4107,7 +4155,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -4107,7 +4155,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status; return status;
} }
static void void
nfsd4_end_grace(struct nfsd_net *nn) nfsd4_end_grace(struct nfsd_net *nn)
{ {
/* do nothing if grace period already ended */ /* do nothing if grace period already ended */
...@@ -4116,14 +4164,28 @@ nfsd4_end_grace(struct nfsd_net *nn) ...@@ -4116,14 +4164,28 @@ nfsd4_end_grace(struct nfsd_net *nn)
dprintk("NFSD: end of grace period\n"); dprintk("NFSD: end of grace period\n");
nn->grace_ended = true; nn->grace_ended = true;
nfsd4_record_grace_done(nn, nn->boot_time); /*
* If the server goes down again right now, an NFSv4
* client will still be allowed to reclaim after it comes back up,
* even if it hasn't yet had a chance to reclaim state this time.
*
*/
nfsd4_record_grace_done(nn);
/*
* At this point, NFSv4 clients can still reclaim. But if the
* server crashes, any that have not yet reclaimed will be out
* of luck on the next boot.
*
* (NFSv4.1+ clients are considered to have reclaimed once they
* call RECLAIM_COMPLETE. NFSv4.0 clients are considered to
* have reclaimed after their first OPEN.)
*/
locks_end_grace(&nn->nfsd4_manager); locks_end_grace(&nn->nfsd4_manager);
/* /*
* Now that every NFSv4 client has had the chance to recover and * At this point, and once lockd and/or any other containers
* to see the (possibly new, possibly shorter) lease time, we * exit their grace period, further reclaims will fail and
* can safely set the next grace time to the current lease time: * regular locking can resume.
*/ */
nn->nfsd4_grace = nn->nfsd4_lease;
} }
static time_t static time_t
...@@ -5210,7 +5272,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -5210,7 +5272,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
} }
fp = lock_stp->st_stid.sc_file; fp = lock_stp->st_stid.sc_file;
locks_init_lock(file_lock);
switch (lock->lk_type) { switch (lock->lk_type) {
case NFS4_READ_LT: case NFS4_READ_LT:
case NFS4_READW_LT: case NFS4_READW_LT:
...@@ -5354,7 +5415,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -5354,7 +5415,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_jukebox; status = nfserr_jukebox;
goto out; goto out;
} }
locks_init_lock(file_lock);
switch (lockt->lt_type) { switch (lockt->lt_type) {
case NFS4_READ_LT: case NFS4_READ_LT:
case NFS4_READW_LT: case NFS4_READW_LT:
...@@ -5432,7 +5493,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, ...@@ -5432,7 +5493,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_jukebox; status = nfserr_jukebox;
goto fput; goto fput;
} }
locks_init_lock(file_lock);
file_lock->fl_type = F_UNLCK; file_lock->fl_type = F_UNLCK;
file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner); file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner);
file_lock->fl_pid = current->tgid; file_lock->fl_pid = current->tgid;
...@@ -5645,6 +5706,9 @@ nfs4_check_open_reclaim(clientid_t *clid, ...@@ -5645,6 +5706,9 @@ nfs4_check_open_reclaim(clientid_t *clid,
if (status) if (status)
return nfserr_reclaim_bad; return nfserr_reclaim_bad;
if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags))
return nfserr_no_grace;
if (nfsd4_client_record_check(cstate->clp)) if (nfsd4_client_record_check(cstate->clp))
return nfserr_reclaim_bad; return nfserr_reclaim_bad;
...@@ -6342,10 +6406,10 @@ nfs4_state_start_net(struct net *net) ...@@ -6342,10 +6406,10 @@ nfs4_state_start_net(struct net *net)
ret = nfs4_state_create_net(net); ret = nfs4_state_create_net(net);
if (ret) if (ret)
return ret; return ret;
nfsd4_client_tracking_init(net);
nn->boot_time = get_seconds(); nn->boot_time = get_seconds();
locks_start_grace(net, &nn->nfsd4_manager);
nn->grace_ended = false; nn->grace_ended = false;
locks_start_grace(net, &nn->nfsd4_manager);
nfsd4_client_tracking_init(net);
printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n", printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
nn->nfsd4_grace, net); nn->nfsd4_grace, net);
queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ); queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
...@@ -6402,6 +6466,7 @@ nfs4_state_shutdown_net(struct net *net) ...@@ -6402,6 +6466,7 @@ nfs4_state_shutdown_net(struct net *net)
list_for_each_safe(pos, next, &reaplist) { list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru); list_del_init(&dp->dl_recall_lru);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid); nfs4_put_stid(&dp->dl_stid);
} }
......
...@@ -31,13 +31,6 @@ ...@@ -31,13 +31,6 @@
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* TODO: Neil Brown made the following observation: We currently
* initially reserve NFSD_BUFSIZE space on the transmit queue and
* never release any of that until the request is complete.
* It would be good to calculate a new maximum response size while
* decoding the COMPOUND, and call svc_reserve with this number
* at the end of nfs4svc_decode_compoundargs.
*/ */
#include <linux/slab.h> #include <linux/slab.h>
...@@ -1520,6 +1513,22 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str ...@@ -1520,6 +1513,22 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
DECODE_TAIL; DECODE_TAIL;
} }
static __be32
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
DECODE_HEAD;
status = nfsd4_decode_stateid(argp, &seek->seek_stateid);
if (status)
return status;
READ_BUF(8 + 4);
p = xdr_decode_hyper(p, &seek->seek_offset);
seek->seek_whence = be32_to_cpup(p);
DECODE_TAIL;
}
static __be32 static __be32
nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p) nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
{ {
...@@ -1593,6 +1602,20 @@ static nfsd4_dec nfsd4_dec_ops[] = { ...@@ -1593,6 +1602,20 @@ static nfsd4_dec nfsd4_dec_ops[] = {
[OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid, [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid,
[OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
/* new operations for NFSv4.2 */
[OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
[OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
}; };
static inline bool static inline bool
...@@ -1670,6 +1693,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) ...@@ -1670,6 +1693,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
readbytes += nfsd4_max_reply(argp->rqstp, op); readbytes += nfsd4_max_reply(argp->rqstp, op);
} else } else
max_reply += nfsd4_max_reply(argp->rqstp, op); max_reply += nfsd4_max_reply(argp->rqstp, op);
/*
* OP_LOCK may return a conflicting lock. (Special case
* because it will just skip encoding this if it runs
* out of xdr buffer space, and it is the only operation
* that behaves this way.)
*/
if (op->opnum == OP_LOCK)
max_reply += NFS4_OPAQUE_LIMIT;
if (op->status) { if (op->status) {
argp->opcnt = i+1; argp->opcnt = i+1;
...@@ -3763,6 +3794,22 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, ...@@ -3763,6 +3794,22 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr; return nfserr;
} }
static __be32
nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_seek *seek)
{
__be32 *p;
if (nfserr)
return nfserr;
p = xdr_reserve_space(&resp->xdr, 4 + 8);
*p++ = cpu_to_be32(seek->seek_eof);
p = xdr_encode_hyper(p, seek->seek_pos);
return nfserr;
}
static __be32 static __be32
nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
{ {
...@@ -3835,6 +3882,20 @@ static nfsd4_enc nfsd4_enc_ops[] = { ...@@ -3835,6 +3882,20 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
[OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop,
[OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop,
/* NFSv4.2 operations */
[OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_COPY] = (nfsd4_enc)nfsd4_encode_noop,
[OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop,
[OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
[OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
}; };
/* /*
......
...@@ -27,8 +27,12 @@ ...@@ -27,8 +27,12 @@
*/ */
#define TARGET_BUCKET_SIZE 64 #define TARGET_BUCKET_SIZE 64
static struct hlist_head * cache_hash; struct nfsd_drc_bucket {
static struct list_head lru_head; struct list_head lru_head;
spinlock_t cache_lock;
};
static struct nfsd_drc_bucket *drc_hashtbl;
static struct kmem_cache *drc_slab; static struct kmem_cache *drc_slab;
/* max number of entries allowed in the cache */ /* max number of entries allowed in the cache */
...@@ -36,6 +40,7 @@ static unsigned int max_drc_entries; ...@@ -36,6 +40,7 @@ static unsigned int max_drc_entries;
/* number of significant bits in the hash value */ /* number of significant bits in the hash value */
static unsigned int maskbits; static unsigned int maskbits;
static unsigned int drc_hashsize;
/* /*
* Stats and other tracking of on the duplicate reply cache. All of these and * Stats and other tracking of on the duplicate reply cache. All of these and
...@@ -43,7 +48,7 @@ static unsigned int maskbits; ...@@ -43,7 +48,7 @@ static unsigned int maskbits;
*/ */
/* total number of entries */ /* total number of entries */
static unsigned int num_drc_entries; static atomic_t num_drc_entries;
/* cache misses due only to checksum comparison failures */ /* cache misses due only to checksum comparison failures */
static unsigned int payload_misses; static unsigned int payload_misses;
...@@ -75,7 +80,6 @@ static struct shrinker nfsd_reply_cache_shrinker = { ...@@ -75,7 +80,6 @@ static struct shrinker nfsd_reply_cache_shrinker = {
* A cache entry is "single use" if c_state == RC_INPROG * A cache entry is "single use" if c_state == RC_INPROG
* Otherwise, it when accessing _prev or _next, the lock must be held. * Otherwise, it when accessing _prev or _next, the lock must be held.
*/ */
static DEFINE_SPINLOCK(cache_lock);
static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func);
/* /*
...@@ -116,6 +120,12 @@ nfsd_hashsize(unsigned int limit) ...@@ -116,6 +120,12 @@ nfsd_hashsize(unsigned int limit)
return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE); return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
} }
static u32
nfsd_cache_hash(__be32 xid)
{
return hash_32(be32_to_cpu(xid), maskbits);
}
static struct svc_cacherep * static struct svc_cacherep *
nfsd_reply_cache_alloc(void) nfsd_reply_cache_alloc(void)
{ {
...@@ -126,7 +136,6 @@ nfsd_reply_cache_alloc(void) ...@@ -126,7 +136,6 @@ nfsd_reply_cache_alloc(void)
rp->c_state = RC_UNUSED; rp->c_state = RC_UNUSED;
rp->c_type = RC_NOCACHE; rp->c_type = RC_NOCACHE;
INIT_LIST_HEAD(&rp->c_lru); INIT_LIST_HEAD(&rp->c_lru);
INIT_HLIST_NODE(&rp->c_hash);
} }
return rp; return rp;
} }
...@@ -138,29 +147,27 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp) ...@@ -138,29 +147,27 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
drc_mem_usage -= rp->c_replvec.iov_len; drc_mem_usage -= rp->c_replvec.iov_len;
kfree(rp->c_replvec.iov_base); kfree(rp->c_replvec.iov_base);
} }
if (!hlist_unhashed(&rp->c_hash))
hlist_del(&rp->c_hash);
list_del(&rp->c_lru); list_del(&rp->c_lru);
--num_drc_entries; atomic_dec(&num_drc_entries);
drc_mem_usage -= sizeof(*rp); drc_mem_usage -= sizeof(*rp);
kmem_cache_free(drc_slab, rp); kmem_cache_free(drc_slab, rp);
} }
static void static void
nfsd_reply_cache_free(struct svc_cacherep *rp) nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
{ {
spin_lock(&cache_lock); spin_lock(&b->cache_lock);
nfsd_reply_cache_free_locked(rp); nfsd_reply_cache_free_locked(rp);
spin_unlock(&cache_lock); spin_unlock(&b->cache_lock);
} }
int nfsd_reply_cache_init(void) int nfsd_reply_cache_init(void)
{ {
unsigned int hashsize; unsigned int hashsize;
unsigned int i;
INIT_LIST_HEAD(&lru_head);
max_drc_entries = nfsd_cache_size_limit(); max_drc_entries = nfsd_cache_size_limit();
num_drc_entries = 0; atomic_set(&num_drc_entries, 0);
hashsize = nfsd_hashsize(max_drc_entries); hashsize = nfsd_hashsize(max_drc_entries);
maskbits = ilog2(hashsize); maskbits = ilog2(hashsize);
...@@ -170,9 +177,14 @@ int nfsd_reply_cache_init(void) ...@@ -170,9 +177,14 @@ int nfsd_reply_cache_init(void)
if (!drc_slab) if (!drc_slab)
goto out_nomem; goto out_nomem;
cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL); drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL);
if (!cache_hash) if (!drc_hashtbl)
goto out_nomem; goto out_nomem;
for (i = 0; i < hashsize; i++) {
INIT_LIST_HEAD(&drc_hashtbl[i].lru_head);
spin_lock_init(&drc_hashtbl[i].cache_lock);
}
drc_hashsize = hashsize;
return 0; return 0;
out_nomem: out_nomem:
...@@ -184,17 +196,22 @@ int nfsd_reply_cache_init(void) ...@@ -184,17 +196,22 @@ int nfsd_reply_cache_init(void)
void nfsd_reply_cache_shutdown(void) void nfsd_reply_cache_shutdown(void)
{ {
struct svc_cacherep *rp; struct svc_cacherep *rp;
unsigned int i;
unregister_shrinker(&nfsd_reply_cache_shrinker); unregister_shrinker(&nfsd_reply_cache_shrinker);
cancel_delayed_work_sync(&cache_cleaner); cancel_delayed_work_sync(&cache_cleaner);
while (!list_empty(&lru_head)) { for (i = 0; i < drc_hashsize; i++) {
rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); struct list_head *head = &drc_hashtbl[i].lru_head;
nfsd_reply_cache_free_locked(rp); while (!list_empty(head)) {
rp = list_first_entry(head, struct svc_cacherep, c_lru);
nfsd_reply_cache_free_locked(rp);
}
} }
kfree (cache_hash); kfree (drc_hashtbl);
cache_hash = NULL; drc_hashtbl = NULL;
drc_hashsize = 0;
if (drc_slab) { if (drc_slab) {
kmem_cache_destroy(drc_slab); kmem_cache_destroy(drc_slab);
...@@ -207,61 +224,63 @@ void nfsd_reply_cache_shutdown(void) ...@@ -207,61 +224,63 @@ void nfsd_reply_cache_shutdown(void)
* not already scheduled. * not already scheduled.
*/ */
static void static void
lru_put_end(struct svc_cacherep *rp) lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
{ {
rp->c_timestamp = jiffies; rp->c_timestamp = jiffies;
list_move_tail(&rp->c_lru, &lru_head); list_move_tail(&rp->c_lru, &b->lru_head);
schedule_delayed_work(&cache_cleaner, RC_EXPIRE); schedule_delayed_work(&cache_cleaner, RC_EXPIRE);
} }
/*
* Move a cache entry from one hash list to another
*/
static void
hash_refile(struct svc_cacherep *rp)
{
hlist_del_init(&rp->c_hash);
/*
* No point in byte swapping c_xid since we're just using it to pick
* a hash bucket.
*/
hlist_add_head(&rp->c_hash, cache_hash +
hash_32((__force u32)rp->c_xid, maskbits));
}
/*
* Walk the LRU list and prune off entries that are older than RC_EXPIRE.
* Also prune the oldest ones when the total exceeds the max number of entries.
*/
static long static long
prune_cache_entries(void) prune_bucket(struct nfsd_drc_bucket *b)
{ {
struct svc_cacherep *rp, *tmp; struct svc_cacherep *rp, *tmp;
long freed = 0; long freed = 0;
list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
/* /*
* Don't free entries attached to calls that are still * Don't free entries attached to calls that are still
* in-progress, but do keep scanning the list. * in-progress, but do keep scanning the list.
*/ */
if (rp->c_state == RC_INPROG) if (rp->c_state == RC_INPROG)
continue; continue;
if (num_drc_entries <= max_drc_entries && if (atomic_read(&num_drc_entries) <= max_drc_entries &&
time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
break; break;
nfsd_reply_cache_free_locked(rp); nfsd_reply_cache_free_locked(rp);
freed++; freed++;
} }
return freed;
}
/*
* Walk the LRU list and prune off entries that are older than RC_EXPIRE.
* Also prune the oldest ones when the total exceeds the max number of entries.
*/
static long
prune_cache_entries(void)
{
unsigned int i;
long freed = 0;
bool cancel = true;
for (i = 0; i < drc_hashsize; i++) {
struct nfsd_drc_bucket *b = &drc_hashtbl[i];
if (list_empty(&b->lru_head))
continue;
spin_lock(&b->cache_lock);
freed += prune_bucket(b);
if (!list_empty(&b->lru_head))
cancel = false;
spin_unlock(&b->cache_lock);
}
/* /*
* Conditionally rearm the job. If we cleaned out the list, then * Conditionally rearm the job to run in RC_EXPIRE since we just
* cancel any pending run (since there won't be any work to do). * ran the pruner.
* Otherwise, we rearm the job or modify the existing one to run in
* RC_EXPIRE since we just ran the pruner.
*/ */
if (list_empty(&lru_head)) if (!cancel)
cancel_delayed_work(&cache_cleaner);
else
mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE);
return freed; return freed;
} }
...@@ -269,32 +288,19 @@ prune_cache_entries(void) ...@@ -269,32 +288,19 @@ prune_cache_entries(void)
static void static void
cache_cleaner_func(struct work_struct *unused) cache_cleaner_func(struct work_struct *unused)
{ {
spin_lock(&cache_lock);
prune_cache_entries(); prune_cache_entries();
spin_unlock(&cache_lock);
} }
static unsigned long static unsigned long
nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc) nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{ {
unsigned long num; return atomic_read(&num_drc_entries);
spin_lock(&cache_lock);
num = num_drc_entries;
spin_unlock(&cache_lock);
return num;
} }
static unsigned long static unsigned long
nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc) nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{ {
unsigned long freed; return prune_cache_entries();
spin_lock(&cache_lock);
freed = prune_cache_entries();
spin_unlock(&cache_lock);
return freed;
} }
/* /*
* Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
...@@ -332,20 +338,24 @@ nfsd_cache_csum(struct svc_rqst *rqstp) ...@@ -332,20 +338,24 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
static bool static bool
nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp)
{ {
/* Check RPC header info first */ /* Check RPC XID first */
if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc || if (rqstp->rq_xid != rp->c_xid)
rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers ||
rqstp->rq_arg.len != rp->c_len ||
!rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) ||
rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr))
return false; return false;
/* compare checksum of NFS data */ /* compare checksum of NFS data */
if (csum != rp->c_csum) { if (csum != rp->c_csum) {
++payload_misses; ++payload_misses;
return false; return false;
} }
/* Other discriminators */
if (rqstp->rq_proc != rp->c_proc ||
rqstp->rq_prot != rp->c_prot ||
rqstp->rq_vers != rp->c_vers ||
rqstp->rq_arg.len != rp->c_len ||
!rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) ||
rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr))
return false;
return true; return true;
} }
...@@ -355,18 +365,14 @@ nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) ...@@ -355,18 +365,14 @@ nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp)
* NULL on failure. * NULL on failure.
*/ */
static struct svc_cacherep * static struct svc_cacherep *
nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp,
__wsum csum)
{ {
struct svc_cacherep *rp, *ret = NULL; struct svc_cacherep *rp, *ret = NULL;
struct hlist_head *rh; struct list_head *rh = &b->lru_head;
unsigned int entries = 0; unsigned int entries = 0;
/* list_for_each_entry(rp, rh, c_lru) {
* No point in byte swapping rq_xid since we're just using it to pick
* a hash bucket.
*/
rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)];
hlist_for_each_entry(rp, rh, c_hash) {
++entries; ++entries;
if (nfsd_cache_match(rqstp, csum, rp)) { if (nfsd_cache_match(rqstp, csum, rp)) {
ret = rp; ret = rp;
...@@ -377,11 +383,12 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) ...@@ -377,11 +383,12 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
/* tally hash chain length stats */ /* tally hash chain length stats */
if (entries > longest_chain) { if (entries > longest_chain) {
longest_chain = entries; longest_chain = entries;
longest_chain_cachesize = num_drc_entries; longest_chain_cachesize = atomic_read(&num_drc_entries);
} else if (entries == longest_chain) { } else if (entries == longest_chain) {
/* prefer to keep the smallest cachesize possible here */ /* prefer to keep the smallest cachesize possible here */
longest_chain_cachesize = min(longest_chain_cachesize, longest_chain_cachesize = min_t(unsigned int,
num_drc_entries); longest_chain_cachesize,
atomic_read(&num_drc_entries));
} }
return ret; return ret;
...@@ -403,6 +410,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) ...@@ -403,6 +410,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
vers = rqstp->rq_vers, vers = rqstp->rq_vers,
proc = rqstp->rq_proc; proc = rqstp->rq_proc;
__wsum csum; __wsum csum;
u32 hash = nfsd_cache_hash(xid);
struct nfsd_drc_bucket *b = &drc_hashtbl[hash];
unsigned long age; unsigned long age;
int type = rqstp->rq_cachetype; int type = rqstp->rq_cachetype;
int rtn = RC_DOIT; int rtn = RC_DOIT;
...@@ -420,16 +429,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) ...@@ -420,16 +429,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
* preallocate an entry. * preallocate an entry.
*/ */
rp = nfsd_reply_cache_alloc(); rp = nfsd_reply_cache_alloc();
spin_lock(&cache_lock); spin_lock(&b->cache_lock);
if (likely(rp)) { if (likely(rp)) {
++num_drc_entries; atomic_inc(&num_drc_entries);
drc_mem_usage += sizeof(*rp); drc_mem_usage += sizeof(*rp);
} }
/* go ahead and prune the cache */ /* go ahead and prune the cache */
prune_cache_entries(); prune_bucket(b);
found = nfsd_cache_search(rqstp, csum); found = nfsd_cache_search(b, rqstp, csum);
if (found) { if (found) {
if (likely(rp)) if (likely(rp))
nfsd_reply_cache_free_locked(rp); nfsd_reply_cache_free_locked(rp);
...@@ -454,8 +463,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) ...@@ -454,8 +463,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
rp->c_len = rqstp->rq_arg.len; rp->c_len = rqstp->rq_arg.len;
rp->c_csum = csum; rp->c_csum = csum;
hash_refile(rp); lru_put_end(b, rp);
lru_put_end(rp);
/* release any buffer */ /* release any buffer */
if (rp->c_type == RC_REPLBUFF) { if (rp->c_type == RC_REPLBUFF) {
...@@ -465,14 +473,14 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) ...@@ -465,14 +473,14 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
} }
rp->c_type = RC_NOCACHE; rp->c_type = RC_NOCACHE;
out: out:
spin_unlock(&cache_lock); spin_unlock(&b->cache_lock);
return rtn; return rtn;
found_entry: found_entry:
nfsdstats.rchits++; nfsdstats.rchits++;
/* We found a matching entry which is either in progress or done. */ /* We found a matching entry which is either in progress or done. */
age = jiffies - rp->c_timestamp; age = jiffies - rp->c_timestamp;
lru_put_end(rp); lru_put_end(b, rp);
rtn = RC_DROPIT; rtn = RC_DROPIT;
/* Request being processed or excessive rexmits */ /* Request being processed or excessive rexmits */
...@@ -527,18 +535,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) ...@@ -527,18 +535,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
{ {
struct svc_cacherep *rp = rqstp->rq_cacherep; struct svc_cacherep *rp = rqstp->rq_cacherep;
struct kvec *resv = &rqstp->rq_res.head[0], *cachv; struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
u32 hash;
struct nfsd_drc_bucket *b;
int len; int len;
size_t bufsize = 0; size_t bufsize = 0;
if (!rp) if (!rp)
return; return;
hash = nfsd_cache_hash(rp->c_xid);
b = &drc_hashtbl[hash];
len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
len >>= 2; len >>= 2;
/* Don't cache excessive amounts of data and XDR failures */ /* Don't cache excessive amounts of data and XDR failures */
if (!statp || len > (256 >> 2)) { if (!statp || len > (256 >> 2)) {
nfsd_reply_cache_free(rp); nfsd_reply_cache_free(b, rp);
return; return;
} }
...@@ -553,23 +566,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) ...@@ -553,23 +566,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
bufsize = len << 2; bufsize = len << 2;
cachv->iov_base = kmalloc(bufsize, GFP_KERNEL); cachv->iov_base = kmalloc(bufsize, GFP_KERNEL);
if (!cachv->iov_base) { if (!cachv->iov_base) {
nfsd_reply_cache_free(rp); nfsd_reply_cache_free(b, rp);
return; return;
} }
cachv->iov_len = bufsize; cachv->iov_len = bufsize;
memcpy(cachv->iov_base, statp, bufsize); memcpy(cachv->iov_base, statp, bufsize);
break; break;
case RC_NOCACHE: case RC_NOCACHE:
nfsd_reply_cache_free(rp); nfsd_reply_cache_free(b, rp);
return; return;
} }
spin_lock(&cache_lock); spin_lock(&b->cache_lock);
drc_mem_usage += bufsize; drc_mem_usage += bufsize;
lru_put_end(rp); lru_put_end(b, rp);
rp->c_secure = rqstp->rq_secure; rp->c_secure = rqstp->rq_secure;
rp->c_type = cachetype; rp->c_type = cachetype;
rp->c_state = RC_DONE; rp->c_state = RC_DONE;
spin_unlock(&cache_lock); spin_unlock(&b->cache_lock);
return; return;
} }
...@@ -600,9 +613,9 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data) ...@@ -600,9 +613,9 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
*/ */
static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
{ {
spin_lock(&cache_lock);
seq_printf(m, "max entries: %u\n", max_drc_entries); seq_printf(m, "max entries: %u\n", max_drc_entries);
seq_printf(m, "num entries: %u\n", num_drc_entries); seq_printf(m, "num entries: %u\n",
atomic_read(&num_drc_entries));
seq_printf(m, "hash buckets: %u\n", 1 << maskbits); seq_printf(m, "hash buckets: %u\n", 1 << maskbits);
seq_printf(m, "mem usage: %u\n", drc_mem_usage); seq_printf(m, "mem usage: %u\n", drc_mem_usage);
seq_printf(m, "cache hits: %u\n", nfsdstats.rchits); seq_printf(m, "cache hits: %u\n", nfsdstats.rchits);
...@@ -611,7 +624,6 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) ...@@ -611,7 +624,6 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
seq_printf(m, "payload misses: %u\n", payload_misses); seq_printf(m, "payload misses: %u\n", payload_misses);
seq_printf(m, "longest chain len: %u\n", longest_chain); seq_printf(m, "longest chain len: %u\n", longest_chain);
seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize); seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize);
spin_unlock(&cache_lock);
return 0; return 0;
} }
......
...@@ -49,6 +49,7 @@ enum { ...@@ -49,6 +49,7 @@ enum {
NFSD_Leasetime, NFSD_Leasetime,
NFSD_Gracetime, NFSD_Gracetime,
NFSD_RecoveryDir, NFSD_RecoveryDir,
NFSD_V4EndGrace,
#endif #endif
}; };
...@@ -68,6 +69,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size); ...@@ -68,6 +69,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size);
static ssize_t write_leasetime(struct file *file, char *buf, size_t size); static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_gracetime(struct file *file, char *buf, size_t size); static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size);
#endif #endif
static ssize_t (*write_op[])(struct file *, char *, size_t) = { static ssize_t (*write_op[])(struct file *, char *, size_t) = {
...@@ -84,6 +86,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { ...@@ -84,6 +86,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Leasetime] = write_leasetime, [NFSD_Leasetime] = write_leasetime,
[NFSD_Gracetime] = write_gracetime, [NFSD_Gracetime] = write_gracetime,
[NFSD_RecoveryDir] = write_recoverydir, [NFSD_RecoveryDir] = write_recoverydir,
[NFSD_V4EndGrace] = write_v4_end_grace,
#endif #endif
}; };
...@@ -1077,6 +1080,47 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) ...@@ -1077,6 +1080,47 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
return rv; return rv;
} }
/**
* write_v4_end_grace - release grace period for nfsd's v4.x lock manager
*
* Input:
* buf: ignored
* size: zero
* OR
*
* Input:
* buf: any value
* size: non-zero length of C string in @buf
* Output:
* passed-in buffer filled with "Y" or "N" with a newline
* and NULL-terminated C string. This indicates whether
* the grace period has ended in the current net
* namespace. Return code is the size in bytes of the
* string. Writing a string that starts with 'Y', 'y', or
* '1' to the file will end the grace period for nfsd's v4
* lock manager.
*/
static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
{
struct net *net = file->f_dentry->d_sb->s_fs_info;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
if (size > 0) {
switch(buf[0]) {
case 'Y':
case 'y':
case '1':
nfsd4_end_grace(nn);
break;
default:
return -EINVAL;
}
}
return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%c\n",
nn->grace_ended ? 'Y' : 'N');
}
#endif #endif
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
...@@ -1110,6 +1154,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) ...@@ -1110,6 +1154,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO},
#endif #endif
/* last one */ {""} /* last one */ {""}
}; };
......
...@@ -251,7 +251,7 @@ void nfsd_lockd_shutdown(void); ...@@ -251,7 +251,7 @@ void nfsd_lockd_shutdown(void);
#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED) #define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED)
#define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP) #define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP)
#define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH) #define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH)
#define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP) #define nfserr_union_notsupp cpu_to_be32(NFS4ERR_UNION_NOTSUPP)
#define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED) #define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED)
#define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS) #define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS)
#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL) #define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL)
......
...@@ -209,8 +209,10 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) ...@@ -209,8 +209,10 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
* fix that case easily. * fix that case easily.
*/ */
struct cred *new = prepare_creds(); struct cred *new = prepare_creds();
if (!new) if (!new) {
return nfserrno(-ENOMEM); error = nfserrno(-ENOMEM);
goto out;
}
new->cap_effective = new->cap_effective =
cap_raise_nfsd_set(new->cap_effective, cap_raise_nfsd_set(new->cap_effective,
new->cap_permitted); new->cap_permitted);
......
...@@ -62,16 +62,21 @@ typedef struct { ...@@ -62,16 +62,21 @@ typedef struct {
(s)->si_generation (s)->si_generation
struct nfsd4_callback { struct nfsd4_callback {
void *cb_op;
struct nfs4_client *cb_clp; struct nfs4_client *cb_clp;
struct list_head cb_per_client; struct list_head cb_per_client;
u32 cb_minorversion; u32 cb_minorversion;
struct rpc_message cb_msg; struct rpc_message cb_msg;
const struct rpc_call_ops *cb_ops; struct nfsd4_callback_ops *cb_ops;
struct work_struct cb_work; struct work_struct cb_work;
bool cb_done; bool cb_done;
}; };
struct nfsd4_callback_ops {
void (*prepare)(struct nfsd4_callback *);
int (*done)(struct nfsd4_callback *, struct rpc_task *);
void (*release)(struct nfsd4_callback *);
};
/* /*
* A core object that represents a "common" stateid. These are generally * A core object that represents a "common" stateid. These are generally
* embedded within the different (more specific) stateid objects and contain * embedded within the different (more specific) stateid objects and contain
...@@ -127,6 +132,9 @@ struct nfs4_delegation { ...@@ -127,6 +132,9 @@ struct nfs4_delegation {
struct nfsd4_callback dl_recall; struct nfsd4_callback dl_recall;
}; };
#define cb_to_delegation(cb) \
container_of(cb, struct nfs4_delegation, dl_recall)
/* client delegation callback info */ /* client delegation callback info */
struct nfs4_cb_conn { struct nfs4_cb_conn {
/* SETCLIENTID info */ /* SETCLIENTID info */
...@@ -306,6 +314,7 @@ struct nfs4_client { ...@@ -306,6 +314,7 @@ struct nfs4_client {
#define NFSD4_CLIENT_STABLE (2) /* client on stable storage */ #define NFSD4_CLIENT_STABLE (2) /* client on stable storage */
#define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */ #define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */
#define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */ #define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */
#define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \ #define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
1 << NFSD4_CLIENT_CB_KILL) 1 << NFSD4_CLIENT_CB_KILL)
unsigned long cl_flags; unsigned long cl_flags;
...@@ -517,6 +526,13 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) ...@@ -517,6 +526,13 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
#define RD_STATE 0x00000010 #define RD_STATE 0x00000010
#define WR_STATE 0x00000020 #define WR_STATE 0x00000020
enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_NULL = 0,
NFSPROC4_CLNT_CB_RECALL,
NFSPROC4_CLNT_CB_SEQUENCE,
};
struct nfsd4_compound_state; struct nfsd4_compound_state;
struct nfsd_net; struct nfsd_net;
...@@ -531,12 +547,12 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir, ...@@ -531,12 +547,12 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
extern __be32 nfs4_check_open_reclaim(clientid_t *clid, extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
struct nfsd4_compound_state *cstate, struct nfsd_net *nn); struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
extern int set_callback_cred(void); extern int set_callback_cred(void);
void nfsd4_run_cb_null(struct work_struct *w);
void nfsd4_run_cb_recall(struct work_struct *w);
extern void nfsd4_probe_callback(struct nfs4_client *clp); extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
extern void nfsd4_cb_recall(struct nfs4_delegation *dp); extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
extern void nfsd4_run_cb(struct nfsd4_callback *cb);
extern int nfsd4_create_callback_queue(void); extern int nfsd4_create_callback_queue(void);
extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_destroy_callback_queue(void);
extern void nfsd4_shutdown_callback(struct nfs4_client *); extern void nfsd4_shutdown_callback(struct nfs4_client *);
...@@ -545,13 +561,16 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, ...@@ -545,13 +561,16 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
struct nfsd_net *nn); struct nfsd_net *nn);
extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
/* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn);
/* nfs4recover operations */ /* nfs4recover operations */
extern int nfsd4_client_tracking_init(struct net *net); extern int nfsd4_client_tracking_init(struct net *net);
extern void nfsd4_client_tracking_exit(struct net *net); extern void nfsd4_client_tracking_exit(struct net *net);
extern void nfsd4_client_record_create(struct nfs4_client *clp); extern void nfsd4_client_record_create(struct nfs4_client *clp);
extern void nfsd4_client_record_remove(struct nfs4_client *clp); extern void nfsd4_client_record_remove(struct nfs4_client *clp);
extern int nfsd4_client_record_check(struct nfs4_client *clp); extern int nfsd4_client_record_check(struct nfs4_client *clp);
extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time); extern void nfsd4_record_grace_done(struct nfsd_net *nn);
/* nfs fault injection functions */ /* nfs fault injection functions */
#ifdef CONFIG_NFSD_FAULT_INJECTION #ifdef CONFIG_NFSD_FAULT_INJECTION
......
...@@ -445,6 +445,16 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, ...@@ -445,6 +445,16 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
if (err) if (err)
goto out; goto out;
size_change = 1; size_change = 1;
/*
* RFC5661, Section 18.30.4:
* Changing the size of a file with SETATTR indirectly
* changes the time_modify and change attributes.
*
* (and similar for the older RFCs)
*/
if (iap->ia_size != i_size_read(inode))
iap->ia_valid |= ATTR_MTIME;
} }
iap->ia_valid |= ATTR_CTIME; iap->ia_valid |= ATTR_CTIME;
...@@ -649,6 +659,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, ...@@ -649,6 +659,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
{ {
struct path path; struct path path;
struct inode *inode; struct inode *inode;
struct file *file;
int flags = O_RDONLY|O_LARGEFILE; int flags = O_RDONLY|O_LARGEFILE;
__be32 err; __be32 err;
int host_err = 0; int host_err = 0;
...@@ -703,19 +714,25 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, ...@@ -703,19 +714,25 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
else else
flags = O_WRONLY|O_LARGEFILE; flags = O_WRONLY|O_LARGEFILE;
} }
*filp = dentry_open(&path, flags, current_cred());
if (IS_ERR(*filp)) {
host_err = PTR_ERR(*filp);
*filp = NULL;
} else {
host_err = ima_file_check(*filp, may_flags);
if (may_flags & NFSD_MAY_64BIT_COOKIE) file = dentry_open(&path, flags, current_cred());
(*filp)->f_mode |= FMODE_64BITHASH; if (IS_ERR(file)) {
else host_err = PTR_ERR(file);
(*filp)->f_mode |= FMODE_32BITHASH; goto out_nfserr;
} }
host_err = ima_file_check(file, may_flags);
if (host_err) {
nfsd_close(file);
goto out_nfserr;
}
if (may_flags & NFSD_MAY_64BIT_COOKIE)
file->f_mode |= FMODE_64BITHASH;
else
file->f_mode |= FMODE_32BITHASH;
*filp = file;
out_nfserr: out_nfserr:
err = nfserrno(host_err); err = nfserrno(host_err);
out: out:
......
...@@ -428,6 +428,17 @@ struct nfsd4_reclaim_complete { ...@@ -428,6 +428,17 @@ struct nfsd4_reclaim_complete {
u32 rca_one_fs; u32 rca_one_fs;
}; };
struct nfsd4_seek {
/* request */
stateid_t seek_stateid;
loff_t seek_offset;
u32 seek_whence;
/* response */
u32 seek_eof;
loff_t seek_pos;
};
struct nfsd4_op { struct nfsd4_op {
int opnum; int opnum;
__be32 status; __be32 status;
...@@ -473,6 +484,9 @@ struct nfsd4_op { ...@@ -473,6 +484,9 @@ struct nfsd4_op {
struct nfsd4_reclaim_complete reclaim_complete; struct nfsd4_reclaim_complete reclaim_complete;
struct nfsd4_test_stateid test_stateid; struct nfsd4_test_stateid test_stateid;
struct nfsd4_free_stateid free_stateid; struct nfsd4_free_stateid free_stateid;
/* NFSv4.2 */
struct nfsd4_seek seek;
} u; } u;
struct nfs4_replay * replay; struct nfs4_replay * replay;
}; };
......
...@@ -110,6 +110,20 @@ enum nfs_opnum4 { ...@@ -110,6 +110,20 @@ enum nfs_opnum4 {
OP_DESTROY_CLIENTID = 57, OP_DESTROY_CLIENTID = 57,
OP_RECLAIM_COMPLETE = 58, OP_RECLAIM_COMPLETE = 58,
/* nfs42 */
OP_ALLOCATE = 59,
OP_COPY = 60,
OP_COPY_NOTIFY = 61,
OP_DEALLOCATE = 62,
OP_IO_ADVISE = 63,
OP_LAYOUTERROR = 64,
OP_LAYOUTSTATS = 65,
OP_OFFLOAD_CANCEL = 66,
OP_OFFLOAD_STATUS = 67,
OP_READ_PLUS = 68,
OP_SEEK = 69,
OP_WRITE_SAME = 70,
OP_ILLEGAL = 10044, OP_ILLEGAL = 10044,
}; };
...@@ -117,10 +131,10 @@ enum nfs_opnum4 { ...@@ -117,10 +131,10 @@ enum nfs_opnum4 {
Needs to be updated if more operations are defined in future.*/ Needs to be updated if more operations are defined in future.*/
#define FIRST_NFS4_OP OP_ACCESS #define FIRST_NFS4_OP OP_ACCESS
#define LAST_NFS4_OP OP_RECLAIM_COMPLETE #define LAST_NFS4_OP OP_WRITE_SAME
#define LAST_NFS40_OP OP_RELEASE_LOCKOWNER #define LAST_NFS40_OP OP_RELEASE_LOCKOWNER
#define LAST_NFS41_OP OP_RECLAIM_COMPLETE #define LAST_NFS41_OP OP_RECLAIM_COMPLETE
#define LAST_NFS42_OP OP_RECLAIM_COMPLETE #define LAST_NFS42_OP OP_WRITE_SAME
enum nfsstat4 { enum nfsstat4 {
NFS4_OK = 0, NFS4_OK = 0,
...@@ -235,10 +249,11 @@ enum nfsstat4 { ...@@ -235,10 +249,11 @@ enum nfsstat4 {
/* nfs42 */ /* nfs42 */
NFS4ERR_PARTNER_NOTSUPP = 10088, NFS4ERR_PARTNER_NOTSUPP = 10088,
NFS4ERR_PARTNER_NO_AUTH = 10089, NFS4ERR_PARTNER_NO_AUTH = 10089,
NFS4ERR_METADATA_NOTSUPP = 10090, NFS4ERR_UNION_NOTSUPP = 10090,
NFS4ERR_OFFLOAD_DENIED = 10091, NFS4ERR_OFFLOAD_DENIED = 10091,
NFS4ERR_WRONG_LFS = 10092, NFS4ERR_WRONG_LFS = 10092,
NFS4ERR_BADLABEL = 10093, NFS4ERR_BADLABEL = 10093,
NFS4ERR_OFFLOAD_NO_REQS = 10094,
}; };
static inline bool seqid_mutating_err(u32 err) static inline bool seqid_mutating_err(u32 err)
...@@ -535,4 +550,9 @@ struct nfs4_deviceid { ...@@ -535,4 +550,9 @@ struct nfs4_deviceid {
char data[NFS4_DEVICEID4_SIZE]; char data[NFS4_DEVICEID4_SIZE];
}; };
enum data_content4 {
NFS4_CONTENT_DATA = 0,
NFS4_CONTENT_HOLE = 1,
};
#endif #endif
...@@ -74,6 +74,8 @@ static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *p ...@@ -74,6 +74,8 @@ static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *p
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
struct net;
static inline struct proc_dir_entry *proc_net_mkdir( static inline struct proc_dir_entry *proc_net_mkdir(
struct net *net, const char *name, struct proc_dir_entry *parent) struct net *net, const char *name, struct proc_dir_entry *parent)
{ {
......
...@@ -280,7 +280,6 @@ struct svc_rqst { ...@@ -280,7 +280,6 @@ struct svc_rqst {
bool rq_splice_ok; /* turned off in gss privacy bool rq_splice_ok; /* turned off in gss privacy
* to prevent encrypting page * to prevent encrypting page
* cache pages */ * cache pages */
wait_queue_head_t rq_wait; /* synchronization */
struct task_struct *rq_task; /* service thread */ struct task_struct *rq_task; /* service thread */
}; };
......
...@@ -28,7 +28,8 @@ ...@@ -28,7 +28,8 @@
#define NFSEXP_ALLSQUASH 0x0008 #define NFSEXP_ALLSQUASH 0x0008
#define NFSEXP_ASYNC 0x0010 #define NFSEXP_ASYNC 0x0010
#define NFSEXP_GATHERED_WRITES 0x0020 #define NFSEXP_GATHERED_WRITES 0x0020
/* 40 80 100 currently unused */ #define NFSEXP_NOREADDIRPLUS 0x0040
/* 80 100 currently unused */
#define NFSEXP_NOHIDE 0x0200 #define NFSEXP_NOHIDE 0x0200
#define NFSEXP_NOSUBTREECHECK 0x0400 #define NFSEXP_NOSUBTREECHECK 0x0400
#define NFSEXP_NOAUTHNLM 0x0800 /* Don't authenticate NLM requests - just trust */ #define NFSEXP_NOAUTHNLM 0x0800 /* Don't authenticate NLM requests - just trust */
...@@ -47,7 +48,7 @@ ...@@ -47,7 +48,7 @@
*/ */
#define NFSEXP_V4ROOT 0x10000 #define NFSEXP_V4ROOT 0x10000
/* All flags that we claim to support. (Note we don't support NOACL.) */ /* All flags that we claim to support. (Note we don't support NOACL.) */
#define NFSEXP_ALLFLAGS 0x17E3F #define NFSEXP_ALLFLAGS 0x1FE7F
/* The flags that may vary depending on security flavor: */ /* The flags that may vary depending on security flavor: */
#define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \ #define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
......
...@@ -612,8 +612,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) ...@@ -612,8 +612,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp) if (!rqstp)
goto out_enomem; goto out_enomem;
init_waitqueue_head(&rqstp->rq_wait);
serv->sv_nrthreads++; serv->sv_nrthreads++;
spin_lock_bh(&pool->sp_lock); spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++; pool->sp_nrthreads++;
......
...@@ -346,20 +346,6 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) ...@@ -346,20 +346,6 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (!svc_xprt_has_something_to_do(xprt)) if (!svc_xprt_has_something_to_do(xprt))
return; return;
cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
put_cpu();
spin_lock_bh(&pool->sp_lock);
if (!list_empty(&pool->sp_threads) &&
!list_empty(&pool->sp_sockets))
printk(KERN_ERR
"svc_xprt_enqueue: "
"threads and transports both waiting??\n");
pool->sp_stats.packets++;
/* Mark transport as busy. It will remain in this state until /* Mark transport as busy. It will remain in this state until
* the provider calls svc_xprt_received. We update XPT_BUSY * the provider calls svc_xprt_received. We update XPT_BUSY
* atomically because it also guards against trying to enqueue * atomically because it also guards against trying to enqueue
...@@ -368,9 +354,15 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) ...@@ -368,9 +354,15 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
/* Don't enqueue transport while already enqueued */ /* Don't enqueue transport while already enqueued */
dprintk("svc: transport %p busy, not enqueued\n", xprt); dprintk("svc: transport %p busy, not enqueued\n", xprt);
goto out_unlock; return;
} }
cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
spin_lock_bh(&pool->sp_lock);
pool->sp_stats.packets++;
if (!list_empty(&pool->sp_threads)) { if (!list_empty(&pool->sp_threads)) {
rqstp = list_entry(pool->sp_threads.next, rqstp = list_entry(pool->sp_threads.next,
struct svc_rqst, struct svc_rqst,
...@@ -382,18 +374,23 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt) ...@@ -382,18 +374,23 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
printk(KERN_ERR printk(KERN_ERR
"svc_xprt_enqueue: server %p, rq_xprt=%p!\n", "svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
rqstp, rqstp->rq_xprt); rqstp, rqstp->rq_xprt);
rqstp->rq_xprt = xprt; /* Note the order of the following 3 lines:
* We want to assign xprt to rqstp->rq_xprt only _after_
* we've woken up the process, so that we don't race with
* the lockless check in svc_get_next_xprt().
*/
svc_xprt_get(xprt); svc_xprt_get(xprt);
wake_up_process(rqstp->rq_task);
rqstp->rq_xprt = xprt;
pool->sp_stats.threads_woken++; pool->sp_stats.threads_woken++;
wake_up(&rqstp->rq_wait);
} else { } else {
dprintk("svc: transport %p put into queue\n", xprt); dprintk("svc: transport %p put into queue\n", xprt);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++; pool->sp_stats.sockets_queued++;
} }
out_unlock:
spin_unlock_bh(&pool->sp_lock); spin_unlock_bh(&pool->sp_lock);
put_cpu();
} }
/* /*
...@@ -509,7 +506,7 @@ void svc_wake_up(struct svc_serv *serv) ...@@ -509,7 +506,7 @@ void svc_wake_up(struct svc_serv *serv)
svc_thread_dequeue(pool, rqstp); svc_thread_dequeue(pool, rqstp);
rqstp->rq_xprt = NULL; rqstp->rq_xprt = NULL;
*/ */
wake_up(&rqstp->rq_wait); wake_up_process(rqstp->rq_task);
} else } else
pool->sp_task_pending = 1; pool->sp_task_pending = 1;
spin_unlock_bh(&pool->sp_lock); spin_unlock_bh(&pool->sp_lock);
...@@ -628,8 +625,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) ...@@ -628,8 +625,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
{ {
struct svc_xprt *xprt; struct svc_xprt *xprt;
struct svc_pool *pool = rqstp->rq_pool; struct svc_pool *pool = rqstp->rq_pool;
DECLARE_WAITQUEUE(wait, current); long time_left = 0;
long time_left;
/* Normally we will wait up to 5 seconds for any required /* Normally we will wait up to 5 seconds for any required
* cache information to be provided. * cache information to be provided.
...@@ -651,40 +647,32 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) ...@@ -651,40 +647,32 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
} else { } else {
if (pool->sp_task_pending) { if (pool->sp_task_pending) {
pool->sp_task_pending = 0; pool->sp_task_pending = 0;
spin_unlock_bh(&pool->sp_lock); xprt = ERR_PTR(-EAGAIN);
return ERR_PTR(-EAGAIN); goto out;
} }
/* No data pending. Go to sleep */
svc_thread_enqueue(pool, rqstp);
/* /*
* We have to be able to interrupt this wait * We have to be able to interrupt this wait
* to bring down the daemons ... * to bring down the daemons ...
*/ */
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
/* /* No data pending. Go to sleep */
* checking kthread_should_stop() here allows us to avoid svc_thread_enqueue(pool, rqstp);
* locking and signalling when stopping kthreads that call
* svc_recv. If the thread has already been woken up, then
* we can exit here without sleeping. If not, then it
* it'll be woken up quickly during the schedule_timeout
*/
if (kthread_should_stop()) {
set_current_state(TASK_RUNNING);
spin_unlock_bh(&pool->sp_lock);
return ERR_PTR(-EINTR);
}
add_wait_queue(&rqstp->rq_wait, &wait);
spin_unlock_bh(&pool->sp_lock); spin_unlock_bh(&pool->sp_lock);
time_left = schedule_timeout(timeout); if (!(signalled() || kthread_should_stop())) {
time_left = schedule_timeout(timeout);
__set_current_state(TASK_RUNNING);
try_to_freeze(); try_to_freeze();
xprt = rqstp->rq_xprt;
if (xprt != NULL)
return xprt;
} else
__set_current_state(TASK_RUNNING);
spin_lock_bh(&pool->sp_lock); spin_lock_bh(&pool->sp_lock);
remove_wait_queue(&rqstp->rq_wait, &wait);
if (!time_left) if (!time_left)
pool->sp_stats.threads_timedout++; pool->sp_stats.threads_timedout++;
...@@ -699,6 +687,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) ...@@ -699,6 +687,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
} }
} }
out:
spin_unlock_bh(&pool->sp_lock); spin_unlock_bh(&pool->sp_lock);
return xprt; return xprt;
} }
...@@ -744,7 +733,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) ...@@ -744,7 +733,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
svc_add_new_temp_xprt(serv, newxpt); svc_add_new_temp_xprt(serv, newxpt);
else else
module_put(xprt->xpt_class->xcl_owner); module_put(xprt->xpt_class->xcl_owner);
} else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { } else {
/* XPT_DATA|XPT_DEFERRED case: */ /* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, rqstp->rq_pool->sp_id, xprt, rqstp, rqstp->rq_pool->sp_id, xprt,
...@@ -781,10 +770,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) ...@@ -781,10 +770,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
printk(KERN_ERR printk(KERN_ERR
"svc_recv: service %p, transport not NULL!\n", "svc_recv: service %p, transport not NULL!\n",
rqstp); rqstp);
if (waitqueue_active(&rqstp->rq_wait))
printk(KERN_ERR
"svc_recv: service %p, wait queue active!\n",
rqstp);
err = svc_alloc_arg(rqstp); err = svc_alloc_arg(rqstp);
if (err) if (err)
......
...@@ -311,19 +311,6 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) ...@@ -311,19 +311,6 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
return len; return len;
} }
/*
* Check input queue length
*/
static int svc_recv_available(struct svc_sock *svsk)
{
struct socket *sock = svsk->sk_sock;
int avail, err;
err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
return (err >= 0)? avail : err;
}
/* /*
* Generic recvfrom routine. * Generic recvfrom routine.
*/ */
...@@ -339,8 +326,14 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, ...@@ -339,8 +326,14 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
rqstp->rq_xprt_hlen = 0; rqstp->rq_xprt_hlen = 0;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
msg.msg_flags); msg.msg_flags);
/* If we read a full record, then assume there may be more
* data to read (stream based sockets only!)
*/
if (len == buflen)
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
svsk, iov[0].iov_base, iov[0].iov_len, len); svsk, iov[0].iov_base, iov[0].iov_len, len);
...@@ -980,8 +973,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) ...@@ -980,8 +973,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
unsigned int want; unsigned int want;
int len; int len;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
struct kvec iov; struct kvec iov;
...@@ -1036,7 +1027,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) ...@@ -1036,7 +1027,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
"%s: Got unrecognized reply: " "%s: Got unrecognized reply: "
"calldir 0x%x xpt_bc_xprt %p xid %08x\n", "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
__func__, ntohl(calldir), __func__, ntohl(calldir),
bc_xprt, xid); bc_xprt, ntohl(xid));
return -EAGAIN; return -EAGAIN;
} }
...@@ -1073,8 +1064,6 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) ...@@ -1073,8 +1064,6 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
static void svc_tcp_fragment_received(struct svc_sock *svsk) static void svc_tcp_fragment_received(struct svc_sock *svsk)
{ {
/* If we have more data, signal svc_xprt_enqueue() to try again */ /* If we have more data, signal svc_xprt_enqueue() to try again */
if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
dprintk("svc: TCP %s record (%d bytes)\n", dprintk("svc: TCP %s record (%d bytes)\n",
svc_sock_final_rec(svsk) ? "final" : "nonfinal", svc_sock_final_rec(svsk) ? "final" : "nonfinal",
svc_sock_reclen(svsk)); svc_sock_reclen(svsk));
......
...@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = { ...@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_name = "rdma", .xcl_name = "rdma",
.xcl_owner = THIS_MODULE, .xcl_owner = THIS_MODULE,
.xcl_ops = &svc_rdma_ops, .xcl_ops = &svc_rdma_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
.xcl_ident = XPRT_TRANSPORT_RDMA, .xcl_ident = XPRT_TRANSPORT_RDMA,
}; };
......
...@@ -51,6 +51,7 @@ ...@@ -51,6 +51,7 @@
#include <linux/sunrpc/clnt.h> /* rpc_xprt */ #include <linux/sunrpc/clnt.h> /* rpc_xprt */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
#include <linux/sunrpc/svc.h> /* RPCSVC_MAXPAYLOAD */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ #define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ #define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
...@@ -392,4 +393,10 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep; ...@@ -392,4 +393,10 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep;
/* Workqueue created in svc_rdma.c */ /* Workqueue created in svc_rdma.c */
extern struct workqueue_struct *svc_rdma_wq; extern struct workqueue_struct *svc_rdma_wq;
#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
#else
#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
#endif
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ #endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment