Commit 4a3a0eba authored by Trond Myklebust's avatar Trond Myklebust

Merge commit '24bab491' into client-4.2

- Pull in patch 'NFSD: Implement SEEK' from Bruce's nfsd-next tree
  for dependencies.
parents 2ce7598c 24bab491
......@@ -233,9 +233,13 @@ if NETWORK_FILESYSTEMS
source "fs/nfs/Kconfig"
source "fs/nfsd/Kconfig"
config GRACE_PERIOD
tristate
config LOCKD
tristate
depends on FILE_LOCKING
select GRACE_PERIOD
config LOCKD_V4
bool
......@@ -249,7 +253,7 @@ config NFS_ACL_SUPPORT
config NFS_COMMON
bool
depends on NFSD || NFS_FS
depends on NFSD || NFS_FS || LOCKD
default y
source "net/sunrpc/Kconfig"
......
......@@ -5,6 +5,7 @@
obj-$(CONFIG_LOCKD) += lockd.o
lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o
svcshare.o svcproc.o svcsubs.o mon.o xdr.o
lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
lockd-objs-$(CONFIG_PROC_FS) += procfs.o
lockd-objs := $(lockd-objs-y)
......@@ -11,7 +11,6 @@ struct lockd_net {
struct delayed_work grace_period_end;
struct lock_manager lockd_manager;
struct list_head grace_list;
spinlock_t nsm_clnt_lock;
unsigned int nsm_users;
......
/*
* Procfs support for lockd
*
* Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com>
*/
#include <linux/fs.h>
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/nsproxy.h>
#include <net/net_namespace.h>
#include "netns.h"
#include "procfs.h"
/*
* We only allow strings that start with 'Y', 'y', or '1'.
*/
static ssize_t
nlm_end_grace_write(struct file *file, const char __user *buf, size_t size,
loff_t *pos)
{
char *data;
struct lockd_net *ln = net_generic(current->nsproxy->net_ns,
lockd_net_id);
if (size < 1)
return -EINVAL;
data = simple_transaction_get(file, buf, size);
if (IS_ERR(data))
return PTR_ERR(data);
switch(data[0]) {
case 'Y':
case 'y':
case '1':
locks_end_grace(&ln->lockd_manager);
break;
default:
return -EINVAL;
}
return size;
}
static ssize_t
nlm_end_grace_read(struct file *file, char __user *buf, size_t size,
loff_t *pos)
{
struct lockd_net *ln = net_generic(current->nsproxy->net_ns,
lockd_net_id);
char resp[3];
resp[0] = list_empty(&ln->lockd_manager.list) ? 'Y' : 'N';
resp[1] = '\n';
resp[2] = '\0';
return simple_read_from_buffer(buf, size, pos, resp, sizeof(resp));
}
static const struct file_operations lockd_end_grace_operations = {
.write = nlm_end_grace_write,
.read = nlm_end_grace_read,
.llseek = default_llseek,
.release = simple_transaction_release,
.owner = THIS_MODULE,
};
int __init
lockd_create_procfs(void)
{
struct proc_dir_entry *entry;
entry = proc_mkdir("fs/lockd", NULL);
if (!entry)
return -ENOMEM;
entry = proc_create("nlm_end_grace", S_IRUGO|S_IWUSR, entry,
&lockd_end_grace_operations);
if (!entry) {
remove_proc_entry("fs/lockd", NULL);
return -ENOMEM;
}
return 0;
}
void __exit
lockd_remove_procfs(void)
{
remove_proc_entry("fs/lockd/nlm_end_grace", NULL);
remove_proc_entry("fs/lockd", NULL);
}
/*
* Procfs support for lockd
*
* Copyright (c) 2014 Jeff Layton <jlayton@primarydata.com>
*/
#ifndef _LOCKD_PROCFS_H
#define _LOCKD_PROCFS_H
#include <linux/kconfig.h>
#if IS_ENABLED(CONFIG_PROC_FS)
int lockd_create_procfs(void);
void lockd_remove_procfs(void);
#else
static inline int
lockd_create_procfs(void)
{
return 0;
}
static inline void
lockd_remove_procfs(void)
{
return;
}
#endif /* IS_ENABLED(CONFIG_PROC_FS) */
#endif /* _LOCKD_PROCFS_H */
......@@ -36,6 +36,7 @@
#include <linux/nfs.h>
#include "netns.h"
#include "procfs.h"
#define NLMDBG_FACILITY NLMDBG_SVC
#define LOCKD_BUFSIZE (1024 + NLMSVC_XDRSIZE)
......@@ -306,13 +307,16 @@ static int lockd_start_svc(struct svc_serv *serv)
svc_sock_update_bufs(serv);
serv->sv_maxconn = nlm_max_connections;
nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, "%s", serv->sv_name);
nlmsvc_task = kthread_create(lockd, nlmsvc_rqst, "%s", serv->sv_name);
if (IS_ERR(nlmsvc_task)) {
error = PTR_ERR(nlmsvc_task);
printk(KERN_WARNING
"lockd_up: kthread_run failed, error=%d\n", error);
goto out_task;
}
nlmsvc_rqst->rq_task = nlmsvc_task;
wake_up_process(nlmsvc_task);
dprintk("lockd_up: service started\n");
return 0;
......@@ -583,7 +587,7 @@ static int lockd_init_net(struct net *net)
struct lockd_net *ln = net_generic(net, lockd_net_id);
INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
INIT_LIST_HEAD(&ln->grace_list);
INIT_LIST_HEAD(&ln->lockd_manager.list);
spin_lock_init(&ln->nsm_clnt_lock);
return 0;
}
......@@ -617,8 +621,15 @@ static int __init init_nlm(void)
err = register_pernet_subsys(&lockd_net_ops);
if (err)
goto err_pernet;
err = lockd_create_procfs();
if (err)
goto err_procfs;
return 0;
err_procfs:
unregister_pernet_subsys(&lockd_net_ops);
err_pernet:
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nlm_sysctl_table);
......@@ -631,6 +642,7 @@ static void __exit exit_nlm(void)
{
/* FIXME: delete all NLM clients */
nlm_shutdown_hosts();
lockd_remove_procfs();
unregister_pernet_subsys(&lockd_net_ops);
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nlm_sysctl_table);
......
......@@ -235,7 +235,7 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
cb_info->serv = serv;
cb_info->rqst = rqstp;
cb_info->task = kthread_run(callback_svc, cb_info->rqst,
cb_info->task = kthread_create(callback_svc, cb_info->rqst,
"nfsv4.%u-svc", minorversion);
if (IS_ERR(cb_info->task)) {
ret = PTR_ERR(cb_info->task);
......@@ -244,6 +244,8 @@ static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
cb_info->task = NULL;
return ret;
}
rqstp->rq_task = cb_info->task;
wake_up_process(cb_info->task);
dprintk("nfs_callback_up: service started\n");
return 0;
}
......
......@@ -3,5 +3,6 @@
#
obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o
nfs_acl-objs := nfsacl.o
obj-$(CONFIG_GRACE_PERIOD) += grace.o
/*
* Common code for control of lockd and nfsv4 grace periods.
*
* Transplanted from lockd code
*/
#include <linux/module.h>
#include <linux/lockd/bind.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/fs.h>
#include "netns.h"
static int grace_net_id;
static DEFINE_SPINLOCK(grace_lock);
/**
* locks_start_grace
* @net: net namespace that this lock manager belongs to
* @lm: who this grace period is for
*
* A grace period is a period during which locks should not be given
......@@ -21,18 +24,20 @@ static DEFINE_SPINLOCK(grace_lock);
*
* This function is called to start a grace period.
*/
void locks_start_grace(struct net *net, struct lock_manager *lm)
void
locks_start_grace(struct net *net, struct lock_manager *lm)
{
struct lockd_net *ln = net_generic(net, lockd_net_id);
struct list_head *grace_list = net_generic(net, grace_net_id);
spin_lock(&grace_lock);
list_add(&lm->list, &ln->grace_list);
list_add(&lm->list, grace_list);
spin_unlock(&grace_lock);
}
EXPORT_SYMBOL_GPL(locks_start_grace);
/**
* locks_end_grace
* @net: net namespace that this lock manager belongs to
* @lm: who this grace period is for
*
* Call this function to state that the given lock manager is ready to
......@@ -41,7 +46,8 @@ EXPORT_SYMBOL_GPL(locks_start_grace);
* Note that callers count on it being safe to call this more than once,
* and the second call should be a no-op.
*/
void locks_end_grace(struct lock_manager *lm)
void
locks_end_grace(struct lock_manager *lm)
{
spin_lock(&grace_lock);
list_del_init(&lm->list);
......@@ -56,10 +62,52 @@ EXPORT_SYMBOL_GPL(locks_end_grace);
* to answer ordinary lock requests, and when they should accept only
* lock reclaims.
*/
int locks_in_grace(struct net *net)
int
locks_in_grace(struct net *net)
{
struct lockd_net *ln = net_generic(net, lockd_net_id);
struct list_head *grace_list = net_generic(net, grace_net_id);
return !list_empty(&ln->grace_list);
return !list_empty(grace_list);
}
EXPORT_SYMBOL_GPL(locks_in_grace);
static int __net_init
grace_init_net(struct net *net)
{
struct list_head *grace_list = net_generic(net, grace_net_id);
INIT_LIST_HEAD(grace_list);
return 0;
}
static void __net_exit
grace_exit_net(struct net *net)
{
struct list_head *grace_list = net_generic(net, grace_net_id);
BUG_ON(!list_empty(grace_list));
}
static struct pernet_operations grace_net_ops = {
.init = grace_init_net,
.exit = grace_exit_net,
.id = &grace_net_id,
.size = sizeof(struct list_head),
};
static int __init
init_grace(void)
{
return register_pernet_subsys(&grace_net_ops);
}
static void __exit
exit_grace(void)
{
unregister_pernet_subsys(&grace_net_ops);
}
MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>");
MODULE_LICENSE("GPL");
module_init(init_grace)
module_exit(exit_grace)
......@@ -71,6 +71,7 @@ config NFSD_V4
select FS_POSIX_ACL
select SUNRPC_GSS
select CRYPTO
select GRACE_PERIOD
help
This option enables support in your system's NFS server for
version 4 of the NFS protocol (RFC 3530).
......@@ -94,9 +95,6 @@ config NFSD_V4_SECURITY_LABEL
If you do not wish to enable fine-grained security labels SELinux or
Smack policies on NFSv4 files, say N.
WARNING: there is still a chance of backwards-incompatible protocol changes.
For now we recommend "Y" only for developers and testers.
config NFSD_FAULT_INJECTION
bool "NFS server manual fault injection"
depends on NFSD_V4 && DEBUG_KERNEL
......
......@@ -18,7 +18,6 @@
* is much larger than a sockaddr_in6.
*/
struct svc_cacherep {
struct hlist_node c_hash;
struct list_head c_lru;
unsigned char c_state, /* unused, inprog, done */
......
......@@ -1145,6 +1145,7 @@ static struct flags {
{ NFSEXP_ALLSQUASH, {"all_squash", ""}},
{ NFSEXP_ASYNC, {"async", "sync"}},
{ NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}},
{ NFSEXP_NOREADDIRPLUS, {"nordirplus", ""}},
{ NFSEXP_NOHIDE, {"nohide", ""}},
{ NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
......
......@@ -223,11 +223,6 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
attr = &argp->attrs;
/* Get the directory inode */
nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE);
if (nfserr)
RETURN_STATUS(nfserr);
/* Unfudge the mode bits */
attr->ia_mode &= ~S_IFMT;
if (!(attr->ia_valid & ATTR_MODE)) {
......@@ -471,6 +466,14 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
resp->buflen = resp->count;
resp->rqstp = rqstp;
offset = argp->cookie;
nfserr = fh_verify(rqstp, &resp->fh, S_IFDIR, NFSD_MAY_NOP);
if (nfserr)
RETURN_STATUS(nfserr);
if (resp->fh.fh_export->ex_flags & NFSEXP_NOREADDIRPLUS)
RETURN_STATUS(nfserr_notsupp);
nfserr = nfsd_readdir(rqstp, &resp->fh,
&offset,
&resp->common,
......
......@@ -49,11 +49,8 @@ static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
/* Index of predefined Linux callback client operations */
enum {
NFSPROC4_CLNT_CB_NULL = 0,
NFSPROC4_CLNT_CB_RECALL,
NFSPROC4_CLNT_CB_SEQUENCE,
};
#define to_delegation(cb) \
container_of(cb, struct nfs4_delegation, dl_recall)
struct nfs4_cb_compound_hdr {
/* args */
......@@ -494,7 +491,7 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
const struct nfsd4_callback *cb)
{
const struct nfs4_delegation *args = cb->cb_op;
const struct nfs4_delegation *dp = to_delegation(cb);
struct nfs4_cb_compound_hdr hdr = {
.ident = cb->cb_clp->cl_cb_ident,
.minorversion = cb->cb_minorversion,
......@@ -502,7 +499,7 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_cb_compound4args(xdr, &hdr);
encode_cb_sequence4args(xdr, cb, &hdr);
encode_cb_recall4args(xdr, args, &hdr);
encode_cb_recall4args(xdr, dp, &hdr);
encode_cb_nops(&hdr);
}
......@@ -746,27 +743,6 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
static struct workqueue_struct *callback_wq;
static void run_nfsd4_cb(struct nfsd4_callback *cb)
{
queue_work(callback_wq, &cb->cb_work);
}
static void do_probe_callback(struct nfs4_client *clp)
{
struct nfsd4_callback *cb = &clp->cl_cb_null;
cb->cb_op = NULL;
cb->cb_clp = clp;
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL];
cb->cb_msg.rpc_argp = NULL;
cb->cb_msg.rpc_resp = NULL;
cb->cb_ops = &nfsd4_cb_probe_ops;
run_nfsd4_cb(cb);
}
/*
* Poke the callback thread to process any updates to the callback
* parameters, and send a null probe.
......@@ -775,7 +751,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp)
{
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
do_probe_callback(clp);
nfsd4_run_cb(&clp->cl_cb_null);
}
void nfsd4_probe_callback_sync(struct nfs4_client *clp)
......@@ -847,23 +823,9 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
rpc_wake_up_next(&clp->cl_cb_waitq);
dprintk("%s: freed slot, new seqid=%d\n", __func__,
clp->cl_cb_session->se_cb_seq_nr);
/* We're done looking into the sequence information */
task->tk_msg.rpc_resp = NULL;
}
}
static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
{
struct nfsd4_callback *cb = calldata;
struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
nfsd4_cb_done(task, calldata);
if (current_rpc_client != task->tk_client) {
if (clp->cl_cb_client != task->tk_client) {
/* We're shutting down or changing cl_cb_client; leave
* it to nfsd4_process_cb_update to restart the call if
* necessary. */
......@@ -872,47 +834,42 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
if (cb->cb_done)
return;
switch (task->tk_status) {
switch (cb->cb_ops->done(cb, task)) {
case 0:
cb->cb_done = true;
task->tk_status = 0;
rpc_restart_call_prepare(task);
return;
case -EBADHANDLE:
case -NFS4ERR_BAD_STATEID:
/* Race: client probably got cb_recall
* before open reply granting delegation */
case 1:
break;
default:
case -1:
/* Network partition? */
nfsd4_mark_cb_down(clp, task->tk_status);
break;
default:
BUG();
}
if (dp->dl_retries--) {
rpc_delay(task, 2*HZ);
task->tk_status = 0;
rpc_restart_call_prepare(task);
return;
}
nfsd4_mark_cb_down(clp, task->tk_status);
cb->cb_done = true;
}
static void nfsd4_cb_recall_release(void *calldata)
static void nfsd4_cb_release(void *calldata)
{
struct nfsd4_callback *cb = calldata;
struct nfs4_client *clp = cb->cb_clp;
struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
if (cb->cb_done) {
spin_lock(&clp->cl_lock);
list_del(&cb->cb_per_client);
spin_unlock(&clp->cl_lock);
nfs4_put_stid(&dp->dl_stid);
cb->cb_ops->release(cb);
}
}
static const struct rpc_call_ops nfsd4_cb_recall_ops = {
static const struct rpc_call_ops nfsd4_cb_ops = {
.rpc_call_prepare = nfsd4_cb_prepare,
.rpc_call_done = nfsd4_cb_recall_done,
.rpc_release = nfsd4_cb_recall_release,
.rpc_call_done = nfsd4_cb_done,
.rpc_release = nfsd4_cb_release,
};
int nfsd4_create_callback_queue(void)
......@@ -937,16 +894,10 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
* instead, nfsd4_run_cb_null() will detect the killed
* client, destroy the rpc client, and stop:
*/
do_probe_callback(clp);
nfsd4_run_cb(&clp->cl_cb_null);
flush_workqueue(callback_wq);
}
static void nfsd4_release_cb(struct nfsd4_callback *cb)
{
if (cb->cb_ops->rpc_release)
cb->cb_ops->rpc_release(cb);
}
/* requires cl_lock: */
static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
{
......@@ -1009,63 +960,49 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
}
/* Yay, the callback channel's back! Restart any callbacks: */
list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client)
run_nfsd4_cb(cb);
queue_work(callback_wq, &cb->cb_work);
}
static void
nfsd4_run_callback_rpc(struct nfsd4_callback *cb)
nfsd4_run_cb_work(struct work_struct *work)
{
struct nfsd4_callback *cb =
container_of(work, struct nfsd4_callback, cb_work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
if (cb->cb_ops && cb->cb_ops->prepare)
cb->cb_ops->prepare(cb);
if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client;
if (!clnt) {
/* Callback channel broken, or client killed; give up: */
nfsd4_release_cb(cb);
if (cb->cb_ops && cb->cb_ops->release)
cb->cb_ops->release(cb);
return;
}
cb->cb_msg.rpc_cred = clp->cl_cb_cred;
rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
cb->cb_ops, cb);
cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb);
}
void
nfsd4_run_cb_null(struct work_struct *w)
void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op)
{
struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
cb_work);
nfsd4_run_callback_rpc(cb);
}
void
nfsd4_run_cb_recall(struct work_struct *w)
{
struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback,
cb_work);
nfsd4_prepare_cb_recall(cb->cb_op);
nfsd4_run_callback_rpc(cb);
}
void nfsd4_cb_recall(struct nfs4_delegation *dp)
{
struct nfsd4_callback *cb = &dp->dl_recall;
struct nfs4_client *clp = dp->dl_stid.sc_client;
dp->dl_retries = 1;
cb->cb_op = dp;
cb->cb_clp = clp;
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL];
cb->cb_msg.rpc_proc = &nfs4_cb_procedures[op];
cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb;
cb->cb_ops = &nfsd4_cb_recall_ops;
cb->cb_ops = ops;
INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
INIT_LIST_HEAD(&cb->cb_per_client);
cb->cb_done = true;
}
run_nfsd4_cb(&dp->dl_recall);
void nfsd4_run_cb(struct nfsd4_callback *cb)
{
queue_work(callback_wq, &cb->cb_work);
}
......@@ -215,7 +215,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
memset(&ent, 0, sizeof(ent));
/* Authentication name */
if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
len = qword_get(&buf, buf1, PAGE_SIZE);
if (len <= 0 || len >= IDMAP_NAMESZ)
goto out;
memcpy(ent.authname, buf1, sizeof(ent.authname));
......@@ -245,12 +246,10 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
/* Name */
error = -EINVAL;
len = qword_get(&buf, buf1, PAGE_SIZE);
if (len < 0)
if (len < 0 || len >= IDMAP_NAMESZ)
goto out;
if (len == 0)
set_bit(CACHE_NEGATIVE, &ent.h.flags);
else if (len >= IDMAP_NAMESZ)
goto out;
else
memcpy(ent.name, buf1, sizeof(ent.name));
error = -ENOMEM;
......@@ -259,15 +258,12 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
goto out;
cache_put(&res->h, cd);
error = 0;
out:
kfree(buf1);
return error;
}
static struct ent *
idtoname_lookup(struct cache_detail *cd, struct ent *item)
{
......@@ -368,7 +364,7 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
{
struct ent ent, *res;
char *buf1;
int error = -EINVAL;
int len, error = -EINVAL;
if (buf[buflen - 1] != '\n')
return (-EINVAL);
......@@ -381,7 +377,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
memset(&ent, 0, sizeof(ent));
/* Authentication name */
if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
len = qword_get(&buf, buf1, PAGE_SIZE);
if (len <= 0 || len >= IDMAP_NAMESZ)
goto out;
memcpy(ent.authname, buf1, sizeof(ent.authname));
......@@ -392,8 +389,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
IDMAP_TYPE_USER : IDMAP_TYPE_GROUP;
/* Name */
error = qword_get(&buf, buf1, PAGE_SIZE);
if (error <= 0 || error >= IDMAP_NAMESZ)
len = qword_get(&buf, buf1, PAGE_SIZE);
if (len <= 0 || len >= IDMAP_NAMESZ)
goto out;
memcpy(ent.name, buf1, sizeof(ent.name));
......@@ -421,7 +418,6 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
error = 0;
out:
kfree(buf1);
return (error);
}
......
......@@ -1013,6 +1013,49 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
}
static __be32
nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_seek *seek)
{
int whence;
__be32 status;
struct file *file;
status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
&seek->seek_stateid,
RD_STATE, &file);
if (status) {
dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
return status;
}
switch (seek->seek_whence) {
case NFS4_CONTENT_DATA:
whence = SEEK_DATA;
break;
case NFS4_CONTENT_HOLE:
whence = SEEK_HOLE;
break;
default:
status = nfserr_union_notsupp;
goto out;
}
/*
* Note: This call does change file->f_pos, but nothing in NFSD
* should ever file->f_pos.
*/
seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
if (seek->seek_pos < 0)
status = nfserrno(seek->seek_pos);
else if (seek->seek_pos >= i_size_read(file_inode(file)))
seek->seek_eof = true;
out:
fput(file);
return status;
}
/* This routine never returns NFS_OK! If there are no other errors, it
* will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the
* attributes matched. VERIFY is implemented by mapping NFSERR_SAME
......@@ -1881,6 +1924,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
/* NFSv4.2 operations */
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
},
};
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
......
......@@ -58,7 +58,7 @@ struct nfsd4_client_tracking_ops {
void (*create)(struct nfs4_client *);
void (*remove)(struct nfs4_client *);
int (*check)(struct nfs4_client *);
void (*grace_done)(struct nfsd_net *, time_t);
void (*grace_done)(struct nfsd_net *);
};
/* Globals */
......@@ -188,7 +188,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
status = mnt_want_write_file(nn->rec_file);
if (status)
return;
goto out_creds;
dir = nn->rec_file->f_path.dentry;
/* lock the parent */
......@@ -228,6 +228,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
user_recovery_dirname);
}
mnt_drop_write_file(nn->rec_file);
out_creds:
nfs4_reset_creds(original_cred);
}
......@@ -392,7 +393,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
}
static void
nfsd4_recdir_purge_old(struct nfsd_net *nn, time_t boot_time)
nfsd4_recdir_purge_old(struct nfsd_net *nn)
{
int status;
......@@ -479,6 +480,16 @@ nfsd4_init_recdir(struct net *net)
return status;
}
static void
nfsd4_shutdown_recdir(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
if (!nn->rec_file)
return;
fput(nn->rec_file);
nn->rec_file = NULL;
}
static int
nfs4_legacy_state_init(struct net *net)
......@@ -512,10 +523,13 @@ nfsd4_load_reboot_recovery_data(struct net *net)
int status;
status = nfsd4_init_recdir(net);
if (!status)
if (status)
return status;
status = nfsd4_recdir_load(net);
if (status)
printk(KERN_ERR "NFSD: Failure reading reboot recovery data\n");
nfsd4_shutdown_recdir(net);
return status;
}
......@@ -545,22 +559,13 @@ nfsd4_legacy_tracking_init(struct net *net)
return status;
}
static void
nfsd4_shutdown_recdir(struct nfsd_net *nn)
{
if (!nn->rec_file)
return;
fput(nn->rec_file);
nn->rec_file = NULL;
}
static void
nfsd4_legacy_tracking_exit(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
nfs4_release_reclaim(nn);
nfsd4_shutdown_recdir(nn);
nfsd4_shutdown_recdir(net);
nfs4_legacy_state_shutdown(net);
}
......@@ -1016,7 +1021,7 @@ nfsd4_cld_check(struct nfs4_client *clp)
}
static void
nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time)
nfsd4_cld_grace_done(struct nfsd_net *nn)
{
int ret;
struct cld_upcall *cup;
......@@ -1029,7 +1034,7 @@ nfsd4_cld_grace_done(struct nfsd_net *nn, time_t boot_time)
}
cup->cu_msg.cm_cmd = Cld_GraceDone;
cup->cu_msg.cm_u.cm_gracetime = (int64_t)boot_time;
cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
if (!ret)
ret = cup->cu_msg.cm_status;
......@@ -1062,6 +1067,8 @@ MODULE_PARM_DESC(cltrack_legacy_disable,
#define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR="
#define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR="
#define HAS_SESSION_ENV_PREFIX "NFSDCLTRACK_CLIENT_HAS_SESSION="
#define GRACE_START_ENV_PREFIX "NFSDCLTRACK_GRACE_START="
static char *
nfsd4_cltrack_legacy_topdir(void)
......@@ -1126,10 +1133,60 @@ nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name)
return result;
}
static char *
nfsd4_cltrack_client_has_session(struct nfs4_client *clp)
{
int copied;
size_t len;
char *result;
/* prefix + Y/N character + terminating NULL */
len = strlen(HAS_SESSION_ENV_PREFIX) + 1 + 1;
result = kmalloc(len, GFP_KERNEL);
if (!result)
return result;
copied = snprintf(result, len, HAS_SESSION_ENV_PREFIX "%c",
clp->cl_minorversion ? 'Y' : 'N');
if (copied >= len) {
/* just return nothing if output was truncated */
kfree(result);
return NULL;
}
return result;
}
static char *
nfsd4_cltrack_grace_start(time_t grace_start)
{
int copied;
size_t len;
char *result;
/* prefix + max width of int64_t string + terminating NULL */
len = strlen(GRACE_START_ENV_PREFIX) + 22 + 1;
result = kmalloc(len, GFP_KERNEL);
if (!result)
return result;
copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld",
grace_start);
if (copied >= len) {
/* just return nothing if output was truncated */
kfree(result);
return NULL;
}
return result;
}
static int
nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy)
nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1)
{
char *envp[2];
char *envp[3];
char *argv[4];
int ret;
......@@ -1140,10 +1197,12 @@ nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *legacy)
dprintk("%s: cmd: %s\n", __func__, cmd);
dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)");
dprintk("%s: legacy: %s\n", __func__, legacy ? legacy : "(null)");
dprintk("%s: env0: %s\n", __func__, env0 ? env0 : "(null)");
dprintk("%s: env1: %s\n", __func__, env1 ? env1 : "(null)");
envp[0] = legacy;
envp[1] = NULL;
envp[0] = env0;
envp[1] = env1;
envp[2] = NULL;
argv[0] = (char *)cltrack_prog;
argv[1] = cmd;
......@@ -1187,28 +1246,78 @@ bin_to_hex_dup(const unsigned char *src, int srclen)
}
static int
nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net)
nfsd4_umh_cltrack_init(struct net *net)
{
int ret;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
char *grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
/* XXX: The usermode helper s not working in container yet. */
if (net != &init_net) {
WARN(1, KERN_ERR "NFSD: attempt to initialize umh client "
"tracking in a container!\n");
return -EINVAL;
}
return nfsd4_umh_cltrack_upcall("init", NULL, NULL);
ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL);
kfree(grace_start);
return ret;
}
static void
nfsd4_cltrack_upcall_lock(struct nfs4_client *clp)
{
wait_on_bit_lock(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK,
TASK_UNINTERRUPTIBLE);
}
static void
nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp)
{
smp_mb__before_atomic();
clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags);
smp_mb__after_atomic();
wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK);
}
static void
nfsd4_umh_cltrack_create(struct nfs4_client *clp)
{
char *hexid;
char *hexid, *has_session, *grace_start;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
/*
* With v4.0 clients, there's little difference in outcome between a
* create and check operation, and we can end up calling into this
* function multiple times per client (once for each openowner). So,
* for v4.0 clients skip upcalling once the client has been recorded
* on stable storage.
*
* For v4.1+ clients, the outcome of the two operations is different,
* so we must ensure that we upcall for the create operation. v4.1+
* clients call this on RECLAIM_COMPLETE though, so we should only end
* up doing a single create upcall per client.
*/
if (clp->cl_minorversion == 0 &&
test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
if (!hexid) {
dprintk("%s: can't allocate memory for upcall!\n", __func__);
return;
}
nfsd4_umh_cltrack_upcall("create", hexid, NULL);
has_session = nfsd4_cltrack_client_has_session(clp);
grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
nfsd4_cltrack_upcall_lock(clp);
if (!nfsd4_umh_cltrack_upcall("create", hexid, has_session, grace_start))
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
nfsd4_cltrack_upcall_unlock(clp);
kfree(has_session);
kfree(grace_start);
kfree(hexid);
}
......@@ -1217,12 +1326,21 @@ nfsd4_umh_cltrack_remove(struct nfs4_client *clp)
{
char *hexid;
if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return;
hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
if (!hexid) {
dprintk("%s: can't allocate memory for upcall!\n", __func__);
return;
}
nfsd4_umh_cltrack_upcall("remove", hexid, NULL);
nfsd4_cltrack_upcall_lock(clp);
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags) &&
nfsd4_umh_cltrack_upcall("remove", hexid, NULL, NULL) == 0)
clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
nfsd4_cltrack_upcall_unlock(clp);
kfree(hexid);
}
......@@ -1230,30 +1348,45 @@ static int
nfsd4_umh_cltrack_check(struct nfs4_client *clp)
{
int ret;
char *hexid, *legacy;
char *hexid, *has_session, *legacy;
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
return 0;
hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
if (!hexid) {
dprintk("%s: can't allocate memory for upcall!\n", __func__);
return -ENOMEM;
}
has_session = nfsd4_cltrack_client_has_session(clp);
legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name);
ret = nfsd4_umh_cltrack_upcall("check", hexid, legacy);
nfsd4_cltrack_upcall_lock(clp);
if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) {
ret = 0;
} else {
ret = nfsd4_umh_cltrack_upcall("check", hexid, has_session, legacy);
if (ret == 0)
set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
}
nfsd4_cltrack_upcall_unlock(clp);
kfree(has_session);
kfree(legacy);
kfree(hexid);
return ret;
}
static void
nfsd4_umh_cltrack_grace_done(struct nfsd_net __attribute__((unused)) *nn,
time_t boot_time)
nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
{
char *legacy;
char timestr[22]; /* FIXME: better way to determine max size? */
sprintf(timestr, "%ld", boot_time);
sprintf(timestr, "%ld", nn->boot_time);
legacy = nfsd4_cltrack_legacy_topdir();
nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy);
nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL);
kfree(legacy);
}
......@@ -1356,10 +1489,10 @@ nfsd4_client_record_check(struct nfs4_client *clp)
}
void
nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time)
nfsd4_record_grace_done(struct nfsd_net *nn)
{
if (nn->client_tracking_ops)
nn->client_tracking_ops->grace_done(nn, boot_time);
nn->client_tracking_ops->grace_done(nn);
}
static int
......
......@@ -96,6 +96,8 @@ static struct kmem_cache *deleg_slab;
static void free_session(struct nfsd4_session *);
static struct nfsd4_callback_ops nfsd4_cb_recall_ops;
static bool is_session_dead(struct nfsd4_session *ses)
{
return ses->se_flags & NFS4_SESSION_DEAD;
......@@ -645,7 +647,9 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
INIT_LIST_HEAD(&dp->dl_perclnt);
INIT_LIST_HEAD(&dp->dl_recall_lru);
dp->dl_type = NFS4_OPEN_DELEGATE_READ;
INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall);
dp->dl_retries = 1;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
return dp;
out_dec:
atomic_long_dec(&num_delegations);
......@@ -673,15 +677,20 @@ nfs4_put_stid(struct nfs4_stid *s)
static void nfs4_put_deleg_lease(struct nfs4_file *fp)
{
lockdep_assert_held(&state_lock);
struct file *filp = NULL;
struct file_lock *fl;
if (!fp->fi_lease)
return;
if (atomic_dec_and_test(&fp->fi_delegees)) {
vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
spin_lock(&fp->fi_lock);
if (fp->fi_lease && atomic_dec_and_test(&fp->fi_delegees)) {
swap(filp, fp->fi_deleg_file);
fl = fp->fi_lease;
fp->fi_lease = NULL;
fput(fp->fi_deleg_file);
fp->fi_deleg_file = NULL;
}
spin_unlock(&fp->fi_lock);
if (filp) {
vfs_setlease(filp, F_UNLCK, &fl);
fput(filp);
}
}
......@@ -717,8 +726,6 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
list_del_init(&dp->dl_recall_lru);
list_del_init(&dp->dl_perfile);
spin_unlock(&fp->fi_lock);
if (fp)
nfs4_put_deleg_lease(fp);
}
static void destroy_delegation(struct nfs4_delegation *dp)
......@@ -726,6 +733,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
spin_lock(&state_lock);
unhash_delegation_locked(dp);
spin_unlock(&state_lock);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid);
}
......@@ -735,6 +743,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
WARN_ON(!list_empty(&dp->dl_recall_lru));
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
if (clp->cl_minorversion == 0)
nfs4_put_stid(&dp->dl_stid);
else {
......@@ -1635,6 +1645,7 @@ __destroy_client(struct nfs4_client *clp)
while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid);
}
while (!list_empty(&clp->cl_revoked)) {
......@@ -1862,7 +1873,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
free_client(clp);
return NULL;
}
INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null);
nfsd4_init_cb(&clp->cl_cb_null, clp, NULL, NFSPROC4_CLNT_CB_NULL);
clp->cl_time = get_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
copy_verf(clp, verf);
......@@ -3349,8 +3360,12 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
return ret;
}
void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp)
#define cb_to_delegation(cb) \
container_of(cb, struct nfs4_delegation, dl_recall)
static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb)
{
struct nfs4_delegation *dp = cb_to_delegation(cb);
struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net,
nfsd_net_id);
......@@ -3371,6 +3386,43 @@ void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp)
spin_unlock(&state_lock);
}
static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
struct rpc_task *task)
{
struct nfs4_delegation *dp = cb_to_delegation(cb);
switch (task->tk_status) {
case 0:
return 1;
case -EBADHANDLE:
case -NFS4ERR_BAD_STATEID:
/*
* Race: client probably got cb_recall before open reply
* granting delegation.
*/
if (dp->dl_retries--) {
rpc_delay(task, 2 * HZ);
return 0;
}
/*FALLTHRU*/
default:
return -1;
}
}
static void nfsd4_cb_recall_release(struct nfsd4_callback *cb)
{
struct nfs4_delegation *dp = cb_to_delegation(cb);
nfs4_put_stid(&dp->dl_stid);
}
static struct nfsd4_callback_ops nfsd4_cb_recall_ops = {
.prepare = nfsd4_cb_recall_prepare,
.done = nfsd4_cb_recall_done,
.release = nfsd4_cb_recall_release,
};
static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
{
/*
......@@ -3381,7 +3433,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
* it's safe to take a reference.
*/
atomic_inc(&dp->dl_stid.sc_count);
nfsd4_cb_recall(dp);
nfsd4_run_cb(&dp->dl_recall);
}
/* Called from break_lease() with i_lock held. */
......@@ -3759,7 +3811,6 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
fl = locks_alloc_lock();
if (!fl)
return NULL;
locks_init_lock(fl);
fl->fl_lmops = &nfsd_lease_mng_ops;
fl->fl_flags = FL_DELEG;
fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
......@@ -4107,7 +4158,7 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
}
static void
void
nfsd4_end_grace(struct nfsd_net *nn)
{
/* do nothing if grace period already ended */
......@@ -4116,14 +4167,28 @@ nfsd4_end_grace(struct nfsd_net *nn)
dprintk("NFSD: end of grace period\n");
nn->grace_ended = true;
nfsd4_record_grace_done(nn, nn->boot_time);
/*
* If the server goes down again right now, an NFSv4
* client will still be allowed to reclaim after it comes back up,
* even if it hasn't yet had a chance to reclaim state this time.
*
*/
nfsd4_record_grace_done(nn);
/*
* At this point, NFSv4 clients can still reclaim. But if the
* server crashes, any that have not yet reclaimed will be out
* of luck on the next boot.
*
* (NFSv4.1+ clients are considered to have reclaimed once they
* call RECLAIM_COMPLETE. NFSv4.0 clients are considered to
* have reclaimed after their first OPEN.)
*/
locks_end_grace(&nn->nfsd4_manager);
/*
* Now that every NFSv4 client has had the chance to recover and
* to see the (possibly new, possibly shorter) lease time, we
* can safely set the next grace time to the current lease time:
* At this point, and once lockd and/or any other containers
* exit their grace period, further reclaims will fail and
* regular locking can resume.
*/
nn->nfsd4_grace = nn->nfsd4_lease;
}
static time_t
......@@ -5210,7 +5275,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
fp = lock_stp->st_stid.sc_file;
locks_init_lock(file_lock);
switch (lock->lk_type) {
case NFS4_READ_LT:
case NFS4_READW_LT:
......@@ -5354,7 +5418,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_jukebox;
goto out;
}
locks_init_lock(file_lock);
switch (lockt->lt_type) {
case NFS4_READ_LT:
case NFS4_READW_LT:
......@@ -5432,7 +5496,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_jukebox;
goto fput;
}
locks_init_lock(file_lock);
file_lock->fl_type = F_UNLCK;
file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner);
file_lock->fl_pid = current->tgid;
......@@ -5645,6 +5709,9 @@ nfs4_check_open_reclaim(clientid_t *clid,
if (status)
return nfserr_reclaim_bad;
if (test_bit(NFSD4_CLIENT_RECLAIM_COMPLETE, &cstate->clp->cl_flags))
return nfserr_no_grace;
if (nfsd4_client_record_check(cstate->clp))
return nfserr_reclaim_bad;
......@@ -6342,10 +6409,10 @@ nfs4_state_start_net(struct net *net)
ret = nfs4_state_create_net(net);
if (ret)
return ret;
nfsd4_client_tracking_init(net);
nn->boot_time = get_seconds();
locks_start_grace(net, &nn->nfsd4_manager);
nn->grace_ended = false;
locks_start_grace(net, &nn->nfsd4_manager);
nfsd4_client_tracking_init(net);
printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
nn->nfsd4_grace, net);
queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
......@@ -6402,6 +6469,7 @@ nfs4_state_shutdown_net(struct net *net)
list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid);
}
......
......@@ -31,13 +31,6 @@
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* TODO: Neil Brown made the following observation: We currently
* initially reserve NFSD_BUFSIZE space on the transmit queue and
* never release any of that until the request is complete.
* It would be good to calculate a new maximum response size while
* decoding the COMPOUND, and call svc_reserve with this number
* at the end of nfs4svc_decode_compoundargs.
*/
#include <linux/slab.h>
......@@ -1520,6 +1513,22 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
DECODE_TAIL;
}
static __be32
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
DECODE_HEAD;
status = nfsd4_decode_stateid(argp, &seek->seek_stateid);
if (status)
return status;
READ_BUF(8 + 4);
p = xdr_decode_hyper(p, &seek->seek_offset);
seek->seek_whence = be32_to_cpup(p);
DECODE_TAIL;
}
static __be32
nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
{
......@@ -1593,6 +1602,20 @@ static nfsd4_dec nfsd4_dec_ops[] = {
[OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_destroy_clientid,
[OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete,
/* new operations for NFSv4.2 */
[OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
[OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
};
static inline bool
......@@ -1670,6 +1693,14 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
readbytes += nfsd4_max_reply(argp->rqstp, op);
} else
max_reply += nfsd4_max_reply(argp->rqstp, op);
/*
* OP_LOCK may return a conflicting lock. (Special case
* because it will just skip encoding this if it runs
* out of xdr buffer space, and it is the only operation
* that behaves this way.)
*/
if (op->opnum == OP_LOCK)
max_reply += NFS4_OPAQUE_LIMIT;
if (op->status) {
argp->opcnt = i+1;
......@@ -3750,6 +3781,22 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr;
}
static __be32
nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_seek *seek)
{
__be32 *p;
if (nfserr)
return nfserr;
p = xdr_reserve_space(&resp->xdr, 4 + 8);
*p++ = cpu_to_be32(seek->seek_eof);
p = xdr_encode_hyper(p, seek->seek_pos);
return nfserr;
}
static __be32
nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
{
......@@ -3822,6 +3869,20 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
[OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop,
[OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop,
/* NFSv4.2 operations */
[OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_COPY] = (nfsd4_enc)nfsd4_encode_noop,
[OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop,
[OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop,
[OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
[OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
};
/*
......
......@@ -27,8 +27,12 @@
*/
#define TARGET_BUCKET_SIZE 64
static struct hlist_head * cache_hash;
static struct list_head lru_head;
struct nfsd_drc_bucket {
struct list_head lru_head;
spinlock_t cache_lock;
};
static struct nfsd_drc_bucket *drc_hashtbl;
static struct kmem_cache *drc_slab;
/* max number of entries allowed in the cache */
......@@ -36,6 +40,7 @@ static unsigned int max_drc_entries;
/* number of significant bits in the hash value */
static unsigned int maskbits;
static unsigned int drc_hashsize;
/*
* Stats and other tracking of on the duplicate reply cache. All of these and
......@@ -43,7 +48,7 @@ static unsigned int maskbits;
*/
/* total number of entries */
static unsigned int num_drc_entries;
static atomic_t num_drc_entries;
/* cache misses due only to checksum comparison failures */
static unsigned int payload_misses;
......@@ -75,7 +80,6 @@ static struct shrinker nfsd_reply_cache_shrinker = {
* A cache entry is "single use" if c_state == RC_INPROG
* Otherwise, it when accessing _prev or _next, the lock must be held.
*/
static DEFINE_SPINLOCK(cache_lock);
static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func);
/*
......@@ -116,6 +120,12 @@ nfsd_hashsize(unsigned int limit)
return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
}
static u32
nfsd_cache_hash(__be32 xid)
{
return hash_32(be32_to_cpu(xid), maskbits);
}
static struct svc_cacherep *
nfsd_reply_cache_alloc(void)
{
......@@ -126,7 +136,6 @@ nfsd_reply_cache_alloc(void)
rp->c_state = RC_UNUSED;
rp->c_type = RC_NOCACHE;
INIT_LIST_HEAD(&rp->c_lru);
INIT_HLIST_NODE(&rp->c_hash);
}
return rp;
}
......@@ -138,29 +147,27 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
drc_mem_usage -= rp->c_replvec.iov_len;
kfree(rp->c_replvec.iov_base);
}
if (!hlist_unhashed(&rp->c_hash))
hlist_del(&rp->c_hash);
list_del(&rp->c_lru);
--num_drc_entries;
atomic_dec(&num_drc_entries);
drc_mem_usage -= sizeof(*rp);
kmem_cache_free(drc_slab, rp);
}
static void
nfsd_reply_cache_free(struct svc_cacherep *rp)
nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
{
spin_lock(&cache_lock);
spin_lock(&b->cache_lock);
nfsd_reply_cache_free_locked(rp);
spin_unlock(&cache_lock);
spin_unlock(&b->cache_lock);
}
int nfsd_reply_cache_init(void)
{
unsigned int hashsize;
unsigned int i;
INIT_LIST_HEAD(&lru_head);
max_drc_entries = nfsd_cache_size_limit();
num_drc_entries = 0;
atomic_set(&num_drc_entries, 0);
hashsize = nfsd_hashsize(max_drc_entries);
maskbits = ilog2(hashsize);
......@@ -170,9 +177,14 @@ int nfsd_reply_cache_init(void)
if (!drc_slab)
goto out_nomem;
cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL);
if (!cache_hash)
drc_hashtbl = kcalloc(hashsize, sizeof(*drc_hashtbl), GFP_KERNEL);
if (!drc_hashtbl)
goto out_nomem;
for (i = 0; i < hashsize; i++) {
INIT_LIST_HEAD(&drc_hashtbl[i].lru_head);
spin_lock_init(&drc_hashtbl[i].cache_lock);
}
drc_hashsize = hashsize;
return 0;
out_nomem:
......@@ -184,17 +196,22 @@ int nfsd_reply_cache_init(void)
void nfsd_reply_cache_shutdown(void)
{
struct svc_cacherep *rp;
unsigned int i;
unregister_shrinker(&nfsd_reply_cache_shrinker);
cancel_delayed_work_sync(&cache_cleaner);
while (!list_empty(&lru_head)) {
rp = list_entry(lru_head.next, struct svc_cacherep, c_lru);
for (i = 0; i < drc_hashsize; i++) {
struct list_head *head = &drc_hashtbl[i].lru_head;
while (!list_empty(head)) {
rp = list_first_entry(head, struct svc_cacherep, c_lru);
nfsd_reply_cache_free_locked(rp);
}
}
kfree (cache_hash);
cache_hash = NULL;
kfree (drc_hashtbl);
drc_hashtbl = NULL;
drc_hashsize = 0;
if (drc_slab) {
kmem_cache_destroy(drc_slab);
......@@ -207,61 +224,63 @@ void nfsd_reply_cache_shutdown(void)
* not already scheduled.
*/
static void
lru_put_end(struct svc_cacherep *rp)
lru_put_end(struct nfsd_drc_bucket *b, struct svc_cacherep *rp)
{
rp->c_timestamp = jiffies;
list_move_tail(&rp->c_lru, &lru_head);
list_move_tail(&rp->c_lru, &b->lru_head);
schedule_delayed_work(&cache_cleaner, RC_EXPIRE);
}
/*
* Move a cache entry from one hash list to another
*/
static void
hash_refile(struct svc_cacherep *rp)
{
hlist_del_init(&rp->c_hash);
/*
* No point in byte swapping c_xid since we're just using it to pick
* a hash bucket.
*/
hlist_add_head(&rp->c_hash, cache_hash +
hash_32((__force u32)rp->c_xid, maskbits));
}
/*
* Walk the LRU list and prune off entries that are older than RC_EXPIRE.
* Also prune the oldest ones when the total exceeds the max number of entries.
*/
static long
prune_cache_entries(void)
prune_bucket(struct nfsd_drc_bucket *b)
{
struct svc_cacherep *rp, *tmp;
long freed = 0;
list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) {
list_for_each_entry_safe(rp, tmp, &b->lru_head, c_lru) {
/*
* Don't free entries attached to calls that are still
* in-progress, but do keep scanning the list.
*/
if (rp->c_state == RC_INPROG)
continue;
if (num_drc_entries <= max_drc_entries &&
if (atomic_read(&num_drc_entries) <= max_drc_entries &&
time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
break;
nfsd_reply_cache_free_locked(rp);
freed++;
}
return freed;
}
/*
* Walk the LRU list and prune off entries that are older than RC_EXPIRE.
* Also prune the oldest ones when the total exceeds the max number of entries.
*/
static long
prune_cache_entries(void)
{
unsigned int i;
long freed = 0;
bool cancel = true;
for (i = 0; i < drc_hashsize; i++) {
struct nfsd_drc_bucket *b = &drc_hashtbl[i];
if (list_empty(&b->lru_head))
continue;
spin_lock(&b->cache_lock);
freed += prune_bucket(b);
if (!list_empty(&b->lru_head))
cancel = false;
spin_unlock(&b->cache_lock);
}
/*
* Conditionally rearm the job. If we cleaned out the list, then
* cancel any pending run (since there won't be any work to do).
* Otherwise, we rearm the job or modify the existing one to run in
* RC_EXPIRE since we just ran the pruner.
* Conditionally rearm the job to run in RC_EXPIRE since we just
* ran the pruner.
*/
if (list_empty(&lru_head))
cancel_delayed_work(&cache_cleaner);
else
if (!cancel)
mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE);
return freed;
}
......@@ -269,32 +288,19 @@ prune_cache_entries(void)
static void
cache_cleaner_func(struct work_struct *unused)
{
spin_lock(&cache_lock);
prune_cache_entries();
spin_unlock(&cache_lock);
}
static unsigned long
nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
{
unsigned long num;
spin_lock(&cache_lock);
num = num_drc_entries;
spin_unlock(&cache_lock);
return num;
return atomic_read(&num_drc_entries);
}
static unsigned long
nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
{
unsigned long freed;
spin_lock(&cache_lock);
freed = prune_cache_entries();
spin_unlock(&cache_lock);
return freed;
return prune_cache_entries();
}
/*
* Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
......@@ -332,20 +338,24 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
static bool
nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp)
{
/* Check RPC header info first */
if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc ||
rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers ||
rqstp->rq_arg.len != rp->c_len ||
!rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) ||
rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr))
/* Check RPC XID first */
if (rqstp->rq_xid != rp->c_xid)
return false;
/* compare checksum of NFS data */
if (csum != rp->c_csum) {
++payload_misses;
return false;
}
/* Other discriminators */
if (rqstp->rq_proc != rp->c_proc ||
rqstp->rq_prot != rp->c_prot ||
rqstp->rq_vers != rp->c_vers ||
rqstp->rq_arg.len != rp->c_len ||
!rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) ||
rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr))
return false;
return true;
}
......@@ -355,18 +365,14 @@ nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp)
* NULL on failure.
*/
static struct svc_cacherep *
nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp,
__wsum csum)
{
struct svc_cacherep *rp, *ret = NULL;
struct hlist_head *rh;
struct list_head *rh = &b->lru_head;
unsigned int entries = 0;
/*
* No point in byte swapping rq_xid since we're just using it to pick
* a hash bucket.
*/
rh = &cache_hash[hash_32((__force u32)rqstp->rq_xid, maskbits)];
hlist_for_each_entry(rp, rh, c_hash) {
list_for_each_entry(rp, rh, c_lru) {
++entries;
if (nfsd_cache_match(rqstp, csum, rp)) {
ret = rp;
......@@ -377,11 +383,12 @@ nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
/* tally hash chain length stats */
if (entries > longest_chain) {
longest_chain = entries;
longest_chain_cachesize = num_drc_entries;
longest_chain_cachesize = atomic_read(&num_drc_entries);
} else if (entries == longest_chain) {
/* prefer to keep the smallest cachesize possible here */
longest_chain_cachesize = min(longest_chain_cachesize,
num_drc_entries);
longest_chain_cachesize = min_t(unsigned int,
longest_chain_cachesize,
atomic_read(&num_drc_entries));
}
return ret;
......@@ -403,6 +410,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
vers = rqstp->rq_vers,
proc = rqstp->rq_proc;
__wsum csum;
u32 hash = nfsd_cache_hash(xid);
struct nfsd_drc_bucket *b = &drc_hashtbl[hash];
unsigned long age;
int type = rqstp->rq_cachetype;
int rtn = RC_DOIT;
......@@ -420,16 +429,16 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
* preallocate an entry.
*/
rp = nfsd_reply_cache_alloc();
spin_lock(&cache_lock);
spin_lock(&b->cache_lock);
if (likely(rp)) {
++num_drc_entries;
atomic_inc(&num_drc_entries);
drc_mem_usage += sizeof(*rp);
}
/* go ahead and prune the cache */
prune_cache_entries();
prune_bucket(b);
found = nfsd_cache_search(rqstp, csum);
found = nfsd_cache_search(b, rqstp, csum);
if (found) {
if (likely(rp))
nfsd_reply_cache_free_locked(rp);
......@@ -454,8 +463,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
rp->c_len = rqstp->rq_arg.len;
rp->c_csum = csum;
hash_refile(rp);
lru_put_end(rp);
lru_put_end(b, rp);
/* release any buffer */
if (rp->c_type == RC_REPLBUFF) {
......@@ -465,14 +473,14 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
}
rp->c_type = RC_NOCACHE;
out:
spin_unlock(&cache_lock);
spin_unlock(&b->cache_lock);
return rtn;
found_entry:
nfsdstats.rchits++;
/* We found a matching entry which is either in progress or done. */
age = jiffies - rp->c_timestamp;
lru_put_end(rp);
lru_put_end(b, rp);
rtn = RC_DROPIT;
/* Request being processed or excessive rexmits */
......@@ -527,18 +535,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
{
struct svc_cacherep *rp = rqstp->rq_cacherep;
struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
u32 hash;
struct nfsd_drc_bucket *b;
int len;
size_t bufsize = 0;
if (!rp)
return;
hash = nfsd_cache_hash(rp->c_xid);
b = &drc_hashtbl[hash];
len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
len >>= 2;
/* Don't cache excessive amounts of data and XDR failures */
if (!statp || len > (256 >> 2)) {
nfsd_reply_cache_free(rp);
nfsd_reply_cache_free(b, rp);
return;
}
......@@ -553,23 +566,23 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
bufsize = len << 2;
cachv->iov_base = kmalloc(bufsize, GFP_KERNEL);
if (!cachv->iov_base) {
nfsd_reply_cache_free(rp);
nfsd_reply_cache_free(b, rp);
return;
}
cachv->iov_len = bufsize;
memcpy(cachv->iov_base, statp, bufsize);
break;
case RC_NOCACHE:
nfsd_reply_cache_free(rp);
nfsd_reply_cache_free(b, rp);
return;
}
spin_lock(&cache_lock);
spin_lock(&b->cache_lock);
drc_mem_usage += bufsize;
lru_put_end(rp);
lru_put_end(b, rp);
rp->c_secure = rqstp->rq_secure;
rp->c_type = cachetype;
rp->c_state = RC_DONE;
spin_unlock(&cache_lock);
spin_unlock(&b->cache_lock);
return;
}
......@@ -600,9 +613,9 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
*/
static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
{
spin_lock(&cache_lock);
seq_printf(m, "max entries: %u\n", max_drc_entries);
seq_printf(m, "num entries: %u\n", num_drc_entries);
seq_printf(m, "num entries: %u\n",
atomic_read(&num_drc_entries));
seq_printf(m, "hash buckets: %u\n", 1 << maskbits);
seq_printf(m, "mem usage: %u\n", drc_mem_usage);
seq_printf(m, "cache hits: %u\n", nfsdstats.rchits);
......@@ -611,7 +624,6 @@ static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
seq_printf(m, "payload misses: %u\n", payload_misses);
seq_printf(m, "longest chain len: %u\n", longest_chain);
seq_printf(m, "cachesize at longest: %u\n", longest_chain_cachesize);
spin_unlock(&cache_lock);
return 0;
}
......
......@@ -49,6 +49,7 @@ enum {
NFSD_Leasetime,
NFSD_Gracetime,
NFSD_RecoveryDir,
NFSD_V4EndGrace,
#endif
};
......@@ -68,6 +69,7 @@ static ssize_t write_maxconn(struct file *file, char *buf, size_t size);
static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
static ssize_t write_gracetime(struct file *file, char *buf, size_t size);
static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size);
#endif
static ssize_t (*write_op[])(struct file *, char *, size_t) = {
......@@ -84,6 +86,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
[NFSD_Leasetime] = write_leasetime,
[NFSD_Gracetime] = write_gracetime,
[NFSD_RecoveryDir] = write_recoverydir,
[NFSD_V4EndGrace] = write_v4_end_grace,
#endif
};
......@@ -1077,6 +1080,47 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
return rv;
}
/**
* write_v4_end_grace - release grace period for nfsd's v4.x lock manager
*
* Input:
* buf: ignored
* size: zero
* OR
*
* Input:
* buf: any value
* size: non-zero length of C string in @buf
* Output:
* passed-in buffer filled with "Y" or "N" with a newline
* and NULL-terminated C string. This indicates whether
* the grace period has ended in the current net
* namespace. Return code is the size in bytes of the
* string. Writing a string that starts with 'Y', 'y', or
* '1' to the file will end the grace period for nfsd's v4
* lock manager.
*/
static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
{
struct net *net = file->f_dentry->d_sb->s_fs_info;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
if (size > 0) {
switch(buf[0]) {
case 'Y':
case 'y':
case '1':
nfsd4_end_grace(nn);
break;
default:
return -EINVAL;
}
}
return scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%c\n",
nn->grace_ended ? 'Y' : 'N');
}
#endif
/*----------------------------------------------------------------------------*/
......@@ -1110,6 +1154,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO},
#endif
/* last one */ {""}
};
......
......@@ -251,7 +251,7 @@ void nfsd_lockd_shutdown(void);
#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED)
#define nfserr_partner_notsupp cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP)
#define nfserr_partner_no_auth cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH)
#define nfserr_metadata_notsupp cpu_to_be32(NFS4ERR_METADATA_NOTSUPP)
#define nfserr_union_notsupp cpu_to_be32(NFS4ERR_UNION_NOTSUPP)
#define nfserr_offload_denied cpu_to_be32(NFS4ERR_OFFLOAD_DENIED)
#define nfserr_wrong_lfs cpu_to_be32(NFS4ERR_WRONG_LFS)
#define nfserr_badlabel cpu_to_be32(NFS4ERR_BADLABEL)
......
......@@ -209,8 +209,10 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
* fix that case easily.
*/
struct cred *new = prepare_creds();
if (!new)
return nfserrno(-ENOMEM);
if (!new) {
error = nfserrno(-ENOMEM);
goto out;
}
new->cap_effective =
cap_raise_nfsd_set(new->cap_effective,
new->cap_permitted);
......
......@@ -62,16 +62,21 @@ typedef struct {
(s)->si_generation
struct nfsd4_callback {
void *cb_op;
struct nfs4_client *cb_clp;
struct list_head cb_per_client;
u32 cb_minorversion;
struct rpc_message cb_msg;
const struct rpc_call_ops *cb_ops;
struct nfsd4_callback_ops *cb_ops;
struct work_struct cb_work;
bool cb_done;
};
struct nfsd4_callback_ops {
void (*prepare)(struct nfsd4_callback *);
int (*done)(struct nfsd4_callback *, struct rpc_task *);
void (*release)(struct nfsd4_callback *);
};
/*
* A core object that represents a "common" stateid. These are generally
* embedded within the different (more specific) stateid objects and contain
......@@ -306,6 +311,7 @@ struct nfs4_client {
#define NFSD4_CLIENT_STABLE (2) /* client on stable storage */
#define NFSD4_CLIENT_RECLAIM_COMPLETE (3) /* reclaim_complete done */
#define NFSD4_CLIENT_CONFIRMED (4) /* client is confirmed */
#define NFSD4_CLIENT_UPCALL_LOCK (5) /* upcall serialization */
#define NFSD4_CLIENT_CB_FLAG_MASK (1 << NFSD4_CLIENT_CB_UPDATE | \
1 << NFSD4_CLIENT_CB_KILL)
unsigned long cl_flags;
......@@ -517,6 +523,13 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
#define RD_STATE 0x00000010
#define WR_STATE 0x00000020
enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_NULL = 0,
NFSPROC4_CLNT_CB_RECALL,
NFSPROC4_CLNT_CB_SEQUENCE,
};
struct nfsd4_compound_state;
struct nfsd_net;
......@@ -531,12 +544,12 @@ extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
extern __be32 nfs4_check_open_reclaim(clientid_t *clid,
struct nfsd4_compound_state *cstate, struct nfsd_net *nn);
extern int set_callback_cred(void);
void nfsd4_run_cb_null(struct work_struct *w);
void nfsd4_run_cb_recall(struct work_struct *w);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
extern void nfsd4_probe_callback_sync(struct nfs4_client *clp);
extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *);
extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
extern void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
struct nfsd4_callback_ops *ops, enum nfsd4_cb_op op);
extern void nfsd4_run_cb(struct nfsd4_callback *cb);
extern int nfsd4_create_callback_queue(void);
extern void nfsd4_destroy_callback_queue(void);
extern void nfsd4_shutdown_callback(struct nfs4_client *);
......@@ -545,13 +558,16 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
struct nfsd_net *nn);
extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
/* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn);
/* nfs4recover operations */
extern int nfsd4_client_tracking_init(struct net *net);
extern void nfsd4_client_tracking_exit(struct net *net);
extern void nfsd4_client_record_create(struct nfs4_client *clp);
extern void nfsd4_client_record_remove(struct nfs4_client *clp);
extern int nfsd4_client_record_check(struct nfs4_client *clp);
extern void nfsd4_record_grace_done(struct nfsd_net *nn, time_t boot_time);
extern void nfsd4_record_grace_done(struct nfsd_net *nn);
/* nfs fault injection functions */
#ifdef CONFIG_NFSD_FAULT_INJECTION
......
......@@ -445,6 +445,16 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
if (err)
goto out;
size_change = 1;
/*
* RFC5661, Section 18.30.4:
* Changing the size of a file with SETATTR indirectly
* changes the time_modify and change attributes.
*
* (and similar for the older RFCs)
*/
if (iap->ia_size != i_size_read(inode))
iap->ia_valid |= ATTR_MTIME;
}
iap->ia_valid |= ATTR_CTIME;
......@@ -649,6 +659,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
{
struct path path;
struct inode *inode;
struct file *file;
int flags = O_RDONLY|O_LARGEFILE;
__be32 err;
int host_err = 0;
......@@ -703,19 +714,25 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
else
flags = O_WRONLY|O_LARGEFILE;
}
*filp = dentry_open(&path, flags, current_cred());
if (IS_ERR(*filp)) {
host_err = PTR_ERR(*filp);
*filp = NULL;
} else {
host_err = ima_file_check(*filp, may_flags);
file = dentry_open(&path, flags, current_cred());
if (IS_ERR(file)) {
host_err = PTR_ERR(file);
goto out_nfserr;
}
host_err = ima_file_check(file, may_flags);
if (host_err) {
nfsd_close(file);
goto out_nfserr;
}
if (may_flags & NFSD_MAY_64BIT_COOKIE)
(*filp)->f_mode |= FMODE_64BITHASH;
file->f_mode |= FMODE_64BITHASH;
else
(*filp)->f_mode |= FMODE_32BITHASH;
}
file->f_mode |= FMODE_32BITHASH;
*filp = file;
out_nfserr:
err = nfserrno(host_err);
out:
......
......@@ -428,6 +428,17 @@ struct nfsd4_reclaim_complete {
u32 rca_one_fs;
};
struct nfsd4_seek {
/* request */
stateid_t seek_stateid;
loff_t seek_offset;
u32 seek_whence;
/* response */
u32 seek_eof;
loff_t seek_pos;
};
struct nfsd4_op {
int opnum;
__be32 status;
......@@ -473,6 +484,9 @@ struct nfsd4_op {
struct nfsd4_reclaim_complete reclaim_complete;
struct nfsd4_test_stateid test_stateid;
struct nfsd4_free_stateid free_stateid;
/* NFSv4.2 */
struct nfsd4_seek seek;
} u;
struct nfs4_replay * replay;
};
......
......@@ -110,6 +110,20 @@ enum nfs_opnum4 {
OP_DESTROY_CLIENTID = 57,
OP_RECLAIM_COMPLETE = 58,
/* nfs42 */
OP_ALLOCATE = 59,
OP_COPY = 60,
OP_COPY_NOTIFY = 61,
OP_DEALLOCATE = 62,
OP_IO_ADVISE = 63,
OP_LAYOUTERROR = 64,
OP_LAYOUTSTATS = 65,
OP_OFFLOAD_CANCEL = 66,
OP_OFFLOAD_STATUS = 67,
OP_READ_PLUS = 68,
OP_SEEK = 69,
OP_WRITE_SAME = 70,
OP_ILLEGAL = 10044,
};
......@@ -117,10 +131,10 @@ enum nfs_opnum4 {
Needs to be updated if more operations are defined in future.*/
#define FIRST_NFS4_OP OP_ACCESS
#define LAST_NFS4_OP OP_RECLAIM_COMPLETE
#define LAST_NFS4_OP OP_WRITE_SAME
#define LAST_NFS40_OP OP_RELEASE_LOCKOWNER
#define LAST_NFS41_OP OP_RECLAIM_COMPLETE
#define LAST_NFS42_OP OP_RECLAIM_COMPLETE
#define LAST_NFS42_OP OP_WRITE_SAME
enum nfsstat4 {
NFS4_OK = 0,
......@@ -235,10 +249,11 @@ enum nfsstat4 {
/* nfs42 */
NFS4ERR_PARTNER_NOTSUPP = 10088,
NFS4ERR_PARTNER_NO_AUTH = 10089,
NFS4ERR_METADATA_NOTSUPP = 10090,
NFS4ERR_UNION_NOTSUPP = 10090,
NFS4ERR_OFFLOAD_DENIED = 10091,
NFS4ERR_WRONG_LFS = 10092,
NFS4ERR_BADLABEL = 10093,
NFS4ERR_OFFLOAD_NO_REQS = 10094,
};
static inline bool seqid_mutating_err(u32 err)
......@@ -535,4 +550,9 @@ struct nfs4_deviceid {
char data[NFS4_DEVICEID4_SIZE];
};
enum data_content4 {
NFS4_CONTENT_DATA = 0,
NFS4_CONTENT_HOLE = 1,
};
#endif
......@@ -74,6 +74,8 @@ static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *p
#endif /* CONFIG_PROC_FS */
struct net;
static inline struct proc_dir_entry *proc_net_mkdir(
struct net *net, const char *name, struct proc_dir_entry *parent)
{
......
......@@ -280,7 +280,6 @@ struct svc_rqst {
bool rq_splice_ok; /* turned off in gss privacy
* to prevent encrypting page
* cache pages */
wait_queue_head_t rq_wait; /* synchronization */
struct task_struct *rq_task; /* service thread */
};
......
......@@ -28,7 +28,8 @@
#define NFSEXP_ALLSQUASH 0x0008
#define NFSEXP_ASYNC 0x0010
#define NFSEXP_GATHERED_WRITES 0x0020
/* 40 80 100 currently unused */
#define NFSEXP_NOREADDIRPLUS 0x0040
/* 80 100 currently unused */
#define NFSEXP_NOHIDE 0x0200
#define NFSEXP_NOSUBTREECHECK 0x0400
#define NFSEXP_NOAUTHNLM 0x0800 /* Don't authenticate NLM requests - just trust */
......@@ -47,7 +48,7 @@
*/
#define NFSEXP_V4ROOT 0x10000
/* All flags that we claim to support. (Note we don't support NOACL.) */
#define NFSEXP_ALLFLAGS 0x17E3F
#define NFSEXP_ALLFLAGS 0x1FE7F
/* The flags that may vary depending on security flavor: */
#define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \
......
......@@ -612,8 +612,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp)
goto out_enomem;
init_waitqueue_head(&rqstp->rq_wait);
serv->sv_nrthreads++;
spin_lock_bh(&pool->sp_lock);
pool->sp_nrthreads++;
......
......@@ -346,20 +346,6 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (!svc_xprt_has_something_to_do(xprt))
return;
cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
put_cpu();
spin_lock_bh(&pool->sp_lock);
if (!list_empty(&pool->sp_threads) &&
!list_empty(&pool->sp_sockets))
printk(KERN_ERR
"svc_xprt_enqueue: "
"threads and transports both waiting??\n");
pool->sp_stats.packets++;
/* Mark transport as busy. It will remain in this state until
* the provider calls svc_xprt_received. We update XPT_BUSY
* atomically because it also guards against trying to enqueue
......@@ -368,9 +354,15 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
/* Don't enqueue transport while already enqueued */
dprintk("svc: transport %p busy, not enqueued\n", xprt);
goto out_unlock;
return;
}
cpu = get_cpu();
pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
spin_lock_bh(&pool->sp_lock);
pool->sp_stats.packets++;
if (!list_empty(&pool->sp_threads)) {
rqstp = list_entry(pool->sp_threads.next,
struct svc_rqst,
......@@ -382,18 +374,23 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
printk(KERN_ERR
"svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
rqstp, rqstp->rq_xprt);
rqstp->rq_xprt = xprt;
/* Note the order of the following 3 lines:
* We want to assign xprt to rqstp->rq_xprt only _after_
* we've woken up the process, so that we don't race with
* the lockless check in svc_get_next_xprt().
*/
svc_xprt_get(xprt);
wake_up_process(rqstp->rq_task);
rqstp->rq_xprt = xprt;
pool->sp_stats.threads_woken++;
wake_up(&rqstp->rq_wait);
} else {
dprintk("svc: transport %p put into queue\n", xprt);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++;
}
out_unlock:
spin_unlock_bh(&pool->sp_lock);
put_cpu();
}
/*
......@@ -509,7 +506,7 @@ void svc_wake_up(struct svc_serv *serv)
svc_thread_dequeue(pool, rqstp);
rqstp->rq_xprt = NULL;
*/
wake_up(&rqstp->rq_wait);
wake_up_process(rqstp->rq_task);
} else
pool->sp_task_pending = 1;
spin_unlock_bh(&pool->sp_lock);
......@@ -628,8 +625,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
{
struct svc_xprt *xprt;
struct svc_pool *pool = rqstp->rq_pool;
DECLARE_WAITQUEUE(wait, current);
long time_left;
long time_left = 0;
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
......@@ -651,40 +647,32 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
} else {
if (pool->sp_task_pending) {
pool->sp_task_pending = 0;
spin_unlock_bh(&pool->sp_lock);
return ERR_PTR(-EAGAIN);
xprt = ERR_PTR(-EAGAIN);
goto out;
}
/* No data pending. Go to sleep */
svc_thread_enqueue(pool, rqstp);
/*
* We have to be able to interrupt this wait
* to bring down the daemons ...
*/
set_current_state(TASK_INTERRUPTIBLE);
/*
* checking kthread_should_stop() here allows us to avoid
* locking and signalling when stopping kthreads that call
* svc_recv. If the thread has already been woken up, then
* we can exit here without sleeping. If not, then it
* it'll be woken up quickly during the schedule_timeout
*/
if (kthread_should_stop()) {
set_current_state(TASK_RUNNING);
spin_unlock_bh(&pool->sp_lock);
return ERR_PTR(-EINTR);
}
add_wait_queue(&rqstp->rq_wait, &wait);
/* No data pending. Go to sleep */
svc_thread_enqueue(pool, rqstp);
spin_unlock_bh(&pool->sp_lock);
if (!(signalled() || kthread_should_stop())) {
time_left = schedule_timeout(timeout);
__set_current_state(TASK_RUNNING);
try_to_freeze();
xprt = rqstp->rq_xprt;
if (xprt != NULL)
return xprt;
} else
__set_current_state(TASK_RUNNING);
spin_lock_bh(&pool->sp_lock);
remove_wait_queue(&rqstp->rq_wait, &wait);
if (!time_left)
pool->sp_stats.threads_timedout++;
......@@ -699,6 +687,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
return ERR_PTR(-EAGAIN);
}
}
out:
spin_unlock_bh(&pool->sp_lock);
return xprt;
}
......@@ -744,7 +733,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
svc_add_new_temp_xprt(serv, newxpt);
else
module_put(xprt->xpt_class->xcl_owner);
} else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
} else {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, rqstp->rq_pool->sp_id, xprt,
......@@ -781,10 +770,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
printk(KERN_ERR
"svc_recv: service %p, transport not NULL!\n",
rqstp);
if (waitqueue_active(&rqstp->rq_wait))
printk(KERN_ERR
"svc_recv: service %p, wait queue active!\n",
rqstp);
err = svc_alloc_arg(rqstp);
if (err)
......
......@@ -311,19 +311,6 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
return len;
}
/*
* Check input queue length
*/
static int svc_recv_available(struct svc_sock *svsk)
{
struct socket *sock = svsk->sk_sock;
int avail, err;
err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
return (err >= 0)? avail : err;
}
/*
* Generic recvfrom routine.
*/
......@@ -339,8 +326,14 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
rqstp->rq_xprt_hlen = 0;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
msg.msg_flags);
/* If we read a full record, then assume there may be more
* data to read (stream based sockets only!)
*/
if (len == buflen)
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
svsk, iov[0].iov_base, iov[0].iov_len, len);
......@@ -980,8 +973,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
unsigned int want;
int len;
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
struct kvec iov;
......@@ -1036,7 +1027,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
"%s: Got unrecognized reply: "
"calldir 0x%x xpt_bc_xprt %p xid %08x\n",
__func__, ntohl(calldir),
bc_xprt, xid);
bc_xprt, ntohl(xid));
return -EAGAIN;
}
......@@ -1073,8 +1064,6 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
static void svc_tcp_fragment_received(struct svc_sock *svsk)
{
/* If we have more data, signal svc_xprt_enqueue() to try again */
if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
dprintk("svc: TCP %s record (%d bytes)\n",
svc_sock_final_rec(svsk) ? "final" : "nonfinal",
svc_sock_reclen(svsk));
......
......@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
.xcl_name = "rdma",
.xcl_owner = THIS_MODULE,
.xcl_ops = &svc_rdma_ops,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP,
.xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
.xcl_ident = XPRT_TRANSPORT_RDMA,
};
......
......@@ -51,6 +51,7 @@
#include <linux/sunrpc/clnt.h> /* rpc_xprt */
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
#include <linux/sunrpc/svc.h> /* RPCSVC_MAXPAYLOAD */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
......@@ -392,4 +393,10 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep;
/* Workqueue created in svc_rdma.c */
extern struct workqueue_struct *svc_rdma_wq;
#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
#else
#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
#endif
#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment