Commit 4dd3c2e5 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux

Pull nfsd updates from Bruce Fields:
 "Lots of good bugfixes, including:

   -  fix a number of races in the NFSv4+ state code

   -  fix some shutdown crashes in multiple-network-namespace cases

   -  relax our 4.1 session limits; if you've an artificially low limit
      to the number of 4.1 clients that can mount simultaneously, try
      upgrading"

* tag 'nfsd-4.15' of git://linux-nfs.org/~bfields/linux: (22 commits)
  SUNRPC: Improve ordering of transport processing
  nfsd: deal with revoked delegations appropriately
  svcrdma: Enqueue after setting XPT_CLOSE in completion handlers
  nfsd: use nfs->ns.inum as net ID
  rpc: remove some BUG()s
  svcrdma: Preserve CB send buffer across retransmits
  nfds: avoid gettimeofday for nfssvc_boot time
  fs, nfsd: convert nfs4_file.fi_ref from atomic_t to refcount_t
  fs, nfsd: convert nfs4_cntl_odstate.co_odcount from atomic_t to refcount_t
  fs, nfsd: convert nfs4_stid.sc_count from atomic_t to refcount_t
  lockd: double unregister of inetaddr notifiers
  nfsd4: catch some false session retries
  nfsd4: fix cached replies to solo SEQUENCE compounds
  sunrcp: make function _svc_create_xprt static
  SUNRPC: Fix tracepoint storage issues with svc_recv and svc_rqst_status
  nfsd: use ARRAY_SIZE
  nfsd: give out fewer session slots as limit approaches
  nfsd: increase DRC cache limit
  nfsd: remove unnecessary nofilehandle checks
  nfs_common: convert int to bool
  ...
parents 07c455ee 22700f3c
......@@ -369,6 +369,7 @@ static int lockd_start_svc(struct svc_serv *serv)
printk(KERN_WARNING
"lockd_up: svc_rqst allocation failed, error=%d\n",
error);
lockd_unregister_notifiers();
goto out_rqst;
}
......@@ -459,13 +460,16 @@ int lockd_up(struct net *net)
}
error = lockd_up_net(serv, net);
if (error < 0)
goto err_net;
if (error < 0) {
lockd_unregister_notifiers();
goto err_put;
}
error = lockd_start_svc(serv);
if (error < 0)
goto err_start;
if (error < 0) {
lockd_down_net(serv, net);
goto err_put;
}
nlmsvc_users++;
/*
* Note: svc_serv structures have an initial use count of 1,
......@@ -476,12 +480,6 @@ int lockd_up(struct net *net)
err_create:
mutex_unlock(&nlmsvc_mutex);
return error;
err_start:
lockd_down_net(serv, net);
err_net:
lockd_unregister_notifiers();
goto err_put;
}
EXPORT_SYMBOL_GPL(lockd_up);
......
......@@ -55,14 +55,7 @@ locks_end_grace(struct lock_manager *lm)
}
EXPORT_SYMBOL_GPL(locks_end_grace);
/**
* locks_in_grace
*
* Lock managers call this function to determine when it is OK for them
* to answer ordinary lock requests, and when they should accept only
* lock reclaims.
*/
int
static bool
__state_in_grace(struct net *net, bool open)
{
struct list_head *grace_list = net_generic(net, grace_net_id);
......@@ -78,15 +71,22 @@ __state_in_grace(struct net *net, bool open)
return false;
}
int locks_in_grace(struct net *net)
/**
* locks_in_grace
*
* Lock managers call this function to determine when it is OK for them
* to answer ordinary lock requests, and when they should accept only
* lock reclaims.
*/
bool locks_in_grace(struct net *net)
{
return __state_in_grace(net, 0);
return __state_in_grace(net, false);
}
EXPORT_SYMBOL_GPL(locks_in_grace);
int opens_in_grace(struct net *net)
bool opens_in_grace(struct net *net)
{
return __state_in_grace(net, 1);
return __state_in_grace(net, true);
}
EXPORT_SYMBOL_GPL(opens_in_grace);
......
......@@ -12,6 +12,7 @@
#include <linux/nsproxy.h>
#include <linux/sunrpc/addr.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include "state.h"
#include "netns.h"
......@@ -126,8 +127,6 @@ static struct nfsd_fault_inject_op inject_ops[] = {
},
};
#define NUM_INJECT_OPS (sizeof(inject_ops)/sizeof(struct nfsd_fault_inject_op))
int nfsd_fault_inject_init(void)
{
unsigned int i;
......@@ -138,7 +137,7 @@ int nfsd_fault_inject_init(void)
if (!debug_dir)
goto fail;
for (i = 0; i < NUM_INJECT_OPS; i++) {
for (i = 0; i < ARRAY_SIZE(inject_ops); i++) {
op = &inject_ops[i];
if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
goto fail;
......
......@@ -107,7 +107,7 @@ struct nfsd_net {
bool lockd_up;
/* Time of server startup */
struct timeval nfssvc_boot;
struct timespec64 nfssvc_boot;
/*
* Max number of connections this nfsd container will allow. Defaults
......
......@@ -748,8 +748,9 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p)
if (resp->status == 0) {
*p++ = htonl(resp->count);
*p++ = htonl(resp->committed);
*p++ = htonl(nn->nfssvc_boot.tv_sec);
*p++ = htonl(nn->nfssvc_boot.tv_usec);
/* unique identifier, y2038 overflow can be ignored */
*p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
*p++ = htonl(nn->nfssvc_boot.tv_nsec);
}
return xdr_ressize_check(rqstp, p);
}
......@@ -1119,8 +1120,9 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p)
p = encode_wcc_data(rqstp, p, &resp->fh);
/* Write verifier */
if (resp->status == 0) {
*p++ = htonl(nn->nfssvc_boot.tv_sec);
*p++ = htonl(nn->nfssvc_boot.tv_usec);
/* unique identifier, y2038 overflow can be ignored */
*p++ = htonl((u32)nn->nfssvc_boot.tv_sec);
*p++ = htonl(nn->nfssvc_boot.tv_nsec);
}
return xdr_ressize_check(rqstp, p);
}
......
......@@ -336,7 +336,7 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls)
trace_layout_recall(&ls->ls_stid.sc_stateid);
atomic_inc(&ls->ls_stid.sc_count);
refcount_inc(&ls->ls_stid.sc_count);
nfsd4_run_cb(&ls->ls_recall);
out_unlock:
......@@ -441,7 +441,7 @@ nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
goto done;
}
atomic_inc(&ls->ls_stid.sc_count);
refcount_inc(&ls->ls_stid.sc_count);
list_add_tail(&new->lo_perstate, &ls->ls_layouts);
new = NULL;
done:
......
......@@ -485,9 +485,6 @@ static __be32
nfsd4_getfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
if (!cstate->current_fh.fh_dentry)
return nfserr_nofilehandle;
u->getfh = &cstate->current_fh;
return nfs_ok;
}
......@@ -535,9 +532,6 @@ static __be32
nfsd4_savefh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
if (!cstate->current_fh.fh_dentry)
return nfserr_nofilehandle;
fh_dup2(&cstate->save_fh, &cstate->current_fh);
if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG)) {
memcpy(&cstate->save_stateid, &cstate->current_stateid, sizeof(stateid_t));
......@@ -570,10 +564,11 @@ static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
/*
* This is opaque to client, so no need to byte-swap. Use
* __force to keep sparse happy
* __force to keep sparse happy. y2038 time_t overflow is
* irrelevant in this usage.
*/
verf[0] = (__force __be32)nn->nfssvc_boot.tv_sec;
verf[1] = (__force __be32)nn->nfssvc_boot.tv_usec;
verf[1] = (__force __be32)nn->nfssvc_boot.tv_nsec;
memcpy(verifier->data, verf, sizeof(verifier->data));
}
......@@ -703,10 +698,8 @@ nfsd4_link(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_link *link = &u->link;
__be32 status = nfserr_nofilehandle;
__be32 status;
if (!cstate->save_fh.fh_dentry)
return status;
status = nfsd_link(rqstp, &cstate->current_fh,
link->li_name, link->li_namelen, &cstate->save_fh);
if (!status)
......@@ -850,10 +843,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
{
struct nfsd4_rename *rename = &u->rename;
__be32 status = nfserr_nofilehandle;
__be32 status;
if (!cstate->save_fh.fh_dentry)
return status;
if (opens_in_grace(SVC_NET(rqstp)) &&
!(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
return nfserr_grace;
......
......@@ -359,7 +359,7 @@ put_nfs4_file(struct nfs4_file *fi)
{
might_lock(&state_lock);
if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
if (refcount_dec_and_lock(&fi->fi_ref, &state_lock)) {
hlist_del_rcu(&fi->fi_hash);
spin_unlock(&state_lock);
WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
......@@ -568,7 +568,7 @@ alloc_clnt_odstate(struct nfs4_client *clp)
co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL);
if (co) {
co->co_client = clp;
atomic_set(&co->co_odcount, 1);
refcount_set(&co->co_odcount, 1);
}
return co;
}
......@@ -586,7 +586,7 @@ static inline void
get_clnt_odstate(struct nfs4_clnt_odstate *co)
{
if (co)
atomic_inc(&co->co_odcount);
refcount_inc(&co->co_odcount);
}
static void
......@@ -598,7 +598,7 @@ put_clnt_odstate(struct nfs4_clnt_odstate *co)
return;
fp = co->co_file;
if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
if (refcount_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
list_del(&co->co_perfile);
spin_unlock(&fp->fi_lock);
......@@ -656,7 +656,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
stid->sc_stateid.si_opaque.so_id = new_id;
stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
/* Will be incremented before return to client: */
atomic_set(&stid->sc_count, 1);
refcount_set(&stid->sc_count, 1);
spin_lock_init(&stid->sc_lock);
/*
......@@ -813,7 +813,7 @@ nfs4_put_stid(struct nfs4_stid *s)
might_lock(&clp->cl_lock);
if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
wake_up_all(&close_wq);
return;
}
......@@ -913,7 +913,7 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
if (status)
return status;
++fp->fi_delegees;
atomic_inc(&dp->dl_stid.sc_count);
refcount_inc(&dp->dl_stid.sc_count);
dp->dl_stid.sc_type = NFS4_DELEG_STID;
list_add(&dp->dl_perfile, &fp->fi_delegations);
list_add(&dp->dl_perclnt, &clp->cl_delegations);
......@@ -1214,7 +1214,7 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
WARN_ON_ONCE(!list_empty(&stp->st_locks));
if (!atomic_dec_and_test(&s->sc_count)) {
if (!refcount_dec_and_test(&s->sc_count)) {
wake_up_all(&close_wq);
return;
}
......@@ -1439,8 +1439,10 @@ free_session_slots(struct nfsd4_session *ses)
{
int i;
for (i = 0; i < ses->se_fchannel.maxreqs; i++)
for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
free_svc_cred(&ses->se_slots[i]->sl_cred);
kfree(ses->se_slots[i]);
}
}
/*
......@@ -1472,6 +1474,11 @@ static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
spin_lock(&nfsd_drc_lock);
avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
nfsd_drc_max_mem - nfsd_drc_mem_used);
/*
* Never use more than a third of the remaining memory,
* unless it's the only way to give this client a slot:
*/
avail = clamp_t(int, avail, slotsize, avail/3);
num = min_t(int, num, avail / slotsize);
nfsd_drc_mem_used += num * slotsize;
spin_unlock(&nfsd_drc_lock);
......@@ -2072,7 +2079,7 @@ find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
s = find_stateid_locked(cl, t);
if (s != NULL) {
if (typemask & s->sc_type)
atomic_inc(&s->sc_count);
refcount_inc(&s->sc_count);
else
s = NULL;
}
......@@ -2287,14 +2294,18 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
dprintk("--> %s slot %p\n", __func__, slot);
slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
slot->sl_opcnt = resp->opcnt;
slot->sl_status = resp->cstate.status;
free_svc_cred(&slot->sl_cred);
copy_cred(&slot->sl_cred, &resp->rqstp->rq_cred);
slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
if (nfsd4_not_cached(resp)) {
slot->sl_datalen = 0;
if (!nfsd4_cache_this(resp)) {
slot->sl_flags &= ~NFSD4_SLOT_CACHED;
return;
}
slot->sl_flags |= NFSD4_SLOT_CACHED;
base = resp->cstate.data_offset;
slot->sl_datalen = buf->len - base;
if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
......@@ -2321,8 +2332,16 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
op = &args->ops[resp->opcnt - 1];
nfsd4_encode_operation(resp, op);
/* Return nfserr_retry_uncached_rep in next operation. */
if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) {
if (slot->sl_flags & NFSD4_SLOT_CACHED)
return op->status;
if (args->opcnt == 1) {
/*
* The original operation wasn't a solo sequence--we
* always cache those--so this retry must not match the
* original:
*/
op->status = nfserr_seq_false_retry;
} else {
op = &args->ops[resp->opcnt++];
op->status = nfserr_retry_uncached_rep;
nfsd4_encode_operation(resp, op);
......@@ -2986,6 +3005,34 @@ static bool nfsd4_request_too_big(struct svc_rqst *rqstp,
return xb->len > session->se_fchannel.maxreq_sz;
}
static bool replay_matches_cache(struct svc_rqst *rqstp,
struct nfsd4_sequence *seq, struct nfsd4_slot *slot)
{
struct nfsd4_compoundargs *argp = rqstp->rq_argp;
if ((bool)(slot->sl_flags & NFSD4_SLOT_CACHETHIS) !=
(bool)seq->cachethis)
return false;
/*
* If there's an error than the reply can have fewer ops than
* the call. But if we cached a reply with *more* ops than the
* call you're sending us now, then this new call is clearly not
* really a replay of the old one:
*/
if (slot->sl_opcnt < argp->opcnt)
return false;
/* This is the only check explicitly called by spec: */
if (!same_creds(&rqstp->rq_cred, &slot->sl_cred))
return false;
/*
* There may be more comparisons we could actually do, but the
* spec doesn't require us to catch every case where the calls
* don't match (that would require caching the call as well as
* the reply), so we don't bother.
*/
return true;
}
__be32
nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
......@@ -3045,6 +3092,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
status = nfserr_seq_misordered;
if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
goto out_put_session;
status = nfserr_seq_false_retry;
if (!replay_matches_cache(rqstp, seq, slot))
goto out_put_session;
cstate->slot = slot;
cstate->session = session;
cstate->clp = clp;
......@@ -3351,7 +3401,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
{
lockdep_assert_held(&state_lock);
atomic_set(&fp->fi_ref, 1);
refcount_set(&fp->fi_ref, 1);
spin_lock_init(&fp->fi_lock);
INIT_LIST_HEAD(&fp->fi_stateids);
INIT_LIST_HEAD(&fp->fi_delegations);
......@@ -3514,7 +3564,7 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
continue;
if (local->st_stateowner == &oo->oo_owner) {
ret = local;
atomic_inc(&ret->st_stid.sc_count);
refcount_inc(&ret->st_stid.sc_count);
break;
}
}
......@@ -3573,7 +3623,7 @@ init_open_stateid(struct nfs4_file *fp, struct nfsd4_open *open)
goto out_unlock;
open->op_stp = NULL;
atomic_inc(&stp->st_stid.sc_count);
refcount_inc(&stp->st_stid.sc_count);
stp->st_stid.sc_type = NFS4_OPEN_STID;
INIT_LIST_HEAD(&stp->st_locks);
stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner);
......@@ -3621,7 +3671,7 @@ move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
* there should be no danger of the refcount going back up again at
* this point.
*/
wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2);
wait_event(close_wq, refcount_read(&s->st_stid.sc_count) == 2);
release_all_access(s);
if (s->st_stid.sc_file) {
......@@ -3647,7 +3697,7 @@ find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash) {
if (fh_match(&fp->fi_fhandle, fh)) {
if (atomic_inc_not_zero(&fp->fi_ref))
if (refcount_inc_not_zero(&fp->fi_ref))
return fp;
}
}
......@@ -3783,7 +3833,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
* lock) we know the server hasn't removed the lease yet, we know
* it's safe to take a reference.
*/
atomic_inc(&dp->dl_stid.sc_count);
refcount_inc(&dp->dl_stid.sc_count);
nfsd4_run_cb(&dp->dl_recall);
}
......@@ -3966,7 +4016,8 @@ static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, statei
{
struct nfs4_stid *ret;
ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID);
ret = find_stateid_by_type(cl, s,
NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
if (!ret)
return NULL;
return delegstateid(ret);
......@@ -3989,6 +4040,12 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
if (deleg == NULL)
goto out;
if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
nfs4_put_stid(&deleg->dl_stid);
if (cl->cl_minorversion)
status = nfserr_deleg_revoked;
goto out;
}
flags = share_access_to_flags(open->op_share_access);
status = nfs4_check_delegmode(deleg, flags);
if (status) {
......@@ -4858,6 +4915,16 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
struct nfs4_stid **s, struct nfsd_net *nn)
{
__be32 status;
bool return_revoked = false;
/*
* only return revoked delegations if explicitly asked.
* otherwise we report revoked or bad_stateid status.
*/
if (typemask & NFS4_REVOKED_DELEG_STID)
return_revoked = true;
else if (typemask & NFS4_DELEG_STID)
typemask |= NFS4_REVOKED_DELEG_STID;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
return nfserr_bad_stateid;
......@@ -4872,6 +4939,12 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
*s = find_stateid_by_type(cstate->clp, stateid, typemask);
if (!*s)
return nfserr_bad_stateid;
if (((*s)->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
nfs4_put_stid(*s);
if (cstate->minorversion)
return nfserr_deleg_revoked;
return nfserr_bad_stateid;
}
return nfs_ok;
}
......@@ -5071,7 +5144,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
ret = nfserr_locks_held;
break;
case NFS4_LOCK_STID:
atomic_inc(&s->sc_count);
refcount_inc(&s->sc_count);
spin_unlock(&cl->cl_lock);
ret = nfsd4_free_lock_stateid(stateid, s);
goto out;
......@@ -5578,7 +5651,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
lockdep_assert_held(&clp->cl_lock);
atomic_inc(&stp->st_stid.sc_count);
refcount_inc(&stp->st_stid.sc_count);
stp->st_stid.sc_type = NFS4_LOCK_STID;
stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
get_nfs4_file(fp);
......@@ -5604,7 +5677,7 @@ find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp)
list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) {
if (lst->st_stid.sc_file == fp) {
atomic_inc(&lst->st_stid.sc_count);
refcount_inc(&lst->st_stid.sc_count);
return lst;
}
}
......@@ -7006,8 +7079,8 @@ nfs4_state_start_net(struct net *net)
nn->nfsd4_manager.block_opens = true;
locks_start_grace(net, &nn->nfsd4_manager);
nfsd4_client_tracking_init(net);
printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
nn->nfsd4_grace, net);
printk(KERN_INFO "NFSD: starting %ld-second grace period (net %x)\n",
nn->nfsd4_grace, net->ns.inum);
queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
return 0;
}
......
......@@ -447,7 +447,7 @@ void nfsd_reset_versions(void)
*/
static void set_max_drc(void)
{
#define NFSD_DRC_SIZE_SHIFT 10
#define NFSD_DRC_SIZE_SHIFT 7
nfsd_drc_max_mem = (nr_free_buffer_pages()
>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
nfsd_drc_mem_used = 0;
......@@ -517,7 +517,7 @@ int nfsd_create_serv(struct net *net)
register_inet6addr_notifier(&nfsd_inet6addr_notifier);
#endif
}
do_gettimeofday(&nn->nfssvc_boot); /* record boot time */
ktime_get_real_ts64(&nn->nfssvc_boot); /* record boot time */
return 0;
}
......
......@@ -36,6 +36,7 @@
#define _NFSD4_STATE_H
#include <linux/idr.h>
#include <linux/refcount.h>
#include <linux/sunrpc/svc_xprt.h>
#include "nfsfh.h"
......@@ -83,7 +84,7 @@ struct nfsd4_callback_ops {
* fields that are of general use to any stateid.
*/
struct nfs4_stid {
atomic_t sc_count;
refcount_t sc_count;
#define NFS4_OPEN_STID 1
#define NFS4_LOCK_STID 2
#define NFS4_DELEG_STID 4
......@@ -169,11 +170,13 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
struct nfsd4_slot {
u32 sl_seqid;
__be32 sl_status;
struct svc_cred sl_cred;
u32 sl_datalen;
u16 sl_opcnt;
#define NFSD4_SLOT_INUSE (1 << 0)
#define NFSD4_SLOT_CACHETHIS (1 << 1)
#define NFSD4_SLOT_INITIALIZED (1 << 2)
#define NFSD4_SLOT_CACHED (1 << 3)
u8 sl_flags;
char sl_data[];
};
......@@ -465,7 +468,7 @@ struct nfs4_clnt_odstate {
struct nfs4_client *co_client;
struct nfs4_file *co_file;
struct list_head co_perfile;
atomic_t co_odcount;
refcount_t co_odcount;
};
/*
......@@ -481,7 +484,7 @@ struct nfs4_clnt_odstate {
* the global state_lock spinlock.
*/
struct nfs4_file {
atomic_t fi_ref;
refcount_t fi_ref;
spinlock_t fi_lock;
struct hlist_node fi_hash; /* hash on fi_fhandle */
struct list_head fi_stateids;
......@@ -634,7 +637,7 @@ struct nfs4_file *find_file(struct knfsd_fh *fh);
void put_nfs4_file(struct nfs4_file *fi);
static inline void get_nfs4_file(struct nfs4_file *fi)
{
atomic_inc(&fi->fi_ref);
refcount_inc(&fi->fi_ref);
}
struct file *find_any_file(struct nfs4_file *f);
......
......@@ -649,9 +649,18 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
}
static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
/*
* The session reply cache only needs to cache replies that the client
* actually asked us to. But it's almost free for us to cache compounds
* consisting of only a SEQUENCE op, so we may as well cache those too.
* Also, the protocol doesn't give us a convenient response in the case
* of a replay of a solo SEQUENCE op that wasn't cached
* (RETRY_UNCACHED_REP can only be returned in the second op of a
* compound).
*/
static inline bool nfsd4_cache_this(struct nfsd4_compoundres *resp)
{
return !(resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
return (resp->cstate.slot->sl_flags & NFSD4_SLOT_CACHETHIS)
|| nfsd4_is_solo_sequence(resp);
}
......
......@@ -971,8 +971,8 @@ struct lock_manager {
struct net;
void locks_start_grace(struct net *, struct lock_manager *);
void locks_end_grace(struct lock_manager *);
int locks_in_grace(struct net *);
int opens_in_grace(struct net *);
bool locks_in_grace(struct net *);
bool opens_in_grace(struct net *);
/* that will die - we need it for nfs_lock_info */
#include <linux/nfs_fs_i.h>
......
......@@ -47,6 +47,7 @@ struct svc_pool {
struct svc_pool_stats sp_stats; /* statistics on pool operation */
#define SP_TASK_PENDING (0) /* still work to do even if no
* xprt is queued. */
#define SP_CONGESTED (1)
unsigned long sp_flags;
} ____cacheline_aligned_in_smp;
......
......@@ -486,20 +486,22 @@ TRACE_EVENT(svc_recv,
TP_ARGS(rqst, status),
TP_STRUCT__entry(
__field(struct sockaddr *, addr)
__field(u32, xid)
__field(int, status)
__field(unsigned long, flags)
__dynamic_array(unsigned char, addr, rqst->rq_addrlen)
),
TP_fast_assign(
__entry->addr = (struct sockaddr *)&rqst->rq_addr;
__entry->xid = status > 0 ? be32_to_cpu(rqst->rq_xid) : 0;
__entry->status = status;
__entry->flags = rqst->rq_flags;
memcpy(__get_dynamic_array(addr),
&rqst->rq_addr, rqst->rq_addrlen);
),
TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s", __entry->addr,
TP_printk("addr=%pIScp xid=0x%08x status=%d flags=%s",
(struct sockaddr *)__get_dynamic_array(addr),
__entry->xid, __entry->status,
show_rqstp_flags(__entry->flags))
);
......@@ -544,22 +546,23 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
TP_ARGS(rqst, status),
TP_STRUCT__entry(
__field(struct sockaddr *, addr)
__field(u32, xid)
__field(int, dropme)
__field(int, status)
__field(unsigned long, flags)
__dynamic_array(unsigned char, addr, rqst->rq_addrlen)
),
TP_fast_assign(
__entry->addr = (struct sockaddr *)&rqst->rq_addr;
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->status = status;
__entry->flags = rqst->rq_flags;
memcpy(__get_dynamic_array(addr),
&rqst->rq_addr, rqst->rq_addrlen);
),
TP_printk("addr=%pIScp rq_xid=0x%08x status=%d flags=%s",
__entry->addr, __entry->xid,
(struct sockaddr *)__get_dynamic_array(addr),
__entry->xid,
__entry->status, show_rqstp_flags(__entry->flags))
);
......
......@@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
return stat;
if (integ_len > buf->len)
return stat;
if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len))
BUG();
if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) {
WARN_ON_ONCE(1);
return stat;
}
/* copy out mic... */
if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
BUG();
return stat;
if (mic.len > RPC_MAX_AUTH_SIZE)
return stat;
mic.data = kmalloc(mic.len, GFP_KERNEL);
......@@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
BUG_ON(integ_len % 4);
*p++ = htonl(integ_len);
*p++ = htonl(gc->gc_seq);
if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len))
BUG();
if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) {
WARN_ON_ONCE(1);
goto out_err;
}
if (resbuf->tail[0].iov_base == NULL) {
if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
goto out_err;
......
......@@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new)
svc_xprt_received(new);
}
int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags)
static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
struct net *net, const int family,
const unsigned short port, int flags)
{
struct svc_xprt_class *xcl;
......@@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
struct svc_pool *pool;
struct svc_rqst *rqstp = NULL;
int cpu;
bool queued = false;
if (!svc_xprt_has_something_to_do(xprt))
goto out;
......@@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt)
atomic_long_inc(&pool->sp_stats.packets);
redo_search:
dprintk("svc: transport %p put into queue\n", xprt);
spin_lock_bh(&pool->sp_lock);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++;
spin_unlock_bh(&pool->sp_lock);
/* find a thread for this xprt */
rcu_read_lock();
list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
/* Do a lockless check first */
if (test_bit(RQ_BUSY, &rqstp->rq_flags))
if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
continue;
/*
* Once the xprt has been queued, it can only be dequeued by
* the task that intends to service it. All we can do at that
* point is to try to wake this thread back up so that it can
* do so.
*/
if (!queued) {
spin_lock_bh(&rqstp->rq_lock);
if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
/* already busy, move on... */
spin_unlock_bh(&rqstp->rq_lock);
continue;
}
/* this one will do */
rqstp->rq_xprt = xprt;
svc_xprt_get(xprt);
spin_unlock_bh(&rqstp->rq_lock);
}
rcu_read_unlock();
atomic_long_inc(&pool->sp_stats.threads_woken);
wake_up_process(rqstp->rq_task);
put_cpu();
goto out;
}
rcu_read_unlock();
/*
* We didn't find an idle thread to use, so we need to queue the xprt.
* Do so and then search again. If we find one, we can't hook this one
* up to it directly but we can wake the thread up in the hopes that it
* will pick it up once it searches for a xprt to service.
*/
if (!queued) {
queued = true;
dprintk("svc: transport %p put into queue\n", xprt);
spin_lock_bh(&pool->sp_lock);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
pool->sp_stats.sockets_queued++;
spin_unlock_bh(&pool->sp_lock);
goto redo_search;
goto out_unlock;
}
set_bit(SP_CONGESTED, &pool->sp_flags);
rqstp = NULL;
out_unlock:
rcu_read_unlock();
put_cpu();
out:
trace_svc_xprt_do_enqueue(xprt, rqstp);
......@@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp)
static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
{
struct svc_xprt *xprt;
struct svc_pool *pool = rqstp->rq_pool;
long time_left = 0;
/* rq_xprt should be clear on entry */
WARN_ON_ONCE(rqstp->rq_xprt);
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
*/
rqstp->rq_chandle.thread_wait = 5*HZ;
xprt = svc_xprt_dequeue(pool);
if (xprt) {
rqstp->rq_xprt = xprt;
/* As there is a shortage of threads and this request
* had to be queued, don't allow the thread to wait so
* long for cache updates.
*/
rqstp->rq_chandle.thread_wait = 1*HZ;
clear_bit(SP_TASK_PENDING, &pool->sp_flags);
return xprt;
}
rqstp->rq_xprt = svc_xprt_dequeue(pool);
if (rqstp->rq_xprt)
goto out_found;
/*
* We have to be able to interrupt this wait
* to bring down the daemons ...
*/
set_current_state(TASK_INTERRUPTIBLE);
smp_mb__before_atomic();
clear_bit(SP_CONGESTED, &pool->sp_flags);
clear_bit(RQ_BUSY, &rqstp->rq_flags);
smp_mb();
smp_mb__after_atomic();
if (likely(rqst_should_sleep(rqstp)))
time_left = schedule_timeout(timeout);
......@@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
try_to_freeze();
spin_lock_bh(&rqstp->rq_lock);
set_bit(RQ_BUSY, &rqstp->rq_flags);
spin_unlock_bh(&rqstp->rq_lock);
xprt = rqstp->rq_xprt;
if (xprt != NULL)
return xprt;
smp_mb__after_atomic();
rqstp->rq_xprt = svc_xprt_dequeue(pool);
if (rqstp->rq_xprt)
goto out_found;
if (!time_left)
atomic_long_inc(&pool->sp_stats.threads_timedout);
......@@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (signalled() || kthread_should_stop())
return ERR_PTR(-EINTR);
return ERR_PTR(-EAGAIN);
out_found:
/* Normally we will wait up to 5 seconds for any required
* cache information to be provided.
*/
if (!test_bit(SP_CONGESTED, &pool->sp_flags))
rqstp->rq_chandle.thread_wait = 5*HZ;
else
rqstp->rq_chandle.thread_wait = 1*HZ;
return rqstp->rq_xprt;
}
static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
......
......@@ -133,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
if (ret)
goto out_err;
/* Bump page refcnt so Send completion doesn't release
* the rq_buffer before all retransmits are complete.
*/
get_page(virt_to_page(rqst->rq_buffer));
ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0);
if (ret)
goto out_unmap;
......@@ -165,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task)
return -EINVAL;
}
/* svc_rdma_sendto releases this page */
page = alloc_page(RPCRDMA_DEF_GFP);
if (!page)
return -ENOMEM;
......@@ -184,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task)
{
struct rpc_rqst *rqst = task->tk_rqstp;
put_page(virt_to_page(rqst->rq_buffer));
kfree(rqst->rq_rbuffer);
}
......
......@@ -290,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context)
ib_event_msg(event->event), event->event,
event->element.qp);
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
break;
}
}
......@@ -322,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
goto out;
svc_xprt_enqueue(&xprt->sc_xprt);
goto out;
goto out_enqueue;
flushed:
if (wc->status != IB_WC_WR_FLUSH_ERR)
......@@ -333,6 +333,8 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_rdma_put_context(ctxt, 1);
out_enqueue:
svc_xprt_enqueue(&xprt->sc_xprt);
out:
svc_xprt_put(&xprt->sc_xprt);
}
......@@ -358,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS)) {
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_xprt_enqueue(&xprt->sc_xprt);
if (wc->status != IB_WC_WR_FLUSH_ERR)
pr_err("svcrdma: Send: %s (%u/0x%x)\n",
ib_wc_status_msg(wc->status),
......@@ -569,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id,
case RDMA_CM_EVENT_DEVICE_REMOVAL:
dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n",
xprt, cma_id);
if (xprt)
if (xprt) {
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
svc_xprt_enqueue(&xprt->sc_xprt);
}
break;
default:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment