Commit c04e88db authored by Trond Myklebust's avatar Trond Myklebust

NFSv4: Share open_owner structs between several different

processes. Reduces the load on the server.
parent 5bb0bc7c
......@@ -616,8 +616,13 @@ nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *satt
memcpy(&state->stateid, &oc_res.stateid, sizeof(state->stateid));
} else
memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid));
spin_lock(&inode->i_lock);
if (flags & FMODE_READ)
state->nreaders++;
if (flags & FMODE_WRITE)
state->nwriters++;
state->state |= flags & (FMODE_READ|FMODE_WRITE);
state->pid = current->pid;
spin_unlock(&inode->i_lock);
up(&sp->so_sema);
nfs4_put_state_owner(sp);
......@@ -634,6 +639,21 @@ nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *satt
iput(inode);
inode = NULL;
}
/* NOTE: BAD_SEQID means the server and client disagree about the
* book-keeping w.r.t. state-changing operations
* (OPEN/CLOSE/LOCK/LOCKU...)
* It is actually a sign of a bug on the client or on the server.
*
* If we receive a BAD_SEQID error in the particular case of
* doing an OPEN, we assume that nfs4_increment_seqid() will
* have unhashed the old state_owner for us, and that we can
* therefore safely retry using a new one. We should still warn
* the user though...
*/
if (status == -NFS4ERR_BAD_SEQID) {
printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n");
goto retry;
}
status = nfs4_handle_error(server, status);
if (!status)
goto retry;
......@@ -722,6 +742,36 @@ nfs4_do_close(struct inode *inode, struct nfs4_state *state)
* the state_owner. we keep this around to process errors
*/
nfs4_increment_seqid(status, sp);
if (!status)
memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
return status;
}
int
nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode)
{
struct nfs4_state_owner *sp = state->owner;
int status = 0;
struct nfs_closeargs arg = {
.fh = NFS_FH(inode),
.seqid = sp->so_seqid,
.share_access = mode,
};
struct nfs_closeres res = {
.status = 0,
};
struct rpc_message msg = {
.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE],
.rpc_argp = &arg,
.rpc_resp = &res,
};
memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid));
status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
nfs4_increment_seqid(status, sp);
if (!status)
memcpy(&state->stateid, &res.stateid, sizeof(state->stateid));
return status;
}
......@@ -771,7 +821,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags)
return 1;
}
d_drop(dentry);
nfs4_put_open_state(state);
nfs4_close_state(state, openflags);
iput(inode);
return 0;
}
......@@ -872,15 +922,14 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
fattr->valid = 0;
if (size_change) {
state = nfs4_find_state_bypid(inode, current->pid);
struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
state = nfs4_find_state(inode, cred, FMODE_WRITE);
if (!state) {
struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
state = nfs4_do_open(dentry->d_parent->d_inode,
&dentry->d_name, FMODE_WRITE, NULL, cred);
put_rpccred(cred);
need_iput = 1;
}
put_rpccred(cred);
if (IS_ERR(state))
return PTR_ERR(state);
......@@ -895,7 +944,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
out:
if (state) {
inode = state->inode;
nfs4_put_open_state(state);
nfs4_close_state(state, FMODE_WRITE);
if (need_iput)
iput(inode);
}
......@@ -1161,7 +1210,7 @@ nfs4_proc_create(struct inode *dir, struct qstr *name, struct iattr *sattr,
status = nfs4_do_setattr(NFS_SERVER(dir), &fattr,
NFS_FH(inode), sattr, state);
if (status != 0) {
nfs4_put_open_state(state);
nfs4_close_state(state, flags);
iput(inode);
inode = ERR_PTR(status);
}
......@@ -1742,6 +1791,7 @@ nfs4_proc_file_open(struct inode *inode, struct file *filp)
{
struct dentry *dentry = filp->f_dentry;
struct nfs4_state *state;
struct rpc_cred *cred;
dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n",
(int)dentry->d_parent->d_name.len,
......@@ -1750,12 +1800,14 @@ nfs4_proc_file_open(struct inode *inode, struct file *filp)
/* Find our open stateid */
state = nfs4_find_state_bypid(inode, current->pid);
cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0);
state = nfs4_find_state(inode, cred, filp->f_mode);
put_rpccred(cred);
if (state == NULL) {
printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__);
return -EIO; /* ERACE actually */
}
nfs4_put_open_state(state);
nfs4_close_state(state, filp->f_mode);
if (filp->f_mode & FMODE_WRITE) {
lock_kernel();
nfs_set_mmcred(inode, state->owner->so_cred);
......@@ -1774,7 +1826,7 @@ nfs4_proc_file_release(struct inode *inode, struct file *filp)
struct nfs4_state *state = (struct nfs4_state *)filp->private_data;
if (state)
nfs4_put_open_state(state);
nfs4_close_state(state, filp->f_mode);
return 0;
}
......@@ -1816,6 +1868,9 @@ nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server)
rpc_delay(task, NFS4_POLL_RETRY_TIME);
task->tk_status = 0;
return -EAGAIN;
case -NFS4ERR_OLD_STATEID:
task->tk_status = 0;
return -EAGAIN;
}
return 0;
}
......@@ -1892,6 +1947,9 @@ nfs4_handle_error(struct nfs_server *server, int errorcode)
case -NFS4ERR_DELAY:
ret = nfs4_delay(server->client);
break;
case -NFS4ERR_OLD_STATEID:
ret = 0;
break;
default:
if (errorcode <= -1000) {
printk(KERN_WARNING "%s could not handle NFSv4 error %d\n",
......
......@@ -188,6 +188,23 @@ nfs4_client_grab_unused(struct nfs4_client *clp, struct rpc_cred *cred)
return sp;
}
static struct nfs4_state_owner *
nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred)
{
struct nfs4_state_owner *sp, *res = NULL;
list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
if (sp->so_cred != cred)
continue;
atomic_inc(&sp->so_count);
/* Move to the head of the list */
list_move(&sp->so_list, &clp->cl_state_owners);
res = sp;
break;
}
return res;
}
/*
* nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
* create a new state_owner.
......@@ -208,6 +225,15 @@ nfs4_alloc_state_owner(void)
return sp;
}
static void
nfs4_unhash_state_owner(struct nfs4_state_owner *sp)
{
struct nfs4_client *clp = sp->so_client;
spin_lock(&clp->cl_lock);
list_del_init(&sp->so_list);
spin_unlock(&clp->cl_lock);
}
struct nfs4_state_owner *
nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
{
......@@ -217,7 +243,9 @@ nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred)
get_rpccred(cred);
new = nfs4_alloc_state_owner();
spin_lock(&clp->cl_lock);
sp = nfs4_client_grab_unused(clp, cred);
sp = nfs4_find_state_owner(clp, cred);
if (sp == NULL)
sp = nfs4_client_grab_unused(clp, cred);
if (sp == NULL && new != NULL) {
list_add(&new->so_list, &clp->cl_state_owners);
new->so_client = clp;
......@@ -248,6 +276,8 @@ nfs4_put_state_owner(struct nfs4_state_owner *sp)
return;
if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
goto out_free;
if (list_empty(&sp->so_list))
goto out_free;
list_move(&sp->so_list, &clp->cl_unused);
clp->cl_nunused++;
spin_unlock(&clp->cl_lock);
......@@ -269,24 +299,38 @@ nfs4_alloc_open_state(void)
state = kmalloc(sizeof(*state), GFP_KERNEL);
if (!state)
return NULL;
state->pid = current->pid;
state->state = 0;
state->nreaders = 0;
state->nwriters = 0;
memset(state->stateid.data, 0, sizeof(state->stateid.data));
atomic_set(&state->count, 1);
return state;
}
static struct nfs4_state *
__nfs4_find_state_bypid(struct inode *inode, pid_t pid)
__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs4_state *state;
mode &= (FMODE_READ|FMODE_WRITE);
list_for_each_entry(state, &nfsi->open_states, inode_states) {
if (state->pid == pid) {
atomic_inc(&state->count);
return state;
}
if (state->owner->so_cred != cred)
continue;
if ((mode & FMODE_READ) != 0 && state->nreaders == 0)
continue;
if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0)
continue;
if ((state->state & mode) != mode)
continue;
/* Add the state to the head of the inode's list */
list_move(&state->inode_states, &nfsi->open_states);
atomic_inc(&state->count);
if (mode & FMODE_READ)
state->nreaders++;
if (mode & FMODE_WRITE)
state->nwriters++;
return state;
}
return NULL;
}
......@@ -298,7 +342,12 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
struct nfs4_state *state;
list_for_each_entry(state, &nfsi->open_states, inode_states) {
/* Is this in the process of being freed? */
if (state->nreaders == 0 && state->nwriters == 0)
continue;
if (state->owner == owner) {
/* Add the state to the head of the inode's list */
list_move(&state->inode_states, &nfsi->open_states);
atomic_inc(&state->count);
return state;
}
......@@ -307,16 +356,12 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
}
struct nfs4_state *
nfs4_find_state_bypid(struct inode *inode, pid_t pid)
nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs4_state *state;
spin_lock(&inode->i_lock);
state = __nfs4_find_state_bypid(inode, pid);
/* Add the state to the tail of the inode's list */
if (state)
list_move_tail(&state->inode_states, &nfsi->open_states);
state = __nfs4_find_state(inode, cred, mode);
spin_unlock(&inode->i_lock);
return state;
}
......@@ -387,6 +432,50 @@ nfs4_put_open_state(struct nfs4_state *state)
nfs4_put_state_owner(owner);
}
void
nfs4_close_state(struct nfs4_state *state, mode_t mode)
{
struct inode *inode = state->inode;
struct nfs4_state_owner *owner = state->owner;
int newstate;
int status = 0;
down(&owner->so_sema);
/* Protect against nfs4_find_state() */
spin_lock(&inode->i_lock);
if (mode & FMODE_READ)
state->nreaders--;
if (mode & FMODE_WRITE)
state->nwriters--;
if (state->nwriters == 0 && state->nreaders == 0)
list_del_init(&state->inode_states);
spin_unlock(&inode->i_lock);
do {
newstate = 0;
if (state->state == 0)
break;
if (state->nreaders)
newstate |= FMODE_READ;
if (state->nwriters)
newstate |= FMODE_WRITE;
if (state->state == newstate)
break;
if (newstate != 0)
status = nfs4_do_downgrade(inode, state, newstate);
else
status = nfs4_do_close(inode, state);
if (!status) {
state->state = newstate;
break;
}
up(&owner->so_sema);
status = nfs4_handle_error(NFS_SERVER(inode), status);
down(&owner->so_sema);
} while (!status);
up(&owner->so_sema);
nfs4_put_open_state(state);
}
/*
* Called with sp->so_sema held.
*
......@@ -399,6 +488,9 @@ nfs4_increment_seqid(int status, struct nfs4_state_owner *sp)
{
if (status == NFS_OK || seqid_mutating_err(-status))
sp->so_seqid++;
/* If the server returns BAD_SEQID, unhash state_owner here */
if (status == -NFS4ERR_BAD_SEQID)
nfs4_unhash_state_owner(sp);
}
static int reclaimer(void *);
......
......@@ -176,6 +176,14 @@ static int nfs_stat_to_errno(int);
op_decode_hdr_maxsz + \
4 + 5 + 2 + 3 + \
decode_getattr_maxsz
#define NFS4_enc_open_downgrade_sz \
compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
op_encode_hdr_maxsz + 7
#define NFS4_dec_open_downgrade_sz \
compound_decode_hdr_maxsz + \
decode_putfh_maxsz + \
op_decode_hdr_maxsz + 4
#define NFS4_enc_close_sz compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
op_encode_hdr_maxsz + 5
......@@ -711,6 +719,22 @@ encode_open_reclaim(struct xdr_stream *xdr, struct nfs_open_reclaimargs *arg)
return 0;
}
static int
encode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeargs *arg)
{
uint32_t *p;
RESERVE_SPACE(16+sizeof(arg->stateid.data));
WRITE32(OP_OPEN_DOWNGRADE);
WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
WRITE32(arg->seqid);
WRITE32(arg->share_access);
/* No deny modes */
WRITE32(0);
return 0;
}
static int
encode_putfh(struct xdr_stream *xdr, struct nfs_fh *fh)
{
......@@ -1129,6 +1153,27 @@ nfs4_xdr_enc_open_reclaim(struct rpc_rqst *req, uint32_t *p,
return status;
}
/*
* Encode an OPEN_DOWNGRADE request
*/
static int
nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
{
struct xdr_stream xdr;
struct compound_hdr hdr = {
.nops = 2,
};
int status;
xdr_init_encode(&xdr, &req->rq_snd_buf, p);
encode_compound_hdr(&xdr, &hdr);
status = encode_putfh(&xdr, args->fh);
if (status)
goto out;
status = encode_open_downgrade(&xdr, args);
out:
return status;
}
/*
* Encode a READ request
......@@ -2001,6 +2046,19 @@ decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
return 0;
}
static int
decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
{
uint32_t *p;
int status;
status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
if (status)
return status;
READ_BUF(sizeof(res->stateid.data));
COPYMEM(res->stateid.data, sizeof(res->stateid.data));
return 0;
}
static int
decode_putfh(struct xdr_stream *xdr)
......@@ -2377,6 +2435,29 @@ decode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqs
DECODE_TAIL;
}
/*
* Decode OPEN_DOWNGRADE response
*/
static int
nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res)
{
struct xdr_stream xdr;
struct compound_hdr hdr;
int status;
xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
status = decode_compound_hdr(&xdr, &hdr);
if (status)
goto out;
status = decode_putfh(&xdr);
if (status)
goto out;
status = decode_open_downgrade(&xdr, res);
out:
return status;
}
/*
* END OF "GENERIC" DECODE ROUTINES.
*/
......@@ -2827,6 +2908,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(OPEN, enc_open, dec_open),
PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm),
PROC(OPEN_RECLAIM, enc_open_reclaim, dec_open_reclaim),
PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade),
PROC(CLOSE, enc_close, dec_close),
PROC(SETATTR, enc_setattr, dec_setattr),
PROC(FSINFO, enc_fsinfo, dec_fsinfo),
......
......@@ -290,6 +290,7 @@ enum {
NFSPROC4_CLNT_OPEN,
NFSPROC4_CLNT_OPEN_CONFIRM,
NFSPROC4_CLNT_OPEN_RECLAIM,
NFSPROC4_CLNT_OPEN_DOWNGRADE,
NFSPROC4_CLNT_CLOSE,
NFSPROC4_CLNT_SETATTR,
NFSPROC4_CLNT_FSINFO,
......
......@@ -549,10 +549,11 @@ struct nfs4_state {
struct nfs4_state_owner *owner; /* Pointer to the open owner */
struct inode *inode; /* Pointer to the inode */
pid_t pid; /* Thread that called OPEN */
nfs4_stateid stateid;
unsigned int nreaders;
unsigned int nwriters;
int state; /* State on the server (R,W, or RW) */
atomic_t count;
};
......@@ -568,6 +569,7 @@ extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *);
extern int nfs4_proc_async_renew(struct nfs4_client *);
extern int nfs4_proc_renew(struct nfs4_client *);
extern int nfs4_do_close(struct inode *, struct nfs4_state *);
int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode);
extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
......@@ -586,7 +588,8 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc
extern void nfs4_put_state_owner(struct nfs4_state_owner *);
extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
extern void nfs4_put_open_state(struct nfs4_state *);
extern struct nfs4_state *nfs4_find_state_bypid(struct inode *, pid_t);
extern void nfs4_close_state(struct nfs4_state *, mode_t);
extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode);
extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp);
extern int nfs4_handle_error(struct nfs_server *, int);
extern void nfs4_schedule_state_recovery(struct nfs4_client *);
......
......@@ -153,6 +153,7 @@ struct nfs_closeargs {
struct nfs_fh * fh;
nfs4_stateid stateid;
__u32 seqid;
__u32 share_access;
};
struct nfs_closeres {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment