Commit 17919837 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'nfs-for-2.6.39' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6

* 'nfs-for-2.6.39' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6: (54 commits)
  RPC: killing RPC tasks races fixed
  xprt: remove redundant check
  SUNRPC: Convert struct rpc_xprt to use atomic_t counters
  SUNRPC: Ensure we always run the tk_callback before tk_action
  sunrpc: fix printk format warning
  xprt: remove redundant null check
  nfs: BKL is no longer needed, so remove the include
  NFS: Fix a warning in fs/nfs/idmap.c
  Cleanup: Factor out some cut-and-paste code.
  cleanup: save 60 lines/100 bytes by combining two mostly duplicate functions.
  NFS: account direct-io into task io accounting
  gss:krb5 only include enctype numbers in gm_upcall_enctypes
  RPCRDMA: Fix FRMR registration/invalidate handling.
  RPCRDMA: Fix to XDR page base interpretation in marshalling logic.
  NFSv4: Send unmapped uid/gids to the server when using auth_sys
  NFSv4: Propagate the error NFS4ERR_BADOWNER to nfs4_do_setattr
  NFSv4: cleanup idmapper functions to take an nfs_server argument
  NFSv4: Send unmapped uid/gids to the server if the idmapper fails
  NFSv4: If the server sends us a numeric uid/gid then accept it
  NFSv4.1: reject zero layout with zeroed stripe unit
  ...
parents 374e5525 8e26de23
......@@ -46,3 +46,10 @@ data server cache
file driver devices refer to data servers, which are kept in a module
level cache. Its reference is held over the lifetime of the deviceid
pointing to it.
lseg
----
lseg maintains an extra reference corresponding to the NFS_LSEG_VALID
bit which holds it in the pnfs_layout_hdr's list. When the final lseg
is removed from the pnfs_layout_hdr's list, the NFS_LAYOUT_DESTROYED
bit is set, preventing any new lsegs from being added.
......@@ -1580,6 +1580,14 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
of returning the full 64-bit number.
The default is to return 64-bit inode numbers.
nfs.nfs4_disable_idmapping=
[NFSv4] When set, this option disables the NFSv4
idmapper on the client, but only if the mount
is using the 'sec=sys' security flavour. This may
make migration from legacy NFSv2/v3 systems easier
provided that the server has the appropriate support.
The default is to always enable NFSv4 idmapping.
nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take
when a NMI is triggered.
Format: [state][,regs][,debounce][,die]
......
......@@ -188,10 +188,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
rv = NFS4ERR_DELAY;
list_del_init(&lo->plh_bulk_recall);
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list);
put_layout_hdr(lo);
iput(ino);
}
pnfs_free_lseg_list(&free_me_list);
return rv;
}
......
......@@ -81,6 +81,11 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
}
#endif /* CONFIG_NFS_V4 */
/*
* Turn off NFSv4 uid/gid mapping when using AUTH_SYS
*/
static int nfs4_disable_idmapping = 0;
/*
* RPC cruft for NFS
*/
......@@ -481,7 +486,12 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
* Look up a client by IP address and protocol version
* - creates a new record if one doesn't yet exist
*/
static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
static struct nfs_client *
nfs_get_client(const struct nfs_client_initdata *cl_init,
const struct rpc_timeout *timeparms,
const char *ip_addr,
rpc_authflavor_t authflavour,
int noresvport)
{
struct nfs_client *clp, *new = NULL;
int error;
......@@ -512,6 +522,13 @@ static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_in
clp = new;
list_add(&clp->cl_share_link, &nfs_client_list);
spin_unlock(&nfs_client_lock);
error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
authflavour, noresvport);
if (error < 0) {
nfs_put_client(clp);
return ERR_PTR(error);
}
dprintk("--> nfs_get_client() = %p [new]\n", clp);
return clp;
......@@ -767,9 +784,9 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
/*
* Initialise an NFS2 or NFS3 client
*/
static int nfs_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const struct nfs_parsed_mount_data *data)
int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms,
const char *ip_addr, rpc_authflavor_t authflavour,
int noresvport)
{
int error;
......@@ -784,7 +801,7 @@ static int nfs_init_client(struct nfs_client *clp,
* - RFC 2623, sec 2.3.2
*/
error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
0, data->flags & NFS_MOUNT_NORESVPORT);
0, noresvport);
if (error < 0)
goto error;
nfs_mark_client_ready(clp, NFS_CS_READY);
......@@ -820,19 +837,17 @@ static int nfs_init_server(struct nfs_server *server,
cl_init.rpc_ops = &nfs_v3_clientops;
#endif
nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
data->timeo, data->retrans);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX,
data->flags & NFS_MOUNT_NORESVPORT);
if (IS_ERR(clp)) {
dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
return PTR_ERR(clp);
}
nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
data->timeo, data->retrans);
error = nfs_init_client(clp, &timeparms, data);
if (error < 0)
goto error;
server->nfs_client = clp;
/* Initialise the client representation from the mount data */
......@@ -1009,14 +1024,19 @@ static void nfs_server_insert_lists(struct nfs_server *server)
spin_lock(&nfs_client_lock);
list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
list_add_tail(&server->master_link, &nfs_volume_list);
clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
spin_unlock(&nfs_client_lock);
}
static void nfs_server_remove_lists(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
spin_lock(&nfs_client_lock);
list_del_rcu(&server->client_link);
if (clp && list_empty(&clp->cl_superblocks))
set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
list_del(&server->master_link);
spin_unlock(&nfs_client_lock);
......@@ -1307,11 +1327,11 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
/*
* Initialise an NFS4 client record
*/
static int nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const char *ip_addr,
rpc_authflavor_t authflavour,
int flags)
int nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const char *ip_addr,
rpc_authflavor_t authflavour,
int noresvport)
{
int error;
......@@ -1325,7 +1345,7 @@ static int nfs4_init_client(struct nfs_client *clp,
clp->rpc_ops = &nfs_v4_clientops;
error = nfs_create_rpc_client(clp, timeparms, authflavour,
1, flags & NFS_MOUNT_NORESVPORT);
1, noresvport);
if (error < 0)
goto error;
strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
......@@ -1378,27 +1398,71 @@ static int nfs4_set_client(struct nfs_server *server,
dprintk("--> nfs4_set_client()\n");
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour,
server->flags & NFS_MOUNT_NORESVPORT);
if (IS_ERR(clp)) {
error = PTR_ERR(clp);
goto error;
}
error = nfs4_init_client(clp, timeparms, ip_addr, authflavour,
server->flags);
if (error < 0)
goto error_put;
/*
* Query for the lease time on clientid setup or renewal
*
* Note that this will be set on nfs_clients that were created
* only for the DS role and did not set this bit, but now will
* serve a dual role.
*/
set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
server->nfs_client = clp;
dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
return 0;
error_put:
nfs_put_client(clp);
error:
dprintk("<-- nfs4_set_client() = xerror %d\n", error);
return error;
}
/*
* Set up a pNFS Data Server client.
*
* Return any existing nfs_client that matches server address,port,version
* and minorversion.
*
* For a new nfs_client, use a soft mount (default), a low retrans and a
* low timeout interval so that if a connection is lost, we retry through
* the MDS.
*/
struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
const struct sockaddr *ds_addr,
int ds_addrlen, int ds_proto)
{
struct nfs_client_initdata cl_init = {
.addr = ds_addr,
.addrlen = ds_addrlen,
.rpc_ops = &nfs_v4_clientops,
.proto = ds_proto,
.minorversion = mds_clp->cl_minorversion,
};
struct rpc_timeout ds_timeout = {
.to_initval = 15 * HZ,
.to_maxval = 15 * HZ,
.to_retries = 1,
.to_exponential = 1,
};
struct nfs_client *clp;
/*
* Set an authflavor equual to the MDS value. Use the MDS nfs_client
* cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
* (section 13.1 RFC 5661).
*/
clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
mds_clp->cl_rpcclient->cl_auth->au_flavor, 0);
dprintk("<-- %s %p\n", __func__, clp);
return clp;
}
EXPORT_SYMBOL(nfs4_set_ds_client);
/*
* Session has been established, and the client marked ready.
......@@ -1435,6 +1499,10 @@ static int nfs4_server_common_setup(struct nfs_server *server,
BUG_ON(!server->nfs_client->rpc_ops);
BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
/* data servers support only a subset of NFSv4.1 */
if (is_ds_only_client(server->nfs_client))
return -EPROTONOSUPPORT;
fattr = nfs_alloc_fattr();
if (fattr == NULL)
return -ENOMEM;
......@@ -1504,6 +1572,13 @@ static int nfs4_init_server(struct nfs_server *server,
if (error < 0)
goto error;
/*
* Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
* authentication.
*/
if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX)
server->caps |= NFS_CAP_UIDGID_NOMAP;
if (data->rsize)
server->rsize = nfs_block_size(data->rsize, NULL);
if (data->wsize)
......@@ -1921,3 +1996,7 @@ void nfs_fs_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
module_param(nfs4_disable_idmapping, bool, 0644);
MODULE_PARM_DESC(nfs4_disable_idmapping,
"Turn off NFSv4 idmapping when using 'sec=sys'");
......@@ -45,6 +45,7 @@
#include <linux/pagemap.h>
#include <linux/kref.h>
#include <linux/slab.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
......@@ -649,8 +650,7 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
if (nfs_writeback_done(task, data) != 0)
return;
nfs_writeback_done(task, data);
}
/*
......@@ -938,6 +938,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
if (retval)
goto out;
task_io_account_read(count);
retval = nfs_direct_read(iocb, iov, nr_segs, pos);
if (retval > 0)
iocb->ki_pos = pos + retval;
......@@ -999,6 +1001,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
if (retval)
goto out;
task_io_account_write(count);
retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
if (retval > 0)
......
......@@ -387,10 +387,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
file->f_path.dentry->d_name.name,
mapping->host->i_ino, len, (long long) pos);
pnfs_update_layout(mapping->host,
nfs_file_open_context(file),
IOMODE_RW);
start:
/*
* Prevent starvation issues if someone is doing a consistency
......
......@@ -33,16 +33,41 @@
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <linux/types.h>
#include <linux/string.h>
#include <linux/kernel.h>
static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
{
unsigned long val;
char buf[16];
if (memchr(name, '@', namelen) != NULL || namelen >= sizeof(buf))
return 0;
memcpy(buf, name, namelen);
buf[namelen] = '\0';
if (strict_strtoul(buf, 0, &val) != 0)
return 0;
*res = val;
return 1;
}
static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
{
return snprintf(buf, buflen, "%u", id);
}
#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/sunrpc/sched.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs_sb.h>
#include <linux/nfs_idmap.h>
#include <linux/keyctl.h>
#include <linux/key-type.h>
#include <linux/rcupdate.h>
#include <linux/kernel.h>
#include <linux/err.h>
#include <keys/user-type.h>
......@@ -219,23 +244,39 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen,
return ret;
}
int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
{
if (nfs_map_string_to_numeric(name, namelen, uid))
return 0;
return nfs_idmap_lookup_id(name, namelen, "uid", uid);
}
int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid)
int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
{
if (nfs_map_string_to_numeric(name, namelen, gid))
return 0;
return nfs_idmap_lookup_id(name, namelen, "gid", gid);
}
int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
{
return nfs_idmap_lookup_name(uid, "user", buf, buflen);
int ret = -EINVAL;
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
if (ret < 0)
ret = nfs_map_numeric_to_string(uid, buf, buflen);
return ret;
}
int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen)
int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
{
return nfs_idmap_lookup_name(gid, "group", buf, buflen);
int ret = -EINVAL;
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
if (ret < 0)
ret = nfs_map_numeric_to_string(gid, buf, buflen);
return ret;
}
#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
......@@ -243,7 +284,6 @@ int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t bu
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/in.h>
......@@ -695,31 +735,45 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
return hash;
}
int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
{
struct idmap *idmap = clp->cl_idmap;
struct idmap *idmap = server->nfs_client->cl_idmap;
if (nfs_map_string_to_numeric(name, namelen, uid))
return 0;
return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
}
int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)
int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
{
struct idmap *idmap = clp->cl_idmap;
struct idmap *idmap = server->nfs_client->cl_idmap;
if (nfs_map_string_to_numeric(name, namelen, uid))
return 0;
return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
}
int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
{
struct idmap *idmap = clp->cl_idmap;
struct idmap *idmap = server->nfs_client->cl_idmap;
int ret = -EINVAL;
return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
if (ret < 0)
ret = nfs_map_numeric_to_string(uid, buf, buflen);
return ret;
}
int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen)
int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
{
struct idmap *idmap = clp->cl_idmap;
struct idmap *idmap = server->nfs_client->cl_idmap;
int ret = -EINVAL;
return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
if (ret < 0)
ret = nfs_map_numeric_to_string(uid, buf, buflen);
return ret;
}
#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
......@@ -148,6 +148,9 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
struct nfs_fattr *);
extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
extern int nfs4_check_client_ready(struct nfs_client *clp);
extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
const struct sockaddr *ds_addr,
int ds_addrlen, int ds_proto);
#ifdef CONFIG_PROC_FS
extern int __init nfs_fs_proc_init(void);
extern void nfs_fs_proc_exit(void);
......@@ -213,8 +216,14 @@ extern const u32 nfs41_maxwrite_overhead;
extern struct rpc_procinfo nfs4_procedures[];
#endif
extern int nfs4_init_ds_session(struct nfs_client *clp);
/* proc.c */
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
extern int nfs_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const char *ip_addr, rpc_authflavor_t authflavour,
int noresvport);
/* dir.c */
extern int nfs_access_cache_shrinker(struct shrinker *shrink,
......@@ -262,9 +271,15 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
#endif
/* read.c */
extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
/* write.c */
extern int nfs_initiate_write(struct nfs_write_data *data,
struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops,
int how);
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
#ifdef CONFIG_MIGRATION
extern int nfs_migrate_page(struct address_space *,
......@@ -274,6 +289,13 @@ extern int nfs_migrate_page(struct address_space *,
#endif
/* nfs4proc.c */
extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
extern int nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const char *ip_addr,
rpc_authflavor_t authflavour,
int noresvport);
extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
extern int _nfs4_call_sync(struct nfs_server *server,
struct rpc_message *msg,
struct nfs4_sequence_args *args,
......
......@@ -885,4 +885,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.lock = nfs3_proc_lock,
.clear_acl_cache = nfs3_forget_cached_acls,
.close_context = nfs_close_context,
.init_client = nfs_init_client,
};
......@@ -252,6 +252,9 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
extern int nfs4_setup_sequence(const struct nfs_server *server,
struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
int cache_reply, struct rpc_task *task);
extern int nfs41_setup_sequence(struct nfs4_session *session,
struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
int cache_reply, struct rpc_task *task);
extern void nfs4_destroy_session(struct nfs4_session *session);
extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
extern int nfs4_proc_create_session(struct nfs_client *);
......@@ -259,6 +262,19 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *);
extern int nfs4_init_session(struct nfs_server *server);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
static inline bool
is_ds_only_client(struct nfs_client *clp)
{
return (clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) ==
EXCHGID4_FLAG_USE_PNFS_DS;
}
static inline bool
is_ds_client(struct nfs_client *clp)
{
return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS;
}
#else /* CONFIG_NFS_v4_1 */
static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
{
......@@ -276,6 +292,18 @@ static inline int nfs4_init_session(struct nfs_server *server)
{
return 0;
}
static inline bool
is_ds_only_client(struct nfs_client *clp)
{
return false;
}
static inline bool
is_ds_client(struct nfs_client *clp)
{
return false;
}
#endif /* CONFIG_NFS_V4_1 */
extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
......
......@@ -40,32 +40,309 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
MODULE_DESCRIPTION("The NFSv4 file layout driver");
static int
filelayout_set_layoutdriver(struct nfs_server *nfss)
#define FILELAYOUT_POLL_RETRY_MAX (15*HZ)
static loff_t
filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg,
loff_t offset)
{
int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client,
nfs4_fl_free_deviceid_callback);
if (status) {
printk(KERN_WARNING "%s: deviceid cache could not be "
"initialized\n", __func__);
return status;
u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count;
u64 tmp;
offset -= flseg->pattern_offset;
tmp = offset;
do_div(tmp, stripe_width);
return tmp * flseg->stripe_unit + do_div(offset, flseg->stripe_unit);
}
/* This function is used by the layout driver to calculate the
* offset of the file on the dserver based on whether the
* layout type is STRIPE_DENSE or STRIPE_SPARSE
*/
static loff_t
filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
{
struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
switch (flseg->stripe_type) {
case STRIPE_SPARSE:
return offset;
case STRIPE_DENSE:
return filelayout_get_dense_offset(flseg, offset);
}
dprintk("%s: deviceid cache has been initialized successfully\n",
__func__);
BUG();
}
/* For data server errors we don't recover from */
static void
filelayout_set_lo_fail(struct pnfs_layout_segment *lseg)
{
if (lseg->pls_range.iomode == IOMODE_RW) {
dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
} else {
dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
}
}
static int filelayout_async_handle_error(struct rpc_task *task,
struct nfs4_state *state,
struct nfs_client *clp,
int *reset)
{
if (task->tk_status >= 0)
return 0;
*reset = 0;
switch (task->tk_status) {
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_DEADSESSION:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_SEQ_FALSE_RETRY:
case -NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session. Exchangeid "
"flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags);
nfs4_schedule_session_recovery(clp->cl_session);
break;
case -NFS4ERR_DELAY:
case -NFS4ERR_GRACE:
case -EKEYEXPIRED:
rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
break;
default:
dprintk("%s DS error. Retry through MDS %d\n", __func__,
task->tk_status);
*reset = 1;
break;
}
task->tk_status = 0;
return -EAGAIN;
}
/* NFS_PROTO call done callback routines */
static int filelayout_read_done_cb(struct rpc_task *task,
struct nfs_read_data *data)
{
struct nfs_client *clp = data->ds_clp;
int reset = 0;
dprintk("%s DS read\n", __func__);
if (filelayout_async_handle_error(task, data->args.context->state,
data->ds_clp, &reset) == -EAGAIN) {
dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
__func__, data->ds_clp, data->ds_clp->cl_session);
if (reset) {
filelayout_set_lo_fail(data->lseg);
nfs4_reset_read(task, data);
clp = NFS_SERVER(data->inode)->nfs_client;
}
nfs_restart_rpc(task, clp);
return -EAGAIN;
}
return 0;
}
/* Clear out the layout by destroying its device list */
static int
filelayout_clear_layoutdriver(struct nfs_server *nfss)
/*
* Call ops for the async read/write cases
* In the case of dense layouts, the offset needs to be reset to its
* original value.
*/
static void filelayout_read_prepare(struct rpc_task *task, void *data)
{
dprintk("--> %s\n", __func__);
struct nfs_read_data *rdata = (struct nfs_read_data *)data;
rdata->read_done_cb = filelayout_read_done_cb;
if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
&rdata->args.seq_args, &rdata->res.seq_res,
0, task))
return;
rpc_call_start(task);
}
static void filelayout_read_call_done(struct rpc_task *task, void *data)
{
struct nfs_read_data *rdata = (struct nfs_read_data *)data;
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
/* Note this may cause RPC to be resent */
rdata->mds_ops->rpc_call_done(task, data);
}
static void filelayout_read_release(void *data)
{
struct nfs_read_data *rdata = (struct nfs_read_data *)data;
rdata->mds_ops->rpc_release(data);
}
static int filelayout_write_done_cb(struct rpc_task *task,
struct nfs_write_data *data)
{
int reset = 0;
if (filelayout_async_handle_error(task, data->args.context->state,
data->ds_clp, &reset) == -EAGAIN) {
struct nfs_client *clp;
dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
__func__, data->ds_clp, data->ds_clp->cl_session);
if (reset) {
filelayout_set_lo_fail(data->lseg);
nfs4_reset_write(task, data);
clp = NFS_SERVER(data->inode)->nfs_client;
} else
clp = data->ds_clp;
nfs_restart_rpc(task, clp);
return -EAGAIN;
}
if (nfss->nfs_client->cl_devid_cache)
pnfs_put_deviceid_cache(nfss->nfs_client);
return 0;
}
static void filelayout_write_prepare(struct rpc_task *task, void *data)
{
struct nfs_write_data *wdata = (struct nfs_write_data *)data;
if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
&wdata->args.seq_args, &wdata->res.seq_res,
0, task))
return;
rpc_call_start(task);
}
static void filelayout_write_call_done(struct rpc_task *task, void *data)
{
struct nfs_write_data *wdata = (struct nfs_write_data *)data;
/* Note this may cause RPC to be resent */
wdata->mds_ops->rpc_call_done(task, data);
}
static void filelayout_write_release(void *data)
{
struct nfs_write_data *wdata = (struct nfs_write_data *)data;
wdata->mds_ops->rpc_release(data);
}
struct rpc_call_ops filelayout_read_call_ops = {
.rpc_call_prepare = filelayout_read_prepare,
.rpc_call_done = filelayout_read_call_done,
.rpc_release = filelayout_read_release,
};
struct rpc_call_ops filelayout_write_call_ops = {
.rpc_call_prepare = filelayout_write_prepare,
.rpc_call_done = filelayout_write_call_done,
.rpc_release = filelayout_write_release,
};
static enum pnfs_try_status
filelayout_read_pagelist(struct nfs_read_data *data)
{
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
loff_t offset = data->args.offset;
u32 j, idx;
struct nfs_fh *fh;
int status;
dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
__func__, data->inode->i_ino,
data->args.pgbase, (size_t)data->args.count, offset);
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
ds = nfs4_fl_prepare_ds(lseg, idx);
if (!ds) {
/* Either layout fh index faulty, or ds connect failed */
set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
dprintk("%s USE DS:ip %x %hu\n", __func__,
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
/* No multipath support. Use first DS */
data->ds_clp = ds->ds_clp;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
data->args.fh = fh;
data->args.offset = filelayout_get_dserver_offset(lseg, offset);
data->mds_offset = offset;
/* Perform an asynchronous read to ds */
status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
&filelayout_read_call_ops);
BUG_ON(status != 0);
return PNFS_ATTEMPTED;
}
/* Perform async writes. */
static enum pnfs_try_status
filelayout_write_pagelist(struct nfs_write_data *data, int sync)
{
struct pnfs_layout_segment *lseg = data->lseg;
struct nfs4_pnfs_ds *ds;
loff_t offset = data->args.offset;
u32 j, idx;
struct nfs_fh *fh;
int status;
/* Retrieve the correct rpc_client for the byte range */
j = nfs4_fl_calc_j_index(lseg, offset);
idx = nfs4_fl_calc_ds_index(lseg, j);
ds = nfs4_fl_prepare_ds(lseg, idx);
if (!ds) {
printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
return PNFS_NOT_ATTEMPTED;
}
dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
data->inode->i_ino, sync, (size_t) data->args.count, offset,
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
/* We can't handle commit to ds yet */
if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
data->args.stable = NFS_FILE_SYNC;
data->write_done_cb = filelayout_write_done_cb;
data->ds_clp = ds->ds_clp;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
data->args.fh = fh;
/*
* Get the file offset on the dserver. Set the write offset to
* this offset and save the original offset.
*/
data->args.offset = filelayout_get_dserver_offset(lseg, offset);
data->mds_offset = offset;
/* Perform an asynchronous write */
status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
&filelayout_write_call_ops, sync);
BUG_ON(status != 0);
return PNFS_ATTEMPTED;
}
/*
* filelayout_check_layout()
*
......@@ -92,14 +369,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out;
}
if (fl->stripe_unit % PAGE_SIZE) {
dprintk("%s Stripe unit (%u) not page aligned\n",
if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) {
dprintk("%s Invalid stripe unit (%u)\n",
__func__, fl->stripe_unit);
goto out;
}
/* find and reference the deviceid */
dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id);
dsaddr = nfs4_fl_find_get_deviceid(id);
if (dsaddr == NULL) {
dsaddr = get_device_info(lo->plh_inode, id);
if (dsaddr == NULL)
......@@ -134,7 +411,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
dprintk("--> %s returns %d\n", __func__, status);
return status;
out_put:
pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid);
nfs4_fl_put_deviceid(dsaddr);
goto out;
}
......@@ -243,23 +520,47 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
static void
filelayout_free_lseg(struct pnfs_layout_segment *lseg)
{
struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode);
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
dprintk("--> %s\n", __func__);
pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache,
&fl->dsaddr->deviceid);
nfs4_fl_put_deviceid(fl->dsaddr);
_filelayout_free_lseg(fl);
}
/*
* filelayout_pg_test(). Called by nfs_can_coalesce_requests()
*
* return 1 : coalesce page
* return 0 : don't coalesce page
*/
int
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
u64 p_stripe, r_stripe;
u32 stripe_unit;
if (!pgio->pg_lseg)
return 1;
p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
do_div(p_stripe, stripe_unit);
do_div(r_stripe, stripe_unit);
return (p_stripe == r_stripe);
}
static struct pnfs_layoutdriver_type filelayout_type = {
.id = LAYOUT_NFSV4_1_FILES,
.name = "LAYOUT_NFSV4_1_FILES",
.owner = THIS_MODULE,
.set_layoutdriver = filelayout_set_layoutdriver,
.clear_layoutdriver = filelayout_clear_layoutdriver,
.alloc_lseg = filelayout_alloc_lseg,
.free_lseg = filelayout_free_lseg,
.id = LAYOUT_NFSV4_1_FILES,
.name = "LAYOUT_NFSV4_1_FILES",
.owner = THIS_MODULE,
.alloc_lseg = filelayout_alloc_lseg,
.free_lseg = filelayout_free_lseg,
.pg_test = filelayout_pg_test,
.read_pagelist = filelayout_read_pagelist,
.write_pagelist = filelayout_write_pagelist,
};
static int __init nfs4filelayout_init(void)
......
......@@ -55,8 +55,14 @@ struct nfs4_pnfs_ds {
atomic_t ds_count;
};
/* nfs4_file_layout_dsaddr flags */
#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
struct nfs4_file_layout_dsaddr {
struct pnfs_deviceid_node deviceid;
struct hlist_node node;
struct nfs4_deviceid deviceid;
atomic_t ref;
unsigned long flags;
u32 stripe_count;
u8 *stripe_indices;
u32 ds_num;
......@@ -83,11 +89,18 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
generic_hdr);
}
extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *);
extern struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
extern void print_ds(struct nfs4_pnfs_ds *ds);
extern void print_deviceid(struct nfs4_deviceid *dev_id);
u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
u32 ds_idx);
extern struct nfs4_file_layout_dsaddr *
nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id);
nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
struct nfs4_file_layout_dsaddr *
get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
......
......@@ -36,6 +36,30 @@
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
/*
* Device ID RCU cache. A device ID is unique per client ID and layout type.
*/
#define NFS4_FL_DEVICE_ID_HASH_BITS 5
#define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS)
#define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1)
static inline u32
nfs4_fl_deviceid_hash(struct nfs4_deviceid *id)
{
unsigned char *cptr = (unsigned char *)id->data;
unsigned int nbytes = NFS4_DEVICEID4_SIZE;
u32 x = 0;
while (nbytes--) {
x *= 37;
x += *cptr++;
}
return x & NFS4_FL_DEVICE_ID_HASH_MASK;
}
static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE];
static DEFINE_SPINLOCK(filelayout_deviceid_lock);
/*
* Data server cache
*
......@@ -104,6 +128,67 @@ _data_server_lookup_locked(u32 ip_addr, u32 port)
return NULL;
}
/*
* Create an rpc connection to the nfs4_pnfs_ds data server
* Currently only support IPv4
*/
static int
nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
{
struct nfs_client *clp;
struct sockaddr_in sin;
int status = 0;
dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = ds->ds_ip_addr;
sin.sin_port = ds->ds_port;
clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
sizeof(sin), IPPROTO_TCP);
if (IS_ERR(clp)) {
status = PTR_ERR(clp);
goto out;
}
if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
if (!is_ds_client(clp)) {
status = -ENODEV;
goto out_put;
}
ds->ds_clp = clp;
dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
goto out;
}
/*
* Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
* be equal to the MDS lease. Renewal is scheduled in create_session.
*/
spin_lock(&mds_srv->nfs_client->cl_lock);
clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
spin_unlock(&mds_srv->nfs_client->cl_lock);
clp->cl_last_renewal = jiffies;
/* New nfs_client */
status = nfs4_init_ds_session(clp);
if (status)
goto out_put;
ds->ds_clp = clp;
dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
ntohs(ds->ds_port));
out:
return status;
out_put:
nfs_put_client(clp);
goto out;
}
static void
destroy_ds(struct nfs4_pnfs_ds *ds)
{
......@@ -122,7 +207,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
struct nfs4_pnfs_ds *ds;
int i;
print_deviceid(&dsaddr->deviceid.de_id);
print_deviceid(&dsaddr->deviceid);
for (i = 0; i < dsaddr->ds_num; i++) {
ds = dsaddr->ds_list[i];
......@@ -139,15 +224,6 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
kfree(dsaddr);
}
void
nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
{
struct nfs4_file_layout_dsaddr *dsaddr =
container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
nfs4_fl_free_deviceid(dsaddr);
}
static struct nfs4_pnfs_ds *
nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
{
......@@ -300,7 +376,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
dsaddr->stripe_count = cnt;
dsaddr->ds_num = num;
memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
/* Go back an read stripe indices */
p = indicesp;
......@@ -350,28 +426,37 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
}
/*
* Decode the opaque device specified in 'dev'
* and add it to the list of available devices.
* If the deviceid is already cached, nfs4_add_deviceid will return
* a pointer to the cached struct and throw away the new.
* Decode the opaque device specified in 'dev' and add it to the cache of
* available devices.
*/
static struct nfs4_file_layout_dsaddr*
static struct nfs4_file_layout_dsaddr *
decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
{
struct nfs4_file_layout_dsaddr *dsaddr;
struct pnfs_deviceid_node *d;
struct nfs4_file_layout_dsaddr *d, *new;
long hash;
dsaddr = decode_device(inode, dev);
if (!dsaddr) {
new = decode_device(inode, dev);
if (!new) {
printk(KERN_WARNING "%s: Could not decode or add device\n",
__func__);
return NULL;
}
d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
&dsaddr->deviceid);
spin_lock(&filelayout_deviceid_lock);
d = nfs4_fl_find_get_deviceid(&new->deviceid);
if (d) {
spin_unlock(&filelayout_deviceid_lock);
nfs4_fl_free_deviceid(new);
return d;
}
INIT_HLIST_NODE(&new->node);
atomic_set(&new->ref, 1);
hash = nfs4_fl_deviceid_hash(&new->deviceid);
hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]);
spin_unlock(&filelayout_deviceid_lock);
return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
return new;
}
/*
......@@ -446,12 +531,123 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
return dsaddr;
}
void
nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
{
if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) {
hlist_del_rcu(&dsaddr->node);
spin_unlock(&filelayout_deviceid_lock);
synchronize_rcu();
nfs4_fl_free_deviceid(dsaddr);
}
}
struct nfs4_file_layout_dsaddr *
nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id)
{
struct nfs4_file_layout_dsaddr *d;
struct hlist_node *n;
long hash = nfs4_fl_deviceid_hash(id);
rcu_read_lock();
hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) {
if (!memcmp(&d->deviceid, id, sizeof(*id))) {
if (!atomic_inc_not_zero(&d->ref))
goto fail;
rcu_read_unlock();
return d;
}
}
fail:
rcu_read_unlock();
return NULL;
}
/*
* Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
* Then: ((res + fsi) % dsaddr->stripe_count)
*/
u32
nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
{
struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
u64 tmp;
tmp = offset - flseg->pattern_offset;
do_div(tmp, flseg->stripe_unit);
tmp += flseg->first_stripe_index;
return do_div(tmp, flseg->dsaddr->stripe_count);
}
u32
nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
{
return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
}
struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
{
struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
u32 i;
if (flseg->stripe_type == STRIPE_SPARSE) {
if (flseg->num_fh == 1)
i = 0;
else if (flseg->num_fh == 0)
/* Use the MDS OPEN fh set in nfs_read_rpcsetup */
return NULL;
else
i = nfs4_fl_calc_ds_index(lseg, j);
} else
i = j;
return flseg->fh_array[i];
}
static void
filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
int err, u32 ds_addr)
{
u32 *p = (u32 *)&dsaddr->deviceid;
printk(KERN_ERR "NFS: data server %x connection error %d."
" Deviceid [%x%x%x%x] marked out of use.\n",
ds_addr, err, p[0], p[1], p[2], p[3]);
spin_lock(&filelayout_deviceid_lock);
dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
spin_unlock(&filelayout_deviceid_lock);
}
struct nfs4_pnfs_ds *
nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
{
struct pnfs_deviceid_node *d;
struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
return (d == NULL) ? NULL :
container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
if (ds == NULL) {
printk(KERN_ERR "%s: No data server for offset index %d\n",
__func__, ds_idx);
return NULL;
}
if (!ds->ds_clp) {
struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
int err;
if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
/* Already tried to connect, don't try again */
dprintk("%s Deviceid marked out of use\n", __func__);
return NULL;
}
err = nfs4_ds_connect(s, ds);
if (err) {
filelayout_mark_devid_negative(dsaddr, err,
ntohl(ds->ds_ip_addr));
return NULL;
}
}
return ds;
}
......@@ -85,6 +85,9 @@ static int nfs4_map_errors(int err)
switch (err) {
case -NFS4ERR_RESOURCE:
return -EREMOTEIO;
case -NFS4ERR_BADOWNER:
case -NFS4ERR_BADNAME:
return -EINVAL;
default:
dprintk("%s could not handle NFSv4 error %d\n",
__func__, -err);
......@@ -241,7 +244,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
/* This is the error handling routine for processes that are allowed
* to sleep.
*/
static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_state *state = exception->state;
......@@ -293,6 +296,19 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
break;
case -NFS4ERR_OLD_STATEID:
exception->retry = 1;
break;
case -NFS4ERR_BADOWNER:
/* The following works around a Linux server bug! */
case -NFS4ERR_BADNAME:
if (server->caps & NFS_CAP_UIDGID_NOMAP) {
server->caps &= ~NFS_CAP_UIDGID_NOMAP;
exception->retry = 1;
printk(KERN_WARNING "NFS: v4 server %s "
"does not accept raw "
"uid/gids. "
"Reenabling the idmapper.\n",
server->nfs_client->cl_hostname);
}
}
/* We failed to handle the error */
return nfs4_map_errors(ret);
......@@ -505,7 +521,7 @@ nfs4_find_slot(struct nfs4_slot_table *tbl)
return ret_id;
}
static int nfs41_setup_sequence(struct nfs4_session *session,
int nfs41_setup_sequence(struct nfs4_session *session,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
int cache_reply,
......@@ -571,6 +587,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
res->sr_status = 1;
return 0;
}
EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
int nfs4_setup_sequence(const struct nfs_server *server,
struct nfs4_sequence_args *args,
......@@ -1573,9 +1590,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
return 0;
}
static int nfs4_recover_expired_lease(struct nfs_server *server)
static int nfs4_client_recover_expired_lease(struct nfs_client *clp)
{
struct nfs_client *clp = server->nfs_client;
unsigned int loop;
int ret;
......@@ -1592,6 +1608,11 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
return ret;
}
static int nfs4_recover_expired_lease(struct nfs_server *server)
{
return nfs4_client_recover_expired_lease(server->nfs_client);
}
/*
* OPEN_EXPIRED:
* reclaim state on the server after a network partition.
......@@ -3069,15 +3090,10 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return err;
}
static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
{
struct nfs_server *server = NFS_SERVER(data->inode);
dprintk("--> %s\n", __func__);
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
nfs_restart_rpc(task, server->nfs_client);
return -EAGAIN;
......@@ -3089,19 +3105,44 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
return 0;
}
static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
{
dprintk("--> %s\n", __func__);
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
return data->read_done_cb(task, data);
}
static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
{
data->timestamp = jiffies;
data->read_done_cb = nfs4_read_done_cb;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
}
static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
/* Reset the the nfs_read_data to send the read to the MDS. */
void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
{
dprintk("%s Reset task for i/o through\n", __func__);
put_lseg(data->lseg);
data->lseg = NULL;
/* offsets will differ in the dense stripe case */
data->args.offset = data->mds_offset;
data->ds_clp = NULL;
data->args.fh = NFS_FH(data->inode);
data->read_done_cb = nfs4_read_done_cb;
task->tk_ops = data->mds_ops;
rpc_task_reset_client(task, NFS_CLIENT(data->inode));
}
EXPORT_SYMBOL_GPL(nfs4_reset_read);
static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
{
struct inode *inode = data->inode;
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
return -EAGAIN;
......@@ -3113,11 +3154,41 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
{
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
return data->write_done_cb(task, data);
}
/* Reset the the nfs_write_data to send the write to the MDS. */
void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
{
dprintk("%s Reset task for i/o through\n", __func__);
put_lseg(data->lseg);
data->lseg = NULL;
data->ds_clp = NULL;
data->write_done_cb = nfs4_write_done_cb;
data->args.fh = NFS_FH(data->inode);
data->args.bitmask = data->res.server->cache_consistency_bitmask;
data->args.offset = data->mds_offset;
data->res.fattr = &data->fattr;
task->tk_ops = data->mds_ops;
rpc_task_reset_client(task, NFS_CLIENT(data->inode));
}
EXPORT_SYMBOL_GPL(nfs4_reset_write);
static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
{
struct nfs_server *server = NFS_SERVER(data->inode);
data->args.bitmask = server->cache_consistency_bitmask;
if (data->lseg) {
data->args.bitmask = NULL;
data->res.fattr = NULL;
} else
data->args.bitmask = server->cache_consistency_bitmask;
if (!data->write_done_cb)
data->write_done_cb = nfs4_write_done_cb;
data->res.server = server;
data->timestamp = jiffies;
......@@ -5118,6 +5189,27 @@ int nfs4_init_session(struct nfs_server *server)
return ret;
}
int nfs4_init_ds_session(struct nfs_client *clp)
{
struct nfs4_session *session = clp->cl_session;
int ret;
if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
return 0;
ret = nfs4_client_recover_expired_lease(clp);
if (!ret)
/* Test for the DS role */
if (!is_ds_client(clp))
ret = -ENODEV;
if (!ret)
ret = nfs4_check_client_ready(clp);
return ret;
}
EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
/*
* Renew the cl_session lease.
*/
......@@ -5648,6 +5740,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.clear_acl_cache = nfs4_zap_acl_attr,
.close_context = nfs4_close_context,
.open_context = nfs4_atomic_open,
.init_client = nfs4_init_client,
};
static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
......
......@@ -64,12 +64,8 @@ nfs4_renew_state(struct work_struct *work)
ops = clp->cl_mvops->state_renewal_ops;
dprintk("%s: start\n", __func__);
rcu_read_lock();
if (list_empty(&clp->cl_superblocks)) {
rcu_read_unlock();
if (test_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state))
goto out;
}
rcu_read_unlock();
spin_lock(&clp->cl_lock);
lease = clp->cl_lease_time;
......
......@@ -153,6 +153,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
int status;
struct nfs_fsinfo fsinfo;
if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
nfs4_schedule_state_renewal(clp);
return 0;
}
status = nfs4_proc_get_lease_time(clp, &fsinfo);
if (status == 0) {
/* Update lease time and schedule renewal */
......@@ -1448,6 +1453,7 @@ void nfs4_schedule_session_recovery(struct nfs4_session *session)
{
nfs4_schedule_lease_recovery(session->clp);
}
EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
void nfs41_handle_recall_slot(struct nfs_client *clp)
{
......
......@@ -844,7 +844,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
if (iap->ia_valid & ATTR_MODE)
len += 4;
if (iap->ia_valid & ATTR_UID) {
owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ);
owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
if (owner_namelen < 0) {
dprintk("nfs: couldn't resolve uid %d to string\n",
iap->ia_uid);
......@@ -856,7 +856,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
}
if (iap->ia_valid & ATTR_GID) {
owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ);
owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
if (owner_grouplen < 0) {
dprintk("nfs: couldn't resolve gid %d to string\n",
iap->ia_gid);
......@@ -1384,7 +1384,7 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
hdr->replen += decode_putrootfh_maxsz;
}
static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx)
static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid)
{
nfs4_stateid stateid;
__be32 *p;
......@@ -1392,6 +1392,8 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
p = reserve_space(xdr, NFS4_STATEID_SIZE);
if (ctx->state != NULL) {
nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
if (zero_seqid)
stateid.stateid.seqid = 0;
xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
} else
xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
......@@ -1404,7 +1406,8 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
p = reserve_space(xdr, 4);
*p = cpu_to_be32(OP_READ);
encode_stateid(xdr, args->context, args->lock_context);
encode_stateid(xdr, args->context, args->lock_context,
hdr->minorversion);
p = reserve_space(xdr, 12);
p = xdr_encode_hyper(p, args->offset);
......@@ -1592,7 +1595,8 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
p = reserve_space(xdr, 4);
*p = cpu_to_be32(OP_WRITE);
encode_stateid(xdr, args->context, args->lock_context);
encode_stateid(xdr, args->context, args->lock_context,
hdr->minorversion);
p = reserve_space(xdr, 16);
p = xdr_encode_hyper(p, args->offset);
......@@ -2271,7 +2275,8 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_putfh(xdr, args->fh, &hdr);
encode_write(xdr, args, &hdr);
req->rq_snd_buf.flags |= XDRBUF_WRITE;
encode_getfattr(xdr, args->bitmask, &hdr);
if (args->bitmask)
encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
......@@ -3382,7 +3387,7 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
}
static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
struct nfs_client *clp, uint32_t *uid, int may_sleep)
const struct nfs_server *server, uint32_t *uid, int may_sleep)
{
uint32_t len;
__be32 *p;
......@@ -3402,7 +3407,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
if (!may_sleep) {
/* do nothing */
} else if (len < XDR_MAX_NETOBJ) {
if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0)
if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0)
ret = NFS_ATTR_FATTR_OWNER;
else
dprintk("%s: nfs_map_name_to_uid failed!\n",
......@@ -3420,7 +3425,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
}
static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
struct nfs_client *clp, uint32_t *gid, int may_sleep)
const struct nfs_server *server, uint32_t *gid, int may_sleep)
{
uint32_t len;
__be32 *p;
......@@ -3440,7 +3445,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
if (!may_sleep) {
/* do nothing */
} else if (len < XDR_MAX_NETOBJ) {
if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0)
if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0)
ret = NFS_ATTR_FATTR_GROUP;
else
dprintk("%s: nfs_map_group_to_gid failed!\n",
......@@ -3939,14 +3944,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
goto xdr_error;
fattr->valid |= status;
status = decode_attr_owner(xdr, bitmap, server->nfs_client,
&fattr->uid, may_sleep);
status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep);
if (status < 0)
goto xdr_error;
fattr->valid |= status;
status = decode_attr_group(xdr, bitmap, server->nfs_client,
&fattr->gid, may_sleep);
status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep);
if (status < 0)
goto xdr_error;
fattr->valid |= status;
......@@ -5690,8 +5693,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
status = decode_write(xdr, res);
if (status)
goto out;
decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
if (res->fattr)
decode_getfattr(xdr, res->fattr, res->server,
!RPC_IS_ASYNC(rqstp->rq_task));
if (!status)
status = res->count;
out:
......@@ -6167,8 +6171,6 @@ static struct {
{ NFS4ERR_DQUOT, -EDQUOT },
{ NFS4ERR_STALE, -ESTALE },
{ NFS4ERR_BADHANDLE, -EBADHANDLE },
{ NFS4ERR_BADOWNER, -EINVAL },
{ NFS4ERR_BADNAME, -EINVAL },
{ NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
{ NFS4ERR_NOTSUPP, -ENOTSUPP },
{ NFS4ERR_TOOSMALL, -ETOOSMALL },
......
......@@ -20,6 +20,7 @@
#include <linux/nfs_mount.h>
#include "internal.h"
#include "pnfs.h"
static struct kmem_cache *nfs_page_cachep;
......@@ -213,7 +214,7 @@ nfs_wait_on_request(struct nfs_page *req)
*/
void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
int (*doio)(struct nfs_pageio_descriptor *),
size_t bsize,
int io_flags)
{
......@@ -226,6 +227,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_doio = doio;
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
}
/**
......@@ -240,7 +242,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
* Return 'true' if this is the case, else return 'false'.
*/
static int nfs_can_coalesce_requests(struct nfs_page *prev,
struct nfs_page *req)
struct nfs_page *req,
struct nfs_pageio_descriptor *pgio)
{
if (req->wb_context->cred != prev->wb_context->cred)
return 0;
......@@ -254,6 +257,12 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev,
return 0;
if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
return 0;
/*
* Non-whole file layouts need to check that req is inside of
* pgio->pg_lseg.
*/
if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
return 0;
return 1;
}
......@@ -286,7 +295,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
if (newlen > desc->pg_bsize)
return 0;
prev = nfs_list_entry(desc->pg_list.prev);
if (!nfs_can_coalesce_requests(prev, req))
if (!nfs_can_coalesce_requests(prev, req, desc))
return 0;
} else
desc->pg_base = req->wb_pgbase;
......@@ -302,12 +311,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
{
if (!list_empty(&desc->pg_list)) {
int error = desc->pg_doio(desc->pg_inode,
&desc->pg_list,
nfs_page_array_len(desc->pg_base,
desc->pg_count),
desc->pg_count,
desc->pg_ioflags);
int error = desc->pg_doio(desc);
if (error < 0)
desc->pg_error = error;
else
......
......@@ -30,6 +30,7 @@
#include <linux/nfs_fs.h>
#include "internal.h"
#include "pnfs.h"
#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
......@@ -74,10 +75,8 @@ find_pnfs_driver(u32 id)
void
unset_pnfs_layoutdriver(struct nfs_server *nfss)
{
if (nfss->pnfs_curr_ld) {
nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
if (nfss->pnfs_curr_ld)
module_put(nfss->pnfs_curr_ld->owner);
}
nfss->pnfs_curr_ld = NULL;
}
......@@ -115,13 +114,7 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
goto out_no_driver;
}
server->pnfs_curr_ld = ld_type;
if (ld_type->set_layoutdriver(server)) {
printk(KERN_ERR
"%s: Error initializing mount point for layout driver %u.\n",
__func__, id);
module_put(ld_type->owner);
goto out_no_driver;
}
dprintk("%s: pNFS module for %u set\n", __func__, id);
return;
......@@ -230,37 +223,41 @@ static void free_lseg(struct pnfs_layout_segment *lseg)
put_layout_hdr(NFS_I(ino)->layout);
}
/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
* could sleep, so must be called outside of the lock.
* Returns 1 if object was removed, otherwise return 0.
*/
static int
put_lseg_locked(struct pnfs_layout_segment *lseg,
struct list_head *tmp_list)
static void
put_lseg_common(struct pnfs_layout_segment *lseg)
{
struct inode *inode = lseg->pls_layout->plh_inode;
BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
list_del_init(&lseg->pls_list);
if (list_empty(&lseg->pls_layout->plh_segs)) {
set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
/* Matched by initial refcount set in alloc_init_layout_hdr */
put_layout_hdr_locked(lseg->pls_layout);
}
rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
}
void
put_lseg(struct pnfs_layout_segment *lseg)
{
struct inode *inode;
if (!lseg)
return;
dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount),
test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
if (atomic_dec_and_test(&lseg->pls_refcount)) {
struct inode *ino = lseg->pls_layout->plh_inode;
inode = lseg->pls_layout->plh_inode;
if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
LIST_HEAD(free_me);
BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
list_del(&lseg->pls_list);
if (list_empty(&lseg->pls_layout->plh_segs)) {
struct nfs_client *clp;
clp = NFS_SERVER(ino)->nfs_client;
spin_lock(&clp->cl_lock);
/* List does not take a reference, so no need for put here */
list_del_init(&lseg->pls_layout->plh_layouts);
spin_unlock(&clp->cl_lock);
clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
}
rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
list_add(&lseg->pls_list, tmp_list);
return 1;
put_lseg_common(lseg);
list_add(&lseg->pls_list, &free_me);
spin_unlock(&inode->i_lock);
pnfs_free_lseg_list(&free_me);
}
return 0;
}
static bool
......@@ -281,7 +278,13 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
* list. It will now be removed when all
* outstanding io is finished.
*/
rv = put_lseg_locked(lseg, tmp_list);
dprintk("%s: lseg %p ref %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount));
if (atomic_dec_and_test(&lseg->pls_refcount)) {
put_lseg_common(lseg);
list_add(&lseg->pls_list, tmp_list);
rv = 1;
}
}
return rv;
}
......@@ -299,6 +302,11 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
dprintk("%s:Begin lo %p\n", __func__, lo);
if (list_empty(&lo->plh_segs)) {
if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
put_layout_hdr_locked(lo);
return 0;
}
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
dprintk("%s: freeing lseg %p iomode %d "
......@@ -312,11 +320,27 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
return invalid - removed;
}
/* note free_me must contain lsegs from a single layout_hdr */
void
pnfs_free_lseg_list(struct list_head *free_me)
{
struct pnfs_layout_segment *lseg, *tmp;
struct pnfs_layout_hdr *lo;
if (list_empty(free_me))
return;
lo = list_first_entry(free_me, struct pnfs_layout_segment,
pls_list)->pls_layout;
if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
struct nfs_client *clp;
clp = NFS_SERVER(lo->plh_inode)->nfs_client;
spin_lock(&clp->cl_lock);
list_del_init(&lo->plh_layouts);
spin_unlock(&clp->cl_lock);
}
list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
list_del(&lseg->pls_list);
free_lseg(lseg);
......@@ -332,10 +356,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
spin_lock(&nfsi->vfs_inode.i_lock);
lo = nfsi->layout;
if (lo) {
set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags);
lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
/* Matched by refcount set to 1 in alloc_init_layout_hdr */
put_layout_hdr_locked(lo);
}
spin_unlock(&nfsi->vfs_inode.i_lock);
pnfs_free_lseg_list(&tmp_list);
......@@ -403,6 +425,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
(int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
return true;
return lo->plh_block_lgets ||
test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
(list_empty(&lo->plh_segs) &&
(atomic_read(&lo->plh_outstanding) > lget));
......@@ -674,7 +697,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
is_matching_lseg(lseg, iomode)) {
ret = lseg;
ret = get_lseg(lseg);
break;
}
if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
......@@ -699,6 +722,7 @@ pnfs_update_layout(struct inode *ino,
struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg = NULL;
bool first = false;
if (!pnfs_enabled_sb(NFS_SERVER(ino)))
return NULL;
......@@ -715,21 +739,25 @@ pnfs_update_layout(struct inode *ino,
dprintk("%s matches recall, use MDS\n", __func__);
goto out_unlock;
}
/* Check to see if the layout for the given range already exists */
lseg = pnfs_find_lseg(lo, iomode);
if (lseg)
goto out_unlock;
/* if LAYOUTGET already failed once we don't try again */
if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
goto out_unlock;
/* Check to see if the layout for the given range already exists */
lseg = pnfs_find_lseg(lo, iomode);
if (lseg)
goto out_unlock;
if (pnfs_layoutgets_blocked(lo, NULL, 0))
goto out_unlock;
atomic_inc(&lo->plh_outstanding);
get_layout_hdr(lo);
if (list_empty(&lo->plh_segs)) {
if (list_empty(&lo->plh_segs))
first = true;
spin_unlock(&ino->i_lock);
if (first) {
/* The lo must be on the clp list if there is any
* chance of a CB_LAYOUTRECALL(FILE) coming in.
*/
......@@ -738,24 +766,18 @@ pnfs_update_layout(struct inode *ino,
list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
spin_unlock(&clp->cl_lock);
}
spin_unlock(&ino->i_lock);
lseg = send_layoutget(lo, ctx, iomode);
if (!lseg) {
spin_lock(&ino->i_lock);
if (list_empty(&lo->plh_segs)) {
spin_lock(&clp->cl_lock);
list_del_init(&lo->plh_layouts);
spin_unlock(&clp->cl_lock);
clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
}
spin_unlock(&ino->i_lock);
if (!lseg && first) {
spin_lock(&clp->cl_lock);
list_del_init(&lo->plh_layouts);
spin_unlock(&clp->cl_lock);
}
atomic_dec(&lo->plh_outstanding);
put_layout_hdr(lo);
out:
dprintk("%s end, state 0x%lx lseg %p\n", __func__,
nfsi->layout->plh_flags, lseg);
nfsi->layout ? nfsi->layout->plh_flags : -1, lseg);
return lseg;
out_unlock:
spin_unlock(&ino->i_lock);
......@@ -808,7 +830,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
}
init_lseg(lo, lseg);
lseg->pls_range = res->range;
*lgp->lsegpp = lseg;
*lgp->lsegpp = get_lseg(lseg);
pnfs_insert_layout(lo, lseg);
if (res->return_on_close) {
......@@ -829,137 +851,97 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
goto out;
}
/*
* Device ID cache. Currently supports one layout type per struct nfs_client.
* Add layout type to the lookup key to expand to support multiple types.
*/
int
pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
void (*free_callback)(struct pnfs_deviceid_node *))
static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
struct nfs_page *prev,
struct nfs_page *req)
{
struct pnfs_deviceid_cache *c;
c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL);
if (!c)
return -ENOMEM;
spin_lock(&clp->cl_lock);
if (clp->cl_devid_cache != NULL) {
atomic_inc(&clp->cl_devid_cache->dc_ref);
dprintk("%s [kref [%d]]\n", __func__,
atomic_read(&clp->cl_devid_cache->dc_ref));
kfree(c);
} else {
/* kzalloc initializes hlists */
spin_lock_init(&c->dc_lock);
atomic_set(&c->dc_ref, 1);
c->dc_free_callback = free_callback;
clp->cl_devid_cache = c;
dprintk("%s [new]\n", __func__);
if (pgio->pg_count == prev->wb_bytes) {
/* This is first coelesce call for a series of nfs_pages */
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
prev->wb_context,
IOMODE_READ);
}
spin_unlock(&clp->cl_lock);
return 0;
return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
}
EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
/*
* Called from pnfs_layoutdriver_type->free_lseg
* last layout segment reference frees deviceid
*/
void
pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
struct pnfs_deviceid_node *devid)
pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
{
struct nfs4_deviceid *id = &devid->de_id;
struct pnfs_deviceid_node *d;
struct hlist_node *n;
long h = nfs4_deviceid_hash(id);
struct pnfs_layoutdriver_type *ld;
dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref));
if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock))
return;
ld = NFS_SERVER(inode)->pnfs_curr_ld;
pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
}
hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node)
if (!memcmp(&d->de_id, id, sizeof(*id))) {
hlist_del_rcu(&d->de_node);
spin_unlock(&c->dc_lock);
synchronize_rcu();
c->dc_free_callback(devid);
return;
}
spin_unlock(&c->dc_lock);
/* Why wasn't it found in the list? */
BUG();
}
EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
/* Find and reference a deviceid */
struct pnfs_deviceid_node *
pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
{
struct pnfs_deviceid_node *d;
struct hlist_node *n;
long hash = nfs4_deviceid_hash(id);
dprintk("--> %s hash %ld\n", __func__, hash);
rcu_read_lock();
hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
if (!memcmp(&d->de_id, id, sizeof(*id))) {
if (!atomic_inc_not_zero(&d->de_ref)) {
goto fail;
} else {
rcu_read_unlock();
return d;
}
}
static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
struct nfs_page *prev,
struct nfs_page *req)
{
if (pgio->pg_count == prev->wb_bytes) {
/* This is first coelesce call for a series of nfs_pages */
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
prev->wb_context,
IOMODE_RW);
}
fail:
rcu_read_unlock();
return NULL;
return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
}
void
pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
{
struct pnfs_layoutdriver_type *ld;
ld = NFS_SERVER(inode)->pnfs_curr_ld;
pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
}
enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_write_data *wdata,
const struct rpc_call_ops *call_ops, int how)
{
struct inode *inode = wdata->inode;
enum pnfs_try_status trypnfs;
struct nfs_server *nfss = NFS_SERVER(inode);
wdata->mds_ops = call_ops;
dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
inode->i_ino, wdata->args.count, wdata->args.offset, how);
trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
if (trypnfs == PNFS_NOT_ATTEMPTED) {
put_lseg(wdata->lseg);
wdata->lseg = NULL;
} else
nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
return trypnfs;
}
EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
/*
* Add a deviceid to the cache.
* GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
* Call the appropriate parallel I/O subsystem read function.
*/
struct pnfs_deviceid_node *
pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new)
{
struct pnfs_deviceid_node *d;
long hash = nfs4_deviceid_hash(&new->de_id);
dprintk("--> %s hash %ld\n", __func__, hash);
spin_lock(&c->dc_lock);
d = pnfs_find_get_deviceid(c, &new->de_id);
if (d) {
spin_unlock(&c->dc_lock);
dprintk("%s [discard]\n", __func__);
c->dc_free_callback(new);
return d;
}
INIT_HLIST_NODE(&new->de_node);
atomic_set(&new->de_ref, 1);
hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
spin_unlock(&c->dc_lock);
dprintk("%s [new]\n", __func__);
return new;
}
EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
void
pnfs_put_deviceid_cache(struct nfs_client *clp)
enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_read_data *rdata,
const struct rpc_call_ops *call_ops)
{
struct pnfs_deviceid_cache *local = clp->cl_devid_cache;
struct inode *inode = rdata->inode;
struct nfs_server *nfss = NFS_SERVER(inode);
enum pnfs_try_status trypnfs;
dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref));
if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) {
int i;
/* Verify cache is empty */
for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++)
BUG_ON(!hlist_empty(&local->dc_deviceids[i]));
clp->cl_devid_cache = NULL;
spin_unlock(&clp->cl_lock);
kfree(local);
rdata->mds_ops = call_ops;
dprintk("%s: Reading ino:%lu %u@%llu\n",
__func__, inode->i_ino, rdata->args.count, rdata->args.offset);
trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
if (trypnfs == PNFS_NOT_ATTEMPTED) {
put_lseg(rdata->lseg);
rdata->lseg = NULL;
} else {
nfs_inc_stats(inode, NFSIOS_PNFS_READ);
}
dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
return trypnfs;
}
EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
......@@ -30,6 +30,8 @@
#ifndef FS_NFS_PNFS_H
#define FS_NFS_PNFS_H
#include <linux/nfs_page.h>
enum {
NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
NFS_LSEG_ROC, /* roc bit received from server */
......@@ -43,6 +45,11 @@ struct pnfs_layout_segment {
struct pnfs_layout_hdr *pls_layout;
};
enum pnfs_try_status {
PNFS_ATTEMPTED = 0,
PNFS_NOT_ATTEMPTED = 1,
};
#ifdef CONFIG_NFS_V4_1
#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
......@@ -61,10 +68,18 @@ struct pnfs_layoutdriver_type {
const u32 id;
const char *name;
struct module *owner;
int (*set_layoutdriver) (struct nfs_server *);
int (*clear_layoutdriver) (struct nfs_server *);
struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
void (*free_lseg) (struct pnfs_layout_segment *lseg);
/* test for nfs page cache coalescing */
int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
/*
* Return PNFS_ATTEMPTED to indicate the layout code has attempted
* I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
*/
enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
};
struct pnfs_layout_hdr {
......@@ -90,52 +105,6 @@ struct pnfs_device {
unsigned int pglen;
};
/*
* Device ID RCU cache. A device ID is unique per client ID and layout type.
*/
#define NFS4_DEVICE_ID_HASH_BITS 5
#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
static inline u32
nfs4_deviceid_hash(struct nfs4_deviceid *id)
{
unsigned char *cptr = (unsigned char *)id->data;
unsigned int nbytes = NFS4_DEVICEID4_SIZE;
u32 x = 0;
while (nbytes--) {
x *= 37;
x += *cptr++;
}
return x & NFS4_DEVICE_ID_HASH_MASK;
}
struct pnfs_deviceid_node {
struct hlist_node de_node;
struct nfs4_deviceid de_id;
atomic_t de_ref;
};
struct pnfs_deviceid_cache {
spinlock_t dc_lock;
atomic_t dc_ref;
void (*dc_free_callback)(struct pnfs_deviceid_node *);
struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
};
extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
void (*free_callback)(struct pnfs_deviceid_node *));
extern void pnfs_put_deviceid_cache(struct nfs_client *);
extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
struct pnfs_deviceid_cache *,
struct nfs4_deviceid *);
extern struct pnfs_deviceid_node *pnfs_add_deviceid(
struct pnfs_deviceid_cache *,
struct pnfs_deviceid_node *);
extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
struct pnfs_deviceid_node *devid);
extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
......@@ -146,11 +115,18 @@ extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
/* pnfs.c */
void get_layout_hdr(struct pnfs_layout_hdr *lo);
void put_lseg(struct pnfs_layout_segment *lseg);
struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
enum pnfs_iomode access_type);
void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
void unset_pnfs_layoutdriver(struct nfs_server *);
enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
const struct rpc_call_ops *, int);
enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
const struct rpc_call_ops *);
void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
......@@ -177,6 +153,16 @@ static inline int lo_fail_bit(u32 iomode)
NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
}
static inline struct pnfs_layout_segment *
get_lseg(struct pnfs_layout_segment *lseg)
{
if (lseg) {
atomic_inc(&lseg->pls_refcount);
smp_mb__after_atomic_inc();
}
return lseg;
}
/* Return true if a layout driver is being used for this mountpoint */
static inline int pnfs_enabled_sb(struct nfs_server *nfss)
{
......@@ -193,6 +179,16 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
{
}
static inline struct pnfs_layout_segment *
get_lseg(struct pnfs_layout_segment *lseg)
{
return NULL;
}
static inline void put_lseg(struct pnfs_layout_segment *lseg)
{
}
static inline struct pnfs_layout_segment *
pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
enum pnfs_iomode access_type)
......@@ -200,6 +196,20 @@ pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
return NULL;
}
static inline enum pnfs_try_status
pnfs_try_to_read_data(struct nfs_read_data *data,
const struct rpc_call_ops *call_ops)
{
return PNFS_NOT_ATTEMPTED;
}
static inline enum pnfs_try_status
pnfs_try_to_write_data(struct nfs_write_data *data,
const struct rpc_call_ops *call_ops, int how)
{
return PNFS_NOT_ATTEMPTED;
}
static inline bool
pnfs_roc(struct inode *ino)
{
......@@ -230,6 +240,18 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
{
}
static inline void
pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
{
pgio->pg_test = NULL;
}
static inline void
pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
{
pgio->pg_test = NULL;
}
#endif /* CONFIG_NFS_V4_1 */
#endif /* FS_NFS_PNFS_H */
......@@ -741,4 +741,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.lock = nfs_proc_lock,
.lock_check_bounds = nfs_lock_check_bounds,
.close_context = nfs_close_context,
.init_client = nfs_init_client,
};
......@@ -18,19 +18,20 @@
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
#include <linux/module.h>
#include <asm/system.h>
#include "pnfs.h"
#include "nfs4_fs.h"
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int);
static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int);
static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
static const struct rpc_call_ops nfs_read_partial_ops;
static const struct rpc_call_ops nfs_read_full_ops;
......@@ -69,6 +70,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
static void nfs_readdata_release(struct nfs_read_data *rdata)
{
put_lseg(rdata->lseg);
put_nfs_open_context(rdata->args.context);
nfs_readdata_free(rdata);
}
......@@ -114,14 +116,13 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
LIST_HEAD(one_request);
struct nfs_page *new;
unsigned int len;
struct nfs_pageio_descriptor pgio;
len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
pnfs_update_layout(inode, ctx, IOMODE_READ);
new = nfs_create_request(ctx, inode, page, 0, len);
if (IS_ERR(new)) {
unlock_page(page);
......@@ -130,11 +131,14 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
nfs_list_add_request(new, &one_request);
nfs_pageio_init(&pgio, inode, NULL, 0, 0);
nfs_list_add_request(new, &pgio.pg_list);
pgio.pg_count = len;
if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
nfs_pagein_multi(inode, &one_request, 1, len, 0);
nfs_pagein_multi(&pgio);
else
nfs_pagein_one(inode, &one_request, 1, len, 0);
nfs_pagein_one(&pgio);
return 0;
}
......@@ -155,24 +159,20 @@ static void nfs_readpage_release(struct nfs_page *req)
nfs_release_request(req);
}
/*
* Set up the NFS read request struct
*/
static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset)
int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops)
{
struct inode *inode = req->wb_context->path.dentry->d_inode;
struct inode *inode = data->inode;
int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
struct rpc_task *task;
struct rpc_message msg = {
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = req->wb_context->cred,
.rpc_cred = data->cred,
};
struct rpc_task_setup task_setup_data = {
.task = &data->task,
.rpc_client = NFS_CLIENT(inode),
.rpc_client = clnt,
.rpc_message = &msg,
.callback_ops = call_ops,
.callback_data = data,
......@@ -180,9 +180,39 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
.flags = RPC_TASK_ASYNC | swap_flags,
};
/* Set up the initial task struct. */
NFS_PROTO(inode)->read_setup(data, &msg);
dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
"offset %llu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
data->args.count,
(unsigned long long)data->args.offset);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
rpc_put_task(task);
return 0;
}
EXPORT_SYMBOL_GPL(nfs_initiate_read);
/*
* Set up the NFS read request struct
*/
static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
struct pnfs_layout_segment *lseg)
{
struct inode *inode = req->wb_context->path.dentry->d_inode;
data->req = req;
data->inode = inode;
data->cred = msg.rpc_cred;
data->cred = req->wb_context->cred;
data->lseg = get_lseg(lseg);
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
......@@ -197,21 +227,11 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
data->res.eof = 0;
nfs_fattr_init(&data->fattr);
/* Set up the initial task struct. */
NFS_PROTO(inode)->read_setup(data, &msg);
dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
count,
(unsigned long long)data->args.offset);
if (data->lseg &&
(pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
return 0;
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
rpc_put_task(task);
return 0;
return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
}
static void
......@@ -240,20 +260,21 @@ nfs_async_read_error(struct list_head *head)
* won't see the new data until our attribute cache is updated. This is more
* or less conventional NFS client behavior.
*/
static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags)
static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
{
struct nfs_page *req = nfs_list_entry(head->next);
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_read_data *data;
size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
struct pnfs_layout_segment *lseg;
LIST_HEAD(list);
nfs_list_remove_request(req);
nbytes = count;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes,rsize);
......@@ -266,9 +287,11 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
} while(nbytes != 0);
atomic_set(&req->wb_complete, requests);
BUG_ON(desc->pg_lseg != NULL);
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
ClearPageError(page);
offset = 0;
nbytes = count;
nbytes = desc->pg_count;
do {
int ret2;
......@@ -280,12 +303,14 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
if (nbytes < rsize)
rsize = nbytes;
ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
rsize, offset);
rsize, offset, lseg);
if (ret == 0)
ret = ret2;
offset += rsize;
nbytes -= rsize;
} while (nbytes != 0);
put_lseg(lseg);
desc->pg_lseg = NULL;
return ret;
......@@ -300,16 +325,21 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
return -ENOMEM;
}
static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags)
static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
{
struct nfs_page *req;
struct page **pages;
struct nfs_read_data *data;
struct list_head *head = &desc->pg_list;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
int ret = -ENOMEM;
data = nfs_readdata_alloc(npages);
if (!data)
goto out_bad;
data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
desc->pg_count));
if (!data) {
nfs_async_read_error(head);
goto out;
}
pages = data->pagevec;
while (!list_empty(head)) {
......@@ -320,10 +350,14 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
if ((!lseg) && list_is_singular(&data->pages))
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
out_bad:
nfs_async_read_error(head);
ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
0, lseg);
out:
put_lseg(lseg);
desc->pg_lseg = NULL;
return ret;
}
......@@ -366,6 +400,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
return;
/* Yes, so retry the read at the end of the data */
data->mds_offset += resp->count;
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
......@@ -625,7 +660,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */
pnfs_update_layout(inode, desc.ctx, IOMODE_READ);
pnfs_pageio_init_read(&pgio, inode);
if (rsize < PAGE_CACHE_SIZE)
nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
else
......
......@@ -1008,6 +1008,27 @@ static int nfs_parse_security_flavors(char *value,
return 1;
}
static int nfs_get_option_str(substring_t args[], char **option)
{
kfree(*option);
*option = match_strdup(args);
return !option;
}
static int nfs_get_option_ul(substring_t args[], unsigned long *option)
{
int rc;
char *string;
string = match_strdup(args);
if (string == NULL)
return -ENOMEM;
rc = strict_strtoul(string, 10, option);
kfree(string);
return rc;
}
/*
* Error-check and convert a string of mount options from user space into
* a data structure. The whole mount string is processed; bad options are
......@@ -1156,155 +1177,82 @@ static int nfs_parse_mount_options(char *raw,
* options that take numeric values
*/
case Opt_port:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0 || option > USHRT_MAX)
if (nfs_get_option_ul(args, &option) ||
option > USHRT_MAX)
goto out_invalid_value;
mnt->nfs_server.port = option;
break;
case Opt_rsize:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0)
if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->rsize = option;
break;
case Opt_wsize:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0)
if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->wsize = option;
break;
case Opt_bsize:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0)
if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->bsize = option;
break;
case Opt_timeo:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0 || option == 0)
if (nfs_get_option_ul(args, &option) || option == 0)
goto out_invalid_value;
mnt->timeo = option;
break;
case Opt_retrans:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0 || option == 0)
if (nfs_get_option_ul(args, &option) || option == 0)
goto out_invalid_value;
mnt->retrans = option;
break;
case Opt_acregmin:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0)
if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->acregmin = option;
break;
case Opt_acregmax:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0)
if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->acregmax = option;
break;
case Opt_acdirmin:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0)
if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->acdirmin = option;
break;
case Opt_acdirmax:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0)
if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->acdirmax = option;
break;
case Opt_actimeo:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0)
if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->acregmin = mnt->acregmax =
mnt->acdirmin = mnt->acdirmax = option;
break;
case Opt_namelen:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0)
if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
mnt->namlen = option;
break;
case Opt_mountport:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0 || option > USHRT_MAX)
if (nfs_get_option_ul(args, &option) ||
option > USHRT_MAX)
goto out_invalid_value;
mnt->mount_server.port = option;
break;
case Opt_mountvers:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0 ||
if (nfs_get_option_ul(args, &option) ||
option < NFS_MNT_VERSION ||
option > NFS_MNT3_VERSION)
goto out_invalid_value;
mnt->mount_server.version = option;
break;
case Opt_nfsvers:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0)
if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
switch (option) {
case NFS2_VERSION:
......@@ -1324,12 +1272,7 @@ static int nfs_parse_mount_options(char *raw,
}
break;
case Opt_minorversion:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
rc = strict_strtoul(string, 10, &option);
kfree(string);
if (rc != 0)
if (nfs_get_option_ul(args, &option))
goto out_invalid_value;
if (option > NFS4_MAX_MINOR_VERSION)
goto out_invalid_value;
......@@ -1365,21 +1308,18 @@ static int nfs_parse_mount_options(char *raw,
case Opt_xprt_udp:
mnt->flags &= ~NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
kfree(string);
break;
case Opt_xprt_tcp6:
protofamily = AF_INET6;
case Opt_xprt_tcp:
mnt->flags |= NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
kfree(string);
break;
case Opt_xprt_rdma:
/* vector side protocols to TCP */
mnt->flags |= NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
xprt_load_transport(string);
kfree(string);
break;
default:
dfprintk(MOUNT, "NFS: unrecognized "
......@@ -1387,6 +1327,7 @@ static int nfs_parse_mount_options(char *raw,
kfree(string);
return 0;
}
kfree(string);
break;
case Opt_mountproto:
string = match_strdup(args);
......@@ -1429,18 +1370,13 @@ static int nfs_parse_mount_options(char *raw,
goto out_invalid_address;
break;
case Opt_clientaddr:
string = match_strdup(args);
if (string == NULL)
if (nfs_get_option_str(args, &mnt->client_address))
goto out_nomem;
kfree(mnt->client_address);
mnt->client_address = string;
break;
case Opt_mounthost:
string = match_strdup(args);
if (string == NULL)
if (nfs_get_option_str(args,
&mnt->mount_server.hostname))
goto out_nomem;
kfree(mnt->mount_server.hostname);
mnt->mount_server.hostname = string;
break;
case Opt_mountaddr:
string = match_strdup(args);
......@@ -1480,11 +1416,8 @@ static int nfs_parse_mount_options(char *raw,
};
break;
case Opt_fscache_uniq:
string = match_strdup(args);
if (string == NULL)
if (nfs_get_option_str(args, &mnt->fscache_uniq))
goto out_nomem;
kfree(mnt->fscache_uniq);
mnt->fscache_uniq = string;
mnt->options |= NFS_OPTION_FSCACHE;
break;
case Opt_local_lock:
......@@ -1694,99 +1627,59 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
return nfs_walk_authlist(args, &request);
}
static int nfs_parse_simple_hostname(const char *dev_name,
char **hostname, size_t maxnamlen,
char **export_path, size_t maxpathlen)
/*
* Split "dev_name" into "hostname:export_path".
*
* The leftmost colon demarks the split between the server's hostname
* and the export path. If the hostname starts with a left square
* bracket, then it may contain colons.
*
* Note: caller frees hostname and export path, even on error.
*/
static int nfs_parse_devname(const char *dev_name,
char **hostname, size_t maxnamlen,
char **export_path, size_t maxpathlen)
{
size_t len;
char *colon, *comma;
char *end;
colon = strchr(dev_name, ':');
if (colon == NULL)
goto out_bad_devname;
len = colon - dev_name;
if (len > maxnamlen)
goto out_hostname;
/* N.B. caller will free nfs_server.hostname in all cases */
*hostname = kstrndup(dev_name, len, GFP_KERNEL);
if (!*hostname)
goto out_nomem;
/* kill possible hostname list: not supported */
comma = strchr(*hostname, ',');
if (comma != NULL) {
if (comma == *hostname)
/* Is the host name protected with square brakcets? */
if (*dev_name == '[') {
end = strchr(++dev_name, ']');
if (end == NULL || end[1] != ':')
goto out_bad_devname;
*comma = '\0';
}
colon++;
len = strlen(colon);
if (len > maxpathlen)
goto out_path;
*export_path = kstrndup(colon, len, GFP_KERNEL);
if (!*export_path)
goto out_nomem;
dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
return 0;
out_bad_devname:
dfprintk(MOUNT, "NFS: device name not in host:path format\n");
return -EINVAL;
out_nomem:
dfprintk(MOUNT, "NFS: not enough memory to parse device name\n");
return -ENOMEM;
out_hostname:
dfprintk(MOUNT, "NFS: server hostname too long\n");
return -ENAMETOOLONG;
out_path:
dfprintk(MOUNT, "NFS: export pathname too long\n");
return -ENAMETOOLONG;
}
/*
* Hostname has square brackets around it because it contains one or
* more colons. We look for the first closing square bracket, and a
* colon must follow it.
*/
static int nfs_parse_protected_hostname(const char *dev_name,
char **hostname, size_t maxnamlen,
char **export_path, size_t maxpathlen)
{
size_t len;
char *start, *end;
len = end - dev_name;
end++;
} else {
char *comma;
start = (char *)(dev_name + 1);
end = strchr(dev_name, ':');
if (end == NULL)
goto out_bad_devname;
len = end - dev_name;
end = strchr(start, ']');
if (end == NULL)
goto out_bad_devname;
if (*(end + 1) != ':')
goto out_bad_devname;
/* kill possible hostname list: not supported */
comma = strchr(dev_name, ',');
if (comma != NULL && comma < end)
*comma = 0;
}
len = end - start;
if (len > maxnamlen)
goto out_hostname;
/* N.B. caller will free nfs_server.hostname in all cases */
*hostname = kstrndup(start, len, GFP_KERNEL);
*hostname = kstrndup(dev_name, len, GFP_KERNEL);
if (*hostname == NULL)
goto out_nomem;
end += 2;
len = strlen(end);
len = strlen(++end);
if (len > maxpathlen)
goto out_path;
*export_path = kstrndup(end, len, GFP_KERNEL);
if (!*export_path)
goto out_nomem;
dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
return 0;
out_bad_devname:
......@@ -1806,29 +1699,6 @@ static int nfs_parse_protected_hostname(const char *dev_name,
return -ENAMETOOLONG;
}
/*
* Split "dev_name" into "hostname:export_path".
*
* The leftmost colon demarks the split between the server's hostname
* and the export path. If the hostname starts with a left square
* bracket, then it may contain colons.
*
* Note: caller frees hostname and export path, even on error.
*/
static int nfs_parse_devname(const char *dev_name,
char **hostname, size_t maxnamlen,
char **export_path, size_t maxpathlen)
{
if (*dev_name == '[')
return nfs_parse_protected_hostname(dev_name,
hostname, maxnamlen,
export_path, maxpathlen);
return nfs_parse_simple_hostname(dev_name,
hostname, maxnamlen,
export_path, maxpathlen);
}
/*
* Validate the NFS2/NFS3 mount data
* - fills in the mount root filehandle
......
......@@ -28,6 +28,7 @@
#include "iostat.h"
#include "nfs4_fs.h"
#include "fscache.h"
#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
......@@ -96,6 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
static void nfs_writedata_release(struct nfs_write_data *wdata)
{
put_lseg(wdata->lseg);
put_nfs_open_context(wdata->args.context);
nfs_writedata_free(wdata);
}
......@@ -781,25 +783,21 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL;
}
/*
* Set up the argument/result storage required for the RPC call.
*/
static int nfs_write_rpcsetup(struct nfs_page *req,
struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
int how)
int nfs_initiate_write(struct nfs_write_data *data,
struct rpc_clnt *clnt,
const struct rpc_call_ops *call_ops,
int how)
{
struct inode *inode = req->wb_context->path.dentry->d_inode;
struct inode *inode = data->inode;
int priority = flush_task_priority(how);
struct rpc_task *task;
struct rpc_message msg = {
.rpc_argp = &data->args,
.rpc_resp = &data->res,
.rpc_cred = req->wb_context->cred,
.rpc_cred = data->cred,
};
struct rpc_task_setup task_setup_data = {
.rpc_client = NFS_CLIENT(inode),
.rpc_client = clnt,
.task = &data->task,
.rpc_message = &msg,
.callback_ops = call_ops,
......@@ -810,12 +808,52 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
};
int ret = 0;
/* Set up the initial task struct. */
NFS_PROTO(inode)->write_setup(data, &msg);
dprintk("NFS: %5u initiated write call "
"(req %s/%lld, %u bytes @ offset %llu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
data->args.count,
(unsigned long long)data->args.offset);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) {
ret = PTR_ERR(task);
goto out;
}
if (how & FLUSH_SYNC) {
ret = rpc_wait_for_completion_task(task);
if (ret == 0)
ret = task->tk_status;
}
rpc_put_task(task);
out:
return ret;
}
EXPORT_SYMBOL_GPL(nfs_initiate_write);
/*
* Set up the argument/result storage required for the RPC call.
*/
static int nfs_write_rpcsetup(struct nfs_page *req,
struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
struct pnfs_layout_segment *lseg,
int how)
{
struct inode *inode = req->wb_context->path.dentry->d_inode;
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
data->req = req;
data->inode = inode = req->wb_context->path.dentry->d_inode;
data->cred = msg.rpc_cred;
data->cred = req->wb_context->cred;
data->lseg = get_lseg(lseg);
data->args.fh = NFS_FH(inode);
data->args.offset = req_offset(req) + offset;
......@@ -836,30 +874,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
data->res.verf = &data->verf;
nfs_fattr_init(&data->fattr);
/* Set up the initial task struct. */
NFS_PROTO(inode)->write_setup(data, &msg);
dprintk("NFS: %5u initiated write call "
"(req %s/%lld, %u bytes @ offset %llu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
count,
(unsigned long long)data->args.offset);
if (data->lseg &&
(pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
return 0;
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task)) {
ret = PTR_ERR(task);
goto out;
}
if (how & FLUSH_SYNC) {
ret = rpc_wait_for_completion_task(task);
if (ret == 0)
ret = task->tk_status;
}
rpc_put_task(task);
out:
return ret;
return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
}
/* If a nfs_flush_* function fails, it should remove reqs from @head and
......@@ -879,20 +898,21 @@ static void nfs_redirty_request(struct nfs_page *req)
* Generate multiple small requests to write out a single
* contiguous dirty area on one page.
*/
static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
{
struct nfs_page *req = nfs_list_entry(head->next);
struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
struct page *page = req->wb_page;
struct nfs_write_data *data;
size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
unsigned int offset;
int requests = 0;
int ret = 0;
struct pnfs_layout_segment *lseg;
LIST_HEAD(list);
nfs_list_remove_request(req);
nbytes = count;
nbytes = desc->pg_count;
do {
size_t len = min(nbytes, wsize);
......@@ -905,9 +925,11 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
} while (nbytes != 0);
atomic_set(&req->wb_complete, requests);
BUG_ON(desc->pg_lseg);
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
ClearPageError(page);
offset = 0;
nbytes = count;
nbytes = desc->pg_count;
do {
int ret2;
......@@ -919,13 +941,15 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
if (nbytes < wsize)
wsize = nbytes;
ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
wsize, offset, how);
wsize, offset, lseg, desc->pg_ioflags);
if (ret == 0)
ret = ret2;
offset += wsize;
nbytes -= wsize;
} while (nbytes != 0);
put_lseg(lseg);
desc->pg_lseg = NULL;
return ret;
out_bad:
......@@ -946,16 +970,26 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
* This is the case if nfs_updatepage detects a conflicting request
* that has been written but not committed.
*/
static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how)
static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
{
struct nfs_page *req;
struct page **pages;
struct nfs_write_data *data;
struct list_head *head = &desc->pg_list;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
int ret;
data = nfs_writedata_alloc(npages);
if (!data)
goto out_bad;
data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
desc->pg_count));
if (!data) {
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_redirty_request(req);
}
ret = -ENOMEM;
goto out;
}
pages = data->pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
......@@ -965,16 +999,15 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
*pages++ = req->wb_page;
}
req = nfs_list_entry(data->pages.next);
if ((!lseg) && list_is_singular(&data->pages))
lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
/* Set up the argument struct */
return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
out_bad:
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_redirty_request(req);
}
return -ENOMEM;
ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
out:
put_lseg(lseg); /* Cleans any gotten in ->pg_test */
desc->pg_lseg = NULL;
return ret;
}
static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
......@@ -982,6 +1015,8 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
{
size_t wsize = NFS_SERVER(inode)->wsize;
pnfs_pageio_init_write(pgio, inode);
if (wsize < PAGE_CACHE_SIZE)
nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
else
......@@ -1132,7 +1167,7 @@ static const struct rpc_call_ops nfs_write_full_ops = {
/*
* This function is called when the WRITE call is complete.
*/
int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
{
struct nfs_writeargs *argp = &data->args;
struct nfs_writeres *resp = &data->res;
......@@ -1151,7 +1186,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
*/
status = NFS_PROTO(data->inode)->write_done(task, data);
if (status != 0)
return status;
return;
nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
......@@ -1166,6 +1201,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
*/
static unsigned long complain;
/* Note this will print the MDS for a DS write */
if (time_before(complain, jiffies)) {
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
......@@ -1186,6 +1222,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
/* Was this an NFSv2 write or an NFSv3 stable write? */
if (resp->verf->committed != NFS_UNSTABLE) {
/* Resend from where the server left off */
data->mds_offset += resp->count;
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
......@@ -1196,7 +1233,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
argp->stable = NFS_FILE_SYNC;
}
nfs_restart_rpc(task, server->nfs_client);
return -EAGAIN;
return;
}
if (time_before(complain, jiffies)) {
printk(KERN_WARNING
......@@ -1207,7 +1244,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
/* Can't do anything about it except throw an error. */
task->tk_status = -EIO;
}
return 0;
return;
}
......
......@@ -501,7 +501,7 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc);
extern int nfs_writepages(struct address_space *, struct writeback_control *);
extern int nfs_flush_incompatible(struct file *file, struct page *page);
extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
extern void nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
/*
* Try to write back everything synchronously (but check the
......
......@@ -30,6 +30,8 @@ struct nfs_client {
#define NFS_CS_CALLBACK 1 /* - callback started */
#define NFS_CS_IDMAP 2 /* - idmap started */
#define NFS_CS_RENEWD 3 /* - renewd started */
#define NFS_CS_STOP_RENEW 4 /* no more state to renew */
#define NFS_CS_CHECK_LEASE_TIME 5 /* need to check lease time */
struct sockaddr_storage cl_addr; /* server identifier */
size_t cl_addrlen;
char * cl_hostname; /* hostname of server */
......@@ -75,7 +77,6 @@ struct nfs_client {
u32 cl_exchange_flags;
struct nfs4_session *cl_session; /* sharred session */
struct list_head cl_layouts;
struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
#endif /* CONFIG_NFS_V4 */
#ifdef CONFIG_NFS_FSCACHE
......@@ -176,6 +177,7 @@ struct nfs_server {
#define NFS_CAP_CTIME (1U << 12)
#define NFS_CAP_MTIME (1U << 13)
#define NFS_CAP_POSIX_LOCK (1U << 14)
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
/* maximum number of slots to use */
......
......@@ -65,6 +65,7 @@ struct idmap_msg {
/* Forward declaration to make this header independent of others */
struct nfs_client;
struct nfs_server;
#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
......@@ -96,10 +97,10 @@ void nfs_idmap_delete(struct nfs_client *);
#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
int nfs_map_name_to_uid(struct nfs_client *, const char *, size_t, __u32 *);
int nfs_map_group_to_gid(struct nfs_client *, const char *, size_t, __u32 *);
int nfs_map_uid_to_name(struct nfs_client *, __u32, char *, size_t);
int nfs_map_gid_to_group(struct nfs_client *, __u32, char *, size_t);
int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, __u32 *);
int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, __u32 *);
int nfs_map_uid_to_name(const struct nfs_server *, __u32, char *, size_t);
int nfs_map_gid_to_group(const struct nfs_server *, __u32, char *, size_t);
extern unsigned int nfs_idmap_cache_timeout;
#endif /* __KERNEL__ */
......
......@@ -113,6 +113,8 @@ enum nfs_stat_eventcounters {
NFSIOS_SHORTREAD,
NFSIOS_SHORTWRITE,
NFSIOS_DELAY,
NFSIOS_PNFS_READ,
NFSIOS_PNFS_WRITE,
__NFSIOS_COUNTSMAX,
};
......
......@@ -59,9 +59,11 @@ struct nfs_pageio_descriptor {
unsigned int pg_base;
struct inode *pg_inode;
int (*pg_doio)(struct inode *, struct list_head *, unsigned int, size_t, int);
int (*pg_doio)(struct nfs_pageio_descriptor *);
int pg_ioflags;
int pg_error;
struct pnfs_layout_segment *pg_lseg;
int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
};
#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
......@@ -79,7 +81,7 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
pgoff_t idx_start, unsigned int npages, int tag);
extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
int (*doio)(struct nfs_pageio_descriptor *desc),
size_t bsize,
int how);
extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
......
......@@ -1016,9 +1016,12 @@ struct nfs_read_data {
unsigned int npages; /* Max length of pagevec */
struct nfs_readargs args;
struct nfs_readres res;
#ifdef CONFIG_NFS_V4
unsigned long timestamp; /* For lease renewal */
#endif
struct pnfs_layout_segment *lseg;
struct nfs_client *ds_clp; /* pNFS data server */
const struct rpc_call_ops *mds_ops;
int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
__u64 mds_offset;
struct page *page_array[NFS_PAGEVEC_SIZE];
};
......@@ -1035,13 +1038,20 @@ struct nfs_write_data {
unsigned int npages; /* Max length of pagevec */
struct nfs_writeargs args; /* argument struct */
struct nfs_writeres res; /* result struct */
struct pnfs_layout_segment *lseg;
struct nfs_client *ds_clp; /* pNFS data server */
const struct rpc_call_ops *mds_ops;
int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
#ifdef CONFIG_NFS_V4
unsigned long timestamp; /* For lease renewal */
#endif
__u64 mds_offset; /* Filelayout dense stripe */
struct page *page_array[NFS_PAGEVEC_SIZE];
};
struct nfs_access_entry;
struct nfs_client;
struct rpc_timeout;
/*
* RPC procedure vector for NFSv2/NFSv3 demuxing
......@@ -1106,6 +1116,8 @@ struct nfs_rpc_ops {
struct nfs_open_context *ctx,
int open_flags,
struct iattr *iattr);
int (*init_client) (struct nfs_client *, const struct rpc_timeout *,
const char *, rpc_authflavor_t, int);
};
/*
......
......@@ -129,6 +129,7 @@ struct rpc_create_args {
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
struct rpc_program *, u32);
void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt);
struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
void rpc_shutdown_client(struct rpc_clnt *);
void rpc_release_client(struct rpc_clnt *);
......
......@@ -12,7 +12,6 @@
#include <linux/uio.h>
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kref.h>
#include <linux/ktime.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/xdr.h>
......@@ -146,7 +145,7 @@ enum xprt_transports {
};
struct rpc_xprt {
struct kref kref; /* Reference count */
atomic_t count; /* Reference count */
struct rpc_xprt_ops * ops; /* transport methods */
const struct rpc_timeout *timeout; /* timeout parms */
......
......@@ -417,7 +417,7 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
gss_msg->msg.len += len;
}
if (mech->gm_upcall_enctypes) {
len = sprintf(p, mech->gm_upcall_enctypes);
len = sprintf(p, "enctypes=%s ", mech->gm_upcall_enctypes);
p += len;
gss_msg->msg.len += len;
}
......
......@@ -750,7 +750,7 @@ static struct gss_api_mech gss_kerberos_mech = {
.gm_ops = &gss_kerberos_ops,
.gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs),
.gm_pfs = gss_kerberos_pfs,
.gm_upcall_enctypes = "enctypes=18,17,16,23,3,1,2 ",
.gm_upcall_enctypes = "18,17,16,23,3,1,2",
};
static int __init init_kerberos_module(void)
......
......@@ -436,7 +436,9 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
rovr->tk_flags |= RPC_TASK_KILLED;
rpc_exit(rovr, -EIO);
rpc_wake_up_queued_task(rovr->tk_waitqueue, rovr);
if (RPC_IS_QUEUED(rovr))
rpc_wake_up_queued_task(rovr->tk_waitqueue,
rovr);
}
}
spin_unlock(&clnt->cl_lock);
......@@ -597,6 +599,14 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
}
}
void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt)
{
rpc_task_release_client(task);
rpc_task_set_client(task, clnt);
}
EXPORT_SYMBOL_GPL(rpc_task_reset_client);
static void
rpc_task_set_rpc_message(struct rpc_task *task, const struct rpc_message *msg)
{
......@@ -636,12 +646,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
rpc_task_set_client(task, task_setup_data->rpc_client);
rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
if (task->tk_status != 0) {
int ret = task->tk_status;
rpc_put_task(task);
return ERR_PTR(ret);
}
if (task->tk_action == NULL)
rpc_call_start(task);
......
......@@ -299,15 +299,8 @@ static void rpc_make_runnable(struct rpc_task *task)
if (rpc_test_and_set_running(task))
return;
if (RPC_IS_ASYNC(task)) {
int status;
INIT_WORK(&task->u.tk_work, rpc_async_schedule);
status = queue_work(rpciod_workqueue, &task->u.tk_work);
if (status < 0) {
printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
task->tk_status = status;
return;
}
queue_work(rpciod_workqueue, &task->u.tk_work);
} else
wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
}
......@@ -637,14 +630,12 @@ static void __rpc_execute(struct rpc_task *task)
save_callback = task->tk_callback;
task->tk_callback = NULL;
save_callback(task);
}
/*
* Perform the next FSM step.
* tk_action may be NULL when the task has been killed
* by someone else.
*/
if (!RPC_IS_QUEUED(task)) {
} else {
/*
* Perform the next FSM step.
* tk_action may be NULL when the task has been killed
* by someone else.
*/
if (task->tk_action == NULL)
break;
task->tk_action(task);
......@@ -843,12 +834,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
}
rpc_init_task(task, setup_data);
if (task->tk_status < 0) {
int err = task->tk_status;
rpc_put_task(task);
return ERR_PTR(err);
}
task->tk_flags |= flags;
dprintk("RPC: allocated task %p\n", task);
return task;
......
......@@ -202,10 +202,9 @@ int xprt_reserve_xprt(struct rpc_task *task)
goto out_sleep;
}
xprt->snd_task = task;
if (req) {
req->rq_bytes_sent = 0;
req->rq_ntrans++;
}
req->rq_bytes_sent = 0;
req->rq_ntrans++;
return 1;
out_sleep:
......@@ -213,7 +212,7 @@ int xprt_reserve_xprt(struct rpc_task *task)
task->tk_pid, xprt);
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (req && req->rq_ntrans)
if (req->rq_ntrans)
rpc_sleep_on(&xprt->resend, task, NULL);
else
rpc_sleep_on(&xprt->sending, task, NULL);
......@@ -965,7 +964,7 @@ struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req)
xprt = kzalloc(size, GFP_KERNEL);
if (xprt == NULL)
goto out;
kref_init(&xprt->kref);
atomic_set(&xprt->count, 1);
xprt->max_reqs = max_req;
xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL);
......@@ -1145,13 +1144,11 @@ struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
/**
* xprt_destroy - destroy an RPC transport, killing off all requests.
* @kref: kref for the transport to destroy
* @xprt: transport to destroy
*
*/
static void xprt_destroy(struct kref *kref)
static void xprt_destroy(struct rpc_xprt *xprt)
{
struct rpc_xprt *xprt = container_of(kref, struct rpc_xprt, kref);
dprintk("RPC: destroying transport %p\n", xprt);
xprt->shutdown = 1;
del_timer_sync(&xprt->timer);
......@@ -1175,7 +1172,8 @@ static void xprt_destroy(struct kref *kref)
*/
void xprt_put(struct rpc_xprt *xprt)
{
kref_put(&xprt->kref, xprt_destroy);
if (atomic_dec_and_test(&xprt->count))
xprt_destroy(xprt);
}
/**
......@@ -1185,6 +1183,7 @@ void xprt_put(struct rpc_xprt *xprt)
*/
struct rpc_xprt *xprt_get(struct rpc_xprt *xprt)
{
kref_get(&xprt->kref);
return xprt;
if (atomic_inc_not_zero(&xprt->count))
return xprt;
return NULL;
}
......@@ -87,6 +87,8 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
{
int len, n = 0, p;
int page_base;
struct page **ppages;
if (pos == 0 && xdrbuf->head[0].iov_len) {
seg[n].mr_page = NULL;
......@@ -95,34 +97,32 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
++n;
}
if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) {
if (n == nsegs)
return 0;
seg[n].mr_page = xdrbuf->pages[0];
seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base;
seg[n].mr_len = min_t(u32,
PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len);
len = xdrbuf->page_len - seg[n].mr_len;
len = xdrbuf->page_len;
ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT);
page_base = xdrbuf->page_base & ~PAGE_MASK;
p = 0;
while (len && n < nsegs) {
seg[n].mr_page = ppages[p];
seg[n].mr_offset = (void *)(unsigned long) page_base;
seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
BUG_ON(seg[n].mr_len > PAGE_SIZE);
len -= seg[n].mr_len;
++n;
p = 1;
while (len > 0) {
if (n == nsegs)
return 0;
seg[n].mr_page = xdrbuf->pages[p];
seg[n].mr_offset = NULL;
seg[n].mr_len = min_t(u32, PAGE_SIZE, len);
len -= seg[n].mr_len;
++n;
++p;
}
++p;
page_base = 0; /* page offset only applies to first page */
}
/* Message overflows the seg array */
if (len && n == nsegs)
return 0;
if (xdrbuf->tail[0].iov_len) {
/* the rpcrdma protocol allows us to omit any trailing
* xdr pad bytes, saving the server an RDMA operation. */
if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
return n;
if (n == nsegs)
/* Tail remains, but we're out of segments */
return 0;
seg[n].mr_page = NULL;
seg[n].mr_offset = xdrbuf->tail[0].iov_base;
......@@ -296,6 +296,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
int copy_len;
unsigned char *srcp, *destp;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
int page_base;
struct page **ppages;
destp = rqst->rq_svec[0].iov_base;
curlen = rqst->rq_svec[0].iov_len;
......@@ -324,28 +326,25 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
__func__, destp + copy_len, curlen);
rqst->rq_svec[0].iov_len += curlen;
}
r_xprt->rx_stats.pullup_copy_count += copy_len;
npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
page_base = rqst->rq_snd_buf.page_base;
ppages = rqst->rq_snd_buf.pages + (page_base >> PAGE_SHIFT);
page_base &= ~PAGE_MASK;
npages = PAGE_ALIGN(page_base+copy_len) >> PAGE_SHIFT;
for (i = 0; copy_len && i < npages; i++) {
if (i == 0)
curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base;
else
curlen = PAGE_SIZE;
curlen = PAGE_SIZE - page_base;
if (curlen > copy_len)
curlen = copy_len;
dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n",
__func__, i, destp, copy_len, curlen);
srcp = kmap_atomic(rqst->rq_snd_buf.pages[i],
KM_SKB_SUNRPC_DATA);
if (i == 0)
memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen);
else
memcpy(destp, srcp, curlen);
srcp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA);
memcpy(destp, srcp+page_base, curlen);
kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA);
rqst->rq_svec[0].iov_len += curlen;
destp += curlen;
copy_len -= curlen;
page_base = 0;
}
/* header now contains entire send message */
return pad;
......@@ -606,6 +605,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
{
int i, npages, curlen, olen;
char *destp;
struct page **ppages;
int page_base;
curlen = rqst->rq_rcv_buf.head[0].iov_len;
if (curlen > copy_len) { /* write chunk header fixup */
......@@ -624,32 +625,29 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
olen = copy_len;
i = 0;
rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen;
page_base = rqst->rq_rcv_buf.page_base;
ppages = rqst->rq_rcv_buf.pages + (page_base >> PAGE_SHIFT);
page_base &= ~PAGE_MASK;
if (copy_len && rqst->rq_rcv_buf.page_len) {
npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base +
npages = PAGE_ALIGN(page_base +
rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT;
for (; i < npages; i++) {
if (i == 0)
curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base;
else
curlen = PAGE_SIZE;
curlen = PAGE_SIZE - page_base;
if (curlen > copy_len)
curlen = copy_len;
dprintk("RPC: %s: page %d"
" srcp 0x%p len %d curlen %d\n",
__func__, i, srcp, copy_len, curlen);
destp = kmap_atomic(rqst->rq_rcv_buf.pages[i],
KM_SKB_SUNRPC_DATA);
if (i == 0)
memcpy(destp + rqst->rq_rcv_buf.page_base,
srcp, curlen);
else
memcpy(destp, srcp, curlen);
flush_dcache_page(rqst->rq_rcv_buf.pages[i]);
destp = kmap_atomic(ppages[i], KM_SKB_SUNRPC_DATA);
memcpy(destp + page_base, srcp, curlen);
flush_dcache_page(ppages[i]);
kunmap_atomic(destp, KM_SKB_SUNRPC_DATA);
srcp += curlen;
copy_len -= curlen;
if (copy_len == 0)
break;
page_base = 0;
}
rqst->rq_rcv_buf.page_len = olen - copy_len;
} else
......
......@@ -144,6 +144,7 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
static inline
void rpcrdma_event_process(struct ib_wc *wc)
{
struct rpcrdma_mw *frmr;
struct rpcrdma_rep *rep =
(struct rpcrdma_rep *)(unsigned long) wc->wr_id;
......@@ -154,15 +155,23 @@ void rpcrdma_event_process(struct ib_wc *wc)
return;
if (IB_WC_SUCCESS != wc->status) {
dprintk("RPC: %s: %s WC status %X, connection lost\n",
__func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send",
wc->status);
dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
__func__, wc->opcode, wc->status);
rep->rr_len = ~0U;
rpcrdma_schedule_tasklet(rep);
if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
rpcrdma_schedule_tasklet(rep);
return;
}
switch (wc->opcode) {
case IB_WC_FAST_REG_MR:
frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
frmr->r.frmr.state = FRMR_IS_VALID;
break;
case IB_WC_LOCAL_INV:
frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
frmr->r.frmr.state = FRMR_IS_INVALID;
break;
case IB_WC_RECV:
rep->rr_len = wc->byte_len;
ib_dma_sync_single_for_cpu(
......@@ -1450,6 +1459,12 @@ rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing)
seg->mr_dma = ib_dma_map_single(ia->ri_id->device,
seg->mr_offset,
seg->mr_dmalen, seg->mr_dir);
if (ib_dma_mapping_error(ia->ri_id->device, seg->mr_dma)) {
dprintk("RPC: %s: mr_dma %llx mr_offset %p mr_dma_len %zu\n",
__func__,
(unsigned long long)seg->mr_dma,
seg->mr_offset, seg->mr_dmalen);
}
}
static void
......@@ -1469,7 +1484,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_mr_seg *seg1 = seg;
struct ib_send_wr frmr_wr, *bad_wr;
struct ib_send_wr invalidate_wr, frmr_wr, *bad_wr, *post_wr;
u8 key;
int len, pageoff;
int i, rc;
......@@ -1484,6 +1500,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
rpcrdma_map_one(ia, seg, writing);
seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
len += seg->mr_len;
BUG_ON(seg->mr_len > PAGE_SIZE);
++seg;
++i;
/* Check for holes */
......@@ -1494,26 +1511,45 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
dprintk("RPC: %s: Using frmr %p to map %d segments\n",
__func__, seg1->mr_chunk.rl_mw, i);
if (unlikely(seg1->mr_chunk.rl_mw->r.frmr.state == FRMR_IS_VALID)) {
dprintk("RPC: %s: frmr %x left valid, posting invalidate.\n",
__func__,
seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey);
/* Invalidate before using. */
memset(&invalidate_wr, 0, sizeof invalidate_wr);
invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
invalidate_wr.next = &frmr_wr;
invalidate_wr.opcode = IB_WR_LOCAL_INV;
invalidate_wr.send_flags = IB_SEND_SIGNALED;
invalidate_wr.ex.invalidate_rkey =
seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
post_wr = &invalidate_wr;
} else
post_wr = &frmr_wr;
/* Bump the key */
key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
/* Prepare FRMR WR */
memset(&frmr_wr, 0, sizeof frmr_wr);
frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
frmr_wr.opcode = IB_WR_FAST_REG_MR;
frmr_wr.send_flags = 0; /* unsignaled */
frmr_wr.send_flags = IB_SEND_SIGNALED;
frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
frmr_wr.wr.fast_reg.page_list_len = i;
frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
BUG_ON(frmr_wr.wr.fast_reg.length < len);
frmr_wr.wr.fast_reg.access_flags = (writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ);
frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr);
rc = ib_post_send(ia->ri_id->qp, post_wr, &bad_wr);
if (rc) {
dprintk("RPC: %s: failed ib_post_send for register,"
......@@ -1542,8 +1578,9 @@ rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
rpcrdma_unmap_one(ia, seg++);
memset(&invalidate_wr, 0, sizeof invalidate_wr);
invalidate_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
invalidate_wr.opcode = IB_WR_LOCAL_INV;
invalidate_wr.send_flags = 0; /* unsignaled */
invalidate_wr.send_flags = IB_SEND_SIGNALED;
invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
......
......@@ -164,6 +164,7 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
struct {
struct ib_fast_reg_page_list *fr_pgl;
struct ib_mr *fr_mr;
enum { FRMR_IS_INVALID, FRMR_IS_VALID } state;
} frmr;
} r;
struct list_head mw_list;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment