Commit 4fe70410 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6

* 'for-linus' of git://git.linux-nfs.org/projects/trondmy/nfs-2.6: (58 commits)
  SUNRPC: Ensure IPV6_V6ONLY is set on the socket before binding to a port
  NSM: Fix unaligned accesses in nsm_init_private()
  NFS: Simplify logic to compare socket addresses in client.c
  NFS: Start PF_INET6 callback listener only if IPv6 support is available
  lockd: Start PF_INET6 listener only if IPv6 support is available
  SUNRPC: Remove CONFIG_SUNRPC_REGISTER_V4
  SUNRPC: rpcb_register() should handle errors silently
  SUNRPC: Simplify kernel RPC service registration
  SUNRPC: Simplify svc_unregister()
  SUNRPC: Allow callers to pass rpcb_v4_register a NULL address
  SUNRPC: rpcbind actually interprets r_owner string
  SUNRPC: Clean up address type casts in rpcb_v4_register()
  SUNRPC: Don't return EPROTONOSUPPORT in svc_register()'s helpers
  SUNRPC: Use IPv4 loopback for registering AF_INET6 kernel RPC services
  SUNRPC: Set IPV6ONLY flag on PF_INET6 RPC listener sockets
  NFS: Revert creation of IPv6 listeners for lockd and NFSv4 callbacks
  SUNRPC: Remove @family argument from svc_create() and svc_create_pooled()
  SUNRPC: Change svc_create_xprt() to take a @family argument
  SUNRPC: svc_setup_socket() gets protocol family from socket
  SUNRPC: Pass a family argument to svc_register()
  ...
parents 395d7341 cc859061
......@@ -139,55 +139,6 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
return 0;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static const struct in6_addr *nlmclnt_map_v4addr(const struct sockaddr *sap,
struct in6_addr *addr_mapped)
{
const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
switch (sap->sa_family) {
case AF_INET6:
return &((const struct sockaddr_in6 *)sap)->sin6_addr;
case AF_INET:
ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, addr_mapped);
return addr_mapped;
}
return NULL;
}
/*
* If lockd is using a PF_INET6 listener, all incoming requests appear
* to come from AF_INET6 remotes. The address of AF_INET remotes are
* mapped to AF_INET6 automatically by the network layer. In case the
* user passed an AF_INET server address at mount time, ensure both
* addresses are AF_INET6 before comparing them.
*/
static int nlmclnt_cmp_addr(const struct nlm_host *host,
const struct sockaddr *sap)
{
const struct in6_addr *addr1;
const struct in6_addr *addr2;
struct in6_addr addr1_mapped;
struct in6_addr addr2_mapped;
addr1 = nlmclnt_map_v4addr(nlm_addr(host), &addr1_mapped);
if (likely(addr1 != NULL)) {
addr2 = nlmclnt_map_v4addr(sap, &addr2_mapped);
if (likely(addr2 != NULL))
return ipv6_addr_equal(addr1, addr2);
}
return 0;
}
#else /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
static int nlmclnt_cmp_addr(const struct nlm_host *host,
const struct sockaddr *sap)
{
return nlm_cmp_addr(nlm_addr(host), sap);
}
#endif /* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
/*
* The server lockd has called us back to tell us the lock was granted
*/
......@@ -215,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
*/
if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
continue;
if (!nlmclnt_cmp_addr(block->b_host, addr))
if (!nlm_cmp_addr(nlm_addr(block->b_host), addr))
continue;
if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
continue;
......
......@@ -16,6 +16,8 @@
#include <linux/sunrpc/svc.h>
#include <linux/lockd/lockd.h>
#include <asm/unaligned.h>
#define NLMDBG_FACILITY NLMDBG_MONITOR
#define NSM_PROGRAM 100024
#define NSM_VERSION 1
......@@ -274,10 +276,12 @@ static void nsm_init_private(struct nsm_handle *nsm)
{
u64 *p = (u64 *)&nsm->sm_priv.data;
struct timespec ts;
s64 ns;
ktime_get_ts(&ts);
*p++ = timespec_to_ns(&ts);
*p = (unsigned long)nsm;
ns = timespec_to_ns(&ts);
put_unaligned(ns, p);
put_unaligned((unsigned long)nsm, p + 1);
}
static struct nsm_handle *nsm_create_handle(const struct sockaddr *sap,
......
......@@ -52,17 +52,6 @@ static struct task_struct *nlmsvc_task;
static struct svc_rqst *nlmsvc_rqst;
unsigned long nlmsvc_timeout;
/*
* If the kernel has IPv6 support available, always listen for
* both AF_INET and AF_INET6 requests.
*/
#if (defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) && \
defined(CONFIG_SUNRPC_REGISTER_V4)
static const sa_family_t nlmsvc_family = AF_INET6;
#else /* (CONFIG_IPV6 || CONFIG_IPV6_MODULE) && CONFIG_SUNRPC_REGISTER_V4 */
static const sa_family_t nlmsvc_family = AF_INET;
#endif /* (CONFIG_IPV6 || CONFIG_IPV6_MODULE) && CONFIG_SUNRPC_REGISTER_V4 */
/*
* These can be set at insmod time (useful for NFS as root filesystem),
* and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003
......@@ -204,19 +193,30 @@ lockd(void *vrqstp)
return 0;
}
static int create_lockd_listener(struct svc_serv *serv, char *name,
unsigned short port)
static int create_lockd_listener(struct svc_serv *serv, const char *name,
const int family, const unsigned short port)
{
struct svc_xprt *xprt;
xprt = svc_find_xprt(serv, name, 0, 0);
xprt = svc_find_xprt(serv, name, family, 0);
if (xprt == NULL)
return svc_create_xprt(serv, name, port, SVC_SOCK_DEFAULTS);
return svc_create_xprt(serv, name, family, port,
SVC_SOCK_DEFAULTS);
svc_xprt_put(xprt);
return 0;
}
static int create_lockd_family(struct svc_serv *serv, const int family)
{
int err;
err = create_lockd_listener(serv, "udp", family, nlm_udpport);
if (err < 0)
return err;
return create_lockd_listener(serv, "tcp", family, nlm_tcpport);
}
/*
* Ensure there are active UDP and TCP listeners for lockd.
*
......@@ -232,13 +232,15 @@ static int make_socks(struct svc_serv *serv)
static int warned;
int err;
err = create_lockd_listener(serv, "udp", nlm_udpport);
err = create_lockd_family(serv, PF_INET);
if (err < 0)
goto out_err;
err = create_lockd_listener(serv, "tcp", nlm_tcpport);
if (err < 0)
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
err = create_lockd_family(serv, PF_INET6);
if (err < 0 && err != -EAFNOSUPPORT)
goto out_err;
#endif /* CONFIG_IPV6 || CONFIG_IPV6_MODULE */
warned = 0;
return 0;
......@@ -274,7 +276,7 @@ int lockd_up(void)
"lockd_up: no pid, %d users??\n", nlmsvc_users);
error = -ENOMEM;
serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, nlmsvc_family, NULL);
serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL);
if (!serv) {
printk(KERN_WARNING "lockd_up: create service failed\n");
goto out;
......
......@@ -38,19 +38,10 @@ static struct svc_program nfs4_callback_program;
unsigned int nfs_callback_set_tcpport;
unsigned short nfs_callback_tcpport;
unsigned short nfs_callback_tcpport6;
static const int nfs_set_port_min = 0;
static const int nfs_set_port_max = 65535;
/*
* If the kernel has IPv6 support available, always listen for
* both AF_INET and AF_INET6 requests.
*/
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static const sa_family_t nfs_callback_family = AF_INET6;
#else
static const sa_family_t nfs_callback_family = AF_INET;
#endif
static int param_set_port(const char *val, struct kernel_param *kp)
{
char *endp;
......@@ -116,19 +107,29 @@ int nfs_callback_up(void)
mutex_lock(&nfs_callback_mutex);
if (nfs_callback_info.users++ || nfs_callback_info.task != NULL)
goto out;
serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE,
nfs_callback_family, NULL);
serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
ret = -ENOMEM;
if (!serv)
goto out_err;
ret = svc_create_xprt(serv, "tcp", nfs_callback_set_tcpport,
SVC_SOCK_ANONYMOUS);
ret = svc_create_xprt(serv, "tcp", PF_INET,
nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
if (ret <= 0)
goto out_err;
nfs_callback_tcpport = ret;
dprintk("NFS: Callback listener port = %u (af %u)\n",
nfs_callback_tcpport, nfs_callback_family);
nfs_callback_tcpport, PF_INET);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
ret = svc_create_xprt(serv, "tcp", PF_INET6,
nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
if (ret > 0) {
nfs_callback_tcpport6 = ret;
dprintk("NFS: Callback listener port = %u (af %u)\n",
nfs_callback_tcpport6, PF_INET6);
} else if (ret != -EAFNOSUPPORT)
goto out_err;
#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
if (IS_ERR(nfs_callback_info.rqst)) {
......
......@@ -72,5 +72,6 @@ extern void nfs_callback_down(void);
extern unsigned int nfs_callback_set_tcpport;
extern unsigned short nfs_callback_tcpport;
extern unsigned short nfs_callback_tcpport6;
#endif /* __LINUX_FS_NFS_CALLBACK_H */
......@@ -224,38 +224,6 @@ void nfs_put_client(struct nfs_client *clp)
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static const struct in6_addr *nfs_map_ipv4_addr(const struct sockaddr *sa, struct in6_addr *addr_mapped)
{
switch (sa->sa_family) {
default:
return NULL;
case AF_INET6:
return &((const struct sockaddr_in6 *)sa)->sin6_addr;
break;
case AF_INET:
ipv6_addr_set_v4mapped(((const struct sockaddr_in *)sa)->sin_addr.s_addr,
addr_mapped);
return addr_mapped;
}
}
static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
const struct in6_addr *addr1;
const struct in6_addr *addr2;
struct in6_addr addr1_mapped;
struct in6_addr addr2_mapped;
addr1 = nfs_map_ipv4_addr(sa1, &addr1_mapped);
if (likely(addr1 != NULL)) {
addr2 = nfs_map_ipv4_addr(sa2, &addr2_mapped);
if (likely(addr2 != NULL))
return ipv6_addr_equal(addr1, addr2);
}
return 0;
}
/*
* Test if two ip6 socket addresses refer to the same socket by
* comparing relevant fields. The padding bytes specifically, are not
......@@ -267,38 +235,21 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
*
* The caller should ensure both socket addresses are AF_INET6.
*/
static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
const struct sockaddr *sa2)
static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
const struct sockaddr_in6 *saddr1 = (const struct sockaddr_in6 *)sa1;
const struct sockaddr_in6 *saddr2 = (const struct sockaddr_in6 *)sa2;
const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
if (!ipv6_addr_equal(&saddr1->sin6_addr,
&saddr1->sin6_addr))
if (ipv6_addr_scope(&sin1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
sin1->sin6_scope_id != sin2->sin6_scope_id)
return 0;
if (ipv6_addr_scope(&saddr1->sin6_addr) == IPV6_ADDR_SCOPE_LINKLOCAL &&
saddr1->sin6_scope_id != saddr2->sin6_scope_id)
return 0;
return saddr1->sin6_port == saddr2->sin6_port;
}
#else
static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
const struct sockaddr_in *sa2)
{
return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr;
}
static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
if (unlikely(sa1->sa_family != AF_INET || sa2->sa_family != AF_INET))
return 0;
return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
(const struct sockaddr_in *)sa2);
return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
}
static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1,
const struct sockaddr * sa2)
#else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
return 0;
}
......@@ -311,20 +262,57 @@ static int nfs_sockaddr_cmp_ip6(const struct sockaddr * sa1,
*
* The caller should ensure both socket addresses are AF_INET.
*/
static int nfs_sockaddr_match_ipaddr4(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
}
static int nfs_sockaddr_cmp_ip6(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sa1;
const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sa2;
return nfs_sockaddr_match_ipaddr6(sa1, sa2) &&
(sin1->sin6_port == sin2->sin6_port);
}
static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
const struct sockaddr_in *saddr1 = (const struct sockaddr_in *)sa1;
const struct sockaddr_in *saddr2 = (const struct sockaddr_in *)sa2;
const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sa1;
const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sa2;
if (saddr1->sin_addr.s_addr != saddr2->sin_addr.s_addr)
return nfs_sockaddr_match_ipaddr4(sa1, sa2) &&
(sin1->sin_port == sin2->sin_port);
}
/*
* Test if two socket addresses represent the same actual socket,
* by comparing (only) relevant fields, excluding the port number.
*/
static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
const struct sockaddr *sa2)
{
if (sa1->sa_family != sa2->sa_family)
return 0;
return saddr1->sin_port == saddr2->sin_port;
switch (sa1->sa_family) {
case AF_INET:
return nfs_sockaddr_match_ipaddr4(sa1, sa2);
case AF_INET6:
return nfs_sockaddr_match_ipaddr6(sa1, sa2);
}
return 0;
}
/*
* Test if two socket addresses represent the same actual socket,
* by comparing (only) relevant fields.
* by comparing (only) relevant fields, including the port number.
*/
static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
const struct sockaddr *sa2)
......
......@@ -1624,8 +1624,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
} else if (atomic_read(&new_dentry->d_count) > 1)
/* dentry still busy? */
goto out;
} else
nfs_drop_nlink(new_inode);
}
go_ahead:
/*
......@@ -1638,10 +1637,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
nfs_inode_return_delegation(old_inode);
if (new_inode != NULL) {
if (new_inode != NULL)
nfs_inode_return_delegation(new_inode);
d_delete(new_dentry);
}
error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
new_dir, &new_dentry->d_name);
......@@ -1650,6 +1647,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (rehash)
d_rehash(rehash);
if (!error) {
if (new_inode != NULL)
nfs_drop_nlink(new_inode);
d_move(old_dentry, new_dentry);
nfs_set_verifier(new_dentry,
nfs_save_change_attribute(new_dir));
......
......@@ -64,11 +64,7 @@ const struct file_operations nfs_file_operations = {
.write = do_sync_write,
.aio_read = nfs_file_read,
.aio_write = nfs_file_write,
#ifdef CONFIG_MMU
.mmap = nfs_file_mmap,
#else
.mmap = generic_file_mmap,
#endif
.open = nfs_file_open,
.flush = nfs_file_flush,
.release = nfs_file_release,
......@@ -141,9 +137,6 @@ nfs_file_release(struct inode *inode, struct file *filp)
dentry->d_parent->d_name.name,
dentry->d_name.name);
/* Ensure that dirty pages are flushed out with the right creds */
if (filp->f_mode & FMODE_WRITE)
nfs_wb_all(dentry->d_inode);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
return nfs_release(inode, filp);
}
......@@ -235,7 +228,6 @@ nfs_file_flush(struct file *file, fl_owner_t id)
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
int status;
dprintk("NFS: flush(%s/%s)\n",
dentry->d_parent->d_name.name,
......@@ -245,11 +237,8 @@ nfs_file_flush(struct file *file, fl_owner_t id)
return 0;
nfs_inc_stats(inode, NFSIOS_VFSFLUSH);
/* Ensure that data+attribute caches are up to date after close() */
status = nfs_do_fsync(ctx, inode);
if (!status)
nfs_revalidate_inode(NFS_SERVER(inode), inode);
return status;
/* Flush writes to the server and return any errors */
return nfs_do_fsync(ctx, inode);
}
static ssize_t
......@@ -304,11 +293,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
dprintk("NFS: mmap(%s/%s)\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
status = nfs_revalidate_mapping(inode, file->f_mapping);
/* Note: generic_file_mmap() returns ENOSYS on nommu systems
* so we call that before revalidating the mapping
*/
status = generic_file_mmap(file, vma);
if (!status) {
vma->vm_ops = &nfs_file_vm_ops;
vma->vm_flags |= VM_CAN_NONLINEAR;
file_accessed(file);
status = nfs_revalidate_mapping(inode, file->f_mapping);
}
return status;
}
......@@ -354,6 +345,15 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
file->f_path.dentry->d_name.name,
mapping->host->i_ino, len, (long long) pos);
/*
* Prevent starvation issues if someone is doing a consistency
* sync-to-disk
*/
ret = wait_on_bit(&NFS_I(mapping->host)->flags, NFS_INO_FLUSHING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (ret)
return ret;
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
......
......@@ -156,7 +156,7 @@ int nfs4_path_walk(struct nfs_server *server,
return ret;
}
if (fattr.type != NFDIR) {
if (!S_ISDIR(fattr.mode)) {
printk(KERN_ERR "nfs4_get_root:"
" getroot encountered non-directory\n");
return -ENOTDIR;
......@@ -213,7 +213,7 @@ int nfs4_path_walk(struct nfs_server *server,
return ret;
}
if (fattr.type != NFDIR) {
if (!S_ISDIR(fattr.mode)) {
printk(KERN_ERR "nfs4_get_root:"
" lookupfh encountered non-directory\n");
return -ENOTDIR;
......
......@@ -65,6 +65,18 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
return nfs_fileid_to_ino_t(fattr->fileid);
}
/**
* nfs_wait_bit_killable - helper for functions that are sleeping on bit locks
* @word: long word containing the bit lock
*/
int nfs_wait_bit_killable(void *word)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
schedule();
return 0;
}
/**
* nfs_compat_user_ino64 - returns the user-visible inode number
* @fileid: 64-bit fileid
......@@ -249,13 +261,10 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
struct inode *inode = ERR_PTR(-ENOENT);
unsigned long hash;
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0)
goto out_no_inode;
if (!fattr->nlink) {
printk("NFS: Buggy server - nlink == 0!\n");
if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
goto out_no_inode;
}
hash = nfs_fattr_to_ino_t(fattr);
......@@ -291,7 +300,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
&& fattr->size <= NFS_LIMIT_READDIRPLUS)
set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
/* Deal with crossing mountpoints */
if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
if ((fattr->valid & NFS_ATTR_FATTR_FSID)
&& !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
inode->i_op = &nfs_referral_inode_operations;
else
......@@ -304,28 +314,45 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
else
init_special_inode(inode, inode->i_mode, fattr->rdev);
memset(&inode->i_atime, 0, sizeof(inode->i_atime));
memset(&inode->i_mtime, 0, sizeof(inode->i_mtime));
memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
nfsi->change_attr = 0;
inode->i_size = 0;
inode->i_nlink = 0;
inode->i_uid = -2;
inode->i_gid = -2;
inode->i_blocks = 0;
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
nfsi->read_cache_jiffies = fattr->time_start;
nfsi->attr_gencount = fattr->gencount;
inode->i_atime = fattr->atime;
inode->i_mtime = fattr->mtime;
inode->i_ctime = fattr->ctime;
if (fattr->valid & NFS_ATTR_FATTR_V4)
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime;
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = fattr->mtime;
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
nfsi->change_attr = fattr->change_attr;
inode->i_size = nfs_size_to_loff_t(fattr->size);
inode->i_nlink = fattr->nlink;
inode->i_uid = fattr->uid;
inode->i_gid = fattr->gid;
if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
if (fattr->valid & NFS_ATTR_FATTR_SIZE)
inode->i_size = nfs_size_to_loff_t(fattr->size);
if (fattr->valid & NFS_ATTR_FATTR_NLINK)
inode->i_nlink = fattr->nlink;
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
if (fattr->valid & NFS_ATTR_FATTR_GROUP)
inode->i_gid = fattr->gid;
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
/*
* report the blocks in 512byte units
*/
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
} else {
inode->i_blocks = fattr->du.nfs2.blocks;
}
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now;
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
nfsi->access_cache = RB_ROOT;
unlock_new_inode(inode);
......@@ -514,6 +541,32 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
return err;
}
/**
* nfs_close_context - Common close_context() routine NFSv2/v3
* @ctx: pointer to context
* @is_sync: is this a synchronous close
*
* always ensure that the attributes are up to date if we're mounted
* with close-to-open semantics
*/
void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
{
struct inode *inode;
struct nfs_server *server;
if (!(ctx->mode & FMODE_WRITE))
return;
if (!is_sync)
return;
inode = ctx->path.dentry->d_inode;
if (!list_empty(&NFS_I(inode)->open_files))
return;
server = NFS_SERVER(inode);
if (server->flags & NFS_MOUNT_NOCTO)
return;
nfs_revalidate_inode(server, inode);
}
static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, struct dentry *dentry, struct rpc_cred *cred)
{
struct nfs_open_context *ctx;
......@@ -540,24 +593,15 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
return ctx;
}
static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait)
static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
{
struct inode *inode;
if (ctx == NULL)
return;
struct inode *inode = ctx->path.dentry->d_inode;
inode = ctx->path.dentry->d_inode;
if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
return;
list_del(&ctx->list);
spin_unlock(&inode->i_lock);
if (ctx->state != NULL) {
if (wait)
nfs4_close_sync(&ctx->path, ctx->state, ctx->mode);
else
nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
}
NFS_PROTO(inode)->close_context(ctx, is_sync);
if (ctx->cred != NULL)
put_rpccred(ctx->cred);
path_put(&ctx->path);
......@@ -670,9 +714,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
if (NFS_STALE(inode))
goto out;
if (NFS_STALE(inode))
goto out;
nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
if (status != 0) {
......@@ -815,25 +856,31 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
struct nfs_inode *nfsi = NFS_I(inode);
if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 &&
nfsi->change_attr == fattr->pre_change_attr) {
if ((fattr->valid & NFS_ATTR_FATTR_PRECHANGE)
&& (fattr->valid & NFS_ATTR_FATTR_CHANGE)
&& nfsi->change_attr == fattr->pre_change_attr) {
nfsi->change_attr = fattr->change_attr;
if (S_ISDIR(inode->i_mode))
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
}
/* If we have atomic WCC data, we may update some attributes */
if ((fattr->valid & NFS_ATTR_WCC) != 0) {
if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
&& (fattr->valid & NFS_ATTR_FATTR_CTIME)
&& timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
if ((fattr->valid & NFS_ATTR_FATTR_PREMTIME)
&& (fattr->valid & NFS_ATTR_FATTR_MTIME)
&& timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
if (S_ISDIR(inode->i_mode))
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
}
if (i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size) &&
nfsi->npages == 0)
i_size_write(inode, nfs_size_to_loff_t(fattr->size));
}
if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
&& (fattr->valid & NFS_ATTR_FATTR_SIZE)
&& i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
&& nfsi->npages == 0)
i_size_write(inode, nfs_size_to_loff_t(fattr->size));
}
/**
......@@ -853,35 +900,39 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
/* Has the inode gone and changed behind our back? */
if (nfsi->fileid != fattr->fileid
|| (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) {
if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
return -EIO;
if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
return -EIO;
}
if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
nfsi->change_attr != fattr->change_attr)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
/* Verify a few of the more important attributes */
if (!timespec_equal(&inode->i_mtime, &fattr->mtime))
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec_equal(&inode->i_mtime, &fattr->mtime))
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
cur_size = i_size_read(inode);
new_isize = nfs_size_to_loff_t(fattr->size);
if (cur_size != new_isize && nfsi->npages == 0)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
cur_size = i_size_read(inode);
new_isize = nfs_size_to_loff_t(fattr->size);
if (cur_size != new_isize && nfsi->npages == 0)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
}
/* Have any file permissions changed? */
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
|| inode->i_uid != fattr->uid
|| inode->i_gid != fattr->gid)
if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && inode->i_uid != fattr->uid)
invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && inode->i_gid != fattr->gid)
invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL;
/* Has the link count changed? */
if (inode->i_nlink != fattr->nlink)
if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
invalid |= NFS_INO_INVALID_ATTR;
if (!timespec_equal(&inode->i_atime, &fattr->atime))
if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec_equal(&inode->i_atime, &fattr->atime))
invalid |= NFS_INO_INVALID_ATIME;
if (invalid != 0)
......@@ -893,11 +944,15 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
{
if (!(fattr->valid & NFS_ATTR_FATTR_CTIME))
return 0;
return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
}
static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
{
if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
return 0;
return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
}
......@@ -1033,20 +1088,31 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
/* Don't do a WCC update if these attributes are already stale */
if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
!nfs_inode_attrs_need_update(inode, fattr)) {
fattr->valid &= ~(NFS_ATTR_WCC_V4|NFS_ATTR_WCC);
fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE
| NFS_ATTR_FATTR_PRESIZE
| NFS_ATTR_FATTR_PREMTIME
| NFS_ATTR_FATTR_PRECTIME);
goto out_noforce;
}
if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
(fattr->valid & NFS_ATTR_WCC_V4) == 0) {
if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PRECHANGE) == 0) {
fattr->pre_change_attr = NFS_I(inode)->change_attr;
fattr->valid |= NFS_ATTR_WCC_V4;
fattr->valid |= NFS_ATTR_FATTR_PRECHANGE;
}
if ((fattr->valid & NFS_ATTR_FATTR) != 0 &&
(fattr->valid & NFS_ATTR_WCC) == 0) {
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime));
fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PREMTIME) == 0) {
memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime));
fattr->valid |= NFS_ATTR_FATTR_PREMTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_SIZE) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PRESIZE) == 0) {
fattr->pre_size = i_size_read(inode);
fattr->valid |= NFS_ATTR_WCC;
fattr->valid |= NFS_ATTR_FATTR_PRESIZE;
}
out_noforce:
status = nfs_post_op_update_inode_locked(inode, fattr);
......@@ -1078,18 +1144,18 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
__func__, inode->i_sb->s_id, inode->i_ino,
atomic_read(&inode->i_count), fattr->valid);
if (nfsi->fileid != fattr->fileid)
if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
goto out_fileid;
/*
* Make sure the inode's type hasn't changed.
*/
if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
goto out_changed;
server = NFS_SERVER(inode);
/* Update the fsid? */
if (S_ISDIR(inode->i_mode) &&
if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) &&
!nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
!test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags))
server->fsid = fattr->fsid;
......@@ -1099,14 +1165,27 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/
nfsi->read_cache_jiffies = fattr->time_start;
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_PAGECACHE);
if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME)))
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_PAGECACHE);
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
/* More cache consistency checks */
if (!(fattr->valid & NFS_ATTR_FATTR_V4)) {
if (fattr->valid & NFS_ATTR_FATTR_CHANGE) {
if (nfsi->change_attr != fattr->change_attr) {
dprintk("NFS: change_attr change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
nfsi->change_attr = fattr->change_attr;
}
}
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
/* NFSv2/v3: Check if the mtime agrees */
if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
dprintk("NFS: mtime change on server for file %s/%ld\n",
......@@ -1114,59 +1193,80 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
}
}
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
/* If ctime has changed we should definitely clear access+acl caches */
if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
} else if (nfsi->change_attr != fattr->change_attr) {
dprintk("NFS: change_attr change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
/* and probably clear data for a directory too as utimes can cause
* havoc with our cache.
*/
if (S_ISDIR(inode->i_mode)) {
invalid |= NFS_INO_INVALID_DATA;
nfs_force_lookup_revalidate(inode);
}
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
}
}
/* Check if our cached file size is stale */
new_isize = nfs_size_to_loff_t(fattr->size);
cur_isize = i_size_read(inode);
if (new_isize != cur_isize) {
/* Do we perhaps have any outstanding writes, or has
* the file grown beyond our last write? */
if (nfsi->npages == 0 || new_isize > cur_isize) {
i_size_write(inode, new_isize);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
new_isize = nfs_size_to_loff_t(fattr->size);
cur_isize = i_size_read(inode);
if (new_isize != cur_isize) {
/* Do we perhaps have any outstanding writes, or has
* the file grown beyond our last write? */
if (nfsi->npages == 0 || new_isize > cur_isize) {
i_size_write(inode, new_isize);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
}
dprintk("NFS: isize change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
}
dprintk("NFS: isize change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
}
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
nfsi->change_attr = fattr->change_attr;
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
inode->i_uid != fattr->uid ||
inode->i_gid != fattr->gid)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
if (inode->i_nlink != fattr->nlink)
invalid |= NFS_INO_INVALID_ATTR;
if (fattr->valid & NFS_ATTR_FATTR_MODE) {
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
inode->i_mode = fattr->mode;
}
}
if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
if (inode->i_uid != fattr->uid) {
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
inode->i_uid = fattr->uid;
}
}
if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
if (inode->i_gid != fattr->gid) {
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
inode->i_gid = fattr->gid;
}
}
inode->i_mode = fattr->mode;
inode->i_nlink = fattr->nlink;
inode->i_uid = fattr->uid;
inode->i_gid = fattr->gid;
if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
if (inode->i_nlink != fattr->nlink) {
invalid |= NFS_INO_INVALID_ATTR;
if (S_ISDIR(inode->i_mode))
invalid |= NFS_INO_INVALID_DATA;
inode->i_nlink = fattr->nlink;
}
}
if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
/*
* report the blocks in 512byte units
*/
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
} else {
inode->i_blocks = fattr->du.nfs2.blocks;
}
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
/* Update attrtimeo value if we're out of the unstable period */
if (invalid & NFS_INO_INVALID_ATTR) {
......@@ -1274,7 +1374,6 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
nfsi->ncommit = 0;
nfsi->npages = 0;
atomic_set(&nfsi->silly_count, 1);
INIT_HLIST_HEAD(&nfsi->silly_list);
......
......@@ -152,6 +152,9 @@ extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
extern struct rpc_procinfo nfs4_procedures[];
#endif
/* proc.c */
void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
/* dir.c */
extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
......@@ -165,6 +168,7 @@ extern void nfs_clear_inode(struct inode *);
extern void nfs4_clear_inode(struct inode *);
#endif
void nfs_zap_acl_cache(struct inode *inode);
extern int nfs_wait_bit_killable(void *word);
/* super.c */
void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *);
......
......@@ -120,8 +120,8 @@ xdr_decode_time(__be32 *p, struct timespec *timep)
static __be32 *
xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
{
u32 rdev;
fattr->type = (enum nfs_ftype) ntohl(*p++);
u32 rdev, type;
type = ntohl(*p++);
fattr->mode = ntohl(*p++);
fattr->nlink = ntohl(*p++);
fattr->uid = ntohl(*p++);
......@@ -136,10 +136,9 @@ xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
p = xdr_decode_time(p, &fattr->atime);
p = xdr_decode_time(p, &fattr->mtime);
p = xdr_decode_time(p, &fattr->ctime);
fattr->valid |= NFS_ATTR_FATTR;
fattr->valid |= NFS_ATTR_FATTR_V2;
fattr->rdev = new_decode_dev(rdev);
if (fattr->type == NFCHR && rdev == NFS2_FIFO_DEV) {
fattr->type = NFFIFO;
if (type == NFCHR && rdev == NFS2_FIFO_DEV) {
fattr->mode = (fattr->mode & ~S_IFMT) | S_IFIFO;
fattr->rdev = 0;
}
......
......@@ -834,4 +834,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.commit_done = nfs3_commit_done,
.lock = nfs3_proc_lock,
.clear_acl_cache = nfs3_forget_cached_acls,
.close_context = nfs_close_context,
};
......@@ -91,19 +91,15 @@
/*
* Map file type to S_IFMT bits
*/
static struct {
unsigned int mode;
unsigned int nfs2type;
} nfs_type2fmt[] = {
{ 0, NFNON },
{ S_IFREG, NFREG },
{ S_IFDIR, NFDIR },
{ S_IFBLK, NFBLK },
{ S_IFCHR, NFCHR },
{ S_IFLNK, NFLNK },
{ S_IFSOCK, NFSOCK },
{ S_IFIFO, NFFIFO },
{ 0, NFBAD }
static const umode_t nfs_type2fmt[] = {
[NF3BAD] = 0,
[NF3REG] = S_IFREG,
[NF3DIR] = S_IFDIR,
[NF3BLK] = S_IFBLK,
[NF3CHR] = S_IFCHR,
[NF3LNK] = S_IFLNK,
[NF3SOCK] = S_IFSOCK,
[NF3FIFO] = S_IFIFO,
};
/*
......@@ -148,13 +144,12 @@ static __be32 *
xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
{
unsigned int type, major, minor;
int fmode;
umode_t fmode;
type = ntohl(*p++);
if (type >= NF3BAD)
type = NF3BAD;
fmode = nfs_type2fmt[type].mode;
fattr->type = nfs_type2fmt[type].nfs2type;
if (type > NF3FIFO)
type = NF3NON;
fmode = nfs_type2fmt[type];
fattr->mode = (ntohl(*p++) & ~S_IFMT) | fmode;
fattr->nlink = ntohl(*p++);
fattr->uid = ntohl(*p++);
......@@ -177,7 +172,7 @@ xdr_decode_fattr(__be32 *p, struct nfs_fattr *fattr)
p = xdr_decode_time3(p, &fattr->ctime);
/* Update the mode bits */
fattr->valid |= (NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3);
fattr->valid |= NFS_ATTR_FATTR_V3;
return p;
}
......@@ -233,7 +228,9 @@ xdr_decode_wcc_attr(__be32 *p, struct nfs_fattr *fattr)
p = xdr_decode_hyper(p, &fattr->pre_size);
p = xdr_decode_time3(p, &fattr->pre_mtime);
p = xdr_decode_time3(p, &fattr->pre_ctime);
fattr->valid |= NFS_ATTR_WCC;
fattr->valid |= NFS_ATTR_FATTR_PRESIZE
| NFS_ATTR_FATTR_PREMTIME
| NFS_ATTR_FATTR_PRECTIME;
return p;
}
......
......@@ -193,14 +193,6 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
kunmap_atomic(start, KM_USER0);
}
static int nfs4_wait_bit_killable(void *word)
{
if (fatal_signal_pending(current))
return -ERESTARTSYS;
schedule();
return 0;
}
static int nfs4_wait_clnt_recover(struct nfs_client *clp)
{
int res;
......@@ -208,7 +200,7 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp)
might_sleep();
res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
nfs4_wait_bit_killable, TASK_KILLABLE);
nfs_wait_bit_killable, TASK_KILLABLE);
return res;
}
......@@ -1439,7 +1431,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
if (calldata->arg.seqid == NULL)
goto out_free_calldata;
calldata->arg.fmode = 0;
calldata->arg.bitmask = server->attr_bitmask;
calldata->arg.bitmask = server->cache_consistency_bitmask;
calldata->res.fattr = &calldata->fattr;
calldata->res.seqid = calldata->arg.seqid;
calldata->res.server = server;
......@@ -1580,6 +1572,15 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
return 0;
}
void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
{
if (ctx->state == NULL)
return;
if (is_sync)
nfs4_close_sync(&ctx->path, ctx->state, ctx->mode);
else
nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
}
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
......@@ -1600,6 +1601,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
server->caps |= NFS_CAP_HARDLINKS;
if (res.has_symlinks != 0)
server->caps |= NFS_CAP_SYMLINKS;
memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
server->acl_bitmask = res.acl_bitmask;
}
return status;
......@@ -2079,7 +2083,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir)
struct nfs_removeargs *args = msg->rpc_argp;
struct nfs_removeres *res = msg->rpc_resp;
args->bitmask = server->attr_bitmask;
args->bitmask = server->cache_consistency_bitmask;
res->server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE];
}
......@@ -2323,7 +2327,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
.pages = &page,
.pgbase = 0,
.count = count,
.bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask,
.bitmask = NFS_SERVER(dentry->d_inode)->cache_consistency_bitmask,
};
struct nfs4_readdir_res res;
struct rpc_message msg = {
......@@ -2552,7 +2556,7 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
{
struct nfs_server *server = NFS_SERVER(data->inode);
data->args.bitmask = server->attr_bitmask;
data->args.bitmask = server->cache_consistency_bitmask;
data->res.server = server;
data->timestamp = jiffies;
......@@ -2575,7 +2579,7 @@ static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
{
struct nfs_server *server = NFS_SERVER(data->inode);
data->args.bitmask = server->attr_bitmask;
data->args.bitmask = server->cache_consistency_bitmask;
data->res.server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
}
......@@ -3678,6 +3682,19 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
return len;
}
static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
{
if (!((fattr->valid & NFS_ATTR_FATTR_FILEID) &&
(fattr->valid & NFS_ATTR_FATTR_FSID) &&
(fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
return;
fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
NFS_ATTR_FATTR_NLINK;
fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
fattr->nlink = 2;
}
int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
struct nfs4_fs_locations *fs_locations, struct page *page)
{
......@@ -3704,6 +3721,7 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
fs_locations->server = server;
fs_locations->nlocations = 0;
status = rpc_call_sync(server->client, &msg, 0);
nfs_fixup_referral_attributes(&fs_locations->fattr);
dprintk("%s: returned status = %d\n", __func__, status);
return status;
}
......@@ -3767,6 +3785,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.commit_done = nfs4_commit_done,
.lock = nfs4_proc_lock,
.clear_acl_cache = nfs4_zap_acl_attr,
.close_context = nfs4_close_context,
};
/*
......
......@@ -62,8 +62,14 @@ static LIST_HEAD(nfs4_clientid_list);
static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
{
int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK,
nfs_callback_tcpport, cred);
unsigned short port;
int status;
port = nfs_callback_tcpport;
if (clp->cl_addr.ss_family == AF_INET6)
port = nfs_callback_tcpport6;
status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred);
if (status == 0)
status = nfs4_proc_setclientid_confirm(clp, cred);
if (status == 0)
......
......@@ -522,20 +522,17 @@ static int nfs4_stat_to_errno(int);
decode_lookup_maxsz + \
decode_fs_locations_maxsz)
static struct {
unsigned int mode;
unsigned int nfs2type;
} nfs_type2fmt[] = {
{ 0, NFNON },
{ S_IFREG, NFREG },
{ S_IFDIR, NFDIR },
{ S_IFBLK, NFBLK },
{ S_IFCHR, NFCHR },
{ S_IFLNK, NFLNK },
{ S_IFSOCK, NFSOCK },
{ S_IFIFO, NFFIFO },
{ 0, NFNON },
{ 0, NFNON },
static const umode_t nfs_type2fmt[] = {
[NF4BAD] = 0,
[NF4REG] = S_IFREG,
[NF4DIR] = S_IFDIR,
[NF4BLK] = S_IFBLK,
[NF4CHR] = S_IFCHR,
[NF4LNK] = S_IFLNK,
[NF4SOCK] = S_IFSOCK,
[NF4FIFO] = S_IFIFO,
[NF4ATTRDIR] = 0,
[NF4NAMEDATTR] = 0,
};
struct compound_hdr {
......@@ -2160,6 +2157,7 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3
static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *type)
{
__be32 *p;
int ret = 0;
*type = 0;
if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
......@@ -2172,14 +2170,16 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
return -EIO;
}
bitmap[0] &= ~FATTR4_WORD0_TYPE;
ret = NFS_ATTR_FATTR_TYPE;
}
dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type].nfs2type);
return 0;
dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]);
return ret;
}
static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
{
__be32 *p;
int ret = 0;
*change = 0;
if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
......@@ -2188,15 +2188,17 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
READ_BUF(8);
READ64(*change);
bitmap[0] &= ~FATTR4_WORD0_CHANGE;
ret = NFS_ATTR_FATTR_CHANGE;
}
dprintk("%s: change attribute=%Lu\n", __func__,
(unsigned long long)*change);
return 0;
return ret;
}
static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
{
__be32 *p;
int ret = 0;
*size = 0;
if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
......@@ -2205,9 +2207,10 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
READ_BUF(8);
READ64(*size);
bitmap[0] &= ~FATTR4_WORD0_SIZE;
ret = NFS_ATTR_FATTR_SIZE;
}
dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size);
return 0;
return ret;
}
static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
......@@ -2245,6 +2248,7 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
{
__be32 *p;
int ret = 0;
fsid->major = 0;
fsid->minor = 0;
......@@ -2255,11 +2259,12 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
READ64(fsid->major);
READ64(fsid->minor);
bitmap[0] &= ~FATTR4_WORD0_FSID;
ret = NFS_ATTR_FATTR_FSID;
}
dprintk("%s: fsid=(0x%Lx/0x%Lx)\n", __func__,
(unsigned long long)fsid->major,
(unsigned long long)fsid->minor);
return 0;
return ret;
}
static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
......@@ -2297,6 +2302,7 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
{
__be32 *p;
int ret = 0;
*fileid = 0;
if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
......@@ -2305,14 +2311,16 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
READ_BUF(8);
READ64(*fileid);
bitmap[0] &= ~FATTR4_WORD0_FILEID;
ret = NFS_ATTR_FATTR_FILEID;
}
dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
return 0;
return ret;
}
static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
{
__be32 *p;
int ret = 0;
*fileid = 0;
if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
......@@ -2321,9 +2329,10 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
READ_BUF(8);
READ64(*fileid);
bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
ret = NFS_ATTR_FATTR_FILEID;
}
dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
return 0;
return ret;
}
static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
......@@ -2479,6 +2488,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
if (res->nlocations < NFS4_FS_LOCATIONS_MAXENTRIES)
res->nlocations++;
}
if (res->nlocations != 0)
status = NFS_ATTR_FATTR_V4_REFERRAL;
out:
dprintk("%s: fs_locations done, error = %d\n", __func__, status);
return status;
......@@ -2580,26 +2591,30 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
return status;
}
static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *mode)
static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode)
{
uint32_t tmp;
__be32 *p;
int ret = 0;
*mode = 0;
if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
READ_BUF(4);
READ32(*mode);
*mode &= ~S_IFMT;
READ32(tmp);
*mode = tmp & ~S_IFMT;
bitmap[1] &= ~FATTR4_WORD1_MODE;
ret = NFS_ATTR_FATTR_MODE;
}
dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode);
return 0;
return ret;
}
static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
{
__be32 *p;
int ret = 0;
*nlink = 1;
if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
......@@ -2608,15 +2623,17 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
READ_BUF(4);
READ32(*nlink);
bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
ret = NFS_ATTR_FATTR_NLINK;
}
dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink);
return 0;
return ret;
}
static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *uid)
{
uint32_t len;
__be32 *p;
int ret = 0;
*uid = -2;
if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
......@@ -2626,7 +2643,9 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
READ32(len);
READ_BUF(len);
if (len < XDR_MAX_NETOBJ) {
if (nfs_map_name_to_uid(clp, (char *)p, len, uid) != 0)
if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0)
ret = NFS_ATTR_FATTR_OWNER;
else
dprintk("%s: nfs_map_name_to_uid failed!\n",
__func__);
} else
......@@ -2635,13 +2654,14 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
bitmap[1] &= ~FATTR4_WORD1_OWNER;
}
dprintk("%s: uid=%d\n", __func__, (int)*uid);
return 0;
return ret;
}
static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_client *clp, uint32_t *gid)
{
uint32_t len;
__be32 *p;
int ret = 0;
*gid = -2;
if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
......@@ -2651,7 +2671,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
READ32(len);
READ_BUF(len);
if (len < XDR_MAX_NETOBJ) {
if (nfs_map_group_to_gid(clp, (char *)p, len, gid) != 0)
if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0)
ret = NFS_ATTR_FATTR_GROUP;
else
dprintk("%s: nfs_map_group_to_gid failed!\n",
__func__);
} else
......@@ -2660,13 +2682,14 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf
bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP;
}
dprintk("%s: gid=%d\n", __func__, (int)*gid);
return 0;
return ret;
}
static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
{
uint32_t major = 0, minor = 0;
__be32 *p;
int ret = 0;
*rdev = MKDEV(0,0);
if (unlikely(bitmap[1] & (FATTR4_WORD1_RAWDEV - 1U)))
......@@ -2681,9 +2704,10 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
if (MAJOR(tmp) == major && MINOR(tmp) == minor)
*rdev = tmp;
bitmap[1] &= ~ FATTR4_WORD1_RAWDEV;
ret = NFS_ATTR_FATTR_RDEV;
}
dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor);
return 0;
return ret;
}
static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
......@@ -2740,6 +2764,7 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
{
__be32 *p;
int ret = 0;
*used = 0;
if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
......@@ -2748,10 +2773,11 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
READ_BUF(8);
READ64(*used);
bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
ret = NFS_ATTR_FATTR_SPACE_USED;
}
dprintk("%s: space used=%Lu\n", __func__,
(unsigned long long)*used);
return 0;
return ret;
}
static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
......@@ -2778,6 +2804,8 @@ static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, str
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_TIME_ACCESS)) {
status = decode_attr_time(xdr, time);
if (status == 0)
status = NFS_ATTR_FATTR_ATIME;
bitmap[1] &= ~FATTR4_WORD1_TIME_ACCESS;
}
dprintk("%s: atime=%ld\n", __func__, (long)time->tv_sec);
......@@ -2794,6 +2822,8 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) {
status = decode_attr_time(xdr, time);
if (status == 0)
status = NFS_ATTR_FATTR_CTIME;
bitmap[1] &= ~FATTR4_WORD1_TIME_METADATA;
}
dprintk("%s: ctime=%ld\n", __func__, (long)time->tv_sec);
......@@ -2810,6 +2840,8 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str
return -EIO;
if (likely(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) {
status = decode_attr_time(xdr, time);
if (status == 0)
status = NFS_ATTR_FATTR_MTIME;
bitmap[1] &= ~FATTR4_WORD1_TIME_MODIFY;
}
dprintk("%s: mtime=%ld\n", __func__, (long)time->tv_sec);
......@@ -2994,63 +3026,116 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, cons
uint32_t attrlen,
bitmap[2] = {0},
type;
int status, fmode = 0;
int status;
umode_t fmode = 0;
uint64_t fileid;
if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0)
goto xdr_error;
if ((status = decode_attr_bitmap(xdr, bitmap)) != 0)
status = decode_op_hdr(xdr, OP_GETATTR);
if (status < 0)
goto xdr_error;
fattr->bitmap[0] = bitmap[0];
fattr->bitmap[1] = bitmap[1];
status = decode_attr_bitmap(xdr, bitmap);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0)
status = decode_attr_length(xdr, &attrlen, &savep);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_type(xdr, bitmap, &type)) != 0)
status = decode_attr_type(xdr, bitmap, &type);
if (status < 0)
goto xdr_error;
fattr->type = nfs_type2fmt[type].nfs2type;
fmode = nfs_type2fmt[type].mode;
fattr->mode = 0;
if (status != 0) {
fattr->mode |= nfs_type2fmt[type];
fattr->valid |= status;
}
if ((status = decode_attr_change(xdr, bitmap, &fattr->change_attr)) != 0)
status = decode_attr_change(xdr, bitmap, &fattr->change_attr);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0)
fattr->valid |= status;
status = decode_attr_size(xdr, bitmap, &fattr->size);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid)) != 0)
fattr->valid |= status;
status = decode_attr_fsid(xdr, bitmap, &fattr->fsid);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0)
fattr->valid |= status;
status = decode_attr_fileid(xdr, bitmap, &fattr->fileid);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr,
fattr->valid |= status;
status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr,
struct nfs4_fs_locations,
fattr))) != 0)
fattr));
if (status < 0)
goto xdr_error;
if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0)
fattr->valid |= status;
status = decode_attr_mode(xdr, bitmap, &fmode);
if (status < 0)
goto xdr_error;
fattr->mode |= fmode;
if ((status = decode_attr_nlink(xdr, bitmap, &fattr->nlink)) != 0)
if (status != 0) {
fattr->mode |= fmode;
fattr->valid |= status;
}
status = decode_attr_nlink(xdr, bitmap, &fattr->nlink);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid)) != 0)
fattr->valid |= status;
status = decode_attr_owner(xdr, bitmap, server->nfs_client, &fattr->uid);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid)) != 0)
fattr->valid |= status;
status = decode_attr_group(xdr, bitmap, server->nfs_client, &fattr->gid);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_rdev(xdr, bitmap, &fattr->rdev)) != 0)
fattr->valid |= status;
status = decode_attr_rdev(xdr, bitmap, &fattr->rdev);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used)) != 0)
fattr->valid |= status;
status = decode_attr_space_used(xdr, bitmap, &fattr->du.nfs3.used);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_time_access(xdr, bitmap, &fattr->atime)) != 0)
fattr->valid |= status;
status = decode_attr_time_access(xdr, bitmap, &fattr->atime);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime)) != 0)
fattr->valid |= status;
status = decode_attr_time_metadata(xdr, bitmap, &fattr->ctime);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime)) != 0)
fattr->valid |= status;
status = decode_attr_time_modify(xdr, bitmap, &fattr->mtime);
if (status < 0)
goto xdr_error;
if ((status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid)) != 0)
fattr->valid |= status;
status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid);
if (status < 0)
goto xdr_error;
if (fattr->fileid == 0 && fileid != 0)
if (status != 0 && !(fattr->valid & status)) {
fattr->fileid = fileid;
if ((status = verify_attr_len(xdr, savep, attrlen)) == 0)
fattr->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
fattr->valid |= status;
}
status = verify_attr_len(xdr, savep, attrlen);
xdr_error:
dprintk("%s: xdr returned %d\n", __func__, -status);
return status;
......@@ -4078,9 +4163,7 @@ static int nfs4_xdr_dec_setattr(struct rpc_rqst *rqstp, __be32 *p, struct nfs_se
status = decode_setattr(&xdr, res);
if (status)
goto out;
status = decode_getfattr(&xdr, res->fattr, res->server);
if (status == NFS4ERR_DELAY)
status = 0;
decode_getfattr(&xdr, res->fattr, res->server);
out:
return status;
}
......
......@@ -176,17 +176,6 @@ void nfs_release_request(struct nfs_page *req)
kref_put(&req->wb_kref, nfs_free_request);
}
static int nfs_wait_bit_killable(void *word)
{
int ret = 0;
if (fatal_signal_pending(current))
ret = -ERESTARTSYS;
else
schedule();
return ret;
}
/**
* nfs_wait_on_request - Wait for a request to complete.
* @req: request to wait upon.
......
......@@ -663,4 +663,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.commit_setup = nfs_proc_commit_setup,
.lock = nfs_proc_lock,
.lock_check_bounds = nfs_lock_check_bounds,
.close_context = nfs_close_context,
};
......@@ -1018,6 +1018,7 @@ static int nfs_parse_mount_options(char *raw,
case Opt_rdma:
mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */
mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
xprt_load_transport(p);
break;
case Opt_acl:
mnt->flags &= ~NFS_MOUNT_NOACL;
......@@ -1205,12 +1206,14 @@ static int nfs_parse_mount_options(char *raw,
/* vector side protocols to TCP */
mnt->flags |= NFS_MOUNT_TCP;
mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
xprt_load_transport(string);
break;
default:
errors++;
dfprintk(MOUNT, "NFS: unrecognized "
"transport protocol\n");
}
kfree(string);
break;
case Opt_mountproto:
string = match_strdup(args);
......@@ -1218,7 +1221,6 @@ static int nfs_parse_mount_options(char *raw,
goto out_nomem;
token = match_token(string,
nfs_xprt_protocol_tokens, args);
kfree(string);
switch (token) {
case Opt_xprt_udp:
......
......@@ -313,19 +313,34 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
unsigned long *bitlock = &NFS_I(inode)->flags;
struct nfs_pageio_descriptor pgio;
int err;
/* Stop dirtying of new pages while we sync */
err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING,
nfs_wait_bit_killable, TASK_KILLABLE);
if (err)
goto out_err;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
nfs_pageio_init_write(&pgio, inode, wb_priority(wbc));
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
nfs_pageio_complete(&pgio);
clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
smp_mb__after_clear_bit();
wake_up_bit(bitlock, NFS_INO_FLUSHING);
if (err < 0)
return err;
if (pgio.pg_error < 0)
return pgio.pg_error;
goto out_err;
err = pgio.pg_error;
if (err < 0)
goto out_err;
return 0;
out_err:
return err;
}
/*
......@@ -404,7 +419,6 @@ nfs_mark_request_commit(struct nfs_page *req)
struct nfs_inode *nfsi = NFS_I(inode);
spin_lock(&inode->i_lock);
nfsi->ncommit++;
set_bit(PG_CLEAN, &(req)->wb_flags);
radix_tree_tag_set(&nfsi->nfs_page_tree,
req->wb_index,
......@@ -524,6 +538,12 @@ static void nfs_cancel_commit_list(struct list_head *head)
}
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
static int
nfs_need_commit(struct nfs_inode *nfsi)
{
return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT);
}
/*
* nfs_scan_commit - Scan an inode for commit requests
* @inode: NFS inode to scan
......@@ -538,16 +558,18 @@ static int
nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
{
struct nfs_inode *nfsi = NFS_I(inode);
int res = 0;
if (nfsi->ncommit != 0) {
res = nfs_scan_list(nfsi, dst, idx_start, npages,
NFS_PAGE_TAG_COMMIT);
nfsi->ncommit -= res;
}
return res;
if (!nfs_need_commit(nfsi))
return 0;
return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
}
#else
static inline int nfs_need_commit(struct nfs_inode *nfsi)
{
return 0;
}
static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages)
{
return 0;
......@@ -820,7 +842,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
data->args.stable = NFS_UNSTABLE;
if (how & FLUSH_STABLE) {
data->args.stable = NFS_DATA_SYNC;
if (!NFS_I(inode)->ncommit)
if (!nfs_need_commit(NFS_I(inode)))
data->args.stable = NFS_FILE_SYNC;
}
......@@ -1425,18 +1447,13 @@ static int nfs_write_mapping(struct address_space *mapping, int how)
{
struct writeback_control wbc = {
.bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_NONE,
.sync_mode = WB_SYNC_ALL,
.nr_to_write = LONG_MAX,
.range_start = 0,
.range_end = LLONG_MAX,
.for_writepages = 1,
};
int ret;
ret = __nfs_write_mapping(mapping, &wbc, how);
if (ret < 0)
return ret;
wbc.sync_mode = WB_SYNC_ALL;
return __nfs_write_mapping(mapping, &wbc, how);
}
......
......@@ -938,10 +938,12 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
char transport[16];
int port;
if (sscanf(buf, "%15s %4d", transport, &port) == 2) {
if (port < 1 || port > 65535)
return -EINVAL;
err = nfsd_create_serv();
if (!err) {
err = svc_create_xprt(nfsd_serv,
transport, port,
transport, PF_INET, port,
SVC_SOCK_ANONYMOUS);
if (err == -ENOENT)
/* Give a reasonable perror msg for
......@@ -960,7 +962,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size)
char transport[16];
int port;
if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
if (port == 0)
if (port < 1 || port > 65535)
return -EINVAL;
if (nfsd_serv) {
xprt = svc_find_xprt(nfsd_serv, transport,
......
......@@ -229,7 +229,6 @@ int nfsd_create_serv(void)
atomic_set(&nfsd_busy, 0);
nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
AF_INET,
nfsd_last_thread, nfsd, THIS_MODULE);
if (nfsd_serv == NULL)
err = -ENOMEM;
......@@ -244,7 +243,7 @@ static int nfsd_init_socks(int port)
if (!list_empty(&nfsd_serv->sv_permsocks))
return 0;
error = svc_create_xprt(nfsd_serv, "udp", port,
error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port,
SVC_SOCK_DEFAULTS);
if (error < 0)
return error;
......@@ -253,7 +252,7 @@ static int nfsd_init_socks(int port)
if (error < 0)
return error;
error = svc_create_xprt(nfsd_serv, "tcp", port,
error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port,
SVC_SOCK_DEFAULTS);
if (error < 0)
return error;
......
......@@ -166,8 +166,7 @@ struct nfs_inode {
*/
struct radix_tree_root nfs_page_tree;
unsigned long ncommit,
npages;
unsigned long npages;
/* Open contexts for shared mmap writes */
struct list_head open_files;
......@@ -207,6 +206,7 @@ struct nfs_inode {
#define NFS_INO_STALE (1) /* possible stale inode */
#define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */
#define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */
#define NFS_INO_FLUSHING (4) /* inode is flushing out data */
static inline struct nfs_inode *NFS_I(const struct inode *inode)
{
......
......@@ -106,6 +106,11 @@ struct nfs_server {
u32 attr_bitmask[2];/* V4 bitmask representing the set
of attributes supported on this
filesystem */
u32 cache_consistency_bitmask[2];
/* V4 bitmask representing the subset
of change attribute, size, ctime
and mtime attributes supported by
the server */
u32 acl_bitmask; /* V4 bitmask representing the ACEs
that are supported on this
filesystem */
......
......@@ -27,12 +27,8 @@ static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid
}
struct nfs_fattr {
unsigned short valid; /* which fields are valid */
__u64 pre_size; /* pre_op_attr.size */
struct timespec pre_mtime; /* pre_op_attr.mtime */
struct timespec pre_ctime; /* pre_op_attr.ctime */
enum nfs_ftype type; /* always use NFSv2 types */
__u32 mode;
unsigned int valid; /* which fields are valid */
umode_t mode;
__u32 nlink;
__u32 uid;
__u32 gid;
......@@ -52,19 +48,55 @@ struct nfs_fattr {
struct timespec atime;
struct timespec mtime;
struct timespec ctime;
__u32 bitmap[2]; /* NFSv4 returned attribute bitmap */
__u64 change_attr; /* NFSv4 change attribute */
__u64 pre_change_attr;/* pre-op NFSv4 change attribute */
__u64 pre_size; /* pre_op_attr.size */
struct timespec pre_mtime; /* pre_op_attr.mtime */
struct timespec pre_ctime; /* pre_op_attr.ctime */
unsigned long time_start;
unsigned long gencount;
};
#define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */
#define NFS_ATTR_FATTR 0x0002 /* post-op attributes */
#define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */
#define NFS_ATTR_FATTR_V4 0x0008 /* NFSv4 change attribute */
#define NFS_ATTR_WCC_V4 0x0010 /* pre-op change attribute */
#define NFS_ATTR_FATTR_V4_REFERRAL 0x0020 /* NFSv4 referral */
#define NFS_ATTR_FATTR_TYPE (1U << 0)
#define NFS_ATTR_FATTR_MODE (1U << 1)
#define NFS_ATTR_FATTR_NLINK (1U << 2)
#define NFS_ATTR_FATTR_OWNER (1U << 3)
#define NFS_ATTR_FATTR_GROUP (1U << 4)
#define NFS_ATTR_FATTR_RDEV (1U << 5)
#define NFS_ATTR_FATTR_SIZE (1U << 6)
#define NFS_ATTR_FATTR_PRESIZE (1U << 7)
#define NFS_ATTR_FATTR_BLOCKS_USED (1U << 8)
#define NFS_ATTR_FATTR_SPACE_USED (1U << 9)
#define NFS_ATTR_FATTR_FSID (1U << 10)
#define NFS_ATTR_FATTR_FILEID (1U << 11)
#define NFS_ATTR_FATTR_ATIME (1U << 12)
#define NFS_ATTR_FATTR_MTIME (1U << 13)
#define NFS_ATTR_FATTR_CTIME (1U << 14)
#define NFS_ATTR_FATTR_PREMTIME (1U << 15)
#define NFS_ATTR_FATTR_PRECTIME (1U << 16)
#define NFS_ATTR_FATTR_CHANGE (1U << 17)
#define NFS_ATTR_FATTR_PRECHANGE (1U << 18)
#define NFS_ATTR_FATTR_V4_REFERRAL (1U << 19) /* NFSv4 referral */
#define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \
| NFS_ATTR_FATTR_MODE \
| NFS_ATTR_FATTR_NLINK \
| NFS_ATTR_FATTR_OWNER \
| NFS_ATTR_FATTR_GROUP \
| NFS_ATTR_FATTR_RDEV \
| NFS_ATTR_FATTR_SIZE \
| NFS_ATTR_FATTR_FSID \
| NFS_ATTR_FATTR_FILEID \
| NFS_ATTR_FATTR_ATIME \
| NFS_ATTR_FATTR_MTIME \
| NFS_ATTR_FATTR_CTIME)
#define NFS_ATTR_FATTR_V2 (NFS_ATTR_FATTR \
| NFS_ATTR_FATTR_BLOCKS_USED)
#define NFS_ATTR_FATTR_V3 (NFS_ATTR_FATTR \
| NFS_ATTR_FATTR_SPACE_USED)
#define NFS_ATTR_FATTR_V4 (NFS_ATTR_FATTR \
| NFS_ATTR_FATTR_SPACE_USED \
| NFS_ATTR_FATTR_CHANGE)
/*
* Info on the file system
......@@ -836,6 +868,7 @@ struct nfs_rpc_ops {
int (*lock)(struct file *, int, struct file_lock *);
int (*lock_check_bounds)(const struct file_lock *);
void (*clear_acl_cache)(struct inode *);
void (*close_context)(struct nfs_open_context *ctx, int);
};
/*
......
......@@ -69,7 +69,6 @@ struct svc_serv {
struct list_head sv_tempsocks; /* all temporary sockets */
int sv_tmpcnt; /* count of temporary sockets */
struct timer_list sv_temptimer; /* timer for aging temporary sockets */
sa_family_t sv_family; /* listener's address family */
char * sv_name; /* service name */
......@@ -385,19 +384,19 @@ struct svc_procedure {
/*
* Function prototypes.
*/
struct svc_serv *svc_create(struct svc_program *, unsigned int, sa_family_t,
struct svc_serv *svc_create(struct svc_program *, unsigned int,
void (*shutdown)(struct svc_serv *));
struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
struct svc_pool *pool);
void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
sa_family_t, void (*shutdown)(struct svc_serv *),
void (*shutdown)(struct svc_serv *),
svc_thread_fn, struct module *);
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
void svc_destroy(struct svc_serv *);
int svc_process(struct svc_rqst *);
int svc_register(const struct svc_serv *, const unsigned short,
const unsigned short);
int svc_register(const struct svc_serv *, const int,
const unsigned short, const unsigned short);
void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
......
......@@ -71,7 +71,8 @@ int svc_reg_xprt_class(struct svc_xprt_class *);
void svc_unreg_xprt_class(struct svc_xprt_class *);
void svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
struct svc_serv *);
int svc_create_xprt(struct svc_serv *, char *, unsigned short, int);
int svc_create_xprt(struct svc_serv *, const char *, const int,
const unsigned short, int);
void svc_xprt_enqueue(struct svc_xprt *xprt);
void svc_xprt_received(struct svc_xprt *);
void svc_xprt_put(struct svc_xprt *xprt);
......@@ -80,7 +81,8 @@ void svc_close_xprt(struct svc_xprt *xprt);
void svc_delete_xprt(struct svc_xprt *xprt);
int svc_port_is_privileged(struct sockaddr *sin);
int svc_print_xprts(char *buf, int maxlen);
struct svc_xprt *svc_find_xprt(struct svc_serv *, char *, int, int);
struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
const sa_family_t af, const unsigned short port);
int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen);
static inline void svc_xprt_get(struct svc_xprt *xprt)
......@@ -88,29 +90,32 @@ static inline void svc_xprt_get(struct svc_xprt *xprt)
kref_get(&xprt->xpt_ref);
}
static inline void svc_xprt_set_local(struct svc_xprt *xprt,
struct sockaddr *sa, int salen)
const struct sockaddr *sa,
const size_t salen)
{
memcpy(&xprt->xpt_local, sa, salen);
xprt->xpt_locallen = salen;
}
static inline void svc_xprt_set_remote(struct svc_xprt *xprt,
struct sockaddr *sa, int salen)
const struct sockaddr *sa,
const size_t salen)
{
memcpy(&xprt->xpt_remote, sa, salen);
xprt->xpt_remotelen = salen;
}
static inline unsigned short svc_addr_port(struct sockaddr *sa)
static inline unsigned short svc_addr_port(const struct sockaddr *sa)
{
unsigned short ret = 0;
const struct sockaddr_in *sin = (const struct sockaddr_in *)sa;
const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sa;
switch (sa->sa_family) {
case AF_INET:
ret = ntohs(((struct sockaddr_in *)sa)->sin_port);
break;
return ntohs(sin->sin_port);
case AF_INET6:
ret = ntohs(((struct sockaddr_in6 *)sa)->sin6_port);
break;
return ntohs(sin6->sin6_port);
}
return ret;
return 0;
}
static inline size_t svc_addr_len(struct sockaddr *sa)
......@@ -124,36 +129,39 @@ static inline size_t svc_addr_len(struct sockaddr *sa)
return -EAFNOSUPPORT;
}
static inline unsigned short svc_xprt_local_port(struct svc_xprt *xprt)
static inline unsigned short svc_xprt_local_port(const struct svc_xprt *xprt)
{
return svc_addr_port((struct sockaddr *)&xprt->xpt_local);
return svc_addr_port((const struct sockaddr *)&xprt->xpt_local);
}
static inline unsigned short svc_xprt_remote_port(struct svc_xprt *xprt)
static inline unsigned short svc_xprt_remote_port(const struct svc_xprt *xprt)
{
return svc_addr_port((struct sockaddr *)&xprt->xpt_remote);
return svc_addr_port((const struct sockaddr *)&xprt->xpt_remote);
}
static inline char *__svc_print_addr(struct sockaddr *addr,
char *buf, size_t len)
static inline char *__svc_print_addr(const struct sockaddr *addr,
char *buf, const size_t len)
{
const struct sockaddr_in *sin = (const struct sockaddr_in *)addr;
const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)addr;
switch (addr->sa_family) {
case AF_INET:
snprintf(buf, len, "%pI4, port=%u",
&((struct sockaddr_in *)addr)->sin_addr,
ntohs(((struct sockaddr_in *) addr)->sin_port));
snprintf(buf, len, "%pI4, port=%u", &sin->sin_addr,
ntohs(sin->sin_port));
break;
case AF_INET6:
snprintf(buf, len, "%pI6, port=%u",
&((struct sockaddr_in6 *)addr)->sin6_addr,
ntohs(((struct sockaddr_in6 *) addr)->sin6_port));
&sin6->sin6_addr,
ntohs(sin6->sin6_port));
break;
default:
snprintf(buf, len, "unknown address type: %d", addr->sa_family);
break;
}
return buf;
}
#endif /* SUNRPC_SVC_XPRT_H */
......@@ -235,6 +235,7 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *
*/
int xprt_register_transport(struct xprt_class *type);
int xprt_unregister_transport(struct xprt_class *type);
int xprt_load_transport(const char *);
void xprt_set_retrans_timeout_def(struct rpc_task *task);
void xprt_set_retrans_timeout_rtt(struct rpc_task *task);
void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
......@@ -259,6 +260,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
#define XPRT_BOUND (4)
#define XPRT_BINDING (5)
#define XPRT_CLOSING (6)
#define XPRT_CONNECTION_ABORT (7)
static inline void xprt_set_connected(struct rpc_xprt *xprt)
{
......
......@@ -17,28 +17,6 @@ config SUNRPC_XPRT_RDMA
If unsure, say N.
config SUNRPC_REGISTER_V4
bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)"
depends on SUNRPC && EXPERIMENTAL
default n
help
Sun added support for registering RPC services at an IPv6
address by creating two new versions of the rpcbind protocol
(RFC 1833).
This option enables support in the kernel RPC server for
registering kernel RPC services via version 4 of the rpcbind
protocol. If you enable this option, you must run a portmapper
daemon that supports rpcbind protocol version 4.
Serving NFS over IPv6 from knfsd (the kernel's NFS server)
requires that you enable this option and use a portmapper that
supports rpcbind version 4.
If unsure, say N to get traditional behavior (register kernel
RPC services using only rpcbind version 2). Distributions
using the legacy Linux portmapper daemon must say N here.
config RPCSEC_GSS_KRB5
tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
depends on SUNRPC && EXPERIMENTAL
......
......@@ -1032,27 +1032,20 @@ call_connect_status(struct rpc_task *task)
dprint_status(task);
task->tk_status = 0;
if (status >= 0) {
if (status >= 0 || status == -EAGAIN) {
clnt->cl_stats->netreconn++;
task->tk_action = call_transmit;
return;
}
/* Something failed: remote service port may have changed */
rpc_force_rebind(clnt);
switch (status) {
case -ENOTCONN:
case -EAGAIN:
task->tk_action = call_bind;
if (!RPC_IS_SOFT(task))
return;
/* if soft mounted, test if we've timed out */
case -ETIMEDOUT:
task->tk_action = call_timeout;
return;
break;
default:
rpc_exit(task, -EIO);
}
rpc_exit(task, -EIO);
}
/*
......@@ -1105,14 +1098,26 @@ static void
call_transmit_status(struct rpc_task *task)
{
task->tk_action = call_status;
/*
* Special case: if we've been waiting on the socket's write_space()
* callback, then don't call xprt_end_transmit().
*/
if (task->tk_status == -EAGAIN)
return;
xprt_end_transmit(task);
rpc_task_force_reencode(task);
switch (task->tk_status) {
case -EAGAIN:
break;
default:
xprt_end_transmit(task);
/*
* Special cases: if we've been waiting on the
* socket's write_space() callback, or if the
* socket just returned a connection error,
* then hold onto the transport lock.
*/
case -ECONNREFUSED:
case -ECONNRESET:
case -ENOTCONN:
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPIPE:
rpc_task_force_reencode(task);
}
}
/*
......@@ -1152,9 +1157,12 @@ call_status(struct rpc_task *task)
xprt_conditional_disconnect(task->tk_xprt,
req->rq_connect_cookie);
break;
case -ECONNRESET:
case -ECONNREFUSED:
case -ENOTCONN:
rpc_force_rebind(clnt);
rpc_delay(task, 3*HZ);
case -EPIPE:
case -ENOTCONN:
task->tk_action = call_bind;
break;
case -EAGAIN:
......
......@@ -63,9 +63,16 @@ enum {
* r_owner
*
* The "owner" is allowed to unset a service in the rpcbind database.
* We always use the following (arbitrary) fixed string.
*
* For AF_LOCAL SET/UNSET requests, rpcbind treats this string as a
* UID which it maps to a local user name via a password lookup.
* In all other cases it is ignored.
*
* For SET/UNSET requests, user space provides a value, even for
* network requests, and GETADDR uses an empty string. We follow
* those precedents here.
*/
#define RPCB_OWNER_STRING "rpcb"
#define RPCB_OWNER_STRING "0"
#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING)
static void rpcb_getport_done(struct rpc_task *, void *);
......@@ -124,12 +131,6 @@ static const struct sockaddr_in rpcb_inaddr_loopback = {
.sin_port = htons(RPCBIND_PORT),
};
static const struct sockaddr_in6 rpcb_in6addr_loopback = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_LOOPBACK_INIT,
.sin6_port = htons(RPCBIND_PORT),
};
static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr,
size_t addrlen, u32 version)
{
......@@ -176,9 +177,10 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
return rpc_create(&args);
}
static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
u32 version, struct rpc_message *msg)
static int rpcb_register_call(const u32 version, struct rpc_message *msg)
{
struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback;
size_t addrlen = sizeof(rpcb_inaddr_loopback);
struct rpc_clnt *rpcb_clnt;
int result, error = 0;
......@@ -192,7 +194,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
error = PTR_ERR(rpcb_clnt);
if (error < 0) {
printk(KERN_WARNING "RPC: failed to contact local rpcbind "
dprintk("RPC: failed to contact local rpcbind "
"server (errno %d).\n", -error);
return error;
}
......@@ -254,25 +256,23 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
if (port)
msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
sizeof(rpcb_inaddr_loopback),
RPCBVERS_2, &msg);
return rpcb_register_call(RPCBVERS_2, &msg);
}
/*
* Fill in AF_INET family-specific arguments to register
*/
static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
struct rpc_message *msg)
static int rpcb_register_inet4(const struct sockaddr *sap,
struct rpc_message *msg)
{
const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
struct rpcbind_args *map = msg->rpc_argp;
unsigned short port = ntohs(address_to_register->sin_port);
unsigned short port = ntohs(sin->sin_port);
char buf[32];
/* Construct AF_INET universal address */
snprintf(buf, sizeof(buf), "%pI4.%u.%u",
&address_to_register->sin_addr.s_addr,
port >> 8, port & 0xff);
&sin->sin_addr.s_addr, port >> 8, port & 0xff);
map->r_addr = buf;
dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
......@@ -284,29 +284,27 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
if (port)
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback,
sizeof(rpcb_inaddr_loopback),
RPCBVERS_4, msg);
return rpcb_register_call(RPCBVERS_4, msg);
}
/*
* Fill in AF_INET6 family-specific arguments to register
*/
static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
struct rpc_message *msg)
static int rpcb_register_inet6(const struct sockaddr *sap,
struct rpc_message *msg)
{
const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
struct rpcbind_args *map = msg->rpc_argp;
unsigned short port = ntohs(address_to_register->sin6_port);
unsigned short port = ntohs(sin6->sin6_port);
char buf[64];
/* Construct AF_INET6 universal address */
if (ipv6_addr_any(&address_to_register->sin6_addr))
if (ipv6_addr_any(&sin6->sin6_addr))
snprintf(buf, sizeof(buf), "::.%u.%u",
port >> 8, port & 0xff);
else
snprintf(buf, sizeof(buf), "%pI6.%u.%u",
&address_to_register->sin6_addr,
port >> 8, port & 0xff);
&sin6->sin6_addr, port >> 8, port & 0xff);
map->r_addr = buf;
dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
......@@ -318,9 +316,21 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
if (port)
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback,
sizeof(rpcb_in6addr_loopback),
RPCBVERS_4, msg);
return rpcb_register_call(RPCBVERS_4, msg);
}
static int rpcb_unregister_all_protofamilies(struct rpc_message *msg)
{
struct rpcbind_args *map = msg->rpc_argp;
dprintk("RPC: unregistering [%u, %u, '%s'] with "
"local rpcbind\n",
map->r_prog, map->r_vers, map->r_netid);
map->r_addr = "";
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
return rpcb_register_call(RPCBVERS_4, msg);
}
/**
......@@ -340,10 +350,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
* invoke this function once for each [program, version, address,
* netid] tuple they wish to advertise.
*
* Callers may also unregister RPC services that are no longer
* available by setting the port number in the passed-in address
* to zero. Callers pass a netid of "" to unregister all
* transport netids associated with [program, version, address].
* Callers may also unregister RPC services that are registered at a
* specific address by setting the port number in @address to zero.
* They may unregister all registered protocol families at once for
* a service by passing a NULL @address argument. If @netid is ""
* then all netids for [program, version, address] are unregistered.
*
* This function uses rpcbind protocol version 4 to contact the
* local rpcbind daemon. The local rpcbind daemon must support
......@@ -378,13 +389,14 @@ int rpcb_v4_register(const u32 program, const u32 version,
.rpc_argp = &map,
};
if (address == NULL)
return rpcb_unregister_all_protofamilies(&msg);
switch (address->sa_family) {
case AF_INET:
return rpcb_register_netid4((struct sockaddr_in *)address,
&msg);
return rpcb_register_inet4(address, &msg);
case AF_INET6:
return rpcb_register_netid6((struct sockaddr_in6 *)address,
&msg);
return rpcb_register_inet6(address, &msg);
}
return -EAFNOSUPPORT;
......@@ -579,7 +591,7 @@ void rpcb_getport_async(struct rpc_task *task)
map->r_xprt = xprt_get(xprt);
map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */
map->r_owner = "";
map->r_status = -EIO;
child = rpcb_call_async(rpcb_clnt, map, proc);
......@@ -703,11 +715,16 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
*portp = 0;
addr_len = ntohl(*p++);
if (addr_len == 0) {
dprintk("RPC: rpcb_decode_getaddr: "
"service is not registered\n");
return 0;
}
/*
* Simple sanity check. The smallest possible universal
* address is an IPv4 address string containing 11 bytes.
* Simple sanity check.
*/
if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN)
if (addr_len > RPCBIND_MAXUADDRLEN)
goto out_err;
/*
......
......@@ -359,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
sa_family_t family, void (*shutdown)(struct svc_serv *serv))
void (*shutdown)(struct svc_serv *serv))
{
struct svc_serv *serv;
unsigned int vers;
......@@ -368,7 +368,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
return NULL;
serv->sv_family = family;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
serv->sv_nrthreads = 1;
......@@ -427,21 +426,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv *
svc_create(struct svc_program *prog, unsigned int bufsize,
sa_family_t family, void (*shutdown)(struct svc_serv *serv))
void (*shutdown)(struct svc_serv *serv))
{
return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
return __svc_create(prog, bufsize, /*npools*/1, shutdown);
}
EXPORT_SYMBOL_GPL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
sa_family_t family, void (*shutdown)(struct svc_serv *serv),
void (*shutdown)(struct svc_serv *serv),
svc_thread_fn func, struct module *mod)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
serv = __svc_create(prog, bufsize, npools, family, shutdown);
serv = __svc_create(prog, bufsize, npools, shutdown);
if (serv != NULL) {
serv->sv_function = func;
......@@ -719,8 +718,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
}
EXPORT_SYMBOL_GPL(svc_exit_thread);
#ifdef CONFIG_SUNRPC_REGISTER_V4
/*
* Register an "inet" protocol family netid with the local
* rpcbind daemon via an rpcbind v4 SET request.
......@@ -735,12 +732,13 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
const unsigned short protocol,
const unsigned short port)
{
struct sockaddr_in sin = {
const struct sockaddr_in sin = {
.sin_family = AF_INET,
.sin_addr.s_addr = htonl(INADDR_ANY),
.sin_port = htons(port),
};
char *netid;
const char *netid;
int error;
switch (protocol) {
case IPPROTO_UDP:
......@@ -750,13 +748,23 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
netid = RPCBIND_NETID_TCP;
break;
default:
return -EPROTONOSUPPORT;
return -ENOPROTOOPT;
}
return rpcb_v4_register(program, version,
(struct sockaddr *)&sin, netid);
error = rpcb_v4_register(program, version,
(const struct sockaddr *)&sin, netid);
/*
* User space didn't support rpcbind v4, so retry this
* registration request with the legacy rpcbind v2 protocol.
*/
if (error == -EPROTONOSUPPORT)
error = rpcb_register(program, version, protocol, port);
return error;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
/*
* Register an "inet6" protocol family netid with the local
* rpcbind daemon via an rpcbind v4 SET request.
......@@ -771,12 +779,13 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
const unsigned short protocol,
const unsigned short port)
{
struct sockaddr_in6 sin6 = {
const struct sockaddr_in6 sin6 = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_ANY_INIT,
.sin6_port = htons(port),
};
char *netid;
const char *netid;
int error;
switch (protocol) {
case IPPROTO_UDP:
......@@ -786,12 +795,22 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
netid = RPCBIND_NETID_TCP6;
break;
default:
return -EPROTONOSUPPORT;
return -ENOPROTOOPT;
}
return rpcb_v4_register(program, version,
(struct sockaddr *)&sin6, netid);
error = rpcb_v4_register(program, version,
(const struct sockaddr *)&sin6, netid);
/*
* User space didn't support rpcbind version 4, so we won't
* use a PF_INET6 listener.
*/
if (error == -EPROTONOSUPPORT)
error = -EAFNOSUPPORT;
return error;
}
#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
/*
* Register a kernel RPC service via rpcbind version 4.
......@@ -799,69 +818,43 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
* Returns zero on success; a negative errno value is returned
* if any error occurs.
*/
static int __svc_register(const u32 program, const u32 version,
const sa_family_t family,
static int __svc_register(const char *progname,
const u32 program, const u32 version,
const int family,
const unsigned short protocol,
const unsigned short port)
{
int error;
int error = -EAFNOSUPPORT;
switch (family) {
case AF_INET:
return __svc_rpcb_register4(program, version,
case PF_INET:
error = __svc_rpcb_register4(program, version,
protocol, port);
case AF_INET6:
break;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case PF_INET6:
error = __svc_rpcb_register6(program, version,
protocol, port);
if (error < 0)
return error;
/*
* Work around bug in some versions of Linux rpcbind
* which don't allow registration of both inet and
* inet6 netids.
*
* Error return ignored for now.
*/
__svc_rpcb_register4(program, version,
protocol, port);
return 0;
#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
}
return -EAFNOSUPPORT;
}
#else /* CONFIG_SUNRPC_REGISTER_V4 */
/*
* Register a kernel RPC service via rpcbind version 2.
*
* Returns zero on success; a negative errno value is returned
* if any error occurs.
*/
static int __svc_register(const u32 program, const u32 version,
sa_family_t family,
const unsigned short protocol,
const unsigned short port)
{
if (family != AF_INET)
return -EAFNOSUPPORT;
return rpcb_register(program, version, protocol, port);
if (error < 0)
printk(KERN_WARNING "svc: failed to register %sv%u RPC "
"service (errno %d).\n", progname, version, -error);
return error;
}
#endif /* CONFIG_SUNRPC_REGISTER_V4 */
/**
* svc_register - register an RPC service with the local portmapper
* @serv: svc_serv struct for the service to register
* @family: protocol family of service's listener socket
* @proto: transport protocol number to advertise
* @port: port to advertise
*
* Service is registered for any address in serv's address family
* Service is registered for any address in the passed-in protocol family
*/
int svc_register(const struct svc_serv *serv, const unsigned short proto,
const unsigned short port)
int svc_register(const struct svc_serv *serv, const int family,
const unsigned short proto, const unsigned short port)
{
struct svc_program *progp;
unsigned int i;
......@@ -879,15 +872,15 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
i,
proto == IPPROTO_UDP? "udp" : "tcp",
port,
serv->sv_family,
family,
progp->pg_vers[i]->vs_hidden?
" (but not telling portmap)" : "");
if (progp->pg_vers[i]->vs_hidden)
continue;
error = __svc_register(progp->pg_prog, i,
serv->sv_family, proto, port);
error = __svc_register(progp->pg_name, progp->pg_prog,
i, family, proto, port);
if (error < 0)
break;
}
......@@ -896,38 +889,31 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
return error;
}
#ifdef CONFIG_SUNRPC_REGISTER_V4
/*
* If user space is running rpcbind, it should take the v4 UNSET
* and clear everything for this [program, version]. If user space
* is running portmap, it will reject the v4 UNSET, but won't have
* any "inet6" entries anyway. So a PMAP_UNSET should be sufficient
* in this case to clear all existing entries for [program, version].
*/
static void __svc_unregister(const u32 program, const u32 version,
const char *progname)
{
struct sockaddr_in6 sin6 = {
.sin6_family = AF_INET6,
.sin6_addr = IN6ADDR_ANY_INIT,
.sin6_port = 0,
};
int error;
error = rpcb_v4_register(program, version,
(struct sockaddr *)&sin6, "");
dprintk("svc: %s(%sv%u), error %d\n",
__func__, progname, version, error);
}
#else /* CONFIG_SUNRPC_REGISTER_V4 */
error = rpcb_v4_register(program, version, NULL, "");
static void __svc_unregister(const u32 program, const u32 version,
const char *progname)
{
int error;
/*
* User space didn't support rpcbind v4, so retry this
* request with the legacy rpcbind v2 protocol.
*/
if (error == -EPROTONOSUPPORT)
error = rpcb_register(program, version, 0, 0);
error = rpcb_register(program, version, 0, 0);
dprintk("svc: %s(%sv%u), error %d\n",
__func__, progname, version, error);
}
#endif /* CONFIG_SUNRPC_REGISTER_V4 */
/*
* All netids, bind addresses and ports registered for [program, version]
* are removed from the local rpcbind database (if the service is not
......
......@@ -161,7 +161,9 @@ EXPORT_SYMBOL_GPL(svc_xprt_init);
static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
struct svc_serv *serv,
unsigned short port, int flags)
const int family,
const unsigned short port,
int flags)
{
struct sockaddr_in sin = {
.sin_family = AF_INET,
......@@ -176,12 +178,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
struct sockaddr *sap;
size_t len;
switch (serv->sv_family) {
case AF_INET:
switch (family) {
case PF_INET:
sap = (struct sockaddr *)&sin;
len = sizeof(sin);
break;
case AF_INET6:
case PF_INET6:
sap = (struct sockaddr *)&sin6;
len = sizeof(sin6);
break;
......@@ -192,7 +194,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
}
int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
const int family, const unsigned short port,
int flags)
{
struct svc_xprt_class *xcl;
......@@ -209,7 +212,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
goto err;
spin_unlock(&svc_xprt_class_lock);
newxprt = __svc_xpo_create(xcl, serv, port, flags);
newxprt = __svc_xpo_create(xcl, serv, family, port, flags);
if (IS_ERR(newxprt)) {
module_put(xcl->xcl_owner);
return PTR_ERR(newxprt);
......@@ -1033,7 +1036,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
return dr;
}
/*
/**
* svc_find_xprt - find an RPC transport instance
* @serv: pointer to svc_serv to search
* @xcl_name: C string containing transport's class name
* @af: Address family of transport's local address
* @port: transport's IP port number
*
* Return the transport instance pointer for the endpoint accepting
* connections/peer traffic from the specified transport class,
* address family and port.
......@@ -1042,14 +1051,14 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
* wild-card, and will result in matching the first transport in the
* service's list that has a matching class name.
*/
struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
int af, int port)
struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
const sa_family_t af, const unsigned short port)
{
struct svc_xprt *xprt;
struct svc_xprt *found = NULL;
/* Sanity check the args */
if (!serv || !xcl_name)
if (serv == NULL || xcl_name == NULL)
return found;
spin_lock_bh(&serv->sv_lock);
......@@ -1058,7 +1067,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
continue;
if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
continue;
if (port && port != svc_xprt_local_port(xprt))
if (port != 0 && port != svc_xprt_local_port(xprt))
continue;
found = xprt;
svc_xprt_get(xprt);
......
......@@ -1110,7 +1110,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct svc_sock *svsk;
struct sock *inet;
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
int val;
dprintk("svc: svc_setup_socket %p\n", sock);
if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
......@@ -1122,7 +1121,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
/* Register socket with portmapper */
if (*errp >= 0 && pmap_register)
*errp = svc_register(serv, inet->sk_protocol,
*errp = svc_register(serv, inet->sk_family, inet->sk_protocol,
ntohs(inet_sk(inet)->sport));
if (*errp < 0) {
......@@ -1143,18 +1142,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
else
svc_tcp_init(svsk, serv);
/*
* We start one listener per sv_serv. We want AF_INET
* requests to be automatically shunted to our AF_INET6
* listener using a mapped IPv4 address. Make sure
* no-one starts an equivalent IPv4 listener, which
* would steal our incoming connections.
*/
val = 0;
if (serv->sv_family == AF_INET6)
kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
(char *)&val, sizeof(val));
dprintk("svc: svc_setup_socket created %p (inet %p)\n",
svsk, svsk->sk_sk);
......@@ -1222,6 +1209,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
struct sockaddr_storage addr;
struct sockaddr *newsin = (struct sockaddr *)&addr;
int newlen;
int family;
int val;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk("svc: svc_create_socket(%s, %d, %s)\n",
......@@ -1233,14 +1222,35 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
"sockets supported\n");
return ERR_PTR(-EINVAL);
}
type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
switch (sin->sa_family) {
case AF_INET6:
family = PF_INET6;
break;
case AF_INET:
family = PF_INET;
break;
default:
return ERR_PTR(-EINVAL);
}
error = sock_create_kern(sin->sa_family, type, protocol, &sock);
error = sock_create_kern(family, type, protocol, &sock);
if (error < 0)
return ERR_PTR(error);
svc_reclassify_socket(sock);
/*
* If this is an PF_INET6 listener, we want to avoid
* getting requests from IPv4 remotes. Those should
* be shunted to a PF_INET listener via rpcbind.
*/
val = 1;
if (family == PF_INET6)
kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
(char *)&val, sizeof(val));
if (type == SOCK_STREAM)
sock->sk->sk_reuse = 1; /* allow address reuse */
error = kernel_bind(sock, sin, len);
......
......@@ -151,6 +151,37 @@ int xprt_unregister_transport(struct xprt_class *transport)
}
EXPORT_SYMBOL_GPL(xprt_unregister_transport);
/**
* xprt_load_transport - load a transport implementation
* @transport_name: transport to load
*
* Returns:
* 0: transport successfully loaded
* -ENOENT: transport module not available
*/
int xprt_load_transport(const char *transport_name)
{
struct xprt_class *t;
char module_name[sizeof t->name + 5];
int result;
result = 0;
spin_lock(&xprt_list_lock);
list_for_each_entry(t, &xprt_list, list) {
if (strcmp(t->name, transport_name) == 0) {
spin_unlock(&xprt_list_lock);
goto out;
}
}
spin_unlock(&xprt_list_lock);
strcpy(module_name, "xprt");
strncat(module_name, transport_name, sizeof t->name);
result = request_module(module_name);
out:
return result;
}
EXPORT_SYMBOL_GPL(xprt_load_transport);
/**
* xprt_reserve_xprt - serialize write access to transports
* @task: task that is requesting access to the transport
......@@ -580,7 +611,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
dprintk("RPC: disconnected transport %p\n", xprt);
spin_lock_bh(&xprt->transport_lock);
xprt_clear_connected(xprt);
xprt_wake_pending_tasks(xprt, -ENOTCONN);
xprt_wake_pending_tasks(xprt, -EAGAIN);
spin_unlock_bh(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_disconnect_done);
......@@ -598,7 +629,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
/* Try to schedule an autoclose RPC call */
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
queue_work(rpciod_workqueue, &xprt->task_cleanup);
xprt_wake_pending_tasks(xprt, -ENOTCONN);
xprt_wake_pending_tasks(xprt, -EAGAIN);
spin_unlock_bh(&xprt->transport_lock);
}
......@@ -625,7 +656,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
/* Try to schedule an autoclose RPC call */
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
queue_work(rpciod_workqueue, &xprt->task_cleanup);
xprt_wake_pending_tasks(xprt, -ENOTCONN);
xprt_wake_pending_tasks(xprt, -EAGAIN);
out:
spin_unlock_bh(&xprt->transport_lock);
}
......@@ -695,9 +726,8 @@ static void xprt_connect_status(struct rpc_task *task)
}
switch (task->tk_status) {
case -ENOTCONN:
dprintk("RPC: %5u xprt_connect_status: connection broken\n",
task->tk_pid);
case -EAGAIN:
dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
break;
case -ETIMEDOUT:
dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
......@@ -818,15 +848,8 @@ int xprt_prepare_transmit(struct rpc_task *task)
err = req->rq_received;
goto out_unlock;
}
if (!xprt->ops->reserve_xprt(task)) {
if (!xprt->ops->reserve_xprt(task))
err = -EAGAIN;
goto out_unlock;
}
if (!xprt_connected(xprt)) {
err = -ENOTCONN;
goto out_unlock;
}
out_unlock:
spin_unlock_bh(&xprt->transport_lock);
return err;
......@@ -870,32 +893,26 @@ void xprt_transmit(struct rpc_task *task)
req->rq_connect_cookie = xprt->connect_cookie;
req->rq_xtime = jiffies;
status = xprt->ops->send_request(task);
if (status == 0) {
dprintk("RPC: %5u xmit complete\n", task->tk_pid);
spin_lock_bh(&xprt->transport_lock);
if (status != 0) {
task->tk_status = status;
return;
}
xprt->ops->set_retrans_timeout(task);
dprintk("RPC: %5u xmit complete\n", task->tk_pid);
spin_lock_bh(&xprt->transport_lock);
xprt->stat.sends++;
xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
xprt->stat.bklog_u += xprt->backlog.qlen;
xprt->ops->set_retrans_timeout(task);
/* Don't race with disconnect */
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
else if (!req->rq_received)
rpc_sleep_on(&xprt->pending, task, xprt_timer);
spin_unlock_bh(&xprt->transport_lock);
return;
}
xprt->stat.sends++;
xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
xprt->stat.bklog_u += xprt->backlog.qlen;
/* Note: at this point, task->tk_sleeping has not yet been set,
* hence there is no danger of the waking up task being put on
* schedq, and being picked up by a parallel run of rpciod().
*/
task->tk_status = status;
if (status == -ECONNREFUSED)
rpc_sleep_on(&xprt->sending, task, NULL);
/* Don't race with disconnect */
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
else if (!req->rq_received)
rpc_sleep_on(&xprt->pending, task, xprt_timer);
spin_unlock_bh(&xprt->transport_lock);
}
static inline void do_xprt_reserve(struct rpc_task *task)
......
......@@ -310,6 +310,19 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
__func__, pad, destp, rqst->rq_slen, curlen);
copy_len = rqst->rq_snd_buf.page_len;
if (rqst->rq_snd_buf.tail[0].iov_len) {
curlen = rqst->rq_snd_buf.tail[0].iov_len;
if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) {
memmove(destp + copy_len,
rqst->rq_snd_buf.tail[0].iov_base, curlen);
r_xprt->rx_stats.pullup_copy_count += curlen;
}
dprintk("RPC: %s: tail destp 0x%p len %d\n",
__func__, destp + copy_len, curlen);
rqst->rq_svec[0].iov_len += curlen;
}
r_xprt->rx_stats.pullup_copy_count += copy_len;
npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
for (i = 0; copy_len && i < npages; i++) {
......@@ -332,17 +345,6 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
destp += curlen;
copy_len -= curlen;
}
if (rqst->rq_snd_buf.tail[0].iov_len) {
curlen = rqst->rq_snd_buf.tail[0].iov_len;
if (destp != rqst->rq_snd_buf.tail[0].iov_base) {
memcpy(destp,
rqst->rq_snd_buf.tail[0].iov_base, curlen);
r_xprt->rx_stats.pullup_copy_count += curlen;
}
dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n",
__func__, destp, copy_len, curlen);
rqst->rq_svec[0].iov_len += curlen;
}
/* header now contains entire send message */
return pad;
}
......@@ -656,7 +658,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
curlen = rqst->rq_rcv_buf.tail[0].iov_len;
if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n",
__func__, srcp, copy_len, curlen);
rqst->rq_rcv_buf.tail[0].iov_len = curlen;
......
......@@ -191,7 +191,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct svc_rdma_req_map *vec)
{
int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
int sge_no;
u32 sge_bytes;
u32 page_bytes;
......@@ -235,7 +234,11 @@ static int map_xdr(struct svcxprt_rdma *xprt,
sge_no++;
}
BUG_ON(sge_no > sge_max);
dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
"page_base %u page_len %u head_len %zu tail_len %zu\n",
sge_no, page_no, xdr->page_base, xdr->page_len,
xdr->head[0].iov_len, xdr->tail[0].iov_len);
vec->count = sge_no;
return 0;
}
......@@ -579,7 +582,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[page_no+1].length = 0;
}
BUG_ON(sge_no > rdma->sc_max_sge);
BUG_ON(sge_no > ctxt->count);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
send_wr.wr_id = (unsigned long)ctxt;
......
......@@ -49,6 +49,9 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
#define XS_TCP_LINGER_TO (15U * HZ)
static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
/*
* We can register our own files under /proc/sys/sunrpc by
* calling register_sysctl_table() again. The files in that
......@@ -116,6 +119,14 @@ static ctl_table xs_tunables_table[] = {
.extra1 = &xprt_min_resvport_limit,
.extra2 = &xprt_max_resvport_limit
},
{
.procname = "tcp_fin_timeout",
.data = &xs_tcp_fin_timeout,
.maxlen = sizeof(xs_tcp_fin_timeout),
.mode = 0644,
.proc_handler = &proc_dointvec_jiffies,
.strategy = sysctl_jiffies
},
{
.ctl_name = 0,
},
......@@ -521,11 +532,12 @@ static void xs_nospace_callback(struct rpc_task *task)
* @task: task to put to sleep
*
*/
static void xs_nospace(struct rpc_task *task)
static int xs_nospace(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
int ret = 0;
dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
task->tk_pid, req->rq_slen - req->rq_bytes_sent,
......@@ -537,6 +549,7 @@ static void xs_nospace(struct rpc_task *task)
/* Don't race with disconnect */
if (xprt_connected(xprt)) {
if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
ret = -EAGAIN;
/*
* Notify TCP that we're limited by the application
* window size
......@@ -548,10 +561,11 @@ static void xs_nospace(struct rpc_task *task)
}
} else {
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
task->tk_status = -ENOTCONN;
ret = -ENOTCONN;
}
spin_unlock_bh(&xprt->transport_lock);
return ret;
}
/**
......@@ -594,6 +608,8 @@ static int xs_udp_send_request(struct rpc_task *task)
/* Still some bytes left; set up for a retry later. */
status = -EAGAIN;
}
if (!transport->sock)
goto out;
switch (status) {
case -ENOTSOCK:
......@@ -601,21 +617,19 @@ static int xs_udp_send_request(struct rpc_task *task)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
xs_nospace(task);
status = xs_nospace(task);
break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
case -ENETUNREACH:
case -EPIPE:
case -ECONNREFUSED:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
break;
default:
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
}
out:
return status;
}
......@@ -697,6 +711,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
status = -EAGAIN;
break;
}
if (!transport->sock)
goto out;
switch (status) {
case -ENOTSOCK:
......@@ -704,23 +720,19 @@ static int xs_tcp_send_request(struct rpc_task *task)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
xs_nospace(task);
status = xs_nospace(task);
break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
case -ECONNRESET:
case -EPIPE:
xs_tcp_shutdown(xprt);
case -ECONNREFUSED:
case -ENOTCONN:
case -EPIPE:
status = -ENOTCONN;
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
xs_tcp_shutdown(xprt);
}
out:
return status;
}
......@@ -767,23 +779,13 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s
sk->sk_error_report = transport->old_error_report;
}
/**
* xs_close - close a socket
* @xprt: transport
*
* This is used when all requests are complete; ie, no DRC state remains
* on the server we want to save.
*/
static void xs_close(struct rpc_xprt *xprt)
static void xs_reset_transport(struct sock_xprt *transport)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct socket *sock = transport->sock;
struct sock *sk = transport->inet;
if (!sk)
goto clear_close_wait;
dprintk("RPC: xs_close xprt %p\n", xprt);
if (sk == NULL)
return;
write_lock_bh(&sk->sk_callback_lock);
transport->inet = NULL;
......@@ -797,8 +799,25 @@ static void xs_close(struct rpc_xprt *xprt)
sk->sk_no_check = 0;
sock_release(sock);
clear_close_wait:
}
/**
* xs_close - close a socket
* @xprt: transport
*
* This is used when all requests are complete; ie, no DRC state remains
* on the server we want to save.
*/
static void xs_close(struct rpc_xprt *xprt)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
dprintk("RPC: xs_close xprt %p\n", xprt);
xs_reset_transport(transport);
smp_mb__before_clear_bit();
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
smp_mb__after_clear_bit();
......@@ -1126,6 +1145,47 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
read_unlock(&sk->sk_callback_lock);
}
/*
* Do the equivalent of linger/linger2 handling for dealing with
* broken servers that don't close the socket in a timely
* fashion
*/
static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt,
unsigned long timeout)
{
struct sock_xprt *transport;
if (xprt_test_and_set_connecting(xprt))
return;
set_bit(XPRT_CONNECTION_ABORT, &xprt->state);
transport = container_of(xprt, struct sock_xprt, xprt);
queue_delayed_work(rpciod_workqueue, &transport->connect_worker,
timeout);
}
static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
{
struct sock_xprt *transport;
transport = container_of(xprt, struct sock_xprt, xprt);
if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) ||
!cancel_delayed_work(&transport->connect_worker))
return;
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
xprt_clear_connecting(xprt);
}
static void xs_sock_mark_closed(struct rpc_xprt *xprt)
{
smp_mb__before_clear_bit();
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
smp_mb__after_clear_bit();
/* Mark transport as closed and wake up all pending tasks */
xprt_disconnect_done(xprt);
}
/**
* xs_tcp_state_change - callback to handle TCP socket state changes
* @sk: socket whose state has changed
......@@ -1158,7 +1218,7 @@ static void xs_tcp_state_change(struct sock *sk)
transport->tcp_flags =
TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
xprt_wake_pending_tasks(xprt, 0);
xprt_wake_pending_tasks(xprt, -EAGAIN);
}
spin_unlock_bh(&xprt->transport_lock);
break;
......@@ -1171,10 +1231,10 @@ static void xs_tcp_state_change(struct sock *sk)
clear_bit(XPRT_CONNECTED, &xprt->state);
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
smp_mb__after_clear_bit();
xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
break;
case TCP_CLOSE_WAIT:
/* The server initiated a shutdown of the socket */
set_bit(XPRT_CLOSING, &xprt->state);
xprt_force_disconnect(xprt);
case TCP_SYN_SENT:
xprt->connect_cookie++;
......@@ -1187,40 +1247,35 @@ static void xs_tcp_state_change(struct sock *sk)
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
break;
case TCP_LAST_ACK:
set_bit(XPRT_CLOSING, &xprt->state);
xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
smp_mb__before_clear_bit();
clear_bit(XPRT_CONNECTED, &xprt->state);
smp_mb__after_clear_bit();
break;
case TCP_CLOSE:
smp_mb__before_clear_bit();
clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
clear_bit(XPRT_CLOSING, &xprt->state);
smp_mb__after_clear_bit();
/* Mark transport as closed and wake up all pending tasks */
xprt_disconnect_done(xprt);
xs_tcp_cancel_linger_timeout(xprt);
xs_sock_mark_closed(xprt);
}
out:
read_unlock(&sk->sk_callback_lock);
}
/**
* xs_tcp_error_report - callback mainly for catching RST events
* xs_error_report - callback mainly for catching socket errors
* @sk: socket
*/
static void xs_tcp_error_report(struct sock *sk)
static void xs_error_report(struct sock *sk)
{
struct rpc_xprt *xprt;
read_lock(&sk->sk_callback_lock);
if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED)
goto out;
if (!(xprt = xprt_from_sock(sk)))
goto out;
dprintk("RPC: %s client %p...\n"
"RPC: error %d\n",
__func__, xprt, sk->sk_err);
xprt_force_disconnect(xprt);
xprt_wake_pending_tasks(xprt, -EAGAIN);
out:
read_unlock(&sk->sk_callback_lock);
}
......@@ -1494,6 +1549,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_user_data = xprt;
sk->sk_data_ready = xs_udp_data_ready;
sk->sk_write_space = xs_udp_write_space;
sk->sk_error_report = xs_error_report;
sk->sk_no_check = UDP_CSUM_NORCV;
sk->sk_allocation = GFP_ATOMIC;
......@@ -1526,9 +1582,10 @@ static void xs_udp_connect_worker4(struct work_struct *work)
goto out;
/* Start by resetting any existing state */
xs_close(xprt);
xs_reset_transport(transport);
if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (err < 0) {
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
goto out;
}
......@@ -1545,8 +1602,8 @@ static void xs_udp_connect_worker4(struct work_struct *work)
xs_udp_finish_connecting(xprt, sock);
status = 0;
out:
xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
}
/**
......@@ -1567,9 +1624,10 @@ static void xs_udp_connect_worker6(struct work_struct *work)
goto out;
/* Start by resetting any existing state */
xs_close(xprt);
xs_reset_transport(transport);
if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (err < 0) {
dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
goto out;
}
......@@ -1586,18 +1644,17 @@ static void xs_udp_connect_worker6(struct work_struct *work)
xs_udp_finish_connecting(xprt, sock);
status = 0;
out:
xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
}
/*
* We need to preserve the port number so the reply cache on the server can
* find our cached RPC replies when we get around to reconnecting.
*/
static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
{
int result;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct sockaddr any;
dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt);
......@@ -1609,11 +1666,24 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
memset(&any, 0, sizeof(any));
any.sa_family = AF_UNSPEC;
result = kernel_connect(transport->sock, &any, sizeof(any), 0);
if (result)
if (!result)
xs_sock_mark_closed(xprt);
else
dprintk("RPC: AF_UNSPEC connect return code %d\n",
result);
}
static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
{
unsigned int state = transport->inet->sk_state;
if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
return;
if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
return;
xs_abort_connection(xprt, transport);
}
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
......@@ -1629,7 +1699,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
sk->sk_data_ready = xs_tcp_data_ready;
sk->sk_state_change = xs_tcp_state_change;
sk->sk_write_space = xs_tcp_write_space;
sk->sk_error_report = xs_tcp_error_report;
sk->sk_error_report = xs_error_report;
sk->sk_allocation = GFP_ATOMIC;
/* socket options */
......@@ -1657,37 +1727,42 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
}
/**
* xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
* @work: RPC transport to connect
* xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
* @xprt: RPC transport to connect
* @transport: socket transport to connect
* @create_sock: function to create a socket of the correct type
*
* Invoked by a work queue tasklet.
*/
static void xs_tcp_connect_worker4(struct work_struct *work)
static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
struct sock_xprt *transport,
struct socket *(*create_sock)(struct rpc_xprt *,
struct sock_xprt *))
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, connect_worker.work);
struct rpc_xprt *xprt = &transport->xprt;
struct socket *sock = transport->sock;
int err, status = -EIO;
int status = -EIO;
if (xprt->shutdown)
goto out;
if (!sock) {
/* start from scratch */
if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
sock = create_sock(xprt, transport);
if (IS_ERR(sock)) {
status = PTR_ERR(sock);
goto out;
}
xs_reclassify_socket4(sock);
} else {
int abort_and_exit;
if (xs_bind4(transport, sock) < 0) {
sock_release(sock);
goto out;
}
} else
abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
&xprt->state);
/* "close" the socket, preserving the local port */
xs_tcp_reuse_connection(xprt);
xs_tcp_reuse_connection(xprt, transport);
if (abort_and_exit)
goto out_eagain;
}
dprintk("RPC: worker connecting xprt %p to address: %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
......@@ -1696,83 +1771,104 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
xprt, -status, xprt_connected(xprt),
sock->sk->sk_state);
if (status < 0) {
switch (status) {
case -EINPROGRESS:
case -EALREADY:
goto out_clear;
case -ECONNREFUSED:
case -ECONNRESET:
/* retry with existing socket, after a delay */
break;
default:
/* get rid of existing socket, and retry */
xs_tcp_shutdown(xprt);
}
switch (status) {
case -ECONNREFUSED:
case -ECONNRESET:
case -ENETUNREACH:
/* retry with existing socket, after a delay */
case 0:
case -EINPROGRESS:
case -EALREADY:
xprt_clear_connecting(xprt);
return;
}
/* get rid of existing socket, and retry */
xs_tcp_shutdown(xprt);
printk("%s: connect returned unhandled error %d\n",
__func__, status);
out_eagain:
status = -EAGAIN;
out:
xprt_wake_pending_tasks(xprt, status);
out_clear:
xprt_clear_connecting(xprt);
xprt_wake_pending_tasks(xprt, status);
}
static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
struct sock_xprt *transport)
{
struct socket *sock;
int err;
/* start from scratch */
err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
if (err < 0) {
dprintk("RPC: can't create TCP transport socket (%d).\n",
-err);
goto out_err;
}
xs_reclassify_socket4(sock);
if (xs_bind4(transport, sock) < 0) {
sock_release(sock);
goto out_err;
}
return sock;
out_err:
return ERR_PTR(-EIO);
}
/**
* xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
* xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
* @work: RPC transport to connect
*
* Invoked by a work queue tasklet.
*/
static void xs_tcp_connect_worker6(struct work_struct *work)
static void xs_tcp_connect_worker4(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, connect_worker.work);
struct rpc_xprt *xprt = &transport->xprt;
struct socket *sock = transport->sock;
int err, status = -EIO;
if (xprt->shutdown)
goto out;
xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
}
if (!sock) {
/* start from scratch */
if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
dprintk("RPC: can't create TCP transport socket (%d).\n", -err);
goto out;
}
xs_reclassify_socket6(sock);
static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
struct sock_xprt *transport)
{
struct socket *sock;
int err;
/* start from scratch */
err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
if (err < 0) {
dprintk("RPC: can't create TCP transport socket (%d).\n",
-err);
goto out_err;
}
xs_reclassify_socket6(sock);
if (xs_bind6(transport, sock) < 0) {
sock_release(sock);
goto out;
}
} else
/* "close" the socket, preserving the local port */
xs_tcp_reuse_connection(xprt);
if (xs_bind6(transport, sock) < 0) {
sock_release(sock);
goto out_err;
}
return sock;
out_err:
return ERR_PTR(-EIO);
}
dprintk("RPC: worker connecting xprt %p to address: %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
/**
* xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
* @work: RPC transport to connect
*
* Invoked by a work queue tasklet.
*/
static void xs_tcp_connect_worker6(struct work_struct *work)
{
struct sock_xprt *transport =
container_of(work, struct sock_xprt, connect_worker.work);
struct rpc_xprt *xprt = &transport->xprt;
status = xs_tcp_finish_connecting(xprt, sock);
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
if (status < 0) {
switch (status) {
case -EINPROGRESS:
case -EALREADY:
goto out_clear;
case -ECONNREFUSED:
case -ECONNRESET:
/* retry with existing socket, after a delay */
break;
default:
/* get rid of existing socket, and retry */
xs_tcp_shutdown(xprt);
}
}
out:
xprt_wake_pending_tasks(xprt, status);
out_clear:
xprt_clear_connecting(xprt);
xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
}
/**
......@@ -1817,9 +1913,6 @@ static void xs_tcp_connect(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
/* Initiate graceful shutdown of the socket if not already done */
if (test_bit(XPRT_CONNECTED, &xprt->state))
xs_tcp_shutdown(xprt);
/* Exit if we need to wait for socket shutdown to complete */
if (test_bit(XPRT_CLOSING, &xprt->state))
return;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment