Commit 52ad0964 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.linux-nfs.org/projects/trondmy/nfs-2.6

* git://git.linux-nfs.org/projects/trondmy/nfs-2.6: (53 commits)
  NFS: Fix a resolution problem with nfs_inode->cache_change_attribute
  NFS: Fix the resolution problem with nfs_inode_attrs_need_update()
  NFS: Changes to inode->i_nlinks must set the NFS_INO_INVALID_ATTR flag
  RPC/RDMA: ensure connection attempt is complete before signalling.
  RPC/RDMA: correct the reconnect timer backoff
  RPC/RDMA: optionally emit useful transport info upon connect/disconnect.
  RPC/RDMA: reformat a debug printk to keep lines together.
  RPC/RDMA: harden connection logic against missing/late rdma_cm upcalls.
  RPC/RDMA: fix connect/reconnect resource leak.
  RPC/RDMA: return a consistent error, when connect fails.
  RPC/RDMA: adhere to protocol for unpadded client trailing write chunks.
  RPC/RDMA: avoid an oops due to disconnect racing with async upcalls.
  RPC/RDMA: maintain the RPC task bytes-sent statistic.
  RPC/RDMA: suppress retransmit on RPC/RDMA clients.
  RPC/RDMA: fix connection IRD/ORD setting
  RPC/RDMA: support FRMR client memory registration.
  RPC/RDMA: check selected memory registration mode at runtime.
  RPC/RDMA: add data types and new FRMR memory registration enum.
  RPC/RDMA: refactor the inline memory registration code.
  NFS: fix nfs_parse_ip_address() corner case
  ...
parents 8cde1ad6 6925bac1
...@@ -675,7 +675,7 @@ static int nfs_init_server(struct nfs_server *server, ...@@ -675,7 +675,7 @@ static int nfs_init_server(struct nfs_server *server,
server->nfs_client = clp; server->nfs_client = clp;
/* Initialise the client representation from the mount data */ /* Initialise the client representation from the mount data */
server->flags = data->flags & NFS_MOUNT_FLAGMASK; server->flags = data->flags;
if (data->rsize) if (data->rsize)
server->rsize = nfs_block_size(data->rsize, NULL); server->rsize = nfs_block_size(data->rsize, NULL);
...@@ -850,7 +850,6 @@ static struct nfs_server *nfs_alloc_server(void) ...@@ -850,7 +850,6 @@ static struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->client_link); INIT_LIST_HEAD(&server->client_link);
INIT_LIST_HEAD(&server->master_link); INIT_LIST_HEAD(&server->master_link);
init_waitqueue_head(&server->active_wq);
atomic_set(&server->active, 0); atomic_set(&server->active, 0);
server->io_stats = nfs_alloc_iostats(); server->io_stats = nfs_alloc_iostats();
...@@ -1073,7 +1072,7 @@ static int nfs4_init_server(struct nfs_server *server, ...@@ -1073,7 +1072,7 @@ static int nfs4_init_server(struct nfs_server *server,
goto error; goto error;
/* Initialise the client representation from the mount data */ /* Initialise the client representation from the mount data */
server->flags = data->flags & NFS_MOUNT_FLAGMASK; server->flags = data->flags;
server->caps |= NFS_CAP_ATOMIC_OPEN; server->caps |= NFS_CAP_ATOMIC_OPEN;
if (data->rsize) if (data->rsize)
......
...@@ -156,6 +156,7 @@ typedef struct { ...@@ -156,6 +156,7 @@ typedef struct {
decode_dirent_t decode; decode_dirent_t decode;
int plus; int plus;
unsigned long timestamp; unsigned long timestamp;
unsigned long gencount;
int timestamp_valid; int timestamp_valid;
} nfs_readdir_descriptor_t; } nfs_readdir_descriptor_t;
...@@ -177,7 +178,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) ...@@ -177,7 +178,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
struct file *file = desc->file; struct file *file = desc->file;
struct inode *inode = file->f_path.dentry->d_inode; struct inode *inode = file->f_path.dentry->d_inode;
struct rpc_cred *cred = nfs_file_cred(file); struct rpc_cred *cred = nfs_file_cred(file);
unsigned long timestamp; unsigned long timestamp, gencount;
int error; int error;
dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n",
...@@ -186,6 +187,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) ...@@ -186,6 +187,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
again: again:
timestamp = jiffies; timestamp = jiffies;
gencount = nfs_inc_attr_generation_counter();
error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page,
NFS_SERVER(inode)->dtsize, desc->plus); NFS_SERVER(inode)->dtsize, desc->plus);
if (error < 0) { if (error < 0) {
...@@ -199,6 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) ...@@ -199,6 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
goto error; goto error;
} }
desc->timestamp = timestamp; desc->timestamp = timestamp;
desc->gencount = gencount;
desc->timestamp_valid = 1; desc->timestamp_valid = 1;
SetPageUptodate(page); SetPageUptodate(page);
/* Ensure consistent page alignment of the data. /* Ensure consistent page alignment of the data.
...@@ -224,9 +227,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc) ...@@ -224,9 +227,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc)
if (IS_ERR(p)) if (IS_ERR(p))
return PTR_ERR(p); return PTR_ERR(p);
desc->ptr = p; desc->ptr = p;
if (desc->timestamp_valid) if (desc->timestamp_valid) {
desc->entry->fattr->time_start = desc->timestamp; desc->entry->fattr->time_start = desc->timestamp;
else desc->entry->fattr->gencount = desc->gencount;
} else
desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; desc->entry->fattr->valid &= ~NFS_ATTR_FATTR;
return 0; return 0;
} }
...@@ -471,7 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, ...@@ -471,7 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
struct rpc_cred *cred = nfs_file_cred(file); struct rpc_cred *cred = nfs_file_cred(file);
struct page *page = NULL; struct page *page = NULL;
int status; int status;
unsigned long timestamp; unsigned long timestamp, gencount;
dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
(unsigned long long)*desc->dir_cookie); (unsigned long long)*desc->dir_cookie);
...@@ -482,6 +486,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, ...@@ -482,6 +486,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
goto out; goto out;
} }
timestamp = jiffies; timestamp = jiffies;
gencount = nfs_inc_attr_generation_counter();
status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred,
*desc->dir_cookie, page, *desc->dir_cookie, page,
NFS_SERVER(inode)->dtsize, NFS_SERVER(inode)->dtsize,
...@@ -490,6 +495,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, ...@@ -490,6 +495,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
if (status >= 0) { if (status >= 0) {
desc->timestamp = timestamp; desc->timestamp = timestamp;
desc->gencount = gencount;
desc->timestamp_valid = 1; desc->timestamp_valid = 1;
if ((status = dir_decode(desc)) == 0) if ((status = dir_decode(desc)) == 0)
desc->entry->prev_cookie = *desc->dir_cookie; desc->entry->prev_cookie = *desc->dir_cookie;
...@@ -655,7 +661,7 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) ...@@ -655,7 +661,7 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
*/ */
void nfs_force_lookup_revalidate(struct inode *dir) void nfs_force_lookup_revalidate(struct inode *dir)
{ {
NFS_I(dir)->cache_change_attribute = jiffies; NFS_I(dir)->cache_change_attribute++;
} }
/* /*
...@@ -667,6 +673,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) ...@@ -667,6 +673,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
{ {
if (IS_ROOT(dentry)) if (IS_ROOT(dentry))
return 1; return 1;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
return 0;
if (!nfs_verify_change_attribute(dir, dentry->d_time)) if (!nfs_verify_change_attribute(dir, dentry->d_time))
return 0; return 0;
/* Revalidate nfsi->cache_change_attribute before we declare a match */ /* Revalidate nfsi->cache_change_attribute before we declare a match */
...@@ -750,6 +758,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, ...@@ -750,6 +758,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
/* Don't revalidate a negative dentry if we're creating a new file */ /* Don't revalidate a negative dentry if we're creating a new file */
if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0)
return 0; return 0;
if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
return 1;
return !nfs_check_verifier(dir, dentry); return !nfs_check_verifier(dir, dentry);
} }
......
...@@ -188,13 +188,16 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) ...@@ -188,13 +188,16 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
/* origin == SEEK_END => we must revalidate the cached file length */ /* origin == SEEK_END => we must revalidate the cached file length */
if (origin == SEEK_END) { if (origin == SEEK_END) {
struct inode *inode = filp->f_mapping->host; struct inode *inode = filp->f_mapping->host;
int retval = nfs_revalidate_file_size(inode, filp); int retval = nfs_revalidate_file_size(inode, filp);
if (retval < 0) if (retval < 0)
return (loff_t)retval; return (loff_t)retval;
}
lock_kernel(); /* BKL needed? */ spin_lock(&inode->i_lock);
loff = generic_file_llseek_unlocked(filp, offset, origin); loff = generic_file_llseek_unlocked(filp, offset, origin);
unlock_kernel(); spin_unlock(&inode->i_lock);
} else
loff = generic_file_llseek_unlocked(filp, offset, origin);
return loff; return loff;
} }
...@@ -699,13 +702,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) ...@@ -699,13 +702,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
filp->f_path.dentry->d_name.name, filp->f_path.dentry->d_name.name,
fl->fl_type, fl->fl_flags); fl->fl_type, fl->fl_flags);
/*
* No BSD flocks over NFS allowed.
* Note: we could try to fake a POSIX lock request here by
* using ((u32) filp | 0x80000000) or some such as the pid.
* Not sure whether that would be unique, though, or whether
* that would break in other places.
*/
if (!(fl->fl_flags & FL_FLOCK)) if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK; return -ENOLCK;
......
...@@ -305,8 +305,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) ...@@ -305,8 +305,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
init_special_inode(inode, inode->i_mode, fattr->rdev); init_special_inode(inode, inode->i_mode, fattr->rdev);
nfsi->read_cache_jiffies = fattr->time_start; nfsi->read_cache_jiffies = fattr->time_start;
nfsi->last_updated = now; nfsi->attr_gencount = fattr->gencount;
nfsi->cache_change_attribute = now;
inode->i_atime = fattr->atime; inode->i_atime = fattr->atime;
inode->i_mtime = fattr->mtime; inode->i_mtime = fattr->mtime;
inode->i_ctime = fattr->ctime; inode->i_ctime = fattr->ctime;
...@@ -453,6 +452,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset) ...@@ -453,6 +452,7 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
{ {
if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
spin_lock(&inode->i_lock);
if ((attr->ia_valid & ATTR_MODE) != 0) { if ((attr->ia_valid & ATTR_MODE) != 0) {
int mode = attr->ia_mode & S_IALLUGO; int mode = attr->ia_mode & S_IALLUGO;
mode |= inode->i_mode & ~S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO;
...@@ -462,7 +462,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) ...@@ -462,7 +462,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
inode->i_uid = attr->ia_uid; inode->i_uid = attr->ia_uid;
if ((attr->ia_valid & ATTR_GID) != 0) if ((attr->ia_valid & ATTR_GID) != 0)
inode->i_gid = attr->ia_gid; inode->i_gid = attr->ia_gid;
spin_lock(&inode->i_lock);
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} }
...@@ -472,37 +471,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) ...@@ -472,37 +471,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
} }
} }
static int nfs_wait_schedule(void *word)
{
if (signal_pending(current))
return -ERESTARTSYS;
schedule();
return 0;
}
/*
* Wait for the inode to get unlocked.
*/
static int nfs_wait_on_inode(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
int error;
error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING,
nfs_wait_schedule, TASK_KILLABLE);
return error;
}
static void nfs_wake_up_inode(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
clear_bit(NFS_INO_REVALIDATING, &nfsi->flags);
smp_mb__after_clear_bit();
wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING);
}
int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{ {
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
...@@ -697,20 +665,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) ...@@ -697,20 +665,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
inode->i_sb->s_id, (long long)NFS_FILEID(inode)); inode->i_sb->s_id, (long long)NFS_FILEID(inode));
nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
if (is_bad_inode(inode)) if (is_bad_inode(inode))
goto out_nowait; goto out;
if (NFS_STALE(inode)) if (NFS_STALE(inode))
goto out_nowait;
status = nfs_wait_on_inode(inode);
if (status < 0)
goto out; goto out;
status = -ESTALE;
if (NFS_STALE(inode)) if (NFS_STALE(inode))
goto out; goto out;
nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE);
status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr);
if (status != 0) { if (status != 0) {
dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
...@@ -724,16 +687,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) ...@@ -724,16 +687,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
goto out; goto out;
} }
spin_lock(&inode->i_lock); status = nfs_refresh_inode(inode, &fattr);
status = nfs_update_inode(inode, &fattr);
if (status) { if (status) {
spin_unlock(&inode->i_lock);
dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
inode->i_sb->s_id, inode->i_sb->s_id,
(long long)NFS_FILEID(inode), status); (long long)NFS_FILEID(inode), status);
goto out; goto out;
} }
spin_unlock(&inode->i_lock);
if (nfsi->cache_validity & NFS_INO_INVALID_ACL) if (nfsi->cache_validity & NFS_INO_INVALID_ACL)
nfs_zap_acl_cache(inode); nfs_zap_acl_cache(inode);
...@@ -743,9 +703,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) ...@@ -743,9 +703,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
(long long)NFS_FILEID(inode)); (long long)NFS_FILEID(inode));
out: out:
nfs_wake_up_inode(inode);
out_nowait:
return status; return status;
} }
...@@ -908,9 +865,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat ...@@ -908,9 +865,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
return -EIO; return -EIO;
} }
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
nfsi->change_attr != fattr->change_attr) nfsi->change_attr != fattr->change_attr)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
...@@ -939,15 +893,81 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat ...@@ -939,15 +893,81 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if (invalid != 0) if (invalid != 0)
nfsi->cache_validity |= invalid; nfsi->cache_validity |= invalid;
else
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_PAGECACHE);
nfsi->read_cache_jiffies = fattr->time_start; nfsi->read_cache_jiffies = fattr->time_start;
return 0; return 0;
} }
static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
{
return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
}
static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
{
return nfs_size_to_loff_t(fattr->size) > i_size_read(inode);
}
static unsigned long nfs_attr_generation_counter;
static unsigned long nfs_read_attr_generation_counter(void)
{
smp_rmb();
return nfs_attr_generation_counter;
}
unsigned long nfs_inc_attr_generation_counter(void)
{
unsigned long ret;
smp_rmb();
ret = ++nfs_attr_generation_counter;
smp_wmb();
return ret;
}
void nfs_fattr_init(struct nfs_fattr *fattr)
{
fattr->valid = 0;
fattr->time_start = jiffies;
fattr->gencount = nfs_inc_attr_generation_counter();
}
/**
* nfs_inode_attrs_need_update - check if the inode attributes need updating
* @inode - pointer to inode
* @fattr - attributes
*
* Attempt to divine whether or not an RPC call reply carrying stale
* attributes got scheduled after another call carrying updated ones.
*
* To do so, the function first assumes that a more recent ctime means
* that the attributes in fattr are newer, however it also attempt to
* catch the case where ctime either didn't change, or went backwards
* (if someone reset the clock on the server) by looking at whether
* or not this RPC call was started after the inode was last updated.
* Note also the check for wraparound of 'attr_gencount'
*
* The function returns 'true' if it thinks the attributes in 'fattr' are
* more recent than the ones cached in the inode.
*
*/
static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
{
const struct nfs_inode *nfsi = NFS_I(inode);
return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
nfs_ctime_need_update(inode, fattr) ||
nfs_size_need_update(inode, fattr) ||
((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
}
static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
{
if (nfs_inode_attrs_need_update(inode, fattr))
return nfs_update_inode(inode, fattr);
return nfs_check_inode_attributes(inode, fattr);
}
/** /**
* nfs_refresh_inode - try to update the inode attribute cache * nfs_refresh_inode - try to update the inode attribute cache
* @inode - pointer to inode * @inode - pointer to inode
...@@ -960,21 +980,28 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat ...@@ -960,21 +980,28 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
*/ */
int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
{ {
struct nfs_inode *nfsi = NFS_I(inode);
int status; int status;
if ((fattr->valid & NFS_ATTR_FATTR) == 0) if ((fattr->valid & NFS_ATTR_FATTR) == 0)
return 0; return 0;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (time_after(fattr->time_start, nfsi->last_updated)) status = nfs_refresh_inode_locked(inode, fattr);
status = nfs_update_inode(inode, fattr);
else
status = nfs_check_inode_attributes(inode, fattr);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
return status; return status;
} }
static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
{
struct nfs_inode *nfsi = NFS_I(inode);
nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
if (S_ISDIR(inode->i_mode))
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
if ((fattr->valid & NFS_ATTR_FATTR) == 0)
return 0;
return nfs_refresh_inode_locked(inode, fattr);
}
/** /**
* nfs_post_op_update_inode - try to update the inode attribute cache * nfs_post_op_update_inode - try to update the inode attribute cache
* @inode - pointer to inode * @inode - pointer to inode
...@@ -991,14 +1018,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -991,14 +1018,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
*/ */
int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{ {
struct nfs_inode *nfsi = NFS_I(inode); int status;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; status = nfs_post_op_update_inode_locked(inode, fattr);
if (S_ISDIR(inode->i_mode))
nfsi->cache_validity |= NFS_INO_INVALID_DATA;
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
return nfs_refresh_inode(inode, fattr); return status;
} }
/** /**
...@@ -1014,6 +1039,15 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1014,6 +1039,15 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
*/ */
int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr)
{ {
int status;
spin_lock(&inode->i_lock);
/* Don't do a WCC update if these attributes are already stale */
if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
!nfs_inode_attrs_need_update(inode, fattr)) {
fattr->valid &= ~(NFS_ATTR_WCC_V4|NFS_ATTR_WCC);
goto out_noforce;
}
if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 &&
(fattr->valid & NFS_ATTR_WCC_V4) == 0) { (fattr->valid & NFS_ATTR_WCC_V4) == 0) {
fattr->pre_change_attr = NFS_I(inode)->change_attr; fattr->pre_change_attr = NFS_I(inode)->change_attr;
...@@ -1026,7 +1060,10 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa ...@@ -1026,7 +1060,10 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa
fattr->pre_size = i_size_read(inode); fattr->pre_size = i_size_read(inode);
fattr->valid |= NFS_ATTR_WCC; fattr->valid |= NFS_ATTR_WCC;
} }
return nfs_post_op_update_inode(inode, fattr); out_noforce:
status = nfs_post_op_update_inode_locked(inode, fattr);
spin_unlock(&inode->i_lock);
return status;
} }
/* /*
...@@ -1092,7 +1129,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1092,7 +1129,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
} }
/* If ctime has changed we should definitely clear access+acl caches */ /* If ctime has changed we should definitely clear access+acl caches */
if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
} else if (nfsi->change_attr != fattr->change_attr) { } else if (nfsi->change_attr != fattr->change_attr) {
dprintk("NFS: change_attr change on server for file %s/%ld\n", dprintk("NFS: change_attr change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino); inode->i_sb->s_id, inode->i_ino);
...@@ -1126,6 +1163,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1126,6 +1163,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_gid != fattr->gid) inode->i_gid != fattr->gid)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
if (inode->i_nlink != fattr->nlink)
invalid |= NFS_INO_INVALID_ATTR;
inode->i_mode = fattr->mode; inode->i_mode = fattr->mode;
inode->i_nlink = fattr->nlink; inode->i_nlink = fattr->nlink;
inode->i_uid = fattr->uid; inode->i_uid = fattr->uid;
...@@ -1145,18 +1185,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1145,18 +1185,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now; nfsi->attrtimeo_timestamp = now;
nfsi->last_updated = now; nfsi->attr_gencount = nfs_inc_attr_generation_counter();
} else { } else {
if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) {
if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now; nfsi->attrtimeo_timestamp = now;
} }
/*
* Avoid jiffy wraparound issues with nfsi->last_updated
*/
if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now))
nfsi->last_updated = nfsi->read_cache_jiffies;
} }
invalid &= ~NFS_INO_INVALID_ATTR; invalid &= ~NFS_INO_INVALID_ATTR;
/* Don't invalidate the data if we were to blame */ /* Don't invalidate the data if we were to blame */
......
...@@ -153,6 +153,7 @@ extern void nfs4_clear_inode(struct inode *); ...@@ -153,6 +153,7 @@ extern void nfs4_clear_inode(struct inode *);
void nfs_zap_acl_cache(struct inode *inode); void nfs_zap_acl_cache(struct inode *inode);
/* super.c */ /* super.c */
void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *);
extern struct file_system_type nfs_xdev_fs_type; extern struct file_system_type nfs_xdev_fs_type;
#ifdef CONFIG_NFS_V4 #ifdef CONFIG_NFS_V4
extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_xdev_fs_type;
...@@ -163,8 +164,8 @@ extern struct rpc_stat nfs_rpcstat; ...@@ -163,8 +164,8 @@ extern struct rpc_stat nfs_rpcstat;
extern int __init register_nfs_fs(void); extern int __init register_nfs_fs(void);
extern void __exit unregister_nfs_fs(void); extern void __exit unregister_nfs_fs(void);
extern void nfs_sb_active(struct nfs_server *server); extern void nfs_sb_active(struct super_block *sb);
extern void nfs_sb_deactive(struct nfs_server *server); extern void nfs_sb_deactive(struct super_block *sb);
/* namespace.c */ /* namespace.c */
extern char *nfs_path(const char *base, extern char *nfs_path(const char *base,
...@@ -276,3 +277,23 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) ...@@ -276,3 +277,23 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
PAGE_SIZE - 1) >> PAGE_SHIFT; PAGE_SIZE - 1) >> PAGE_SHIFT;
} }
#define IPV6_SCOPE_DELIMITER '%'
/*
* Set the port number in an address. Be agnostic about the address
* family.
*/
static inline void nfs_set_port(struct sockaddr *sap, unsigned short port)
{
struct sockaddr_in *ap = (struct sockaddr_in *)sap;
struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap;
switch (sap->sa_family) {
case AF_INET:
ap->sin_port = htons(port);
break;
case AF_INET6:
ap6->sin6_port = htons(port);
break;
}
}
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <linux/sunrpc/clnt.h> #include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/sched.h> #include <linux/sunrpc/sched.h>
#include <linux/nfs_fs.h> #include <linux/nfs_fs.h>
#include "internal.h"
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
# define NFSDBG_FACILITY NFSDBG_MOUNT # define NFSDBG_FACILITY NFSDBG_MOUNT
...@@ -98,7 +99,7 @@ int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path, ...@@ -98,7 +99,7 @@ int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path,
out_mnt_err: out_mnt_err:
dprintk("NFS: MNT server returned result %d\n", result.status); dprintk("NFS: MNT server returned result %d\n", result.status);
status = -EACCES; status = nfs_stat_to_errno(result.status);
goto out; goto out;
} }
......
...@@ -105,7 +105,10 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) ...@@ -105,7 +105,10 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
dprintk("--> nfs_follow_mountpoint()\n"); dprintk("--> nfs_follow_mountpoint()\n");
BUG_ON(IS_ROOT(dentry)); err = -ESTALE;
if (IS_ROOT(dentry))
goto out_err;
dprintk("%s: enter\n", __func__); dprintk("%s: enter\n", __func__);
dput(nd->path.dentry); dput(nd->path.dentry);
nd->path.dentry = dget(dentry); nd->path.dentry = dget(dentry);
...@@ -189,7 +192,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, ...@@ -189,7 +192,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
struct nfs_clone_mount *mountdata) struct nfs_clone_mount *mountdata)
{ {
#ifdef CONFIG_NFS_V4 #ifdef CONFIG_NFS_V4
struct vfsmount *mnt = NULL; struct vfsmount *mnt = ERR_PTR(-EINVAL);
switch (server->nfs_client->rpc_ops->version) { switch (server->nfs_client->rpc_ops->version) {
case 2: case 2:
case 3: case 3:
......
...@@ -229,6 +229,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) ...@@ -229,6 +229,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type)
dprintk("NFS call getacl\n"); dprintk("NFS call getacl\n");
msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL];
nfs_fattr_init(&fattr);
status = rpc_call_sync(server->client_acl, &msg, 0); status = rpc_call_sync(server->client_acl, &msg, 0);
dprintk("NFS reply getacl: %d\n", status); dprintk("NFS reply getacl: %d\n", status);
...@@ -322,6 +323,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, ...@@ -322,6 +323,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
dprintk("NFS call setacl\n"); dprintk("NFS call setacl\n");
msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL];
nfs_fattr_init(&fattr);
status = rpc_call_sync(server->client_acl, &msg, 0); status = rpc_call_sync(server->client_acl, &msg, 0);
nfs_access_zap_cache(inode); nfs_access_zap_cache(inode);
nfs_zap_acl_cache(inode); nfs_zap_acl_cache(inode);
......
...@@ -699,7 +699,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, ...@@ -699,7 +699,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
} }
static int static int
nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, do_proc_fsinfo(struct rpc_clnt *client, struct nfs_fh *fhandle,
struct nfs_fsinfo *info) struct nfs_fsinfo *info)
{ {
struct rpc_message msg = { struct rpc_message msg = {
...@@ -711,11 +711,27 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, ...@@ -711,11 +711,27 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
dprintk("NFS call fsinfo\n"); dprintk("NFS call fsinfo\n");
nfs_fattr_init(info->fattr); nfs_fattr_init(info->fattr);
status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); status = rpc_call_sync(client, &msg, 0);
dprintk("NFS reply fsinfo: %d\n", status); dprintk("NFS reply fsinfo: %d\n", status);
return status; return status;
} }
/*
* Bare-bones access to fsinfo: this is for nfs_get_root/nfs_get_sb via
* nfs_create_server
*/
static int
nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_fsinfo *info)
{
int status;
status = do_proc_fsinfo(server->client, fhandle, info);
if (status && server->nfs_client->cl_rpcclient != server->client)
status = do_proc_fsinfo(server->nfs_client->cl_rpcclient, fhandle, info);
return status;
}
static int static int
nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
struct nfs_pathconf *info) struct nfs_pathconf *info)
......
...@@ -93,21 +93,52 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, ...@@ -93,21 +93,52 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent,
return 0; return 0;
} }
/* static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
* Check if the string represents a "valid" IPv4 address char *page, char *page2,
*/ const struct nfs4_fs_location *location)
static inline int valid_ipaddr4(const char *buf)
{ {
int rc, count, in[4]; struct vfsmount *mnt = ERR_PTR(-ENOENT);
char *mnt_path;
rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); int page2len;
if (rc != 4) unsigned int s;
return -EINVAL;
for (count = 0; count < 4; count++) { mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE);
if (in[count] > 255) if (IS_ERR(mnt_path))
return -EINVAL; return mnt;
mountdata->mnt_path = mnt_path;
page2 += strlen(mnt_path) + 1;
page2len = PAGE_SIZE - strlen(mnt_path) - 1;
for (s = 0; s < location->nservers; s++) {
const struct nfs4_string *buf = &location->servers[s];
struct sockaddr_storage addr;
if (buf->len <= 0 || buf->len >= PAGE_SIZE)
continue;
mountdata->addr = (struct sockaddr *)&addr;
if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len))
continue;
nfs_parse_ip_address(buf->data, buf->len,
mountdata->addr, &mountdata->addrlen);
if (mountdata->addr->sa_family == AF_UNSPEC)
continue;
nfs_set_port(mountdata->addr, NFS_PORT);
strncpy(page2, buf->data, page2len);
page2[page2len] = '\0';
mountdata->hostname = page2;
snprintf(page, PAGE_SIZE, "%s:%s",
mountdata->hostname,
mountdata->mnt_path);
mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata);
if (!IS_ERR(mnt))
break;
} }
return 0; return mnt;
} }
/** /**
...@@ -128,7 +159,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, ...@@ -128,7 +159,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
.authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
}; };
char *page = NULL, *page2 = NULL; char *page = NULL, *page2 = NULL;
unsigned int s;
int loc, error; int loc, error;
if (locations == NULL || locations->nlocations <= 0) if (locations == NULL || locations->nlocations <= 0)
...@@ -152,53 +182,16 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, ...@@ -152,53 +182,16 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
goto out; goto out;
} }
loc = 0; for (loc = 0; loc < locations->nlocations; loc++) {
while (loc < locations->nlocations && IS_ERR(mnt)) {
const struct nfs4_fs_location *location = &locations->locations[loc]; const struct nfs4_fs_location *location = &locations->locations[loc];
char *mnt_path;
if (location == NULL || location->nservers <= 0 || if (location == NULL || location->nservers <= 0 ||
location->rootpath.ncomponents == 0) { location->rootpath.ncomponents == 0)
loc++;
continue; continue;
}
mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); mnt = try_location(&mountdata, page, page2, location);
if (IS_ERR(mnt_path)) { if (!IS_ERR(mnt))
loc++; break;
continue;
}
mountdata.mnt_path = mnt_path;
s = 0;
while (s < location->nservers) {
struct sockaddr_in addr = {
.sin_family = AF_INET,
.sin_port = htons(NFS_PORT),
};
if (location->servers[s].len <= 0 ||
valid_ipaddr4(location->servers[s].data) < 0) {
s++;
continue;
}
mountdata.hostname = location->servers[s].data;
addr.sin_addr.s_addr = in_aton(mountdata.hostname),
mountdata.addr = (struct sockaddr *)&addr;
mountdata.addrlen = sizeof(addr);
snprintf(page, PAGE_SIZE, "%s:%s",
mountdata.hostname,
mountdata.mnt_path);
mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, &mountdata);
if (!IS_ERR(mnt)) {
break;
}
s++;
}
loc++;
} }
out: out:
......
...@@ -65,14 +65,20 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, ...@@ -65,14 +65,20 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
dprintk("%s: call getattr\n", __func__); dprintk("%s: call getattr\n", __func__);
nfs_fattr_init(fattr); nfs_fattr_init(fattr);
status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); status = rpc_call_sync(server->client, &msg, 0);
/* Retry with default authentication if different */
if (status && server->nfs_client->cl_rpcclient != server->client)
status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
dprintk("%s: reply getattr: %d\n", __func__, status); dprintk("%s: reply getattr: %d\n", __func__, status);
if (status) if (status)
return status; return status;
dprintk("%s: call statfs\n", __func__); dprintk("%s: call statfs\n", __func__);
msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS];
msg.rpc_resp = &fsinfo; msg.rpc_resp = &fsinfo;
status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); status = rpc_call_sync(server->client, &msg, 0);
/* Retry with default authentication if different */
if (status && server->nfs_client->cl_rpcclient != server->client)
status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0);
dprintk("%s: reply statfs: %d\n", __func__, status); dprintk("%s: reply statfs: %d\n", __func__, status);
if (status) if (status)
return status; return status;
......
...@@ -91,6 +91,7 @@ enum { ...@@ -91,6 +91,7 @@ enum {
/* Mount options that take string arguments */ /* Mount options that take string arguments */
Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
Opt_addr, Opt_mountaddr, Opt_clientaddr, Opt_addr, Opt_mountaddr, Opt_clientaddr,
Opt_lookupcache,
/* Special mount options */ /* Special mount options */
Opt_userspace, Opt_deprecated, Opt_sloppy, Opt_userspace, Opt_deprecated, Opt_sloppy,
...@@ -154,6 +155,8 @@ static const match_table_t nfs_mount_option_tokens = { ...@@ -154,6 +155,8 @@ static const match_table_t nfs_mount_option_tokens = {
{ Opt_mounthost, "mounthost=%s" }, { Opt_mounthost, "mounthost=%s" },
{ Opt_mountaddr, "mountaddr=%s" }, { Opt_mountaddr, "mountaddr=%s" },
{ Opt_lookupcache, "lookupcache=%s" },
{ Opt_err, NULL } { Opt_err, NULL }
}; };
...@@ -200,6 +203,22 @@ static const match_table_t nfs_secflavor_tokens = { ...@@ -200,6 +203,22 @@ static const match_table_t nfs_secflavor_tokens = {
{ Opt_sec_err, NULL } { Opt_sec_err, NULL }
}; };
enum {
Opt_lookupcache_all, Opt_lookupcache_positive,
Opt_lookupcache_none,
Opt_lookupcache_err
};
static match_table_t nfs_lookupcache_tokens = {
{ Opt_lookupcache_all, "all" },
{ Opt_lookupcache_positive, "pos" },
{ Opt_lookupcache_positive, "positive" },
{ Opt_lookupcache_none, "none" },
{ Opt_lookupcache_err, NULL }
};
static void nfs_umount_begin(struct super_block *); static void nfs_umount_begin(struct super_block *);
static int nfs_statfs(struct dentry *, struct kstatfs *); static int nfs_statfs(struct dentry *, struct kstatfs *);
...@@ -209,7 +228,6 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru ...@@ -209,7 +228,6 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru
static int nfs_xdev_get_sb(struct file_system_type *fs_type, static int nfs_xdev_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
static void nfs_kill_super(struct super_block *); static void nfs_kill_super(struct super_block *);
static void nfs_put_super(struct super_block *);
static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
static struct file_system_type nfs_fs_type = { static struct file_system_type nfs_fs_type = {
...@@ -232,7 +250,6 @@ static const struct super_operations nfs_sops = { ...@@ -232,7 +250,6 @@ static const struct super_operations nfs_sops = {
.alloc_inode = nfs_alloc_inode, .alloc_inode = nfs_alloc_inode,
.destroy_inode = nfs_destroy_inode, .destroy_inode = nfs_destroy_inode,
.write_inode = nfs_write_inode, .write_inode = nfs_write_inode,
.put_super = nfs_put_super,
.statfs = nfs_statfs, .statfs = nfs_statfs,
.clear_inode = nfs_clear_inode, .clear_inode = nfs_clear_inode,
.umount_begin = nfs_umount_begin, .umount_begin = nfs_umount_begin,
...@@ -337,26 +354,20 @@ void __exit unregister_nfs_fs(void) ...@@ -337,26 +354,20 @@ void __exit unregister_nfs_fs(void)
unregister_filesystem(&nfs_fs_type); unregister_filesystem(&nfs_fs_type);
} }
void nfs_sb_active(struct nfs_server *server) void nfs_sb_active(struct super_block *sb)
{ {
atomic_inc(&server->active); struct nfs_server *server = NFS_SB(sb);
}
void nfs_sb_deactive(struct nfs_server *server) if (atomic_inc_return(&server->active) == 1)
{ atomic_inc(&sb->s_active);
if (atomic_dec_and_test(&server->active))
wake_up(&server->active_wq);
} }
static void nfs_put_super(struct super_block *sb) void nfs_sb_deactive(struct super_block *sb)
{ {
struct nfs_server *server = NFS_SB(sb); struct nfs_server *server = NFS_SB(sb);
/*
* Make sure there are no outstanding ops to this server. if (atomic_dec_and_test(&server->active))
* If so, wait for them to finish before allowing the deactivate_super(sb);
* unmount to continue.
*/
wait_event(server->active_wq, atomic_read(&server->active) == 0);
} }
/* /*
...@@ -663,25 +674,6 @@ static void nfs_umount_begin(struct super_block *sb) ...@@ -663,25 +674,6 @@ static void nfs_umount_begin(struct super_block *sb)
rpc_killall_tasks(rpc); rpc_killall_tasks(rpc);
} }
/*
* Set the port number in an address. Be agnostic about the address family.
*/
static void nfs_set_port(struct sockaddr *sap, unsigned short port)
{
switch (sap->sa_family) {
case AF_INET: {
struct sockaddr_in *ap = (struct sockaddr_in *)sap;
ap->sin_port = htons(port);
break;
}
case AF_INET6: {
struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
ap->sin6_port = htons(port);
break;
}
}
}
/* /*
* Sanity-check a server address provided by the mount command. * Sanity-check a server address provided by the mount command.
* *
...@@ -724,20 +716,22 @@ static void nfs_parse_ipv4_address(char *string, size_t str_len, ...@@ -724,20 +716,22 @@ static void nfs_parse_ipv4_address(char *string, size_t str_len,
*addr_len = 0; *addr_len = 0;
} }
#define IPV6_SCOPE_DELIMITER '%'
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len,
const char *delim, const char *delim,
struct sockaddr_in6 *sin6) struct sockaddr_in6 *sin6)
{ {
char *p; char *p;
size_t len; size_t len;
if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) if ((string + str_len) == delim)
return ; return 1;
if (*delim != IPV6_SCOPE_DELIMITER) if (*delim != IPV6_SCOPE_DELIMITER)
return; return 0;
if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
return 0;
len = (string + str_len) - delim - 1; len = (string + str_len) - delim - 1;
p = kstrndup(delim + 1, len, GFP_KERNEL); p = kstrndup(delim + 1, len, GFP_KERNEL);
...@@ -750,14 +744,20 @@ static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, ...@@ -750,14 +744,20 @@ static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len,
scope_id = dev->ifindex; scope_id = dev->ifindex;
dev_put(dev); dev_put(dev);
} else { } else {
/* scope_id is set to zero on error */ if (strict_strtoul(p, 10, &scope_id) == 0) {
strict_strtoul(p, 10, &scope_id); kfree(p);
return 0;
}
} }
kfree(p); kfree(p);
sin6->sin6_scope_id = scope_id; sin6->sin6_scope_id = scope_id;
dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id);
return 1;
} }
return 0;
} }
static void nfs_parse_ipv6_address(char *string, size_t str_len, static void nfs_parse_ipv6_address(char *string, size_t str_len,
...@@ -773,9 +773,11 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, ...@@ -773,9 +773,11 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len,
sin6->sin6_family = AF_INET6; sin6->sin6_family = AF_INET6;
*addr_len = sizeof(*sin6); *addr_len = sizeof(*sin6);
if (in6_pton(string, str_len, addr, IPV6_SCOPE_DELIMITER, &delim)) { if (in6_pton(string, str_len, addr,
nfs_parse_ipv6_scope_id(string, str_len, delim, sin6); IPV6_SCOPE_DELIMITER, &delim) != 0) {
return; if (nfs_parse_ipv6_scope_id(string, str_len,
delim, sin6) != 0)
return;
} }
} }
...@@ -798,7 +800,7 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, ...@@ -798,7 +800,7 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len,
* If there is a problem constructing the new sockaddr, set the address * If there is a problem constructing the new sockaddr, set the address
* family to AF_UNSPEC. * family to AF_UNSPEC.
*/ */
static void nfs_parse_ip_address(char *string, size_t str_len, void nfs_parse_ip_address(char *string, size_t str_len,
struct sockaddr *sap, size_t *addr_len) struct sockaddr *sap, size_t *addr_len)
{ {
unsigned int i, colons; unsigned int i, colons;
...@@ -1258,6 +1260,30 @@ static int nfs_parse_mount_options(char *raw, ...@@ -1258,6 +1260,30 @@ static int nfs_parse_mount_options(char *raw,
&mnt->mount_server.addrlen); &mnt->mount_server.addrlen);
kfree(string); kfree(string);
break; break;
case Opt_lookupcache:
string = match_strdup(args);
if (string == NULL)
goto out_nomem;
token = match_token(string,
nfs_lookupcache_tokens, args);
kfree(string);
switch (token) {
case Opt_lookupcache_all:
mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE);
break;
case Opt_lookupcache_positive:
mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE;
mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG;
break;
case Opt_lookupcache_none:
mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE;
break;
default:
errors++;
dfprintk(MOUNT, "NFS: invalid "
"lookupcache argument\n");
};
break;
/* /*
* Special options * Special options
...@@ -1558,7 +1584,7 @@ static int nfs_validate_mount_data(void *options, ...@@ -1558,7 +1584,7 @@ static int nfs_validate_mount_data(void *options,
* Translate to nfs_parsed_mount_data, which nfs_fill_super * Translate to nfs_parsed_mount_data, which nfs_fill_super
* can deal with. * can deal with.
*/ */
args->flags = data->flags; args->flags = data->flags & NFS_MOUNT_FLAGMASK;
args->rsize = data->rsize; args->rsize = data->rsize;
args->wsize = data->wsize; args->wsize = data->wsize;
args->timeo = data->timeo; args->timeo = data->timeo;
......
...@@ -99,7 +99,7 @@ static void nfs_async_unlink_release(void *calldata) ...@@ -99,7 +99,7 @@ static void nfs_async_unlink_release(void *calldata)
nfs_dec_sillycount(data->dir); nfs_dec_sillycount(data->dir);
nfs_free_unlinkdata(data); nfs_free_unlinkdata(data);
nfs_sb_deactive(NFS_SB(sb)); nfs_sb_deactive(sb);
} }
static const struct rpc_call_ops nfs_unlink_ops = { static const struct rpc_call_ops nfs_unlink_ops = {
...@@ -118,6 +118,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n ...@@ -118,6 +118,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
.rpc_message = &msg, .rpc_message = &msg,
.callback_ops = &nfs_unlink_ops, .callback_ops = &nfs_unlink_ops,
.callback_data = data, .callback_data = data,
.workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC, .flags = RPC_TASK_ASYNC,
}; };
struct rpc_task *task; struct rpc_task *task;
...@@ -149,7 +150,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n ...@@ -149,7 +150,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
nfs_dec_sillycount(dir); nfs_dec_sillycount(dir);
return 0; return 0;
} }
nfs_sb_active(NFS_SERVER(dir)); nfs_sb_active(dir->i_sb);
data->args.fh = NFS_FH(dir); data->args.fh = NFS_FH(dir);
nfs_fattr_init(&data->res.dir_attr); nfs_fattr_init(&data->res.dir_attr);
......
...@@ -1427,8 +1427,9 @@ static int nfs_write_mapping(struct address_space *mapping, int how) ...@@ -1427,8 +1427,9 @@ static int nfs_write_mapping(struct address_space *mapping, int how)
.bdi = mapping->backing_dev_info, .bdi = mapping->backing_dev_info,
.sync_mode = WB_SYNC_NONE, .sync_mode = WB_SYNC_NONE,
.nr_to_write = LONG_MAX, .nr_to_write = LONG_MAX,
.range_start = 0,
.range_end = LLONG_MAX,
.for_writepages = 1, .for_writepages = 1,
.range_cyclic = 1,
}; };
int ret; int ret;
......
...@@ -137,7 +137,7 @@ struct nfs_inode { ...@@ -137,7 +137,7 @@ struct nfs_inode {
unsigned long attrtimeo_timestamp; unsigned long attrtimeo_timestamp;
__u64 change_attr; /* v4 only */ __u64 change_attr; /* v4 only */
unsigned long last_updated; unsigned long attr_gencount;
/* "Generation counter" for the attribute cache. This is /* "Generation counter" for the attribute cache. This is
* bumped whenever we update the metadata on the * bumped whenever we update the metadata on the
* server. * server.
...@@ -200,11 +200,10 @@ struct nfs_inode { ...@@ -200,11 +200,10 @@ struct nfs_inode {
/* /*
* Bit offsets in flags field * Bit offsets in flags field
*/ */
#define NFS_INO_REVALIDATING (0) /* revalidating attrs */ #define NFS_INO_ADVISE_RDPLUS (0) /* advise readdirplus */
#define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */ #define NFS_INO_STALE (1) /* possible stale inode */
#define NFS_INO_STALE (2) /* possible stale inode */ #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the LRU list */
#define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */ #define NFS_INO_MOUNTPOINT (3) /* inode is remote mountpoint */
#define NFS_INO_MOUNTPOINT (4) /* inode is remote mountpoint */
static inline struct nfs_inode *NFS_I(const struct inode *inode) static inline struct nfs_inode *NFS_I(const struct inode *inode)
{ {
...@@ -345,15 +344,11 @@ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ct ...@@ -345,15 +344,11 @@ extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ct
extern void put_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx);
extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode);
extern u64 nfs_compat_user_ino64(u64 fileid); extern u64 nfs_compat_user_ino64(u64 fileid);
extern void nfs_fattr_init(struct nfs_fattr *fattr);
/* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
extern __be32 root_nfs_parse_addr(char *name); /*__init*/ extern __be32 root_nfs_parse_addr(char *name); /*__init*/
extern unsigned long nfs_inc_attr_generation_counter(void);
static inline void nfs_fattr_init(struct nfs_fattr *fattr)
{
fattr->valid = 0;
fattr->time_start = jiffies;
}
/* /*
* linux/fs/nfs/file.c * linux/fs/nfs/file.c
......
...@@ -119,7 +119,6 @@ struct nfs_server { ...@@ -119,7 +119,6 @@ struct nfs_server {
void (*destroy)(struct nfs_server *); void (*destroy)(struct nfs_server *);
atomic_t active; /* Keep trace of any activity to this server */ atomic_t active; /* Keep trace of any activity to this server */
wait_queue_head_t active_wq; /* Wait for any activity to stop */
/* mountd-related mount options */ /* mountd-related mount options */
struct sockaddr_storage mountd_address; struct sockaddr_storage mountd_address;
......
...@@ -65,4 +65,8 @@ struct nfs_mount_data { ...@@ -65,4 +65,8 @@ struct nfs_mount_data {
#define NFS_MOUNT_UNSHARED 0x8000 /* 5 */ #define NFS_MOUNT_UNSHARED 0x8000 /* 5 */
#define NFS_MOUNT_FLAGMASK 0xFFFF #define NFS_MOUNT_FLAGMASK 0xFFFF
/* The following are for internal use only */
#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
#define NFS_MOUNT_LOOKUP_CACHE_NONE 0x20000
#endif #endif
...@@ -36,6 +36,7 @@ struct nfs_fattr { ...@@ -36,6 +36,7 @@ struct nfs_fattr {
__u32 nlink; __u32 nlink;
__u32 uid; __u32 uid;
__u32 gid; __u32 gid;
dev_t rdev;
__u64 size; __u64 size;
union { union {
struct { struct {
...@@ -46,7 +47,6 @@ struct nfs_fattr { ...@@ -46,7 +47,6 @@ struct nfs_fattr {
__u64 used; __u64 used;
} nfs3; } nfs3;
} du; } du;
dev_t rdev;
struct nfs_fsid fsid; struct nfs_fsid fsid;
__u64 fileid; __u64 fileid;
struct timespec atime; struct timespec atime;
...@@ -56,6 +56,7 @@ struct nfs_fattr { ...@@ -56,6 +56,7 @@ struct nfs_fattr {
__u64 change_attr; /* NFSv4 change attribute */ __u64 change_attr; /* NFSv4 change attribute */
__u64 pre_change_attr;/* pre-op NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */
unsigned long time_start; unsigned long time_start;
unsigned long gencount;
}; };
#define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */
...@@ -672,16 +673,16 @@ struct nfs4_rename_res { ...@@ -672,16 +673,16 @@ struct nfs4_rename_res {
struct nfs_fattr * new_fattr; struct nfs_fattr * new_fattr;
}; };
#define NFS4_SETCLIENTID_NAMELEN (56) #define NFS4_SETCLIENTID_NAMELEN (127)
struct nfs4_setclientid { struct nfs4_setclientid {
const nfs4_verifier * sc_verifier; const nfs4_verifier * sc_verifier;
unsigned int sc_name_len; unsigned int sc_name_len;
char sc_name[NFS4_SETCLIENTID_NAMELEN]; char sc_name[NFS4_SETCLIENTID_NAMELEN + 1];
u32 sc_prog; u32 sc_prog;
unsigned int sc_netid_len; unsigned int sc_netid_len;
char sc_netid[RPCBIND_MAXNETIDLEN]; char sc_netid[RPCBIND_MAXNETIDLEN + 1];
unsigned int sc_uaddr_len; unsigned int sc_uaddr_len;
char sc_uaddr[RPCBIND_MAXUADDRLEN]; char sc_uaddr[RPCBIND_MAXUADDRLEN + 1];
u32 sc_cb_ident; u32 sc_cb_ident;
}; };
......
...@@ -66,9 +66,6 @@ ...@@ -66,9 +66,6 @@
#define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ #define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */
#define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
/* memory registration strategies */ /* memory registration strategies */
#define RPCRDMA_PERSISTENT_REGISTRATION (1) #define RPCRDMA_PERSISTENT_REGISTRATION (1)
...@@ -78,6 +75,7 @@ enum rpcrdma_memreg { ...@@ -78,6 +75,7 @@ enum rpcrdma_memreg {
RPCRDMA_MEMWINDOWS, RPCRDMA_MEMWINDOWS,
RPCRDMA_MEMWINDOWS_ASYNC, RPCRDMA_MEMWINDOWS_ASYNC,
RPCRDMA_MTHCAFMR, RPCRDMA_MTHCAFMR,
RPCRDMA_FRMR,
RPCRDMA_ALLPHYSICAL, RPCRDMA_ALLPHYSICAL,
RPCRDMA_LAST RPCRDMA_LAST
}; };
......
...@@ -213,10 +213,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru ...@@ -213,10 +213,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
} }
/* save the nodename */ /* save the nodename */
clnt->cl_nodelen = strlen(utsname()->nodename); clnt->cl_nodelen = strlen(init_utsname()->nodename);
if (clnt->cl_nodelen > UNX_MAXNODENAME) if (clnt->cl_nodelen > UNX_MAXNODENAME)
clnt->cl_nodelen = UNX_MAXNODENAME; clnt->cl_nodelen = UNX_MAXNODENAME;
memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen);
rpc_register_client(clnt); rpc_register_client(clnt);
return clnt; return clnt;
......
...@@ -460,6 +460,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi ...@@ -460,6 +460,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
return rpc_run_task(&task_setup_data); return rpc_run_task(&task_setup_data);
} }
/*
* In the case where rpc clients have been cloned, we want to make
* sure that we use the program number/version etc of the actual
* owner of the xprt. To do so, we walk back up the tree of parents
* to find whoever created the transport and/or whoever has the
* autobind flag set.
*/
static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt)
{
struct rpc_clnt *parent = clnt->cl_parent;
while (parent != clnt) {
if (parent->cl_xprt != clnt->cl_xprt)
break;
if (clnt->cl_autobind)
break;
clnt = parent;
parent = parent->cl_parent;
}
return clnt;
}
/** /**
* rpcb_getport_async - obtain the port for a given RPC service on a given host * rpcb_getport_async - obtain the port for a given RPC service on a given host
* @task: task that is waiting for portmapper request * @task: task that is waiting for portmapper request
...@@ -469,10 +491,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi ...@@ -469,10 +491,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
*/ */
void rpcb_getport_async(struct rpc_task *task) void rpcb_getport_async(struct rpc_task *task)
{ {
struct rpc_clnt *clnt = task->tk_client; struct rpc_clnt *clnt;
struct rpc_procinfo *proc; struct rpc_procinfo *proc;
u32 bind_version; u32 bind_version;
struct rpc_xprt *xprt = task->tk_xprt; struct rpc_xprt *xprt;
struct rpc_clnt *rpcb_clnt; struct rpc_clnt *rpcb_clnt;
static struct rpcbind_args *map; static struct rpcbind_args *map;
struct rpc_task *child; struct rpc_task *child;
...@@ -481,13 +503,13 @@ void rpcb_getport_async(struct rpc_task *task) ...@@ -481,13 +503,13 @@ void rpcb_getport_async(struct rpc_task *task)
size_t salen; size_t salen;
int status; int status;
clnt = rpcb_find_transport_owner(task->tk_client);
xprt = clnt->cl_xprt;
dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", dprintk("RPC: %5u %s(%s, %u, %u, %d)\n",
task->tk_pid, __func__, task->tk_pid, __func__,
clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot);
/* Autobind on cloned rpc clients is discouraged */
BUG_ON(clnt->cl_parent != clnt);
/* Put self on the wait queue to ensure we get notified if /* Put self on the wait queue to ensure we get notified if
* some other task is already attempting to bind the port */ * some other task is already attempting to bind the port */
rpc_sleep_on(&xprt->binding, task, NULL); rpc_sleep_on(&xprt->binding, task, NULL);
...@@ -549,7 +571,7 @@ void rpcb_getport_async(struct rpc_task *task) ...@@ -549,7 +571,7 @@ void rpcb_getport_async(struct rpc_task *task)
status = -ENOMEM; status = -ENOMEM;
dprintk("RPC: %5u %s: no memory available\n", dprintk("RPC: %5u %s: no memory available\n",
task->tk_pid, __func__); task->tk_pid, __func__);
goto bailout_nofree; goto bailout_release_client;
} }
map->r_prog = clnt->cl_prog; map->r_prog = clnt->cl_prog;
map->r_vers = clnt->cl_vers; map->r_vers = clnt->cl_vers;
...@@ -569,11 +591,13 @@ void rpcb_getport_async(struct rpc_task *task) ...@@ -569,11 +591,13 @@ void rpcb_getport_async(struct rpc_task *task)
task->tk_pid, __func__); task->tk_pid, __func__);
return; return;
} }
rpc_put_task(child);
task->tk_xprt->stat.bind_count++; xprt->stat.bind_count++;
rpc_put_task(child);
return; return;
bailout_release_client:
rpc_release_client(rpcb_clnt);
bailout_nofree: bailout_nofree:
rpcb_wake_rpcbind_waiters(xprt, status); rpcb_wake_rpcbind_waiters(xprt, status);
task->tk_status = status; task->tk_status = status;
......
...@@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport) ...@@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport)
goto out; goto out;
} }
result = -EINVAL; list_add_tail(&transport->list, &xprt_list);
if (try_module_get(THIS_MODULE)) { printk(KERN_INFO "RPC: Registered %s transport module.\n",
list_add_tail(&transport->list, &xprt_list); transport->name);
printk(KERN_INFO "RPC: Registered %s transport module.\n", result = 0;
transport->name);
result = 0;
}
out: out:
spin_unlock(&xprt_list_lock); spin_unlock(&xprt_list_lock);
...@@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport) ...@@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport)
"RPC: Unregistered %s transport module.\n", "RPC: Unregistered %s transport module.\n",
transport->name); transport->name);
list_del_init(&transport->list); list_del_init(&transport->list);
module_put(THIS_MODULE);
goto out; goto out;
} }
} }
......
...@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, ...@@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
} }
if (xdrbuf->tail[0].iov_len) { if (xdrbuf->tail[0].iov_len) {
/* the rpcrdma protocol allows us to omit any trailing
* xdr pad bytes, saving the server an RDMA operation. */
if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
return n;
if (n == nsegs) if (n == nsegs)
return 0; return 0;
seg[n].mr_page = NULL; seg[n].mr_page = NULL;
...@@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) ...@@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
if (hdrlen == 0) if (hdrlen == 0)
return -1; return -1;
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
" headerp 0x%p base 0x%p lkey 0x%x\n", " headerp 0x%p base 0x%p lkey 0x%x\n",
__func__, transfertypes[wtype], hdrlen, rpclen, padlen, __func__, transfertypes[wtype], hdrlen, rpclen, padlen,
headerp, base, req->rl_iov.lkey); headerp, base, req->rl_iov.lkey);
...@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b ...@@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b
* Scatter inline received data back into provided iov's. * Scatter inline received data back into provided iov's.
*/ */
static void static void
rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
{ {
int i, npages, curlen, olen; int i, npages, curlen, olen;
char *destp; char *destp;
...@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) ...@@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len)
} else } else
rqst->rq_rcv_buf.tail[0].iov_len = 0; rqst->rq_rcv_buf.tail[0].iov_len = 0;
if (pad) {
/* implicit padding on terminal chunk */
unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base;
while (pad--)
p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0;
}
if (copy_len) if (copy_len)
dprintk("RPC: %s: %d bytes in" dprintk("RPC: %s: %d bytes in"
" %d extra segments (%d lost)\n", " %d extra segments (%d lost)\n",
...@@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) ...@@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
struct rpc_xprt *xprt = ep->rep_xprt; struct rpc_xprt *xprt = ep->rep_xprt;
spin_lock_bh(&xprt->transport_lock); spin_lock_bh(&xprt->transport_lock);
if (++xprt->connect_cookie == 0) /* maintain a reserved value */
++xprt->connect_cookie;
if (ep->rep_connected > 0) { if (ep->rep_connected > 0) {
if (!xprt_test_and_set_connected(xprt)) if (!xprt_test_and_set_connected(xprt))
xprt_wake_pending_tasks(xprt, 0); xprt_wake_pending_tasks(xprt, 0);
} else { } else {
if (xprt_test_and_clear_connected(xprt)) if (xprt_test_and_clear_connected(xprt))
xprt_wake_pending_tasks(xprt, ep->rep_connected); xprt_wake_pending_tasks(xprt, -ENOTCONN);
} }
spin_unlock_bh(&xprt->transport_lock); spin_unlock_bh(&xprt->transport_lock);
} }
...@@ -792,14 +805,20 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep) ...@@ -792,14 +805,20 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
((unsigned char *)iptr - (unsigned char *)headerp); ((unsigned char *)iptr - (unsigned char *)headerp);
status = rep->rr_len + rdmalen; status = rep->rr_len + rdmalen;
r_xprt->rx_stats.total_rdma_reply += rdmalen; r_xprt->rx_stats.total_rdma_reply += rdmalen;
/* special case - last chunk may omit padding */
if (rdmalen &= 3) {
rdmalen = 4 - rdmalen;
status += rdmalen;
}
} else { } else {
/* else ordinary inline */ /* else ordinary inline */
rdmalen = 0;
iptr = (__be32 *)((unsigned char *)headerp + 28); iptr = (__be32 *)((unsigned char *)headerp + 28);
rep->rr_len -= 28; /*sizeof *headerp;*/ rep->rr_len -= 28; /*sizeof *headerp;*/
status = rep->rr_len; status = rep->rr_len;
} }
/* Fix up the rpc results for upper layer */ /* Fix up the rpc results for upper layer */
rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen);
break; break;
case htonl(RDMA_NOMSG): case htonl(RDMA_NOMSG):
......
...@@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; ...@@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding; static unsigned int xprt_rdma_inline_write_padding;
#if !RPCRDMA_PERSISTENT_REGISTRATION static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ int xprt_rdma_pad_optimize = 0;
#else
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL;
#endif
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
...@@ -139,6 +136,14 @@ static ctl_table xr_tunables_table[] = { ...@@ -139,6 +136,14 @@ static ctl_table xr_tunables_table[] = {
.extra1 = &min_memreg, .extra1 = &min_memreg,
.extra2 = &max_memreg, .extra2 = &max_memreg,
}, },
{
.ctl_name = CTL_UNNUMBERED,
.procname = "rdma_pad_optimize",
.data = &xprt_rdma_pad_optimize,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ {
.ctl_name = 0, .ctl_name = 0,
}, },
...@@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt) ...@@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
dprintk("RPC: %s: closing\n", __func__); dprintk("RPC: %s: closing\n", __func__);
if (r_xprt->rx_ep.rep_connected > 0)
xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt); xprt_disconnect_done(xprt);
(void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
} }
...@@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task) ...@@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task)
/* Reconnect */ /* Reconnect */
schedule_delayed_work(&r_xprt->rdma_connect, schedule_delayed_work(&r_xprt->rdma_connect,
xprt->reestablish_timeout); xprt->reestablish_timeout);
xprt->reestablish_timeout <<= 1;
if (xprt->reestablish_timeout > (30 * HZ))
xprt->reestablish_timeout = (30 * HZ);
else if (xprt->reestablish_timeout < (5 * HZ))
xprt->reestablish_timeout = (5 * HZ);
} else { } else {
schedule_delayed_work(&r_xprt->rdma_connect, 0); schedule_delayed_work(&r_xprt->rdma_connect, 0);
if (!RPC_IS_ASYNC(task)) if (!RPC_IS_ASYNC(task))
...@@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) ...@@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
} }
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
out: out:
req->rl_connect_cookie = 0; /* our reserved value */
return req->rl_xdr_buf; return req->rl_xdr_buf;
outfail: outfail:
...@@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task) ...@@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task)
req->rl_reply->rr_xprt = xprt; req->rl_reply->rr_xprt = xprt;
} }
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { /* Must suppress retransmit to maintain credits */
xprt_disconnect_done(xprt); if (req->rl_connect_cookie == xprt->connect_cookie)
return -ENOTCONN; /* implies disconnect */ goto drop_connection;
} req->rl_connect_cookie = xprt->connect_cookie;
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
task->tk_bytes_sent += rqst->rq_snd_buf.len;
rqst->rq_bytes_sent = 0; rqst->rq_bytes_sent = 0;
return 0; return 0;
drop_connection:
xprt_disconnect_done(xprt);
return -ENOTCONN; /* implies disconnect */
} }
static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
...@@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void) ...@@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void)
{ {
int rc; int rc;
dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n");
#ifdef RPC_DEBUG #ifdef RPC_DEBUG
if (sunrpc_table_header) { if (sunrpc_table_header) {
unregister_sysctl_table(sunrpc_table_header); unregister_sysctl_table(sunrpc_table_header);
......
...@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) ...@@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
switch (event->event) { switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED: case RDMA_CM_EVENT_ADDR_RESOLVED:
case RDMA_CM_EVENT_ROUTE_RESOLVED: case RDMA_CM_EVENT_ROUTE_RESOLVED:
ia->ri_async_rc = 0;
complete(&ia->ri_done); complete(&ia->ri_done);
break; break;
case RDMA_CM_EVENT_ADDR_ERROR: case RDMA_CM_EVENT_ADDR_ERROR:
...@@ -338,13 +339,32 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) ...@@ -338,13 +339,32 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
wake_up_all(&ep->rep_connect_wait); wake_up_all(&ep->rep_connect_wait);
break; break;
default: default:
ia->ri_async_rc = -EINVAL; dprintk("RPC: %s: unexpected CM event %d\n",
dprintk("RPC: %s: unexpected CM event %X\n",
__func__, event->event); __func__, event->event);
complete(&ia->ri_done);
break; break;
} }
#ifdef RPC_DEBUG
if (connstate == 1) {
int ird = attr.max_dest_rd_atomic;
int tird = ep->rep_remote_cma.responder_resources;
printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u "
"on %s, memreg %d slots %d ird %d%s\n",
NIPQUAD(addr->sin_addr.s_addr),
ntohs(addr->sin_port),
ia->ri_id->device->name,
ia->ri_memreg_strategy,
xprt->rx_buf.rb_max_requests,
ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
} else if (connstate < 0) {
printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u "
"closed (%d)\n",
NIPQUAD(addr->sin_addr.s_addr),
ntohs(addr->sin_port),
connstate);
}
#endif
return 0; return 0;
} }
...@@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, ...@@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
struct rdma_cm_id *id; struct rdma_cm_id *id;
int rc; int rc;
init_completion(&ia->ri_done);
id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP);
if (IS_ERR(id)) { if (IS_ERR(id)) {
rc = PTR_ERR(id); rc = PTR_ERR(id);
...@@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, ...@@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
return id; return id;
} }
ia->ri_async_rc = 0; ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
if (rc) { if (rc) {
dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
__func__, rc); __func__, rc);
goto out; goto out;
} }
wait_for_completion(&ia->ri_done); wait_for_completion_interruptible_timeout(&ia->ri_done,
msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
rc = ia->ri_async_rc; rc = ia->ri_async_rc;
if (rc) if (rc)
goto out; goto out;
ia->ri_async_rc = 0; ia->ri_async_rc = -ETIMEDOUT;
rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
if (rc) { if (rc) {
dprintk("RPC: %s: rdma_resolve_route() failed %i\n", dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
__func__, rc); __func__, rc);
goto out; goto out;
} }
wait_for_completion(&ia->ri_done); wait_for_completion_interruptible_timeout(&ia->ri_done,
msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1);
rc = ia->ri_async_rc; rc = ia->ri_async_rc;
if (rc) if (rc)
goto out; goto out;
...@@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq) ...@@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq)
int int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
{ {
int rc; int rc, mem_priv;
struct ib_device_attr devattr;
struct rpcrdma_ia *ia = &xprt->rx_ia; struct rpcrdma_ia *ia = &xprt->rx_ia;
init_completion(&ia->ri_done);
ia->ri_id = rpcrdma_create_id(xprt, ia, addr); ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
if (IS_ERR(ia->ri_id)) { if (IS_ERR(ia->ri_id)) {
rc = PTR_ERR(ia->ri_id); rc = PTR_ERR(ia->ri_id);
...@@ -442,6 +465,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) ...@@ -442,6 +465,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
goto out2; goto out2;
} }
/*
* Query the device to determine if the requested memory
* registration strategy is supported. If it isn't, set the
* strategy to a globally supported model.
*/
rc = ib_query_device(ia->ri_id->device, &devattr);
if (rc) {
dprintk("RPC: %s: ib_query_device failed %d\n",
__func__, rc);
goto out2;
}
if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
ia->ri_have_dma_lkey = 1;
ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
}
switch (memreg) {
case RPCRDMA_MEMWINDOWS:
case RPCRDMA_MEMWINDOWS_ASYNC:
if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
dprintk("RPC: %s: MEMWINDOWS registration "
"specified but not supported by adapter, "
"using slower RPCRDMA_REGISTER\n",
__func__);
memreg = RPCRDMA_REGISTER;
}
break;
case RPCRDMA_MTHCAFMR:
if (!ia->ri_id->device->alloc_fmr) {
#if RPCRDMA_PERSISTENT_REGISTRATION
dprintk("RPC: %s: MTHCAFMR registration "
"specified but not supported by adapter, "
"using riskier RPCRDMA_ALLPHYSICAL\n",
__func__);
memreg = RPCRDMA_ALLPHYSICAL;
#else
dprintk("RPC: %s: MTHCAFMR registration "
"specified but not supported by adapter, "
"using slower RPCRDMA_REGISTER\n",
__func__);
memreg = RPCRDMA_REGISTER;
#endif
}
break;
case RPCRDMA_FRMR:
/* Requires both frmr reg and local dma lkey */
if ((devattr.device_cap_flags &
(IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
(IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
#if RPCRDMA_PERSISTENT_REGISTRATION
dprintk("RPC: %s: FRMR registration "
"specified but not supported by adapter, "
"using riskier RPCRDMA_ALLPHYSICAL\n",
__func__);
memreg = RPCRDMA_ALLPHYSICAL;
#else
dprintk("RPC: %s: FRMR registration "
"specified but not supported by adapter, "
"using slower RPCRDMA_REGISTER\n",
__func__);
memreg = RPCRDMA_REGISTER;
#endif
}
break;
}
/* /*
* Optionally obtain an underlying physical identity mapping in * Optionally obtain an underlying physical identity mapping in
* order to do a memory window-based bind. This base registration * order to do a memory window-based bind. This base registration
...@@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) ...@@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
* revoked after the corresponding completion similar to a storage * revoked after the corresponding completion similar to a storage
* adapter. * adapter.
*/ */
if (memreg > RPCRDMA_REGISTER) { switch (memreg) {
int mem_priv = IB_ACCESS_LOCAL_WRITE; case RPCRDMA_BOUNCEBUFFERS:
switch (memreg) { case RPCRDMA_REGISTER:
case RPCRDMA_FRMR:
break;
#if RPCRDMA_PERSISTENT_REGISTRATION #if RPCRDMA_PERSISTENT_REGISTRATION
case RPCRDMA_ALLPHYSICAL: case RPCRDMA_ALLPHYSICAL:
mem_priv |= IB_ACCESS_REMOTE_WRITE; mem_priv = IB_ACCESS_LOCAL_WRITE |
mem_priv |= IB_ACCESS_REMOTE_READ; IB_ACCESS_REMOTE_WRITE |
break; IB_ACCESS_REMOTE_READ;
goto register_setup;
#endif #endif
case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS: case RPCRDMA_MEMWINDOWS:
mem_priv |= IB_ACCESS_MW_BIND; mem_priv = IB_ACCESS_LOCAL_WRITE |
break; IB_ACCESS_MW_BIND;
default: goto register_setup;
case RPCRDMA_MTHCAFMR:
if (ia->ri_have_dma_lkey)
break; break;
} mem_priv = IB_ACCESS_LOCAL_WRITE;
register_setup:
ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
if (IS_ERR(ia->ri_bind_mem)) { if (IS_ERR(ia->ri_bind_mem)) {
printk(KERN_ALERT "%s: ib_get_dma_mr for " printk(KERN_ALERT "%s: ib_get_dma_mr for "
...@@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) ...@@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
memreg = RPCRDMA_REGISTER; memreg = RPCRDMA_REGISTER;
ia->ri_bind_mem = NULL; ia->ri_bind_mem = NULL;
} }
break;
default:
printk(KERN_ERR "%s: invalid memory registration mode %d\n",
__func__, memreg);
rc = -EINVAL;
goto out2;
} }
dprintk("RPC: %s: memory registration strategy is %d\n",
__func__, memreg);
/* Else will do memory reg/dereg for each chunk */ /* Else will do memory reg/dereg for each chunk */
ia->ri_memreg_strategy = memreg; ia->ri_memreg_strategy = memreg;
...@@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) ...@@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
return 0; return 0;
out2: out2:
rdma_destroy_id(ia->ri_id); rdma_destroy_id(ia->ri_id);
ia->ri_id = NULL;
out1: out1:
return rc; return rc;
} }
...@@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) ...@@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
dprintk("RPC: %s: ib_dereg_mr returned %i\n", dprintk("RPC: %s: ib_dereg_mr returned %i\n",
__func__, rc); __func__, rc);
} }
if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
rdma_destroy_qp(ia->ri_id); if (ia->ri_id->qp)
rdma_destroy_qp(ia->ri_id);
rdma_destroy_id(ia->ri_id);
ia->ri_id = NULL;
}
if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) {
rc = ib_dealloc_pd(ia->ri_pd); rc = ib_dealloc_pd(ia->ri_pd);
dprintk("RPC: %s: ib_dealloc_pd returned %i\n", dprintk("RPC: %s: ib_dealloc_pd returned %i\n",
__func__, rc); __func__, rc);
} }
if (ia->ri_id != NULL && !IS_ERR(ia->ri_id))
rdma_destroy_id(ia->ri_id);
} }
/* /*
...@@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ...@@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.srq = NULL; ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests; ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) { switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
/* Add room for frmr register and invalidate WRs */
ep->rep_attr.cap.max_send_wr *= 3;
if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
return -EINVAL;
break;
case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS: case RPCRDMA_MEMWINDOWS:
/* Add room for mw_binds+unbinds - overkill! */ /* Add room for mw_binds+unbinds - overkill! */
...@@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ...@@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_remote_cma.private_data_len = 0; ep->rep_remote_cma.private_data_len = 0;
/* Client offers RDMA Read but does not initiate */ /* Client offers RDMA Read but does not initiate */
switch (ia->ri_memreg_strategy) { ep->rep_remote_cma.initiator_depth = 0;
case RPCRDMA_BOUNCEBUFFERS: if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS)
ep->rep_remote_cma.responder_resources = 0; ep->rep_remote_cma.responder_resources = 0;
break; else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
case RPCRDMA_MTHCAFMR: ep->rep_remote_cma.responder_resources = 32;
case RPCRDMA_REGISTER: else
ep->rep_remote_cma.responder_resources = cdata->max_requests *
(RPCRDMA_MAX_DATA_SEGS / 8);
break;
case RPCRDMA_MEMWINDOWS:
case RPCRDMA_MEMWINDOWS_ASYNC:
#if RPCRDMA_PERSISTENT_REGISTRATION
case RPCRDMA_ALLPHYSICAL:
#endif
ep->rep_remote_cma.responder_resources = cdata->max_requests *
(RPCRDMA_MAX_DATA_SEGS / 2);
break;
default:
break;
}
if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom)
ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
ep->rep_remote_cma.initiator_depth = 0;
ep->rep_remote_cma.retry_count = 7; ep->rep_remote_cma.retry_count = 7;
ep->rep_remote_cma.flow_control = 0; ep->rep_remote_cma.flow_control = 0;
...@@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
if (rc) if (rc)
dprintk("RPC: %s: rpcrdma_ep_disconnect" dprintk("RPC: %s: rpcrdma_ep_disconnect"
" returned %i\n", __func__, rc); " returned %i\n", __func__, rc);
rdma_destroy_qp(ia->ri_id);
ia->ri_id->qp = NULL;
} }
ep->rep_func = NULL;
/* padding - could be done in rpcrdma_buffer_destroy... */ /* padding - could be done in rpcrdma_buffer_destroy... */
if (ep->rep_pad_mr) { if (ep->rep_pad_mr) {
rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad);
ep->rep_pad_mr = NULL; ep->rep_pad_mr = NULL;
} }
if (ia->ri_id->qp) {
rdma_destroy_qp(ia->ri_id);
ia->ri_id->qp = NULL;
}
rpcrdma_clean_cq(ep->rep_cq); rpcrdma_clean_cq(ep->rep_cq);
rc = ib_destroy_cq(ep->rep_cq); rc = ib_destroy_cq(ep->rep_cq);
if (rc) if (rc)
...@@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
struct rdma_cm_id *id; struct rdma_cm_id *id;
int rc = 0; int rc = 0;
int retry_count = 0; int retry_count = 0;
int reconnect = (ep->rep_connected != 0);
if (reconnect) { if (ep->rep_connected != 0) {
struct rpcrdma_xprt *xprt; struct rpcrdma_xprt *xprt;
retry: retry:
rc = rpcrdma_ep_disconnect(ep, ia); rc = rpcrdma_ep_disconnect(ep, ia);
...@@ -745,6 +836,7 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ...@@ -745,6 +836,7 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
goto out; goto out;
} }
/* END TEMP */ /* END TEMP */
rdma_destroy_qp(ia->ri_id);
rdma_destroy_id(ia->ri_id); rdma_destroy_id(ia->ri_id);
ia->ri_id = id; ia->ri_id = id;
} }
...@@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { ...@@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
} }
} }
/* Theoretically a client initiator_depth > 0 is not needed,
* but many peers fail to complete the connection unless they
* == responder_resources! */
if (ep->rep_remote_cma.initiator_depth !=
ep->rep_remote_cma.responder_resources)
ep->rep_remote_cma.initiator_depth =
ep->rep_remote_cma.responder_resources;
ep->rep_connected = 0; ep->rep_connected = 0;
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
...@@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { ...@@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
goto out; goto out;
} }
if (reconnect)
return 0;
wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
/* /*
...@@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { ...@@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
if (ep->rep_connected <= 0) { if (ep->rep_connected <= 0) {
/* Sometimes, the only way to reliably connect to remote /* Sometimes, the only way to reliably connect to remote
* CMs is to use same nonzero values for ORD and IRD. */ * CMs is to use same nonzero values for ORD and IRD. */
ep->rep_remote_cma.initiator_depth = if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
ep->rep_remote_cma.responder_resources; (ep->rep_remote_cma.responder_resources == 0 ||
if (ep->rep_remote_cma.initiator_depth == 0) ep->rep_remote_cma.initiator_depth !=
++ep->rep_remote_cma.initiator_depth; ep->rep_remote_cma.responder_resources)) {
if (ep->rep_remote_cma.responder_resources == 0) if (ep->rep_remote_cma.responder_resources == 0)
++ep->rep_remote_cma.responder_resources; ep->rep_remote_cma.responder_resources = 1;
if (retry_count++ == 0) ep->rep_remote_cma.initiator_depth =
ep->rep_remote_cma.responder_resources;
goto retry; goto retry;
}
rc = ep->rep_connected; rc = ep->rep_connected;
} else { } else {
dprintk("RPC: %s: connected\n", __func__); dprintk("RPC: %s: connected\n", __func__);
...@@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, ...@@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
char *p; char *p;
size_t len; size_t len;
int i, rc; int i, rc;
struct rpcrdma_mw *r;
buf->rb_max_requests = cdata->max_requests; buf->rb_max_requests = cdata->max_requests;
spin_lock_init(&buf->rb_lock); spin_lock_init(&buf->rb_lock);
...@@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, ...@@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* 2. arrays of struct rpcrdma_req to fill in pointers * 2. arrays of struct rpcrdma_req to fill in pointers
* 3. array of struct rpcrdma_rep for replies * 3. array of struct rpcrdma_rep for replies
* 4. padding, if any * 4. padding, if any
* 5. mw's, if any * 5. mw's, fmr's or frmr's, if any
* Send/recv buffers in req/rep need to be registered * Send/recv buffers in req/rep need to be registered
*/ */
...@@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, ...@@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
(sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *));
len += cdata->padding; len += cdata->padding;
switch (ia->ri_memreg_strategy) { switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
len += buf->rb_max_requests * RPCRDMA_MAX_SEGS *
sizeof(struct rpcrdma_mw);
break;
case RPCRDMA_MTHCAFMR: case RPCRDMA_MTHCAFMR:
/* TBD we are perhaps overallocating here */ /* TBD we are perhaps overallocating here */
len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
...@@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, ...@@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* and also reduce unbind-to-bind collision. * and also reduce unbind-to-bind collision.
*/ */
INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_mws);
r = (struct rpcrdma_mw *)p;
switch (ia->ri_memreg_strategy) { switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
RPCRDMA_MAX_SEGS);
if (IS_ERR(r->r.frmr.fr_mr)) {
rc = PTR_ERR(r->r.frmr.fr_mr);
dprintk("RPC: %s: ib_alloc_fast_reg_mr"
" failed %i\n", __func__, rc);
goto out;
}
r->r.frmr.fr_pgl =
ib_alloc_fast_reg_page_list(ia->ri_id->device,
RPCRDMA_MAX_SEGS);
if (IS_ERR(r->r.frmr.fr_pgl)) {
rc = PTR_ERR(r->r.frmr.fr_pgl);
dprintk("RPC: %s: "
"ib_alloc_fast_reg_page_list "
"failed %i\n", __func__, rc);
goto out;
}
list_add(&r->mw_list, &buf->rb_mws);
++r;
}
break;
case RPCRDMA_MTHCAFMR: case RPCRDMA_MTHCAFMR:
{
struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
struct ib_fmr_attr fa = {
RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT
};
/* TBD we are perhaps overallocating here */ /* TBD we are perhaps overallocating here */
for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
static struct ib_fmr_attr fa =
{ RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT };
r->r.fmr = ib_alloc_fmr(ia->ri_pd, r->r.fmr = ib_alloc_fmr(ia->ri_pd,
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ,
&fa); &fa);
...@@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, ...@@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
list_add(&r->mw_list, &buf->rb_mws); list_add(&r->mw_list, &buf->rb_mws);
++r; ++r;
} }
}
break; break;
case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS: case RPCRDMA_MEMWINDOWS:
{
struct rpcrdma_mw *r = (struct rpcrdma_mw *)p;
/* Allocate one extra request's worth, for full cycling */ /* Allocate one extra request's worth, for full cycling */
for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
r->r.mw = ib_alloc_mw(ia->ri_pd); r->r.mw = ib_alloc_mw(ia->ri_pd);
...@@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, ...@@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
list_add(&r->mw_list, &buf->rb_mws); list_add(&r->mw_list, &buf->rb_mws);
++r; ++r;
} }
}
break; break;
default: default:
break; break;
...@@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) ...@@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{ {
int rc, i; int rc, i;
struct rpcrdma_ia *ia = rdmab_to_ia(buf); struct rpcrdma_ia *ia = rdmab_to_ia(buf);
struct rpcrdma_mw *r;
/* clean up in reverse order from create /* clean up in reverse order from create
* 1. recv mr memory (mr free, then kfree) * 1. recv mr memory (mr free, then kfree)
...@@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) ...@@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
} }
if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
while (!list_empty(&buf->rb_mws)) { while (!list_empty(&buf->rb_mws)) {
struct rpcrdma_mw *r;
r = list_entry(buf->rb_mws.next, r = list_entry(buf->rb_mws.next,
struct rpcrdma_mw, mw_list); struct rpcrdma_mw, mw_list);
list_del(&r->mw_list); list_del(&r->mw_list);
switch (ia->ri_memreg_strategy) { switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
rc = ib_dereg_mr(r->r.frmr.fr_mr);
if (rc)
dprintk("RPC: %s:"
" ib_dereg_mr"
" failed %i\n",
__func__, rc);
ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
break;
case RPCRDMA_MTHCAFMR: case RPCRDMA_MTHCAFMR:
rc = ib_dealloc_fmr(r->r.fmr); rc = ib_dealloc_fmr(r->r.fmr);
if (rc) if (rc)
...@@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) ...@@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
{ {
struct rpcrdma_req *req; struct rpcrdma_req *req;
unsigned long flags; unsigned long flags;
int i;
struct rpcrdma_mw *r;
spin_lock_irqsave(&buffers->rb_lock, flags); spin_lock_irqsave(&buffers->rb_lock, flags);
if (buffers->rb_send_index == buffers->rb_max_requests) { if (buffers->rb_send_index == buffers->rb_max_requests) {
...@@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) ...@@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
} }
buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; buffers->rb_send_bufs[buffers->rb_send_index++] = NULL;
if (!list_empty(&buffers->rb_mws)) { if (!list_empty(&buffers->rb_mws)) {
int i = RPCRDMA_MAX_SEGS - 1; i = RPCRDMA_MAX_SEGS - 1;
do { do {
struct rpcrdma_mw *r;
r = list_entry(buffers->rb_mws.next, r = list_entry(buffers->rb_mws.next,
struct rpcrdma_mw, mw_list); struct rpcrdma_mw, mw_list);
list_del(&r->mw_list); list_del(&r->mw_list);
...@@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) ...@@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
req->rl_reply = NULL; req->rl_reply = NULL;
} }
switch (ia->ri_memreg_strategy) { switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
case RPCRDMA_MTHCAFMR: case RPCRDMA_MTHCAFMR:
case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS: case RPCRDMA_MEMWINDOWS:
...@@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, ...@@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len,
va, len, DMA_BIDIRECTIONAL); va, len, DMA_BIDIRECTIONAL);
iov->length = len; iov->length = len;
if (ia->ri_bind_mem != NULL) { if (ia->ri_have_dma_lkey) {
*mrp = NULL;
iov->lkey = ia->ri_dma_lkey;
return 0;
} else if (ia->ri_bind_mem != NULL) {
*mrp = NULL; *mrp = NULL;
iov->lkey = ia->ri_bind_mem->lkey; iov->lkey = ia->ri_bind_mem->lkey;
return 0; return 0;
...@@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) ...@@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg)
seg->mr_dma, seg->mr_dmalen, seg->mr_dir); seg->mr_dma, seg->mr_dmalen, seg->mr_dir);
} }
static int
rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
int *nsegs, int writing, struct rpcrdma_ia *ia,
struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_mr_seg *seg1 = seg;
struct ib_send_wr frmr_wr, *bad_wr;
u8 key;
int len, pageoff;
int i, rc;
pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
seg1->mr_len += pageoff;
len = -pageoff;
if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
*nsegs = RPCRDMA_MAX_DATA_SEGS;
for (i = 0; i < *nsegs;) {
rpcrdma_map_one(ia, seg, writing);
seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma;
len += seg->mr_len;
++seg;
++i;
/* Check for holes */
if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
dprintk("RPC: %s: Using frmr %p to map %d segments\n",
__func__, seg1->mr_chunk.rl_mw, i);
/* Bump the key */
key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
/* Prepare FRMR WR */
memset(&frmr_wr, 0, sizeof frmr_wr);
frmr_wr.opcode = IB_WR_FAST_REG_MR;
frmr_wr.send_flags = 0; /* unsignaled */
frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma;
frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
frmr_wr.wr.fast_reg.page_list_len = i;
frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT;
frmr_wr.wr.fast_reg.access_flags = (writing ?
IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ);
frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr);
if (rc) {
dprintk("RPC: %s: failed ib_post_send for register,"
" status %i\n", __func__, rc);
while (i--)
rpcrdma_unmap_one(ia, --seg);
} else {
seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
seg1->mr_base = seg1->mr_dma + pageoff;
seg1->mr_nsegs = i;
seg1->mr_len = len;
}
*nsegs = i;
return rc;
}
static int
rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg,
struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_mr_seg *seg1 = seg;
struct ib_send_wr invalidate_wr, *bad_wr;
int rc;
while (seg1->mr_nsegs--)
rpcrdma_unmap_one(ia, seg++);
memset(&invalidate_wr, 0, sizeof invalidate_wr);
invalidate_wr.opcode = IB_WR_LOCAL_INV;
invalidate_wr.send_flags = 0; /* unsignaled */
invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey;
DECR_CQCOUNT(&r_xprt->rx_ep);
rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
if (rc)
dprintk("RPC: %s: failed ib_post_send for invalidate,"
" status %i\n", __func__, rc);
return rc;
}
static int
rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg,
int *nsegs, int writing, struct rpcrdma_ia *ia)
{
struct rpcrdma_mr_seg *seg1 = seg;
u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
int len, pageoff, i, rc;
pageoff = offset_in_page(seg1->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
seg1->mr_len += pageoff;
len = -pageoff;
if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
*nsegs = RPCRDMA_MAX_DATA_SEGS;
for (i = 0; i < *nsegs;) {
rpcrdma_map_one(ia, seg, writing);
physaddrs[i] = seg->mr_dma;
len += seg->mr_len;
++seg;
++i;
/* Check for holes */
if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
physaddrs, i, seg1->mr_dma);
if (rc) {
dprintk("RPC: %s: failed ib_map_phys_fmr "
"%u@0x%llx+%i (%d)... status %i\n", __func__,
len, (unsigned long long)seg1->mr_dma,
pageoff, i, rc);
while (i--)
rpcrdma_unmap_one(ia, --seg);
} else {
seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
seg1->mr_base = seg1->mr_dma + pageoff;
seg1->mr_nsegs = i;
seg1->mr_len = len;
}
*nsegs = i;
return rc;
}
static int
rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
struct rpcrdma_ia *ia)
{
struct rpcrdma_mr_seg *seg1 = seg;
LIST_HEAD(l);
int rc;
list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l);
rc = ib_unmap_fmr(&l);
while (seg1->mr_nsegs--)
rpcrdma_unmap_one(ia, seg++);
if (rc)
dprintk("RPC: %s: failed ib_unmap_fmr,"
" status %i\n", __func__, rc);
return rc;
}
static int
rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
int *nsegs, int writing, struct rpcrdma_ia *ia,
struct rpcrdma_xprt *r_xprt)
{
int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
IB_ACCESS_REMOTE_READ);
struct ib_mw_bind param;
int rc;
*nsegs = 1;
rpcrdma_map_one(ia, seg, writing);
param.mr = ia->ri_bind_mem;
param.wr_id = 0ULL; /* no send cookie */
param.addr = seg->mr_dma;
param.length = seg->mr_len;
param.send_flags = 0;
param.mw_access_flags = mem_priv;
DECR_CQCOUNT(&r_xprt->rx_ep);
rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
if (rc) {
dprintk("RPC: %s: failed ib_bind_mw "
"%u@0x%llx status %i\n",
__func__, seg->mr_len,
(unsigned long long)seg->mr_dma, rc);
rpcrdma_unmap_one(ia, seg);
} else {
seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
seg->mr_base = param.addr;
seg->mr_nsegs = 1;
}
return rc;
}
static int
rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
struct rpcrdma_ia *ia,
struct rpcrdma_xprt *r_xprt, void **r)
{
struct ib_mw_bind param;
LIST_HEAD(l);
int rc;
BUG_ON(seg->mr_nsegs != 1);
param.mr = ia->ri_bind_mem;
param.addr = 0ULL; /* unbind */
param.length = 0;
param.mw_access_flags = 0;
if (*r) {
param.wr_id = (u64) (unsigned long) *r;
param.send_flags = IB_SEND_SIGNALED;
INIT_CQCOUNT(&r_xprt->rx_ep);
} else {
param.wr_id = 0ULL;
param.send_flags = 0;
DECR_CQCOUNT(&r_xprt->rx_ep);
}
rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
rpcrdma_unmap_one(ia, seg);
if (rc)
dprintk("RPC: %s: failed ib_(un)bind_mw,"
" status %i\n", __func__, rc);
else
*r = NULL; /* will upcall on completion */
return rc;
}
static int
rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
int *nsegs, int writing, struct rpcrdma_ia *ia)
{
int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
IB_ACCESS_REMOTE_READ);
struct rpcrdma_mr_seg *seg1 = seg;
struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
int len, i, rc = 0;
if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
*nsegs = RPCRDMA_MAX_DATA_SEGS;
for (len = 0, i = 0; i < *nsegs;) {
rpcrdma_map_one(ia, seg, writing);
ipb[i].addr = seg->mr_dma;
ipb[i].size = seg->mr_len;
len += seg->mr_len;
++seg;
++i;
/* Check for holes */
if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
break;
}
seg1->mr_base = seg1->mr_dma;
seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
ipb, i, mem_priv, &seg1->mr_base);
if (IS_ERR(seg1->mr_chunk.rl_mr)) {
rc = PTR_ERR(seg1->mr_chunk.rl_mr);
dprintk("RPC: %s: failed ib_reg_phys_mr "
"%u@0x%llx (%d)... status %i\n",
__func__, len,
(unsigned long long)seg1->mr_dma, i, rc);
while (i--)
rpcrdma_unmap_one(ia, --seg);
} else {
seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
seg1->mr_nsegs = i;
seg1->mr_len = len;
}
*nsegs = i;
return rc;
}
static int
rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
struct rpcrdma_ia *ia)
{
struct rpcrdma_mr_seg *seg1 = seg;
int rc;
rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
seg1->mr_chunk.rl_mr = NULL;
while (seg1->mr_nsegs--)
rpcrdma_unmap_one(ia, seg++);
if (rc)
dprintk("RPC: %s: failed ib_dereg_mr,"
" status %i\n", __func__, rc);
return rc;
}
int int
rpcrdma_register_external(struct rpcrdma_mr_seg *seg, rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
int nsegs, int writing, struct rpcrdma_xprt *r_xprt) int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
{ {
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
IB_ACCESS_REMOTE_READ);
struct rpcrdma_mr_seg *seg1 = seg;
int i;
int rc = 0; int rc = 0;
switch (ia->ri_memreg_strategy) { switch (ia->ri_memreg_strategy) {
...@@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, ...@@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
break; break;
#endif #endif
/* Registration using fast memory registration */ /* Registration using frmr registration */
case RPCRDMA_FRMR:
rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt);
break;
/* Registration using fmr memory registration */
case RPCRDMA_MTHCAFMR: case RPCRDMA_MTHCAFMR:
{ rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
u64 physaddrs[RPCRDMA_MAX_DATA_SEGS];
int len, pageoff = offset_in_page(seg->mr_offset);
seg1->mr_offset -= pageoff; /* start of page */
seg1->mr_len += pageoff;
len = -pageoff;
if (nsegs > RPCRDMA_MAX_DATA_SEGS)
nsegs = RPCRDMA_MAX_DATA_SEGS;
for (i = 0; i < nsegs;) {
rpcrdma_map_one(ia, seg, writing);
physaddrs[i] = seg->mr_dma;
len += seg->mr_len;
++seg;
++i;
/* Check for holes */
if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
break;
}
nsegs = i;
rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr,
physaddrs, nsegs, seg1->mr_dma);
if (rc) {
dprintk("RPC: %s: failed ib_map_phys_fmr "
"%u@0x%llx+%i (%d)... status %i\n", __func__,
len, (unsigned long long)seg1->mr_dma,
pageoff, nsegs, rc);
while (nsegs--)
rpcrdma_unmap_one(ia, --seg);
} else {
seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey;
seg1->mr_base = seg1->mr_dma + pageoff;
seg1->mr_nsegs = nsegs;
seg1->mr_len = len;
}
}
break; break;
/* Registration using memory windows */ /* Registration using memory windows */
case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS: case RPCRDMA_MEMWINDOWS:
{ rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
struct ib_mw_bind param;
rpcrdma_map_one(ia, seg, writing);
param.mr = ia->ri_bind_mem;
param.wr_id = 0ULL; /* no send cookie */
param.addr = seg->mr_dma;
param.length = seg->mr_len;
param.send_flags = 0;
param.mw_access_flags = mem_priv;
DECR_CQCOUNT(&r_xprt->rx_ep);
rc = ib_bind_mw(ia->ri_id->qp,
seg->mr_chunk.rl_mw->r.mw, &param);
if (rc) {
dprintk("RPC: %s: failed ib_bind_mw "
"%u@0x%llx status %i\n",
__func__, seg->mr_len,
(unsigned long long)seg->mr_dma, rc);
rpcrdma_unmap_one(ia, seg);
} else {
seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
seg->mr_base = param.addr;
seg->mr_nsegs = 1;
nsegs = 1;
}
}
break; break;
/* Default registration each time */ /* Default registration each time */
default: default:
{ rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
int len = 0;
if (nsegs > RPCRDMA_MAX_DATA_SEGS)
nsegs = RPCRDMA_MAX_DATA_SEGS;
for (i = 0; i < nsegs;) {
rpcrdma_map_one(ia, seg, writing);
ipb[i].addr = seg->mr_dma;
ipb[i].size = seg->mr_len;
len += seg->mr_len;
++seg;
++i;
/* Check for holes */
if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
break;
}
nsegs = i;
seg1->mr_base = seg1->mr_dma;
seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
ipb, nsegs, mem_priv, &seg1->mr_base);
if (IS_ERR(seg1->mr_chunk.rl_mr)) {
rc = PTR_ERR(seg1->mr_chunk.rl_mr);
dprintk("RPC: %s: failed ib_reg_phys_mr "
"%u@0x%llx (%d)... status %i\n",
__func__, len,
(unsigned long long)seg1->mr_dma, nsegs, rc);
while (nsegs--)
rpcrdma_unmap_one(ia, --seg);
} else {
seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
seg1->mr_nsegs = nsegs;
seg1->mr_len = len;
}
}
break; break;
} }
if (rc) if (rc)
...@@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, ...@@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
struct rpcrdma_xprt *r_xprt, void *r) struct rpcrdma_xprt *r_xprt, void *r)
{ {
struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ia *ia = &r_xprt->rx_ia;
struct rpcrdma_mr_seg *seg1 = seg;
int nsegs = seg->mr_nsegs, rc; int nsegs = seg->mr_nsegs, rc;
switch (ia->ri_memreg_strategy) { switch (ia->ri_memreg_strategy) {
...@@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, ...@@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
break; break;
#endif #endif
case RPCRDMA_FRMR:
rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt);
break;
case RPCRDMA_MTHCAFMR: case RPCRDMA_MTHCAFMR:
{ rc = rpcrdma_deregister_fmr_external(seg, ia);
LIST_HEAD(l);
list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l);
rc = ib_unmap_fmr(&l);
while (seg1->mr_nsegs--)
rpcrdma_unmap_one(ia, seg++);
}
if (rc)
dprintk("RPC: %s: failed ib_unmap_fmr,"
" status %i\n", __func__, rc);
break; break;
case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS: case RPCRDMA_MEMWINDOWS:
{ rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r);
struct ib_mw_bind param;
BUG_ON(nsegs != 1);
param.mr = ia->ri_bind_mem;
param.addr = 0ULL; /* unbind */
param.length = 0;
param.mw_access_flags = 0;
if (r) {
param.wr_id = (u64) (unsigned long) r;
param.send_flags = IB_SEND_SIGNALED;
INIT_CQCOUNT(&r_xprt->rx_ep);
} else {
param.wr_id = 0ULL;
param.send_flags = 0;
DECR_CQCOUNT(&r_xprt->rx_ep);
}
rc = ib_bind_mw(ia->ri_id->qp,
seg->mr_chunk.rl_mw->r.mw, &param);
rpcrdma_unmap_one(ia, seg);
}
if (rc)
dprintk("RPC: %s: failed ib_(un)bind_mw,"
" status %i\n", __func__, rc);
else
r = NULL; /* will upcall on completion */
break; break;
default: default:
rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); rc = rpcrdma_deregister_default_external(seg, ia);
seg1->mr_chunk.rl_mr = NULL;
while (seg1->mr_nsegs--)
rpcrdma_unmap_one(ia, seg++);
if (rc)
dprintk("RPC: %s: failed ib_dereg_mr,"
" status %i\n", __func__, rc);
break; break;
} }
if (r) { if (r) {
......
...@@ -51,6 +51,9 @@ ...@@ -51,6 +51,9 @@
#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
/* /*
* Interface Adapter -- one per transport instance * Interface Adapter -- one per transport instance
*/ */
...@@ -58,6 +61,8 @@ struct rpcrdma_ia { ...@@ -58,6 +61,8 @@ struct rpcrdma_ia {
struct rdma_cm_id *ri_id; struct rdma_cm_id *ri_id;
struct ib_pd *ri_pd; struct ib_pd *ri_pd;
struct ib_mr *ri_bind_mem; struct ib_mr *ri_bind_mem;
u32 ri_dma_lkey;
int ri_have_dma_lkey;
struct completion ri_done; struct completion ri_done;
int ri_async_rc; int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy; enum rpcrdma_memreg ri_memreg_strategy;
...@@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ ...@@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
union { union {
struct ib_mw *mw; struct ib_mw *mw;
struct ib_fmr *fmr; struct ib_fmr *fmr;
struct {
struct ib_fast_reg_page_list *fr_pgl;
struct ib_mr *fr_mr;
} frmr;
} r; } r;
struct list_head mw_list; struct list_head mw_list;
} *rl_mw; } *rl_mw;
...@@ -175,6 +184,7 @@ struct rpcrdma_req { ...@@ -175,6 +184,7 @@ struct rpcrdma_req {
size_t rl_size; /* actual length of buffer */ size_t rl_size; /* actual length of buffer */
unsigned int rl_niovs; /* 0, 2 or 4 */ unsigned int rl_niovs; /* 0, 2 or 4 */
unsigned int rl_nchunks; /* non-zero if chunks */ unsigned int rl_nchunks; /* non-zero if chunks */
unsigned int rl_connect_cookie; /* retry detection */
struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ struct rpcrdma_buffer *rl_buffer; /* home base for this structure */
struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ struct rpcrdma_rep *rl_reply;/* holder for reply buffer */
struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */
...@@ -198,7 +208,7 @@ struct rpcrdma_buffer { ...@@ -198,7 +208,7 @@ struct rpcrdma_buffer {
atomic_t rb_credits; /* most recent server credits */ atomic_t rb_credits; /* most recent server credits */
unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */
int rb_max_requests;/* client max requests */ int rb_max_requests;/* client max requests */
struct list_head rb_mws; /* optional memory windows/fmrs */ struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
int rb_send_index; int rb_send_index;
struct rpcrdma_req **rb_send_bufs; struct rpcrdma_req **rb_send_bufs;
int rb_recv_index; int rb_recv_index;
...@@ -273,6 +283,11 @@ struct rpcrdma_xprt { ...@@ -273,6 +283,11 @@ struct rpcrdma_xprt {
#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data)
/* Setting this to 0 ensures interoperability with early servers.
* Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
extern int xprt_rdma_pad_optimize;
/* /*
* Interface Adapter calls - xprtrdma/verbs.c * Interface Adapter calls - xprtrdma/verbs.c
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment