Commit 8bda9557 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfsd-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux

Pull nfsd updates from Chuck Lever:
 "New features:

   - Support for server-side disconnect injection via debugfs

   - Protocol definitions for new RPC_AUTH_TLS authentication flavor

  Performance improvements:

   - Reduce page allocator traffic in the NFSD splice read actor

   - Reduce CPU utilization in svcrdma's Send completion handler

  Notable bug fixes:

   - Stabilize lockd operation when re-exporting NFS mounts

   - Fix the use of %.*s in NFSD tracepoints

   - Fix /proc/sys/fs/nfs/nsm_use_hostnames"

* tag 'nfsd-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (31 commits)
  nfsd: fix crash on LOCKT on reexported NFSv3
  nfs: don't allow reexport reclaims
  lockd: don't attempt blocking locks on nfs reexports
  nfs: don't atempt blocking locks on nfs reexports
  Keep read and write fds with each nlm_file
  lockd: update nlm_lookup_file reexport comment
  nlm: minor refactoring
  nlm: minor nlm_lookup_file argument change
  lockd: lockd server-side shouldn't set fl_ops
  SUNRPC: Add documentation for the fail_sunrpc/ directory
  SUNRPC: Server-side disconnect injection
  SUNRPC: Move client-side disconnect injection
  SUNRPC: Add a /sys/kernel/debug/fail_sunrpc/ directory
  svcrdma: xpt_bc_xprt is already clear in __svc_rdma_free()
  nfsd4: Fix forced-expiry locking
  rpc: fix gss_svc_init cleanup on failure
  SUNRPC: Add RPC_AUTH_TLS protocol numbers
  lockd: change the proc_handler for nsm_use_hostnames
  sysctl: introduce new proc handler proc_dobool
  SUNRPC: Fix a NULL pointer deref in trace_svc_stats_latency()
  ...
parents 4529fb15 0bcc7ca4
......@@ -24,6 +24,10 @@ Available fault injection capabilities
injects futex deadlock and uaddr fault errors.
- fail_sunrpc
injects kernel RPC client and server failures.
- fail_make_request
injects disk IO errors on devices permitted by setting
......@@ -151,6 +155,20 @@ configuration of fault-injection capabilities.
default is 'N', setting it to 'Y' will disable failure injections
when dealing with private (address space) futexes.
- /sys/kernel/debug/fail_sunrpc/ignore-client-disconnect:
Format: { 'Y' | 'N' }
default is 'N', setting it to 'Y' will disable disconnect
injection on the RPC client.
- /sys/kernel/debug/fail_sunrpc/ignore-server-disconnect:
Format: { 'Y' | 'N' }
default is 'N', setting it to 'Y' will disable disconnect
injection on the RPC server.
- /sys/kernel/debug/fail_function/inject:
Format: { 'function-name' | '!function-name' | '' }
......
......@@ -584,7 +584,7 @@ static struct ctl_table nlm_sysctls[] = {
.data = &nsm_use_hostnames,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
.proc_handler = proc_dobool,
},
{
.procname = "nsm_local_state",
......
......@@ -40,12 +40,15 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Obtain file pointer. Not used by FREE_ALL call. */
if (filp != NULL) {
if ((error = nlm_lookup_file(rqstp, &file, &lock->fh)) != 0)
int mode = lock_to_openmode(&lock->fl);
error = nlm_lookup_file(rqstp, &file, lock);
if (error)
goto no_locks;
*filp = file;
/* Set up the missing parts of the file_lock structure */
lock->fl.fl_file = file->f_file;
lock->fl.fl_file = file->f_file[mode];
lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
......
......@@ -31,6 +31,7 @@
#include <linux/lockd/nlm.h>
#include <linux/lockd/lockd.h>
#include <linux/kthread.h>
#include <linux/exportfs.h>
#define NLMDBG_FACILITY NLMDBG_SVCLOCK
......@@ -395,28 +396,10 @@ nlmsvc_release_lockowner(struct nlm_lock *lock)
nlmsvc_put_lockowner(lock->fl.fl_owner);
}
static void nlmsvc_locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
struct nlm_lockowner *nlm_lo = (struct nlm_lockowner *)fl->fl_owner;
new->fl_owner = nlmsvc_get_lockowner(nlm_lo);
}
static void nlmsvc_locks_release_private(struct file_lock *fl)
{
nlmsvc_put_lockowner((struct nlm_lockowner *)fl->fl_owner);
}
static const struct file_lock_operations nlmsvc_lock_ops = {
.fl_copy_lock = nlmsvc_locks_copy_lock,
.fl_release_private = nlmsvc_locks_release_private,
};
void nlmsvc_locks_init_private(struct file_lock *fl, struct nlm_host *host,
pid_t pid)
{
fl->fl_owner = nlmsvc_find_lockowner(host, pid);
if (fl->fl_owner != NULL)
fl->fl_ops = &nlmsvc_lock_ops;
}
/*
......@@ -488,17 +471,24 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_cookie *cookie, int reclaim)
{
struct nlm_block *block = NULL;
struct inode *inode = nlmsvc_file_inode(file);
int error;
int mode;
int async_block = 0;
__be32 ret;
dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
locks_inode(file->f_file)->i_sb->s_id,
locks_inode(file->f_file)->i_ino,
inode->i_sb->s_id, inode->i_ino,
lock->fl.fl_type, lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end,
wait);
if (inode->i_sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS) {
async_block = wait;
wait = 0;
}
/* Lock file against concurrent access */
mutex_lock(&file->f_mutex);
/* Get existing block (in case client is busy-waiting)
......@@ -542,7 +532,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
if (!wait)
lock->fl.fl_flags &= ~FL_SLEEP;
error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
mode = lock_to_openmode(&lock->fl);
error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL);
lock->fl.fl_flags &= ~FL_SLEEP;
dprintk("lockd: vfs_lock_file returned %d\n", error);
......@@ -558,7 +549,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
*/
if (wait)
break;
ret = nlm_lck_denied;
ret = async_block ? nlm_lck_blocked : nlm_lck_denied;
goto out;
case FILE_LOCK_DEFERRED:
if (wait)
......@@ -595,12 +586,13 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
struct nlm_lock *conflock, struct nlm_cookie *cookie)
{
int error;
int mode;
__be32 ret;
struct nlm_lockowner *test_owner;
dprintk("lockd: nlmsvc_testlock(%s/%ld, ty=%d, %Ld-%Ld)\n",
locks_inode(file->f_file)->i_sb->s_id,
locks_inode(file->f_file)->i_ino,
nlmsvc_file_inode(file)->i_sb->s_id,
nlmsvc_file_inode(file)->i_ino,
lock->fl.fl_type,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
......@@ -613,7 +605,8 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
/* If there's a conflicting lock, remember to clean up the test lock */
test_owner = (struct nlm_lockowner *)lock->fl.fl_owner;
error = vfs_test_lock(file->f_file, &lock->fl);
mode = lock_to_openmode(&lock->fl);
error = vfs_test_lock(file->f_file[mode], &lock->fl);
if (error) {
/* We can't currently deal with deferred test requests */
if (error == FILE_LOCK_DEFERRED)
......@@ -634,7 +627,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
conflock->caller = "somehost"; /* FIXME */
conflock->len = strlen(conflock->caller);
conflock->oh.len = 0; /* don't return OH info */
conflock->svid = ((struct nlm_lockowner *)lock->fl.fl_owner)->pid;
conflock->svid = lock->fl.fl_pid;
conflock->fl.fl_type = lock->fl.fl_type;
conflock->fl.fl_start = lock->fl.fl_start;
conflock->fl.fl_end = lock->fl.fl_end;
......@@ -659,11 +652,11 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
__be32
nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
{
int error;
int error = 0;
dprintk("lockd: nlmsvc_unlock(%s/%ld, pi=%d, %Ld-%Ld)\n",
locks_inode(file->f_file)->i_sb->s_id,
locks_inode(file->f_file)->i_ino,
nlmsvc_file_inode(file)->i_sb->s_id,
nlmsvc_file_inode(file)->i_ino,
lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
......@@ -672,7 +665,12 @@ nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
nlmsvc_cancel_blocked(net, file, lock);
lock->fl.fl_type = F_UNLCK;
error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
if (file->f_file[O_RDONLY])
error = vfs_lock_file(file->f_file[O_RDONLY], F_SETLK,
&lock->fl, NULL);
if (file->f_file[O_WRONLY])
error = vfs_lock_file(file->f_file[O_WRONLY], F_SETLK,
&lock->fl, NULL);
return (error < 0)? nlm_lck_denied_nolocks : nlm_granted;
}
......@@ -689,10 +687,11 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
{
struct nlm_block *block;
int status = 0;
int mode;
dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
locks_inode(file->f_file)->i_sb->s_id,
locks_inode(file->f_file)->i_ino,
nlmsvc_file_inode(file)->i_sb->s_id,
nlmsvc_file_inode(file)->i_ino,
lock->fl.fl_pid,
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
......@@ -704,7 +703,8 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
block = nlmsvc_lookup_block(file, lock);
mutex_unlock(&file->f_mutex);
if (block != NULL) {
vfs_cancel_lock(block->b_file->f_file,
mode = lock_to_openmode(&lock->fl);
vfs_cancel_lock(block->b_file->f_file[mode],
&block->b_call->a_args.lock.fl);
status = nlmsvc_unlink_block(block);
nlmsvc_release_block(block);
......@@ -788,9 +788,21 @@ nlmsvc_notify_blocked(struct file_lock *fl)
printk(KERN_WARNING "lockd: notification for unknown block!\n");
}
static fl_owner_t nlmsvc_get_owner(fl_owner_t owner)
{
return nlmsvc_get_lockowner(owner);
}
static void nlmsvc_put_owner(fl_owner_t owner)
{
nlmsvc_put_lockowner(owner);
}
const struct lock_manager_operations nlmsvc_lock_operations = {
.lm_notify = nlmsvc_notify_blocked,
.lm_grant = nlmsvc_grant_deferred,
.lm_get_owner = nlmsvc_get_owner,
.lm_put_owner = nlmsvc_put_owner,
};
/*
......@@ -809,6 +821,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
{
struct nlm_file *file = block->b_file;
struct nlm_lock *lock = &block->b_call->a_args.lock;
int mode;
int error;
loff_t fl_start, fl_end;
......@@ -834,7 +847,8 @@ nlmsvc_grant_blocked(struct nlm_block *block)
lock->fl.fl_flags |= FL_SLEEP;
fl_start = lock->fl.fl_start;
fl_end = lock->fl.fl_end;
error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
mode = lock_to_openmode(&lock->fl);
error = vfs_lock_file(file->f_file[mode], F_SETLK, &lock->fl, NULL);
lock->fl.fl_flags &= ~FL_SLEEP;
lock->fl.fl_start = fl_start;
lock->fl.fl_end = fl_end;
......
......@@ -55,6 +55,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
struct nlm_host *host = NULL;
struct nlm_file *file = NULL;
struct nlm_lock *lock = &argp->lock;
int mode;
__be32 error = 0;
/* nfsd callbacks must have been installed for this procedure */
......@@ -69,13 +70,14 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
/* Obtain file pointer. Not used by FREE_ALL call. */
if (filp != NULL) {
error = cast_status(nlm_lookup_file(rqstp, &file, &lock->fh));
error = cast_status(nlm_lookup_file(rqstp, &file, lock));
if (error != 0)
goto no_locks;
*filp = file;
/* Set up the missing parts of the file_lock structure */
lock->fl.fl_file = file->f_file;
mode = lock_to_openmode(&lock->fl);
lock->fl.fl_file = file->f_file[mode];
lock->fl.fl_pid = current->tgid;
lock->fl.fl_lmops = &nlmsvc_lock_operations;
nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid);
......
......@@ -45,7 +45,7 @@ static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
static inline void nlm_debug_print_file(char *msg, struct nlm_file *file)
{
struct inode *inode = locks_inode(file->f_file);
struct inode *inode = nlmsvc_file_inode(file);
dprintk("lockd: %s %s/%ld\n",
msg, inode->i_sb->s_id, inode->i_ino);
......@@ -71,56 +71,75 @@ static inline unsigned int file_hash(struct nfs_fh *f)
return tmp & (FILE_NRHASH - 1);
}
int lock_to_openmode(struct file_lock *lock)
{
return (lock->fl_type == F_WRLCK) ? O_WRONLY : O_RDONLY;
}
/*
* Open the file. Note that if we're reexporting, for example,
* this could block the lockd thread for a while.
*
* We have to make sure we have the right credential to open
* the file.
*/
static __be32 nlm_do_fopen(struct svc_rqst *rqstp,
struct nlm_file *file, int mode)
{
struct file **fp = &file->f_file[mode];
__be32 nfserr;
if (*fp)
return 0;
nfserr = nlmsvc_ops->fopen(rqstp, &file->f_handle, fp, mode);
if (nfserr)
dprintk("lockd: open failed (error %d)\n", nfserr);
return nfserr;
}
/*
* Lookup file info. If it doesn't exist, create a file info struct
* and open a (VFS) file for the given inode.
*
* FIXME:
* Note that we open the file O_RDONLY even when creating write locks.
* This is not quite right, but for now, we assume the client performs
* the proper R/W checking.
*/
__be32
nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
struct nfs_fh *f)
struct nlm_lock *lock)
{
struct nlm_file *file;
unsigned int hash;
__be32 nfserr;
int mode;
nlm_debug_print_fh("nlm_lookup_file", f);
nlm_debug_print_fh("nlm_lookup_file", &lock->fh);
hash = file_hash(f);
hash = file_hash(&lock->fh);
mode = lock_to_openmode(&lock->fl);
/* Lock file table */
mutex_lock(&nlm_file_mutex);
hlist_for_each_entry(file, &nlm_files[hash], f_list)
if (!nfs_compare_fh(&file->f_handle, f))
if (!nfs_compare_fh(&file->f_handle, &lock->fh)) {
mutex_lock(&file->f_mutex);
nfserr = nlm_do_fopen(rqstp, file, mode);
mutex_unlock(&file->f_mutex);
goto found;
nlm_debug_print_fh("creating file for", f);
}
nlm_debug_print_fh("creating file for", &lock->fh);
nfserr = nlm_lck_denied_nolocks;
file = kzalloc(sizeof(*file), GFP_KERNEL);
if (!file)
goto out_unlock;
goto out_free;
memcpy(&file->f_handle, f, sizeof(struct nfs_fh));
memcpy(&file->f_handle, &lock->fh, sizeof(struct nfs_fh));
mutex_init(&file->f_mutex);
INIT_HLIST_NODE(&file->f_list);
INIT_LIST_HEAD(&file->f_blocks);
/* Open the file. Note that this must not sleep for too long, else
* we would lock up lockd:-) So no NFS re-exports, folks.
*
* We have to make sure we have the right credential to open
* the file.
*/
if ((nfserr = nlmsvc_ops->fopen(rqstp, f, &file->f_file)) != 0) {
dprintk("lockd: open failed (error %d)\n", nfserr);
goto out_free;
}
nfserr = nlm_do_fopen(rqstp, file, mode);
if (nfserr)
goto out_unlock;
hlist_add_head(&file->f_list, &nlm_files[hash]);
......@@ -128,7 +147,6 @@ nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
dprintk("lockd: found file %p (count %d)\n", file, file->f_count);
*result = file;
file->f_count++;
nfserr = 0;
out_unlock:
mutex_unlock(&nlm_file_mutex);
......@@ -148,13 +166,34 @@ nlm_delete_file(struct nlm_file *file)
nlm_debug_print_file("closing file", file);
if (!hlist_unhashed(&file->f_list)) {
hlist_del(&file->f_list);
nlmsvc_ops->fclose(file->f_file);
if (file->f_file[O_RDONLY])
nlmsvc_ops->fclose(file->f_file[O_RDONLY]);
if (file->f_file[O_WRONLY])
nlmsvc_ops->fclose(file->f_file[O_WRONLY]);
kfree(file);
} else {
printk(KERN_WARNING "lockd: attempt to release unknown file!\n");
}
}
static int nlm_unlock_files(struct nlm_file *file)
{
struct file_lock lock;
struct file *f;
lock.fl_type = F_UNLCK;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
for (f = file->f_file[0]; f <= file->f_file[1]; f++) {
if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) {
pr_warn("lockd: unlock failure in %s:%d\n",
__FILE__, __LINE__);
return 1;
}
}
return 0;
}
/*
* Loop over all locks on the given file and perform the specified
* action.
......@@ -182,17 +221,10 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
lockhost = ((struct nlm_lockowner *)fl->fl_owner)->host;
if (match(lockhost, host)) {
struct file_lock lock = *fl;
spin_unlock(&flctx->flc_lock);
lock.fl_type = F_UNLCK;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
if (vfs_lock_file(file->f_file, F_SETLK, &lock, NULL) < 0) {
printk("lockd: unlock failure in %s:%d\n",
__FILE__, __LINE__);
if (nlm_unlock_files(file))
return 1;
}
goto again;
}
}
......@@ -246,6 +278,15 @@ nlm_file_inuse(struct nlm_file *file)
return 0;
}
static void nlm_close_files(struct nlm_file *file)
{
struct file *f;
for (f = file->f_file[0]; f <= file->f_file[1]; f++)
if (f)
nlmsvc_ops->fclose(f);
}
/*
* Loop over all files in the file table.
*/
......@@ -276,7 +317,7 @@ nlm_traverse_files(void *data, nlm_host_match_fn_t match,
if (list_empty(&file->f_blocks) && !file->f_locks
&& !file->f_shares && !file->f_count) {
hlist_del(&file->f_list);
nlmsvc_ops->fclose(file->f_file);
nlm_close_files(file);
kfree(file);
}
}
......@@ -410,12 +451,13 @@ nlmsvc_invalidate_all(void)
nlm_traverse_files(NULL, nlmsvc_is_client, NULL);
}
static int
nlmsvc_match_sb(void *datap, struct nlm_file *file)
{
struct super_block *sb = datap;
return sb == locks_inode(file->f_file)->i_sb;
return sb == nlmsvc_file_inode(file)->i_sb;
}
/**
......
......@@ -180,5 +180,5 @@ const struct export_operations nfs_export_ops = {
.fetch_iversion = nfs_fetch_iversion,
.flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
EXPORT_OP_NOATOMIC_ATTR,
EXPORT_OP_NOATOMIC_ATTR|EXPORT_OP_SYNC_LOCKS,
};
......@@ -806,6 +806,9 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
nfs_inc_stats(inode, NFSIOS_VFSLOCK);
if (fl->fl_flags & FL_RECLAIM)
return -ENOGRACE;
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL)
is_local = 1;
......
......@@ -25,9 +25,11 @@
* Note: we hold the dentry use count while the file is open.
*/
static __be32
nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp,
int mode)
{
__be32 nfserr;
int access;
struct svc_fh fh;
/* must initialize before using! but maxsize doesn't matter */
......@@ -36,7 +38,9 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size);
fh.fh_export = NULL;
nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
access = (mode == O_WRONLY) ? NFSD_MAY_WRITE : NFSD_MAY_READ;
access |= NFSD_MAY_LOCK;
nfserr = nfsd_open(rqstp, &fh, S_IFREG, access, filp);
fh_put(&fh);
/* We return nlm error codes as nlm doesn't know
* about nfsd, but nfsd does know about nlm..
......
......@@ -2687,9 +2687,9 @@ static void force_expire_client(struct nfs4_client *clp)
trace_nfsd_clid_admin_expired(&clp->cl_clientid);
spin_lock(&clp->cl_lock);
spin_lock(&nn->client_lock);
clp->cl_time = 0;
spin_unlock(&clp->cl_lock);
spin_unlock(&nn->client_lock);
wait_event(expiry_wq, atomic_read(&clp->cl_rpc_users) == 0);
spin_lock(&nn->client_lock);
......@@ -6821,6 +6821,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_blocked_lock *nbl = NULL;
struct file_lock *file_lock = NULL;
struct file_lock *conflock = NULL;
struct super_block *sb;
__be32 status = 0;
int lkflg;
int err;
......@@ -6842,6 +6843,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: nfsd4_lock: permission denied!\n");
return status;
}
sb = cstate->current_fh.fh_dentry->d_sb;
if (lock->lk_is_new) {
if (nfsd4_has_session(cstate))
......@@ -6887,10 +6889,14 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!locks_in_grace(net) && lock->lk_reclaim)
goto out;
if (lock->lk_reclaim)
fl_flags |= FL_RECLAIM;
fp = lock_stp->st_stid.sc_file;
switch (lock->lk_type) {
case NFS4_READW_LT:
if (nfsd4_has_session(cstate))
if (nfsd4_has_session(cstate) &&
!(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_READ_LT:
......@@ -6902,7 +6908,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fl_type = F_RDLCK;
break;
case NFS4_WRITEW_LT:
if (nfsd4_has_session(cstate))
if (nfsd4_has_session(cstate) &&
!(sb->s_export_op->flags & EXPORT_OP_SYNC_LOCKS))
fl_flags |= FL_SLEEP;
fallthrough;
case NFS4_WRITE_LT:
......@@ -7022,8 +7029,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/*
* The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
* so we do a temporary open here just to get an open file to pass to
* vfs_test_lock. (Arguably perhaps test_lock should be done with an
* inode operation.)
* vfs_test_lock.
*/
static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{
......@@ -7038,7 +7044,9 @@ static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct
NFSD_MAY_READ));
if (err)
goto out;
lock->fl_file = nf->nf_file;
err = nfserrno(vfs_test_lock(nf->nf_file, lock));
lock->fl_file = NULL;
out:
fh_unlock(fhp);
nfsd_file_put(nf);
......
......@@ -881,6 +881,7 @@ nfserrno (int errno)
{ nfserr_serverfault, -ENFILE },
{ nfserr_io, -EUCLEAN },
{ nfserr_perm, -ENOKEY },
{ nfserr_no_grace, -ENOGRACE},
};
int i;
......
......@@ -400,18 +400,16 @@ TRACE_EVENT(nfsd_dirent,
TP_STRUCT__entry(
__field(u32, fh_hash)
__field(u64, ino)
__field(int, len)
__dynamic_array(unsigned char, name, namlen)
__string_len(name, name, namlen)
),
TP_fast_assign(
__entry->fh_hash = fhp ? knfsd_fh_hash(&fhp->fh_handle) : 0;
__entry->ino = ino;
__entry->len = namlen;
memcpy(__get_str(name), name, namlen);
__assign_str_len(name, name, namlen)
),
TP_printk("fh_hash=0x%08x ino=%llu name=%.*s",
__entry->fh_hash, __entry->ino,
__entry->len, __get_str(name))
TP_printk("fh_hash=0x%08x ino=%llu name=%s",
__entry->fh_hash, __entry->ino, __get_str(name)
)
)
#include "state.h"
......@@ -608,7 +606,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class,
__array(unsigned char, addr, sizeof(struct sockaddr_in6))
__field(unsigned long, flavor)
__array(unsigned char, verifier, NFS4_VERIFIER_SIZE)
__dynamic_array(char, name, clp->cl_name.len + 1)
__string_len(name, name, clp->cl_name.len)
),
TP_fast_assign(
__entry->cl_boot = clp->cl_clientid.cl_boot;
......@@ -618,8 +616,7 @@ DECLARE_EVENT_CLASS(nfsd_clid_class,
__entry->flavor = clp->cl_cred.cr_flavor;
memcpy(__entry->verifier, (void *)&clp->cl_verifier,
NFS4_VERIFIER_SIZE);
memcpy(__get_str(name), clp->cl_name.data, clp->cl_name.len);
__get_str(name)[clp->cl_name.len] = '\0';
__assign_str_len(name, clp->cl_name.data, clp->cl_name.len);
),
TP_printk("addr=%pISpc name='%s' verifier=0x%s flavor=%s client=%08x:%08x",
__entry->addr, __get_str(name),
......
......@@ -244,7 +244,6 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
* returned. Otherwise the covered directory is returned.
* NOTE: this mountpoint crossing is not supported properly by all
* clients and is explicitly disallowed for NFSv3
* NeilBrown <neilb@cse.unsw.edu.au>
*/
__be32
nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
......@@ -826,26 +825,16 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
struct svc_rqst *rqstp = sd->u.data;
struct page **pp = rqstp->rq_next_page;
struct page *page = buf->page;
size_t size;
size = sd->len;
if (rqstp->rq_res.page_len == 0) {
get_page(page);
put_page(*rqstp->rq_next_page);
*(rqstp->rq_next_page++) = page;
svc_rqst_replace_page(rqstp, page);
rqstp->rq_res.page_base = buf->offset;
rqstp->rq_res.page_len = size;
} else if (page != pp[-1]) {
get_page(page);
if (*rqstp->rq_next_page)
put_page(*rqstp->rq_next_page);
*(rqstp->rq_next_page++) = page;
rqstp->rq_res.page_len += size;
} else
rqstp->rq_res.page_len += size;
svc_rqst_replace_page(rqstp, page);
}
rqstp->rq_res.page_len += sd->len;
return size;
return sd->len;
}
static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
......
......@@ -31,5 +31,6 @@
#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */
#define EIOCBQUEUED 529 /* iocb queued, will get completion event */
#define ERECALLCONFLICT 530 /* conflict with recalled state */
#define ENOGRACE 531 /* NFS file lock reclaim refused */
#endif
......@@ -221,6 +221,8 @@ struct export_operations {
#define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply
atomic attribute updates
*/
#define EXPORT_OP_SYNC_LOCKS (0x20) /* Filesystem can't do
asychronous blocking locks */
unsigned long flags;
};
......
......@@ -1037,6 +1037,7 @@ static inline struct file *get_file(struct file *f)
#define FL_UNLOCK_PENDING 512 /* Lease is being broken */
#define FL_OFDLCK 1024 /* lock is "owned" by struct file */
#define FL_LAYOUT 2048 /* outstanding pNFS layout */
#define FL_RECLAIM 4096 /* reclaiming from a reboot server */
#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
......
......@@ -27,7 +27,8 @@ struct rpc_task;
struct nlmsvc_binding {
__be32 (*fopen)(struct svc_rqst *,
struct nfs_fh *,
struct file **);
struct file **,
int mode);
void (*fclose)(struct file *);
};
......
......@@ -10,6 +10,8 @@
#ifndef LINUX_LOCKD_LOCKD_H
#define LINUX_LOCKD_LOCKD_H
/* XXX: a lot of this should really be under fs/lockd. */
#include <linux/in.h>
#include <linux/in6.h>
#include <net/ipv6.h>
......@@ -154,7 +156,8 @@ struct nlm_rqst {
struct nlm_file {
struct hlist_node f_list; /* linked list */
struct nfs_fh f_handle; /* NFS file handle */
struct file * f_file; /* VFS file pointer */
struct file * f_file[2]; /* VFS file pointers,
indexed by O_ flags */
struct nlm_share * f_shares; /* DOS shares */
struct list_head f_blocks; /* blocked locks */
unsigned int f_locks; /* guesstimate # of locks */
......@@ -267,6 +270,7 @@ typedef int (*nlm_host_match_fn_t)(void *cur, struct nlm_host *ref);
/*
* Server-side lock handling
*/
int lock_to_openmode(struct file_lock *);
__be32 nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
struct nlm_host *, struct nlm_lock *, int,
struct nlm_cookie *, int);
......@@ -286,7 +290,7 @@ void nlmsvc_locks_init_private(struct file_lock *, struct nlm_host *, pid_t);
* File handling for the server personality
*/
__be32 nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
struct nfs_fh *);
struct nlm_lock *);
void nlm_release_file(struct nlm_file *);
void nlmsvc_release_lockowner(struct nlm_lock *);
void nlmsvc_mark_resources(struct net *);
......@@ -301,7 +305,8 @@ int nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr);
static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
{
return locks_inode(file->f_file);
return locks_inode(file->f_file[O_RDONLY] ?
file->f_file[O_RDONLY] : file->f_file[O_WRONLY]);
}
static inline int __nlm_privileged_request4(const struct sockaddr *sap)
......
......@@ -20,6 +20,7 @@ enum rpc_auth_flavors {
RPC_AUTH_DES = 3,
RPC_AUTH_KRB = 4,
RPC_AUTH_GSS = 6,
RPC_AUTH_TLS = 7,
RPC_AUTH_MAXFLAVOR = 8,
/* pseudoflavors: */
RPC_AUTH_GSS_KRB5 = 390003,
......
......@@ -19,6 +19,7 @@
#include <linux/sunrpc/svcauth.h>
#include <linux/wait.h>
#include <linux/mm.h>
#include <linux/pagevec.h>
/* statistics for svc_pool structures */
struct svc_pool_stats {
......@@ -256,6 +257,7 @@ struct svc_rqst {
struct page * *rq_next_page; /* next reply page to use */
struct page * *rq_page_end; /* one past the last page */
struct pagevec rq_pvec;
struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
struct bio_vec rq_bvec[RPCSVC_MAXPAGES];
......@@ -502,6 +504,8 @@ struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
struct svc_pool *pool, int node);
struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
struct svc_pool *pool, int node);
void svc_rqst_replace_page(struct svc_rqst *rqstp,
struct page *page);
void svc_rqst_free(struct svc_rqst *);
void svc_exit_thread(struct svc_rqst *);
unsigned int svc_pool_map_get(void);
......@@ -523,6 +527,7 @@ void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu);
char * svc_print_addr(struct svc_rqst *, char *, size_t);
const char * svc_proc_name(const struct svc_rqst *rqstp);
int svc_encode_result_payload(struct svc_rqst *rqstp,
unsigned int offset,
unsigned int length);
......
......@@ -90,9 +90,9 @@ struct svcxprt_rdma {
struct ib_pd *sc_pd;
spinlock_t sc_send_lock;
struct list_head sc_send_ctxts;
struct llist_head sc_send_ctxts;
spinlock_t sc_rw_ctxt_lock;
struct list_head sc_rw_ctxts;
struct llist_head sc_rw_ctxts;
u32 sc_pending_recvs;
u32 sc_recv_batch;
......@@ -150,7 +150,7 @@ struct svc_rdma_recv_ctxt {
};
struct svc_rdma_send_ctxt {
struct list_head sc_list;
struct llist_node sc_node;
struct rpc_rdma_cid sc_cid;
struct ib_send_wr sc_send_wr;
......@@ -207,6 +207,7 @@ extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
struct svc_rdma_recv_ctxt *rctxt,
int status);
extern void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail);
extern int svc_rdma_sendto(struct svc_rqst *);
extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length);
......
......@@ -95,6 +95,7 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
#define rpc_auth_unix cpu_to_be32(RPC_AUTH_UNIX)
#define rpc_auth_short cpu_to_be32(RPC_AUTH_SHORT)
#define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS)
#define rpc_auth_tls cpu_to_be32(RPC_AUTH_TLS)
#define rpc_call cpu_to_be32(RPC_CALL)
#define rpc_reply cpu_to_be32(RPC_REPLY)
......
......@@ -288,7 +288,6 @@ struct rpc_xprt {
const char *address_strings[RPC_DISPLAY_MAX];
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
struct dentry *debugfs; /* debugfs directory */
atomic_t inject_disconnect;
#endif
struct rcu_head rcu;
const struct xprt_class *xprt_class;
......@@ -502,21 +501,4 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt)
return test_and_set_bit(XPRT_BINDING, &xprt->state);
}
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
extern unsigned int rpc_inject_disconnect;
static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
{
if (!rpc_inject_disconnect)
return;
if (atomic_dec_return(&xprt->inject_disconnect))
return;
atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
xprt->ops->inject_disconnect(xprt);
}
#else
static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
{
}
#endif
#endif /* _LINUX_SUNRPC_XPRT_H */
......@@ -48,6 +48,8 @@ typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer,
size_t *lenp, loff_t *ppos);
int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *);
int proc_dobool(struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos);
int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *);
int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *);
int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *);
......
......@@ -1642,7 +1642,7 @@ TRACE_EVENT(svc_process,
__field(u32, vers)
__field(u32, proc)
__string(service, name)
__string(procedure, rqst->rq_procinfo->pc_name)
__string(procedure, svc_proc_name(rqst))
__string(addr, rqst->rq_xprt ?
rqst->rq_xprt->xpt_remotebuf : "(null)")
),
......@@ -1652,7 +1652,7 @@ TRACE_EVENT(svc_process,
__entry->vers = rqst->rq_vers;
__entry->proc = rqst->rq_proc;
__assign_str(service, name);
__assign_str(procedure, rqst->rq_procinfo->pc_name);
__assign_str(procedure, svc_proc_name(rqst));
__assign_str(addr, rqst->rq_xprt ?
rqst->rq_xprt->xpt_remotebuf : "(null)");
),
......@@ -1918,7 +1918,7 @@ TRACE_EVENT(svc_stats_latency,
TP_STRUCT__entry(
__field(u32, xid)
__field(unsigned long, execute)
__string(procedure, rqst->rq_procinfo->pc_name)
__string(procedure, svc_proc_name(rqst))
__string(addr, rqst->rq_xprt->xpt_remotebuf)
),
......@@ -1926,7 +1926,7 @@ TRACE_EVENT(svc_stats_latency,
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->execute = ktime_to_us(ktime_sub(ktime_get(),
rqst->rq_stime));
__assign_str(procedure, rqst->rq_procinfo->pc_name);
__assign_str(procedure, svc_proc_name(rqst));
__assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
),
......
......@@ -102,6 +102,9 @@ TRACE_MAKE_SYSTEM_STR();
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
#undef __string_len
#define __string_len(item, src, len) __dynamic_array(char, item, -1)
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
......@@ -197,6 +200,9 @@ TRACE_MAKE_SYSTEM_STR();
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
#undef __string_len
#define __string_len(item, src, len) __dynamic_array(char, item, -1)
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
......@@ -459,6 +465,9 @@ static struct trace_event_functions trace_event_type_funcs_##call = { \
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
#undef __string_len
#define __string_len(item, src, len) __dynamic_array(char, item, -1)
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
......@@ -507,6 +516,9 @@ static struct trace_event_fields trace_event_fields_##call[] = { \
#define __string(item, src) __dynamic_array(char, item, \
strlen((src) ? (const char *)(src) : "(null)") + 1)
#undef __string_len
#define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1)
/*
* __bitmask_size_in_bytes_raw is the number of bytes needed to hold
* num_possible_cpus().
......@@ -670,10 +682,20 @@ static inline notrace int trace_event_get_offsets_##call( \
#undef __string
#define __string(item, src) __dynamic_array(char, item, -1)
#undef __string_len
#define __string_len(item, src, len) __dynamic_array(char, item, -1)
#undef __assign_str
#define __assign_str(dst, src) \
strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)");
#undef __assign_str_len
#define __assign_str_len(dst, src, len) \
do { \
memcpy(__get_str(dst), (src), (len)); \
__get_str(dst)[len] = '\0'; \
} while(0)
#undef __bitmask
#define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
......
......@@ -33,7 +33,6 @@ struct nfs_fhbase_old {
/*
* This is the new flexible, extensible style NFSv2/v3/v4 file handle.
* by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
*
* The file handle starts with a sequence of four-byte words.
* The first word contains a version number (1) and three descriptor bytes
......
......@@ -536,6 +536,21 @@ static void proc_put_char(void **buf, size_t *size, char c)
}
}
static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
{
if (write) {
*(bool *)valp = *lvalp;
} else {
int val = *(bool *)valp;
*lvalp = (unsigned long)val;
*negp = false;
}
return 0;
}
static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
......@@ -798,6 +813,26 @@ static int do_proc_douintvec(struct ctl_table *table, int write,
buffer, lenp, ppos, conv, data);
}
/**
* proc_dobool - read/write a bool
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
*
* Reads/writes up to table->maxlen/sizeof(unsigned int) integer
* values from/to the user buffer, treated as an ASCII string.
*
* Returns 0 on success.
*/
int proc_dobool(struct ctl_table *table, int write, void *buffer,
size_t *lenp, loff_t *ppos)
{
return do_proc_dointvec(table, write, buffer, lenp, ppos,
do_proc_dobool_conv, NULL);
}
/**
* proc_dointvec - read a vector of integers
* @table: the sysctl table
......@@ -1630,6 +1665,12 @@ int proc_dostring(struct ctl_table *table, int write,
return -ENOSYS;
}
int proc_dobool(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
int proc_dointvec(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
......@@ -3425,6 +3466,7 @@ int __init sysctl_init(void)
* No sense putting this after each symbol definition, twice,
* exception granted :-)
*/
EXPORT_SYMBOL(proc_dobool);
EXPORT_SYMBOL(proc_dointvec);
EXPORT_SYMBOL(proc_douintvec);
EXPORT_SYMBOL(proc_dointvec_jiffies);
......
......@@ -1945,6 +1945,13 @@ config FAIL_MMC_REQUEST
and to test how the mmc host driver handles retries from
the block device.
config FAIL_SUNRPC
bool "Fault-injection capability for SunRPC"
depends on FAULT_INJECTION_DEBUG_FS && SUNRPC_DEBUG
help
Provide fault-injection capability for SunRPC and
its consumers.
config FAULT_INJECTION_STACKTRACE_FILTER
bool "stacktrace filter for fault-injection capabilities"
depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
......
......@@ -1980,7 +1980,7 @@ gss_svc_init_net(struct net *net)
goto out2;
return 0;
out2:
destroy_use_gss_proxy_proc_entry(net);
rsi_cache_destroy_net(net);
out1:
rsc_cache_destroy_net(net);
return rv;
......
......@@ -8,14 +8,14 @@
#include <linux/debugfs.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/clnt.h>
#include "netns.h"
#include "fail.h"
static struct dentry *topdir;
static struct dentry *rpc_clnt_dir;
static struct dentry *rpc_xprt_dir;
unsigned int rpc_inject_disconnect;
static int
tasks_show(struct seq_file *f, void *v)
{
......@@ -235,8 +235,6 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
/* make tasks file */
debugfs_create_file("info", S_IFREG | 0400, xprt->debugfs, xprt,
&xprt_info_fops);
atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect);
}
void
......@@ -246,56 +244,30 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt)
xprt->debugfs = NULL;
}
static int
fault_open(struct inode *inode, struct file *filp)
{
filp->private_data = kmalloc(128, GFP_KERNEL);
if (!filp->private_data)
return -ENOMEM;
return 0;
}
#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
struct fail_sunrpc_attr fail_sunrpc = {
.attr = FAULT_ATTR_INITIALIZER,
};
EXPORT_SYMBOL_GPL(fail_sunrpc);
static int
fault_release(struct inode *inode, struct file *filp)
static void fail_sunrpc_init(void)
{
kfree(filp->private_data);
return 0;
}
struct dentry *dir;
static ssize_t
fault_disconnect_read(struct file *filp, char __user *user_buf,
size_t len, loff_t *offset)
{
char *buffer = (char *)filp->private_data;
size_t size;
dir = fault_create_debugfs_attr("fail_sunrpc", NULL,
&fail_sunrpc.attr);
size = sprintf(buffer, "%u\n", rpc_inject_disconnect);
return simple_read_from_buffer(user_buf, len, offset, buffer, size);
}
debugfs_create_bool("ignore-client-disconnect", S_IFREG | 0600, dir,
&fail_sunrpc.ignore_client_disconnect);
static ssize_t
fault_disconnect_write(struct file *filp, const char __user *user_buf,
size_t len, loff_t *offset)
debugfs_create_bool("ignore-server-disconnect", S_IFREG | 0600, dir,
&fail_sunrpc.ignore_server_disconnect);
}
#else
static void fail_sunrpc_init(void)
{
char buffer[16];
if (len >= sizeof(buffer))
len = sizeof(buffer) - 1;
if (copy_from_user(buffer, user_buf, len))
return -EFAULT;
buffer[len] = '\0';
if (kstrtouint(buffer, 10, &rpc_inject_disconnect))
return -EINVAL;
return len;
}
static const struct file_operations fault_disconnect_fops = {
.owner = THIS_MODULE,
.open = fault_open,
.read = fault_disconnect_read,
.write = fault_disconnect_write,
.release = fault_release,
};
#endif
void __exit
sunrpc_debugfs_exit(void)
......@@ -309,16 +281,11 @@ sunrpc_debugfs_exit(void)
void __init
sunrpc_debugfs_init(void)
{
struct dentry *rpc_fault_dir;
topdir = debugfs_create_dir("sunrpc", NULL);
rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
rpc_xprt_dir = debugfs_create_dir("rpc_xprt", topdir);
rpc_fault_dir = debugfs_create_dir("inject_fault", topdir);
debugfs_create_file("disconnect", S_IFREG | 0400, rpc_fault_dir, NULL,
&fault_disconnect_fops);
fail_sunrpc_init();
}
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2021, Oracle. All rights reserved.
*/
#ifndef _NET_SUNRPC_FAIL_H_
#define _NET_SUNRPC_FAIL_H_
#include <linux/fault-inject.h>
#if IS_ENABLED(CONFIG_FAULT_INJECTION)
struct fail_sunrpc_attr {
struct fault_attr attr;
bool ignore_client_disconnect;
bool ignore_server_disconnect;
};
extern struct fail_sunrpc_attr fail_sunrpc;
#endif /* CONFIG_FAULT_INJECTION */
#endif /* _NET_SUNRPC_FAIL_H_ */
......@@ -31,6 +31,8 @@
#include <trace/events/sunrpc.h>
#include "fail.h"
#define RPCDBG_FACILITY RPCDBG_SVCDSP
static void svc_unregister(const struct svc_serv *serv, struct net *net);
......@@ -838,6 +840,27 @@ svc_set_num_threads_sync(struct svc_serv *serv, struct svc_pool *pool, int nrser
}
EXPORT_SYMBOL_GPL(svc_set_num_threads_sync);
/**
* svc_rqst_replace_page - Replace one page in rq_pages[]
* @rqstp: svc_rqst with pages to replace
* @page: replacement page
*
* When replacing a page in rq_pages, batch the release of the
* replaced pages to avoid hammering the page allocator.
*/
void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
{
if (*rqstp->rq_next_page) {
if (!pagevec_space(&rqstp->rq_pvec))
__pagevec_release(&rqstp->rq_pvec);
pagevec_add(&rqstp->rq_pvec, *rqstp->rq_next_page);
}
get_page(page);
*(rqstp->rq_next_page++) = page;
}
EXPORT_SYMBOL_GPL(svc_rqst_replace_page);
/*
* Called from a server thread as it's exiting. Caller must hold the "service
* mutex" for the service.
......@@ -1503,6 +1526,12 @@ svc_process(struct svc_rqst *rqstp)
struct svc_serv *serv = rqstp->rq_server;
u32 dir;
#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
if (!fail_sunrpc.ignore_server_disconnect &&
should_fail(&fail_sunrpc.attr, 1))
svc_xprt_deferred_close(rqstp->rq_xprt);
#endif
/*
* Setup response xdr_buf.
* Initially it has just one page
......@@ -1629,6 +1658,21 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
}
EXPORT_SYMBOL_GPL(svc_max_payload);
/**
* svc_proc_name - Return RPC procedure name in string form
* @rqstp: svc_rqst to operate on
*
* Return value:
* Pointer to a NUL-terminated string
*/
const char *svc_proc_name(const struct svc_rqst *rqstp)
{
if (rqstp && rqstp->rq_procinfo)
return rqstp->rq_procinfo->pc_name;
return "unknown";
}
/**
* svc_encode_result_payload - mark a range of bytes as a result payload
* @rqstp: svc_rqst to operate on
......
......@@ -539,6 +539,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
kfree(rqstp->rq_deferred);
rqstp->rq_deferred = NULL;
pagevec_release(&rqstp->rq_pvec);
svc_free_res_pages(rqstp);
rqstp->rq_res.page_len = 0;
rqstp->rq_res.page_base = 0;
......@@ -664,6 +665,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
struct xdr_buf *arg = &rqstp->rq_arg;
unsigned long pages, filled;
pagevec_init(&rqstp->rq_pvec);
pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT;
if (pages > RPCSVC_MAXPAGES) {
pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n",
......
......@@ -56,6 +56,7 @@
#include "sunrpc.h"
#include "sysfs.h"
#include "fail.h"
/*
* Local variables
......@@ -855,6 +856,19 @@ xprt_init_autodisconnect(struct timer_list *t)
queue_work(xprtiod_workqueue, &xprt->task_cleanup);
}
#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
static void xprt_inject_disconnect(struct rpc_xprt *xprt)
{
if (!fail_sunrpc.ignore_client_disconnect &&
should_fail(&fail_sunrpc.attr, 1))
xprt->ops->inject_disconnect(xprt);
}
#else
static inline void xprt_inject_disconnect(struct rpc_xprt *xprt)
{
}
#endif
bool xprt_lock_connect(struct rpc_xprt *xprt,
struct rpc_task *task,
void *cookie)
......
......@@ -35,6 +35,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc);
* controlling svcxprt_rdma is destroyed.
*/
struct svc_rdma_rw_ctxt {
struct llist_node rw_node;
struct list_head rw_list;
struct rdma_rw_ctx rw_ctx;
unsigned int rw_nents;
......@@ -53,19 +54,19 @@ static struct svc_rdma_rw_ctxt *
svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
{
struct svc_rdma_rw_ctxt *ctxt;
struct llist_node *node;
spin_lock(&rdma->sc_rw_ctxt_lock);
ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts);
if (ctxt) {
list_del(&ctxt->rw_list);
spin_unlock(&rdma->sc_rw_ctxt_lock);
node = llist_del_first(&rdma->sc_rw_ctxts);
spin_unlock(&rdma->sc_rw_ctxt_lock);
if (node) {
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
} else {
spin_unlock(&rdma->sc_rw_ctxt_lock);
ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
GFP_KERNEL);
if (!ctxt)
goto out_noctx;
INIT_LIST_HEAD(&ctxt->rw_list);
}
......@@ -83,14 +84,18 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
return NULL;
}
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt)
static void __svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt,
struct llist_head *list)
{
sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
llist_add(&ctxt->rw_node, list);
}
spin_lock(&rdma->sc_rw_ctxt_lock);
list_add(&ctxt->rw_list, &rdma->sc_rw_ctxts);
spin_unlock(&rdma->sc_rw_ctxt_lock);
static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
struct svc_rdma_rw_ctxt *ctxt)
{
__svc_rdma_put_rw_ctxt(rdma, ctxt, &rdma->sc_rw_ctxts);
}
/**
......@@ -101,9 +106,10 @@ static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma,
void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma)
{
struct svc_rdma_rw_ctxt *ctxt;
struct llist_node *node;
while ((ctxt = svc_rdma_next_ctxt(&rdma->sc_rw_ctxts)) != NULL) {
list_del(&ctxt->rw_list);
while ((node = llist_del_first(&rdma->sc_rw_ctxts)) != NULL) {
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
kfree(ctxt);
}
}
......@@ -171,20 +177,35 @@ static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
cc->cc_sqecount = 0;
}
/*
* The consumed rw_ctx's are cleaned and placed on a local llist so
* that only one atomic llist operation is needed to put them all
* back on the free list.
*/
static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
enum dma_data_direction dir)
{
struct svcxprt_rdma *rdma = cc->cc_rdma;
struct llist_node *first, *last;
struct svc_rdma_rw_ctxt *ctxt;
LLIST_HEAD(free);
first = last = NULL;
while ((ctxt = svc_rdma_next_ctxt(&cc->cc_rwctxts)) != NULL) {
list_del(&ctxt->rw_list);
rdma_rw_ctx_destroy(&ctxt->rw_ctx, rdma->sc_qp,
rdma->sc_port_num, ctxt->rw_sg_table.sgl,
ctxt->rw_nents, dir);
svc_rdma_put_rw_ctxt(rdma, ctxt);
__svc_rdma_put_rw_ctxt(rdma, ctxt, &free);
ctxt->rw_node.next = first;
first = &ctxt->rw_node;
if (!last)
last = first;
}
if (first)
llist_add_batch(first, last, &rdma->sc_rw_ctxts);
}
/* State for sending a Write or Reply chunk.
......@@ -248,8 +269,7 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_write(wc, &cc->cc_cid);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
if (unlikely(wc->status != IB_WC_SUCCESS))
svc_xprt_deferred_close(&rdma->sc_xprt);
......@@ -304,9 +324,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_read(wc, &cc->cc_cid);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
cc->cc_status = wc->status;
complete(&cc->cc_done);
return;
......
......@@ -113,13 +113,6 @@
static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
static inline struct svc_rdma_send_ctxt *
svc_rdma_next_send_ctxt(struct list_head *list)
{
return list_first_entry_or_null(list, struct svc_rdma_send_ctxt,
sc_list);
}
static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
struct rpc_rdma_cid *cid)
{
......@@ -182,9 +175,10 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
{
struct svc_rdma_send_ctxt *ctxt;
struct llist_node *node;
while ((ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts))) {
list_del(&ctxt->sc_list);
while ((node = llist_del_first(&rdma->sc_send_ctxts)) != NULL) {
ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
ib_dma_unmap_single(rdma->sc_pd->device,
ctxt->sc_sges[0].addr,
rdma->sc_max_req_size,
......@@ -204,12 +198,13 @@ void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma)
struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
{
struct svc_rdma_send_ctxt *ctxt;
struct llist_node *node;
spin_lock(&rdma->sc_send_lock);
ctxt = svc_rdma_next_send_ctxt(&rdma->sc_send_ctxts);
if (!ctxt)
node = llist_del_first(&rdma->sc_send_ctxts);
if (!node)
goto out_empty;
list_del(&ctxt->sc_list);
ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
spin_unlock(&rdma->sc_send_lock);
out:
......@@ -253,9 +248,21 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
ctxt->sc_sges[i].length);
}
spin_lock(&rdma->sc_send_lock);
list_add(&ctxt->sc_list, &rdma->sc_send_ctxts);
spin_unlock(&rdma->sc_send_lock);
llist_add(&ctxt->sc_node, &rdma->sc_send_ctxts);
}
/**
* svc_rdma_wake_send_waiters - manage Send Queue accounting
* @rdma: controlling transport
* @avail: Number of additional SQEs that are now available
*
*/
void svc_rdma_wake_send_waiters(struct svcxprt_rdma *rdma, int avail)
{
atomic_add(avail, &rdma->sc_sq_avail);
smp_mb__after_atomic();
if (unlikely(waitqueue_active(&rdma->sc_send_wait)))
wake_up(&rdma->sc_send_wait);
}
/**
......@@ -275,11 +282,9 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
svc_rdma_wake_send_waiters(rdma, 1);
complete(&ctxt->sc_done);
atomic_inc(&rdma->sc_sq_avail);
wake_up(&rdma->sc_send_wait);
if (unlikely(wc->status != IB_WC_SUCCESS))
svc_xprt_deferred_close(&rdma->sc_xprt);
}
......
......@@ -136,9 +136,9 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_send_ctxts);
init_llist_head(&cma_xprt->sc_send_ctxts);
init_llist_head(&cma_xprt->sc_recv_ctxts);
INIT_LIST_HEAD(&cma_xprt->sc_rw_ctxts);
init_llist_head(&cma_xprt->sc_rw_ctxts);
init_waitqueue_head(&cma_xprt->sc_send_wait);
spin_lock_init(&cma_xprt->sc_lock);
......@@ -545,7 +545,6 @@ static void __svc_rdma_free(struct work_struct *work)
{
struct svcxprt_rdma *rdma =
container_of(work, struct svcxprt_rdma, sc_work);
struct svc_xprt *xprt = &rdma->sc_xprt;
/* This blocks until the Completion Queues are empty */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
......@@ -553,12 +552,6 @@ static void __svc_rdma_free(struct work_struct *work)
svc_rdma_flush_recv_queues(rdma);
/* Final put of backchannel client transport */
if (xprt->xpt_bc_xprt) {
xprt_put(xprt->xpt_bc_xprt);
xprt->xpt_bc_xprt = NULL;
}
svc_rdma_destroy_rw_ctxts(rdma);
svc_rdma_send_ctxts_destroy(rdma);
svc_rdma_recv_ctxts_destroy(rdma);
......
......@@ -141,6 +141,33 @@
* In most cases, the __assign_str() macro will take the same
* parameters as the __string() macro had to declare the string.
*
* __string_len: This is a helper to a __dynamic_array, but it understands
* that the array has characters in it, and with the combined
* use of __assign_str_len(), it will allocate 'len' + 1 bytes
* in the ring buffer and add a '\0' to the string. This is
* useful if the string being saved has no terminating '\0' byte.
* It requires that the length of the string is known as it acts
* like a memcpy().
*
* Declared with:
*
* __string_len(foo, bar, len)
*
* To assign this string, use the helper macro __assign_str_len().
*
* __assign_str(foo, bar, len);
*
* Then len + 1 is allocated to the ring buffer, and a nul terminating
* byte is added. This is similar to:
*
* memcpy(__get_str(foo), bar, len);
* __get_str(foo)[len] = 0;
*
* The advantage of using this over __dynamic_array, is that it
* takes care of allocating the extra byte on the ring buffer
* for the '\0' terminating byte, and __get_str(foo) can be used
* in the TP_printk().
*
* __bitmask: This is another kind of __dynamic_array, but it expects
* an array of longs, and the number of bits to parse. It takes
* two parameters (name, nr_bits), where name is the name of the
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment