Commit 1c8c601a authored by Jeff Layton's avatar Jeff Layton Committed by Al Viro

locks: protect most of the file_lock handling with i_lock

Having a global lock that protects all of this code is a clear
scalability problem. Instead of doing that, move most of the code to be
protected by the i_lock instead. The exceptions are the global lists
that the ->fl_link sits on, and the ->fl_block list.

->fl_link is what connects these structures to the
global lists, so we must ensure that we hold those locks when iterating
over or updating these lists.

Furthermore, sound deadlock detection requires that we hold the
blocked_list state steady while checking for loops. We also must ensure
that the search and update to the list are atomic.

For the checking and insertion side of the blocked_list, push the
acquisition of the global lock into __posix_lock_file and ensure that
checking and update of the  blocked_list is done without dropping the
lock in between.

On the removal side, when waking up blocked lock waiters, take the
global lock before walking the blocked list and dequeue the waiters from
the global list prior to removal from the fl_block list.

With this, deadlock detection should be race free while we minimize
excessive file_lock_lock thrashing.

Finally, in order to avoid a lock inversion problem when handling
/proc/locks output we must ensure that manipulations of the fl_block
list are also protected by the file_lock_lock.
Signed-off-by: default avatarJeff Layton <jlayton@redhat.com>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent 88974691
...@@ -342,7 +342,7 @@ prototypes: ...@@ -342,7 +342,7 @@ prototypes:
locking rules: locking rules:
file_lock_lock may block inode->i_lock may block
fl_copy_lock: yes no fl_copy_lock: yes no
fl_release_private: maybe no fl_release_private: maybe no
...@@ -355,12 +355,19 @@ prototypes: ...@@ -355,12 +355,19 @@ prototypes:
int (*lm_change)(struct file_lock **, int); int (*lm_change)(struct file_lock **, int);
locking rules: locking rules:
file_lock_lock may block
lm_compare_owner: yes no inode->i_lock file_lock_lock may block
lm_notify: yes no lm_compare_owner: yes[1] maybe no
lm_grant: no no lm_notify: yes yes no
lm_break: yes no lm_grant: no no no
lm_change yes no lm_break: yes no no
lm_change yes no no
[1]: ->lm_compare_owner is generally called with *an* inode->i_lock held. It
may not be the i_lock of the inode for either file_lock being compared! This is
the case with deadlock detection, since the code has to chase down the owners
of locks that may be entirely unrelated to the one on which the lock is being
acquired. When doing a search for deadlocks, the file_lock_lock is also held.
--------------------------- buffer_head ----------------------------------- --------------------------- buffer_head -----------------------------------
prototypes: prototypes:
......
...@@ -252,7 +252,8 @@ static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key) ...@@ -252,7 +252,8 @@ static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
*/ */
static int afs_do_setlk(struct file *file, struct file_lock *fl) static int afs_do_setlk(struct file *file, struct file_lock *fl)
{ {
struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host); struct inode *inode = file_inode(file);
struct afs_vnode *vnode = AFS_FS_I(inode);
afs_lock_type_t type; afs_lock_type_t type;
struct key *key = file->private_data; struct key *key = file->private_data;
int ret; int ret;
...@@ -273,7 +274,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) ...@@ -273,7 +274,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
lock_flocks(); spin_lock(&inode->i_lock);
/* make sure we've got a callback on this file and that our view of the /* make sure we've got a callback on this file and that our view of the
* data version is up to date */ * data version is up to date */
...@@ -420,7 +421,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) ...@@ -420,7 +421,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
afs_vnode_fetch_status(vnode, NULL, key); afs_vnode_fetch_status(vnode, NULL, key);
error: error:
unlock_flocks(); spin_unlock(&inode->i_lock);
_leave(" = %d", ret); _leave(" = %d", ret);
return ret; return ret;
......
...@@ -192,7 +192,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) ...@@ -192,7 +192,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
/** /**
* Encode the flock and fcntl locks for the given inode into the ceph_filelock * Encode the flock and fcntl locks for the given inode into the ceph_filelock
* array. Must be called with lock_flocks() already held. * array. Must be called with inode->i_lock already held.
* If we encounter more of a specific lock type than expected, return -ENOSPC. * If we encounter more of a specific lock type than expected, return -ENOSPC.
*/ */
int ceph_encode_locks_to_buffer(struct inode *inode, int ceph_encode_locks_to_buffer(struct inode *inode,
......
...@@ -2481,20 +2481,20 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, ...@@ -2481,20 +2481,20 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_filelock *flocks; struct ceph_filelock *flocks;
encode_again: encode_again:
lock_flocks(); spin_lock(&inode->i_lock);
ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
unlock_flocks(); spin_unlock(&inode->i_lock);
flocks = kmalloc((num_fcntl_locks+num_flock_locks) * flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
sizeof(struct ceph_filelock), GFP_NOFS); sizeof(struct ceph_filelock), GFP_NOFS);
if (!flocks) { if (!flocks) {
err = -ENOMEM; err = -ENOMEM;
goto out_free; goto out_free;
} }
lock_flocks(); spin_lock(&inode->i_lock);
err = ceph_encode_locks_to_buffer(inode, flocks, err = ceph_encode_locks_to_buffer(inode, flocks,
num_fcntl_locks, num_fcntl_locks,
num_flock_locks); num_flock_locks);
unlock_flocks(); spin_unlock(&inode->i_lock);
if (err) { if (err) {
kfree(flocks); kfree(flocks);
if (err == -ENOSPC) if (err == -ENOSPC)
......
...@@ -765,7 +765,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence) ...@@ -765,7 +765,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
{ {
/* note that this is called by vfs setlease with lock_flocks held /* note that this is called by vfs setlease with i_lock held
to protect *lease from going away */ to protect *lease from going away */
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct cifsFileInfo *cfile = file->private_data; struct cifsFileInfo *cfile = file->private_data;
......
...@@ -1092,6 +1092,7 @@ struct lock_to_push { ...@@ -1092,6 +1092,7 @@ struct lock_to_push {
static int static int
cifs_push_posix_locks(struct cifsFileInfo *cfile) cifs_push_posix_locks(struct cifsFileInfo *cfile)
{ {
struct inode *inode = cfile->dentry->d_inode;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct file_lock *flock, **before; struct file_lock *flock, **before;
unsigned int count = 0, i = 0; unsigned int count = 0, i = 0;
...@@ -1102,12 +1103,12 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) ...@@ -1102,12 +1103,12 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
xid = get_xid(); xid = get_xid();
lock_flocks(); spin_lock(&inode->i_lock);
cifs_for_each_lock(cfile->dentry->d_inode, before) { cifs_for_each_lock(inode, before) {
if ((*before)->fl_flags & FL_POSIX) if ((*before)->fl_flags & FL_POSIX)
count++; count++;
} }
unlock_flocks(); spin_unlock(&inode->i_lock);
INIT_LIST_HEAD(&locks_to_send); INIT_LIST_HEAD(&locks_to_send);
...@@ -1126,8 +1127,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) ...@@ -1126,8 +1127,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
} }
el = locks_to_send.next; el = locks_to_send.next;
lock_flocks(); spin_lock(&inode->i_lock);
cifs_for_each_lock(cfile->dentry->d_inode, before) { cifs_for_each_lock(inode, before) {
flock = *before; flock = *before;
if ((flock->fl_flags & FL_POSIX) == 0) if ((flock->fl_flags & FL_POSIX) == 0)
continue; continue;
...@@ -1152,7 +1153,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile) ...@@ -1152,7 +1153,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
lck->offset = flock->fl_start; lck->offset = flock->fl_start;
el = el->next; el = el->next;
} }
unlock_flocks(); spin_unlock(&inode->i_lock);
list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) { list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
int stored_rc; int stored_rc;
......
...@@ -889,7 +889,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, ...@@ -889,7 +889,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
* cluster; until we do, disable leases (by just returning -EINVAL), * cluster; until we do, disable leases (by just returning -EINVAL),
* unless the administrator has requested purely local locking. * unless the administrator has requested purely local locking.
* *
* Locking: called under lock_flocks * Locking: called under i_lock
* *
* Returns: errno * Returns: errno
*/ */
......
...@@ -169,7 +169,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, ...@@ -169,7 +169,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
again: again:
file->f_locks = 0; file->f_locks = 0;
lock_flocks(); /* protects i_flock list */ spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl; fl = fl->fl_next) { for (fl = inode->i_flock; fl; fl = fl->fl_next) {
if (fl->fl_lmops != &nlmsvc_lock_operations) if (fl->fl_lmops != &nlmsvc_lock_operations)
continue; continue;
...@@ -181,7 +181,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, ...@@ -181,7 +181,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
if (match(lockhost, host)) { if (match(lockhost, host)) {
struct file_lock lock = *fl; struct file_lock lock = *fl;
unlock_flocks(); spin_unlock(&inode->i_lock);
lock.fl_type = F_UNLCK; lock.fl_type = F_UNLCK;
lock.fl_start = 0; lock.fl_start = 0;
lock.fl_end = OFFSET_MAX; lock.fl_end = OFFSET_MAX;
...@@ -193,7 +193,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, ...@@ -193,7 +193,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
goto again; goto again;
} }
} }
unlock_flocks(); spin_unlock(&inode->i_lock);
return 0; return 0;
} }
...@@ -228,14 +228,14 @@ nlm_file_inuse(struct nlm_file *file) ...@@ -228,14 +228,14 @@ nlm_file_inuse(struct nlm_file *file)
if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
return 1; return 1;
lock_flocks(); spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl; fl = fl->fl_next) { for (fl = inode->i_flock; fl; fl = fl->fl_next) {
if (fl->fl_lmops == &nlmsvc_lock_operations) { if (fl->fl_lmops == &nlmsvc_lock_operations) {
unlock_flocks(); spin_unlock(&inode->i_lock);
return 1; return 1;
} }
} }
unlock_flocks(); spin_unlock(&inode->i_lock);
file->f_locks = 0; file->f_locks = 0;
return 0; return 0;
} }
......
This diff is collapsed.
...@@ -73,20 +73,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ ...@@ -73,20 +73,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
if (inode->i_flock == NULL) if (inode->i_flock == NULL)
goto out; goto out;
/* Protect inode->i_flock using the file locks lock */ /* Protect inode->i_flock using the i_lock */
lock_flocks(); spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue; continue;
if (nfs_file_open_context(fl->fl_file) != ctx) if (nfs_file_open_context(fl->fl_file) != ctx)
continue; continue;
unlock_flocks(); spin_unlock(&inode->i_lock);
status = nfs4_lock_delegation_recall(fl, state, stateid); status = nfs4_lock_delegation_recall(fl, state, stateid);
if (status < 0) if (status < 0)
goto out; goto out;
lock_flocks(); spin_lock(&inode->i_lock);
} }
unlock_flocks(); spin_unlock(&inode->i_lock);
out: out:
return status; return status;
} }
......
...@@ -1373,13 +1373,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ ...@@ -1373,13 +1373,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
/* Guard against delegation returns and new lock/unlock calls */ /* Guard against delegation returns and new lock/unlock calls */
down_write(&nfsi->rwsem); down_write(&nfsi->rwsem);
/* Protect inode->i_flock using the BKL */ /* Protect inode->i_flock using the BKL */
lock_flocks(); spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue; continue;
if (nfs_file_open_context(fl->fl_file)->state != state) if (nfs_file_open_context(fl->fl_file)->state != state)
continue; continue;
unlock_flocks(); spin_unlock(&inode->i_lock);
status = ops->recover_lock(state, fl); status = ops->recover_lock(state, fl);
switch (status) { switch (status) {
case 0: case 0:
...@@ -1406,9 +1406,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ ...@@ -1406,9 +1406,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
/* kill_proc(fl->fl_pid, SIGLOST, 1); */ /* kill_proc(fl->fl_pid, SIGLOST, 1); */
status = 0; status = 0;
} }
lock_flocks(); spin_lock(&inode->i_lock);
} }
unlock_flocks(); spin_unlock(&inode->i_lock);
out: out:
up_write(&nfsi->rwsem); up_write(&nfsi->rwsem);
return status; return status;
......
...@@ -2645,13 +2645,13 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp) ...@@ -2645,13 +2645,13 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
/* only place dl_time is set. protected by lock_flocks*/ /* Only place dl_time is set; protected by i_lock: */
dp->dl_time = get_seconds(); dp->dl_time = get_seconds();
nfsd4_cb_recall(dp); nfsd4_cb_recall(dp);
} }
/* Called from break_lease() with lock_flocks() held. */ /* Called from break_lease() with i_lock held. */
static void nfsd_break_deleg_cb(struct file_lock *fl) static void nfsd_break_deleg_cb(struct file_lock *fl)
{ {
struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner; struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
...@@ -4520,7 +4520,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) ...@@ -4520,7 +4520,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner)
struct inode *inode = filp->fi_inode; struct inode *inode = filp->fi_inode;
int status = 0; int status = 0;
lock_flocks(); spin_lock(&inode->i_lock);
for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
if ((*flpp)->fl_owner == (fl_owner_t)lowner) { if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
status = 1; status = 1;
...@@ -4528,7 +4528,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner) ...@@ -4528,7 +4528,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner)
} }
} }
out: out:
unlock_flocks(); spin_unlock(&inode->i_lock);
return status; return status;
} }
......
...@@ -1024,8 +1024,6 @@ extern int vfs_setlease(struct file *, long, struct file_lock **); ...@@ -1024,8 +1024,6 @@ extern int vfs_setlease(struct file *, long, struct file_lock **);
extern int lease_modify(struct file_lock **, int); extern int lease_modify(struct file_lock **, int);
extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
extern int lock_may_write(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
extern void lock_flocks(void);
extern void unlock_flocks(void);
#else /* !CONFIG_FILE_LOCKING */ #else /* !CONFIG_FILE_LOCKING */
static inline int fcntl_getlk(struct file *file, struct flock __user *user) static inline int fcntl_getlk(struct file *file, struct flock __user *user)
{ {
...@@ -1166,15 +1164,6 @@ static inline int lock_may_write(struct inode *inode, loff_t start, ...@@ -1166,15 +1164,6 @@ static inline int lock_may_write(struct inode *inode, loff_t start,
{ {
return 1; return 1;
} }
static inline void lock_flocks(void)
{
}
static inline void unlock_flocks(void)
{
}
#endif /* !CONFIG_FILE_LOCKING */ #endif /* !CONFIG_FILE_LOCKING */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment