Commit 1c8c601a authored by Jeff Layton's avatar Jeff Layton Committed by Al Viro

locks: protect most of the file_lock handling with i_lock

Having a global lock that protects all of this code is a clear
scalability problem. Instead of doing that, move most of the code to be
protected by the i_lock instead. The exceptions are the global lists
that the ->fl_link sits on, and the ->fl_block list.

->fl_link is what connects these structures to the
global lists, so we must ensure that we hold those locks when iterating
over or updating these lists.

Furthermore, sound deadlock detection requires that we hold the
blocked_list state steady while checking for loops. We also must ensure
that the search and update to the list are atomic.

For the checking and insertion side of the blocked_list, push the
acquisition of the global lock into __posix_lock_file and ensure that
checking and update of the  blocked_list is done without dropping the
lock in between.

On the removal side, when waking up blocked lock waiters, take the
global lock before walking the blocked list and dequeue the waiters from
the global list prior to removal from the fl_block list.

With this, deadlock detection should be race free while we minimize
excessive file_lock_lock thrashing.

Finally, in order to avoid a lock inversion problem when handling
/proc/locks output we must ensure that manipulations of the fl_block
list are also protected by the file_lock_lock.
Signed-off-by: default avatarJeff Layton <jlayton@redhat.com>
Signed-off-by: default avatarAl Viro <viro@zeniv.linux.org.uk>
parent 88974691
......@@ -342,7 +342,7 @@ prototypes:
locking rules:
file_lock_lock may block
inode->i_lock may block
fl_copy_lock: yes no
fl_release_private: maybe no
......@@ -355,12 +355,19 @@ prototypes:
int (*lm_change)(struct file_lock **, int);
locking rules:
file_lock_lock may block
lm_compare_owner: yes no
lm_notify: yes no
lm_grant: no no
lm_break: yes no
lm_change yes no
inode->i_lock file_lock_lock may block
lm_compare_owner: yes[1] maybe no
lm_notify: yes yes no
lm_grant: no no no
lm_break: yes no no
lm_change yes no no
[1]: ->lm_compare_owner is generally called with *an* inode->i_lock held. It
may not be the i_lock of the inode for either file_lock being compared! This is
the case with deadlock detection, since the code has to chase down the owners
of locks that may be entirely unrelated to the one on which the lock is being
acquired. When doing a search for deadlocks, the file_lock_lock is also held.
--------------------------- buffer_head -----------------------------------
prototypes:
......
......@@ -252,7 +252,8 @@ static void afs_defer_unlock(struct afs_vnode *vnode, struct key *key)
*/
static int afs_do_setlk(struct file *file, struct file_lock *fl)
{
struct afs_vnode *vnode = AFS_FS_I(file->f_mapping->host);
struct inode *inode = file_inode(file);
struct afs_vnode *vnode = AFS_FS_I(inode);
afs_lock_type_t type;
struct key *key = file->private_data;
int ret;
......@@ -273,7 +274,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE;
lock_flocks();
spin_lock(&inode->i_lock);
/* make sure we've got a callback on this file and that our view of the
* data version is up to date */
......@@ -420,7 +421,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl)
afs_vnode_fetch_status(vnode, NULL, key);
error:
unlock_flocks();
spin_unlock(&inode->i_lock);
_leave(" = %d", ret);
return ret;
......
......@@ -192,7 +192,7 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count)
/**
* Encode the flock and fcntl locks for the given inode into the ceph_filelock
* array. Must be called with lock_flocks() already held.
* array. Must be called with inode->i_lock already held.
* If we encounter more of a specific lock type than expected, return -ENOSPC.
*/
int ceph_encode_locks_to_buffer(struct inode *inode,
......
......@@ -2481,20 +2481,20 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_filelock *flocks;
encode_again:
lock_flocks();
spin_lock(&inode->i_lock);
ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
unlock_flocks();
spin_unlock(&inode->i_lock);
flocks = kmalloc((num_fcntl_locks+num_flock_locks) *
sizeof(struct ceph_filelock), GFP_NOFS);
if (!flocks) {
err = -ENOMEM;
goto out_free;
}
lock_flocks();
spin_lock(&inode->i_lock);
err = ceph_encode_locks_to_buffer(inode, flocks,
num_fcntl_locks,
num_flock_locks);
unlock_flocks();
spin_unlock(&inode->i_lock);
if (err) {
kfree(flocks);
if (err == -ENOSPC)
......
......@@ -765,7 +765,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
{
/* note that this is called by vfs setlease with lock_flocks held
/* note that this is called by vfs setlease with i_lock held
to protect *lease from going away */
struct inode *inode = file_inode(file);
struct cifsFileInfo *cfile = file->private_data;
......
......@@ -1092,6 +1092,7 @@ struct lock_to_push {
static int
cifs_push_posix_locks(struct cifsFileInfo *cfile)
{
struct inode *inode = cfile->dentry->d_inode;
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
struct file_lock *flock, **before;
unsigned int count = 0, i = 0;
......@@ -1102,12 +1103,12 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
xid = get_xid();
lock_flocks();
cifs_for_each_lock(cfile->dentry->d_inode, before) {
spin_lock(&inode->i_lock);
cifs_for_each_lock(inode, before) {
if ((*before)->fl_flags & FL_POSIX)
count++;
}
unlock_flocks();
spin_unlock(&inode->i_lock);
INIT_LIST_HEAD(&locks_to_send);
......@@ -1126,8 +1127,8 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
}
el = locks_to_send.next;
lock_flocks();
cifs_for_each_lock(cfile->dentry->d_inode, before) {
spin_lock(&inode->i_lock);
cifs_for_each_lock(inode, before) {
flock = *before;
if ((flock->fl_flags & FL_POSIX) == 0)
continue;
......@@ -1152,7 +1153,7 @@ cifs_push_posix_locks(struct cifsFileInfo *cfile)
lck->offset = flock->fl_start;
el = el->next;
}
unlock_flocks();
spin_unlock(&inode->i_lock);
list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
int stored_rc;
......
......@@ -889,7 +889,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
* cluster; until we do, disable leases (by just returning -EINVAL),
* unless the administrator has requested purely local locking.
*
* Locking: called under lock_flocks
* Locking: called under i_lock
*
* Returns: errno
*/
......
......@@ -169,7 +169,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
again:
file->f_locks = 0;
lock_flocks(); /* protects i_flock list */
spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl; fl = fl->fl_next) {
if (fl->fl_lmops != &nlmsvc_lock_operations)
continue;
......@@ -181,7 +181,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
if (match(lockhost, host)) {
struct file_lock lock = *fl;
unlock_flocks();
spin_unlock(&inode->i_lock);
lock.fl_type = F_UNLCK;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
......@@ -193,7 +193,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
goto again;
}
}
unlock_flocks();
spin_unlock(&inode->i_lock);
return 0;
}
......@@ -228,14 +228,14 @@ nlm_file_inuse(struct nlm_file *file)
if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
return 1;
lock_flocks();
spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl; fl = fl->fl_next) {
if (fl->fl_lmops == &nlmsvc_lock_operations) {
unlock_flocks();
spin_unlock(&inode->i_lock);
return 1;
}
}
unlock_flocks();
spin_unlock(&inode->i_lock);
file->f_locks = 0;
return 0;
}
......
This diff is collapsed.
......@@ -73,20 +73,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_
if (inode->i_flock == NULL)
goto out;
/* Protect inode->i_flock using the file locks lock */
lock_flocks();
/* Protect inode->i_flock using the i_lock */
spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue;
if (nfs_file_open_context(fl->fl_file) != ctx)
continue;
unlock_flocks();
spin_unlock(&inode->i_lock);
status = nfs4_lock_delegation_recall(fl, state, stateid);
if (status < 0)
goto out;
lock_flocks();
spin_lock(&inode->i_lock);
}
unlock_flocks();
spin_unlock(&inode->i_lock);
out:
return status;
}
......
......@@ -1373,13 +1373,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
/* Guard against delegation returns and new lock/unlock calls */
down_write(&nfsi->rwsem);
/* Protect inode->i_flock using the BKL */
lock_flocks();
spin_lock(&inode->i_lock);
for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK)))
continue;
if (nfs_file_open_context(fl->fl_file)->state != state)
continue;
unlock_flocks();
spin_unlock(&inode->i_lock);
status = ops->recover_lock(state, fl);
switch (status) {
case 0:
......@@ -1406,9 +1406,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_
/* kill_proc(fl->fl_pid, SIGLOST, 1); */
status = 0;
}
lock_flocks();
spin_lock(&inode->i_lock);
}
unlock_flocks();
spin_unlock(&inode->i_lock);
out:
up_write(&nfsi->rwsem);
return status;
......
......@@ -2645,13 +2645,13 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
/* only place dl_time is set. protected by lock_flocks*/
/* Only place dl_time is set; protected by i_lock: */
dp->dl_time = get_seconds();
nfsd4_cb_recall(dp);
}
/* Called from break_lease() with lock_flocks() held. */
/* Called from break_lease() with i_lock held. */
static void nfsd_break_deleg_cb(struct file_lock *fl)
{
struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
......@@ -4520,7 +4520,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner)
struct inode *inode = filp->fi_inode;
int status = 0;
lock_flocks();
spin_lock(&inode->i_lock);
for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
status = 1;
......@@ -4528,7 +4528,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner)
}
}
out:
unlock_flocks();
spin_unlock(&inode->i_lock);
return status;
}
......
......@@ -1024,8 +1024,6 @@ extern int vfs_setlease(struct file *, long, struct file_lock **);
extern int lease_modify(struct file_lock **, int);
extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
extern void lock_flocks(void);
extern void unlock_flocks(void);
#else /* !CONFIG_FILE_LOCKING */
static inline int fcntl_getlk(struct file *file, struct flock __user *user)
{
......@@ -1166,15 +1164,6 @@ static inline int lock_may_write(struct inode *inode, loff_t start,
{
return 1;
}
static inline void lock_flocks(void)
{
}
static inline void unlock_flocks(void)
{
}
#endif /* !CONFIG_FILE_LOCKING */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment