Commit f82c458a authored by Chris Mason's avatar Chris Mason

btrfs: fix lockups from btrfs_clear_path_blocking

The fair reader/writer locks mean that btrfs_clear_path_blocking needs
to strictly follow lock ordering rules even when we already have
blocking locks on a given path.

Before we can clear a blocking lock on the path, we need to make sure
all of the locks have been converted to blocking.  This will remove lock
inversions against anyone spinning in write_lock() against the buffers
we're trying to get read locks on.  These inversions didn't exist before
the fair read/writer locks, but now we need to be more careful.

We papered over this deadlock in the past by changing
btrfs_try_read_lock() to be a true trylock against both the spinlock and
the blocking lock.  This was slower, and not sufficient to fix all the
deadlocks.  This patch adds a btrfs_tree_read_lock_atomic(), which
basically means get the spinlock but trylock on the blocking lock.
Signed-off-by: default avatarChris Mason <clm@fb.com>
Signed-off-by: default avatarJosef Bacik <jbacik@fb.com>
Reported-by: default avatarPatrick Schmid <schmid@phys.ethz.ch>
cc: stable@vger.kernel.org #v3.15+
parent 6e5aafb2
......@@ -80,13 +80,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
{
int i;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/* lockdep really cares that we take all of these spinlocks
* in the right order. If any of the locks in the path are not
* currently blocking, it is going to complain. So, make really
* really sure by forcing the path to blocking before we clear
* the path blocking.
*/
if (held) {
btrfs_set_lock_blocking_rw(held, held_rw);
if (held_rw == BTRFS_WRITE_LOCK)
......@@ -95,7 +88,6 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
held_rw = BTRFS_READ_LOCK_BLOCKING;
}
btrfs_set_path_blocking(p);
#endif
for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
if (p->nodes[i] && p->locks[i]) {
......@@ -107,10 +99,8 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
}
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
if (held)
btrfs_clear_lock_blocking_rw(held, held_rw);
#endif
}
/* this also releases the path */
......@@ -2893,7 +2883,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
}
p->locks[level] = BTRFS_WRITE_LOCK;
} else {
err = btrfs_try_tree_read_lock(b);
err = btrfs_tree_read_lock_atomic(b);
if (!err) {
btrfs_set_path_blocking(p);
btrfs_tree_read_lock(b);
......@@ -3025,7 +3015,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
}
level = btrfs_header_level(b);
err = btrfs_try_tree_read_lock(b);
err = btrfs_tree_read_lock_atomic(b);
if (!err) {
btrfs_set_path_blocking(p);
btrfs_tree_read_lock(b);
......
......@@ -127,6 +127,26 @@ void btrfs_tree_read_lock(struct extent_buffer *eb)
atomic_inc(&eb->spinning_readers);
}
/*
* take a spinning read lock.
* returns 1 if we get the read lock and 0 if we don't
* this won't wait for blocking writers
*/
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb)
{
if (atomic_read(&eb->blocking_writers))
return 0;
read_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers)) {
read_unlock(&eb->lock);
return 0;
}
atomic_inc(&eb->read_locks);
atomic_inc(&eb->spinning_readers);
return 1;
}
/*
* returns 1 if we get the read lock and 0 if we don't
* this won't wait for blocking writers
......@@ -158,9 +178,7 @@ int btrfs_try_tree_write_lock(struct extent_buffer *eb)
atomic_read(&eb->blocking_readers))
return 0;
if (!write_trylock(&eb->lock))
return 0;
write_lock(&eb->lock);
if (atomic_read(&eb->blocking_writers) ||
atomic_read(&eb->blocking_readers)) {
write_unlock(&eb->lock);
......
......@@ -35,6 +35,8 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw);
void btrfs_assert_tree_locked(struct extent_buffer *eb);
int btrfs_try_tree_read_lock(struct extent_buffer *eb);
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
int btrfs_tree_read_lock_atomic(struct extent_buffer *eb);
static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment