Commit 278f6679 authored by Jeff Mahoney's avatar Jeff Mahoney Committed by Jeff Mahoney

reiserfs: locking, handle nested locks properly

The reiserfs write lock replaced the BKL and uses similar semantics.

Frederic's locking code makes a distinction between when the lock is nested
and when it's being acquired/released, but I don't think that's the right
distinction to make.

The right distinction is between the lock being released at end-of-use and
the lock being released for a schedule. The unlock should return the depth
and the lock should restore it, rather than the other way around as it is now.

This patch implements that and adds a number of places where the lock
should be dropped.
Signed-off-by: default avatarJeff Mahoney <jeffm@suse.com>
parent 4c05141d
......@@ -1340,10 +1340,11 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb,
"reading failed", __func__, block);
else {
if (buffer_locked(bh)) {
int depth;
PROC_INFO_INC(sb, scan_bitmap.wait);
reiserfs_write_unlock(sb);
depth = reiserfs_write_unlock_nested(sb);
__wait_on_buffer(bh);
reiserfs_write_lock(sb);
reiserfs_write_lock_nested(sb, depth);
}
BUG_ON(!buffer_uptodate(bh));
BUG_ON(atomic_read(&bh->b_count) == 0);
......
......@@ -71,6 +71,7 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
char small_buf[32]; /* avoid kmalloc if we can */
struct reiserfs_dir_entry de;
int ret = 0;
int depth;
reiserfs_write_lock(inode->i_sb);
......@@ -181,17 +182,17 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx)
* Since filldir might sleep, we can release
* the write lock here for other waiters
*/
reiserfs_write_unlock(inode->i_sb);
depth = reiserfs_write_unlock_nested(inode->i_sb);
if (!dir_emit
(ctx, local_buf, d_reclen, d_ino,
DT_UNKNOWN)) {
reiserfs_write_lock(inode->i_sb);
reiserfs_write_lock_nested(inode->i_sb, depth);
if (local_buf != small_buf) {
kfree(local_buf);
}
goto end;
}
reiserfs_write_lock(inode->i_sb);
reiserfs_write_lock_nested(inode->i_sb, depth);
if (local_buf != small_buf) {
kfree(local_buf);
}
......
......@@ -1022,9 +1022,9 @@ static int get_far_parent(struct tree_balance *tb,
if (buffer_locked(*pcom_father)) {
/* Release the write lock while the buffer is busy */
reiserfs_write_unlock(tb->tb_sb);
int depth = reiserfs_write_unlock_nested(tb->tb_sb);
__wait_on_buffer(*pcom_father);
reiserfs_write_lock(tb->tb_sb);
reiserfs_write_lock_nested(tb->tb_sb, depth);
if (FILESYSTEM_CHANGED_TB(tb)) {
brelse(*pcom_father);
return REPEAT_SEARCH;
......@@ -1929,9 +1929,9 @@ static int get_direct_parent(struct tree_balance *tb, int h)
return REPEAT_SEARCH;
if (buffer_locked(bh)) {
reiserfs_write_unlock(tb->tb_sb);
int depth = reiserfs_write_unlock_nested(tb->tb_sb);
__wait_on_buffer(bh);
reiserfs_write_lock(tb->tb_sb);
reiserfs_write_lock_nested(tb->tb_sb, depth);
if (FILESYSTEM_CHANGED_TB(tb))
return REPEAT_SEARCH;
}
......@@ -1952,6 +1952,7 @@ static int get_neighbors(struct tree_balance *tb, int h)
unsigned long son_number;
struct super_block *sb = tb->tb_sb;
struct buffer_head *bh;
int depth;
PROC_INFO_INC(sb, get_neighbors[h]);
......@@ -1969,9 +1970,9 @@ static int get_neighbors(struct tree_balance *tb, int h)
tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb->
FL[h]);
son_number = B_N_CHILD_NUM(tb->FL[h], child_position);
reiserfs_write_unlock(sb);
depth = reiserfs_write_unlock_nested(tb->tb_sb);
bh = sb_bread(sb, son_number);
reiserfs_write_lock(sb);
reiserfs_write_lock_nested(tb->tb_sb, depth);
if (!bh)
return IO_ERROR;
if (FILESYSTEM_CHANGED_TB(tb)) {
......@@ -2009,9 +2010,9 @@ static int get_neighbors(struct tree_balance *tb, int h)
child_position =
(bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0;
son_number = B_N_CHILD_NUM(tb->FR[h], child_position);
reiserfs_write_unlock(sb);
depth = reiserfs_write_unlock_nested(tb->tb_sb);
bh = sb_bread(sb, son_number);
reiserfs_write_lock(sb);
reiserfs_write_lock_nested(tb->tb_sb, depth);
if (!bh)
return IO_ERROR;
if (FILESYSTEM_CHANGED_TB(tb)) {
......@@ -2272,6 +2273,7 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
}
if (locked) {
int depth;
#ifdef CONFIG_REISERFS_CHECK
repeat_counter++;
if ((repeat_counter % 10000) == 0) {
......@@ -2286,9 +2288,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb)
REPEAT_SEARCH : CARRY_ON;
}
#endif
reiserfs_write_unlock(tb->tb_sb);
depth = reiserfs_write_unlock_nested(tb->tb_sb);
__wait_on_buffer(locked);
reiserfs_write_lock(tb->tb_sb);
reiserfs_write_lock_nested(tb->tb_sb, depth);
if (FILESYSTEM_CHANGED_TB(tb))
return REPEAT_SEARCH;
}
......@@ -2359,9 +2361,9 @@ int fix_nodes(int op_mode, struct tree_balance *tb,
/* if it possible in indirect_to_direct conversion */
if (buffer_locked(tbS0)) {
reiserfs_write_unlock(tb->tb_sb);
int depth = reiserfs_write_unlock_nested(tb->tb_sb);
__wait_on_buffer(tbS0);
reiserfs_write_lock(tb->tb_sb);
reiserfs_write_lock_nested(tb->tb_sb, depth);
if (FILESYSTEM_CHANGED_TB(tb))
return REPEAT_SEARCH;
}
......
This diff is collapsed.
......@@ -167,7 +167,6 @@ int reiserfs_commit_write(struct file *f, struct page *page,
int reiserfs_unpack(struct inode *inode, struct file *filp)
{
int retval = 0;
int depth;
int index;
struct page *page;
struct address_space *mapping;
......@@ -183,11 +182,11 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
return 0;
}
depth = reiserfs_write_lock_once(inode->i_sb);
/* we need to make sure nobody is changing the file size beneath us */
reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
reiserfs_write_lock(inode->i_sb);
write_from = inode->i_size & (blocksize - 1);
/* if we are on a block boundary, we are already unpacked. */
if (write_from == 0) {
......@@ -221,6 +220,6 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
out:
mutex_unlock(&inode->i_mutex);
reiserfs_write_unlock_once(inode->i_sb, depth);
reiserfs_write_unlock(inode->i_sb);
return retval;
}
This diff is collapsed.
......@@ -48,30 +48,35 @@ void reiserfs_write_unlock(struct super_block *s)
}
}
/*
* If we already own the lock, just exit and don't increase the depth.
* Useful when we don't want to lock more than once.
*
* We always return the lock_depth we had before calling
* this function.
*/
int reiserfs_write_lock_once(struct super_block *s)
int __must_check reiserfs_write_unlock_nested(struct super_block *s)
{
struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
int depth;
if (sb_i->lock_owner != current) {
mutex_lock(&sb_i->lock);
sb_i->lock_owner = current;
return sb_i->lock_depth++;
}
/* this can happen when the lock isn't always held */
if (sb_i->lock_owner != current)
return -1;
depth = sb_i->lock_depth;
sb_i->lock_depth = -1;
sb_i->lock_owner = NULL;
mutex_unlock(&sb_i->lock);
return sb_i->lock_depth;
return depth;
}
void reiserfs_write_unlock_once(struct super_block *s, int lock_depth)
void reiserfs_write_lock_nested(struct super_block *s, int depth)
{
if (lock_depth == -1)
reiserfs_write_unlock(s);
struct reiserfs_sb_info *sb_i = REISERFS_SB(s);
/* this can happen when the lock isn't always held */
if (depth == -1)
return;
mutex_lock(&sb_i->lock);
sb_i->lock_owner = current;
sb_i->lock_depth = depth;
}
/*
......@@ -82,9 +87,7 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
{
struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
if (sb_i->lock_depth < 0)
reiserfs_panic(sb, "%s called without kernel lock held %d",
caller);
WARN_ON(sb_i->lock_depth < 0);
}
#ifdef CONFIG_REISERFS_CHECK
......
......@@ -325,7 +325,6 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
int retval;
int lock_depth;
struct inode *inode = NULL;
struct reiserfs_dir_entry de;
INITIALIZE_PATH(path_to_entry);
......@@ -333,12 +332,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len)
return ERR_PTR(-ENAMETOOLONG);
/*
* Might be called with or without the write lock, must be careful
* to not recursively hold it in case we want to release the lock
* before rescheduling.
*/
lock_depth = reiserfs_write_lock_once(dir->i_sb);
reiserfs_write_lock(dir->i_sb);
de.de_gen_number_bit_string = NULL;
retval =
......@@ -349,7 +343,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
inode = reiserfs_iget(dir->i_sb,
(struct cpu_key *)&(de.de_dir_id));
if (!inode || IS_ERR(inode)) {
reiserfs_write_unlock_once(dir->i_sb, lock_depth);
reiserfs_write_unlock(dir->i_sb);
return ERR_PTR(-EACCES);
}
......@@ -358,7 +352,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry,
if (IS_PRIVATE(dir))
inode->i_flags |= S_PRIVATE;
}
reiserfs_write_unlock_once(dir->i_sb, lock_depth);
reiserfs_write_unlock(dir->i_sb);
if (retval == IO_ERROR) {
return ERR_PTR(-EIO);
}
......@@ -727,7 +721,6 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
struct inode *inode;
struct reiserfs_transaction_handle th;
struct reiserfs_security_handle security;
int lock_depth;
/* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */
int jbegin_count =
JOURNAL_PER_BALANCE_CNT * 3 +
......@@ -753,7 +746,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
return retval;
}
jbegin_count += retval;
lock_depth = reiserfs_write_lock_once(dir->i_sb);
reiserfs_write_lock(dir->i_sb);
retval = journal_begin(&th, dir->i_sb, jbegin_count);
if (retval) {
......@@ -804,7 +797,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
d_instantiate(dentry, inode);
retval = journal_end(&th, dir->i_sb, jbegin_count);
out_failed:
reiserfs_write_unlock_once(dir->i_sb, lock_depth);
reiserfs_write_unlock(dir->i_sb);
return retval;
}
......@@ -920,7 +913,6 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
struct reiserfs_transaction_handle th;
int jbegin_count;
unsigned long savelink;
int depth;
dquot_initialize(dir);
......@@ -934,7 +926,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
JOURNAL_PER_BALANCE_CNT * 2 + 2 +
4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
depth = reiserfs_write_lock_once(dir->i_sb);
reiserfs_write_lock(dir->i_sb);
retval = journal_begin(&th, dir->i_sb, jbegin_count);
if (retval)
goto out_unlink;
......@@ -995,7 +987,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
retval = journal_end(&th, dir->i_sb, jbegin_count);
reiserfs_check_path(&path);
reiserfs_write_unlock_once(dir->i_sb, depth);
reiserfs_write_unlock(dir->i_sb);
return retval;
end_unlink:
......@@ -1005,7 +997,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
if (err)
retval = err;
out_unlink:
reiserfs_write_unlock_once(dir->i_sb, depth);
reiserfs_write_unlock(dir->i_sb);
return retval;
}
......
......@@ -358,12 +358,13 @@ void __reiserfs_panic(struct super_block *sb, const char *id,
dump_stack();
#endif
if (sb)
panic(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n",
printk(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n",
sb->s_id, id ? id : "", id ? " " : "",
function, error_buf);
else
panic(KERN_WARNING "REISERFS panic: %s%s%s: %s\n",
printk(KERN_WARNING "REISERFS panic: %s%s%s: %s\n",
id ? id : "", id ? " " : "", function, error_buf);
BUG();
}
void __reiserfs_error(struct super_block *sb, const char *id,
......
......@@ -630,8 +630,8 @@ static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal
*/
void reiserfs_write_lock(struct super_block *s);
void reiserfs_write_unlock(struct super_block *s);
int reiserfs_write_lock_once(struct super_block *s);
void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
int __must_check reiserfs_write_unlock_nested(struct super_block *s);
void reiserfs_write_lock_nested(struct super_block *s, int depth);
#ifdef CONFIG_REISERFS_CHECK
void reiserfs_lock_check_recursive(struct super_block *s);
......@@ -667,31 +667,33 @@ static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
* - The inode mutex
*/
static inline void reiserfs_mutex_lock_safe(struct mutex *m,
struct super_block *s)
struct super_block *s)
{
reiserfs_lock_check_recursive(s);
reiserfs_write_unlock(s);
int depth;
depth = reiserfs_write_unlock_nested(s);
mutex_lock(m);
reiserfs_write_lock(s);
reiserfs_write_lock_nested(s, depth);
}
static inline void
reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
struct super_block *s)
struct super_block *s)
{
reiserfs_lock_check_recursive(s);
reiserfs_write_unlock(s);
int depth;
depth = reiserfs_write_unlock_nested(s);
mutex_lock_nested(m, subclass);
reiserfs_write_lock(s);
reiserfs_write_lock_nested(s, depth);
}
static inline void
reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
{
reiserfs_lock_check_recursive(s);
reiserfs_write_unlock(s);
down_read(sem);
reiserfs_write_lock(s);
int depth;
depth = reiserfs_write_unlock_nested(s);
down_read(sem);
reiserfs_write_lock_nested(s, depth);
}
/*
......@@ -701,9 +703,11 @@ reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
static inline void reiserfs_cond_resched(struct super_block *s)
{
if (need_resched()) {
reiserfs_write_unlock(s);
int depth;
depth = reiserfs_write_unlock_nested(s);
schedule();
reiserfs_write_lock(s);
reiserfs_write_lock_nested(s, depth);
}
}
......
......@@ -34,6 +34,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
unsigned long int block_count, free_blocks;
int i;
int copy_size;
int depth;
sb = SB_DISK_SUPER_BLOCK(s);
......@@ -43,7 +44,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
}
/* check the device size */
depth = reiserfs_write_unlock_nested(s);
bh = sb_bread(s, block_count_new - 1);
reiserfs_write_lock_nested(s, depth);
if (!bh) {
printk("reiserfs_resize: can\'t read last block\n");
return -EINVAL;
......@@ -125,9 +128,12 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
* transaction begins, and the new bitmaps don't matter if the
* transaction fails. */
for (i = bmap_nr; i < bmap_nr_new; i++) {
int depth;
/* don't use read_bitmap_block since it will cache
* the uninitialized bitmap */
depth = reiserfs_write_unlock_nested(s);
bh = sb_bread(s, i * s->s_blocksize * 8);
reiserfs_write_lock_nested(s, depth);
if (!bh) {
vfree(bitmap);
return -EIO;
......@@ -138,9 +144,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new)
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
reiserfs_write_unlock(s);
depth = reiserfs_write_unlock_nested(s);
sync_dirty_buffer(bh);
reiserfs_write_lock(s);
reiserfs_write_lock_nested(s, depth);
// update bitmap_info stuff
bitmap[i].free_count = sb_blocksize(sb) * 8 - 1;
brelse(bh);
......
......@@ -524,14 +524,14 @@ static int is_tree_node(struct buffer_head *bh, int level)
* the caller (search_by_key) will perform other schedule-unsafe
* operations just after calling this function.
*
* @return true if we have unlocked
* @return depth of lock to be restored after read completes
*/
static bool search_by_key_reada(struct super_block *s,
static int search_by_key_reada(struct super_block *s,
struct buffer_head **bh,
b_blocknr_t *b, int num)
{
int i, j;
bool unlocked = false;
int depth = -1;
for (i = 0; i < num; i++) {
bh[i] = sb_getblk(s, b[i]);
......@@ -549,15 +549,13 @@ static bool search_by_key_reada(struct super_block *s,
* you have to make sure the prepared bit isn't set on this buffer
*/
if (!buffer_uptodate(bh[j])) {
if (!unlocked) {
reiserfs_write_unlock(s);
unlocked = true;
}
if (depth == -1)
depth = reiserfs_write_unlock_nested(s);
ll_rw_block(READA, 1, bh + j);
}
brelse(bh[j]);
}
return unlocked;
return depth;
}
/**************************************************************************
......@@ -645,26 +643,26 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
have a pointer to it. */
if ((bh = last_element->pe_buffer =
sb_getblk(sb, block_number))) {
bool unlocked = false;
if (!buffer_uptodate(bh) && reada_count > 1)
/* may unlock the write lock */
unlocked = search_by_key_reada(sb, reada_bh,
reada_blocks, reada_count);
/*
* If we haven't already unlocked the write lock,
* then we need to do that here before reading
* the current block
* We'll need to drop the lock if we encounter any
* buffers that need to be read. If all of them are
* already up to date, we don't need to drop the lock.
*/
if (!buffer_uptodate(bh) && !unlocked) {
reiserfs_write_unlock(sb);
unlocked = true;
}
int depth = -1;
if (!buffer_uptodate(bh) && reada_count > 1)
depth = search_by_key_reada(sb, reada_bh,
reada_blocks, reada_count);
if (!buffer_uptodate(bh) && depth == -1)
depth = reiserfs_write_unlock_nested(sb);
ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh);
if (unlocked)
reiserfs_write_lock(sb);
if (depth != -1)
reiserfs_write_lock_nested(sb, depth);
if (!buffer_uptodate(bh))
goto io_error;
} else {
......@@ -1059,9 +1057,7 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st
reiserfs_free_block(th, inode, block, 1);
}
reiserfs_write_unlock(sb);
cond_resched();
reiserfs_write_lock(sb);
reiserfs_cond_resched(sb);
if (item_moved (&s_ih, path)) {
need_re_search = 1;
......
......@@ -624,7 +624,6 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags)
struct reiserfs_transaction_handle th;
int err = 0;
int lock_depth;
if (inode->i_sb->s_flags & MS_RDONLY) {
reiserfs_warning(inode->i_sb, "clm-6006",
......@@ -632,7 +631,7 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags)
inode->i_ino);
return;
}
lock_depth = reiserfs_write_lock_once(inode->i_sb);
reiserfs_write_lock(inode->i_sb);
/* this is really only used for atime updates, so they don't have
** to be included in O_SYNC or fsync
......@@ -645,7 +644,7 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags)
journal_end(&th, inode->i_sb, 1);
out:
reiserfs_write_unlock_once(inode->i_sb, lock_depth);
reiserfs_write_unlock(inode->i_sb);
}
static int reiserfs_show_options(struct seq_file *seq, struct dentry *root)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment