Commit 9ff32a07 authored by Alexander Viro's avatar Alexander Viro Committed by Linus Torvalds

[PATCH] BKL-free ext2_get_block()

	Linus, I've got the first of BKL-removal ext2 patches ready to
go.  It removes BKL from ext2_get_block() and guts of ext2_truncate().
The only place where we hold BKL on these paths is in dquot.c - probably
can be easily dealt with, but threading quota is a separate story.

	Inode metadata (pointers to blocks, both in inode itself and in
indirect blocks, preallocation data and allocation goal) are protected
by rwlock - EXT2_I(inode)->i_meta_lock.

	Next steps will involve threading the group descriptors and bitmaps
handling - lock_super() uses in ext2 are going to die.  However, that's
a separate story - let's do that step-by-step.

	I suspect that patch below will take care of almost all BKL contention
from ext2 - we still have BKL held over directory operations, but for regular
files that's it.
parent d1adbf38
...@@ -1039,6 +1039,7 @@ int dquot_alloc_block(struct inode *inode, unsigned long number, char warn) ...@@ -1039,6 +1039,7 @@ int dquot_alloc_block(struct inode *inode, unsigned long number, char warn)
struct dquot *dquot[MAXQUOTAS]; struct dquot *dquot[MAXQUOTAS];
char warntype[MAXQUOTAS]; char warntype[MAXQUOTAS];
lock_kernel();
for (cnt = 0; cnt < MAXQUOTAS; cnt++) { for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
dquot[cnt] = NODQUOT; dquot[cnt] = NODQUOT;
warntype[cnt] = NOWARN; warntype[cnt] = NOWARN;
...@@ -1064,6 +1065,7 @@ int dquot_alloc_block(struct inode *inode, unsigned long number, char warn) ...@@ -1064,6 +1065,7 @@ int dquot_alloc_block(struct inode *inode, unsigned long number, char warn)
for (cnt = 0; cnt < MAXQUOTAS; cnt++) for (cnt = 0; cnt < MAXQUOTAS; cnt++)
if (dquot[cnt] != NODQUOT) if (dquot[cnt] != NODQUOT)
dqput(dquot[cnt]); dqput(dquot[cnt]);
unlock_kernel();
return ret; return ret;
} }
...@@ -1113,6 +1115,7 @@ void dquot_free_block(struct inode *inode, unsigned long number) ...@@ -1113,6 +1115,7 @@ void dquot_free_block(struct inode *inode, unsigned long number)
struct dquot *dquot; struct dquot *dquot;
/* NOBLOCK Start */ /* NOBLOCK Start */
lock_kernel();
for (cnt = 0; cnt < MAXQUOTAS; cnt++) { for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
dquot = dqduplicate(inode->i_dquot[cnt]); dquot = dqduplicate(inode->i_dquot[cnt]);
if (dquot == NODQUOT) if (dquot == NODQUOT)
...@@ -1121,6 +1124,7 @@ void dquot_free_block(struct inode *inode, unsigned long number) ...@@ -1121,6 +1124,7 @@ void dquot_free_block(struct inode *inode, unsigned long number)
dqput(dquot); dqput(dquot);
} }
inode->i_blocks -= number << (BLOCK_SIZE_BITS - 9); inode->i_blocks -= number << (BLOCK_SIZE_BITS - 9);
unlock_kernel();
/* NOBLOCK End */ /* NOBLOCK End */
} }
......
...@@ -504,27 +504,21 @@ int ext2_new_block (struct inode * inode, unsigned long goal, ...@@ -504,27 +504,21 @@ int ext2_new_block (struct inode * inode, unsigned long goal,
/* /*
* Do block preallocation now if required. * Do block preallocation now if required.
*/ */
/* Writer: ->i_prealloc* */ write_lock(&EXT2_I(inode)->i_meta_lock);
if (group_alloc && !*prealloc_count) { if (group_alloc && !*prealloc_count) {
unsigned long next_block = block + 1; unsigned n;
*prealloc_block = next_block; for (n = 0; n < group_alloc && ++j < group_size; n++) {
/* Writer: end */ if (ext2_set_bit (j, bh->b_data))
while (group_alloc && ++j < group_size) {
/* Writer: ->i_prealloc* */
if (*prealloc_block + *prealloc_count != next_block ||
ext2_set_bit (j, bh->b_data)) {
/* Writer: end */
break; break;
} }
(*prealloc_count)++; *prealloc_block = block + 1;
/* Writer: end */ *prealloc_count = n;
next_block++; es_alloc -= n;
es_alloc--; dq_alloc -= n;
dq_alloc--; group_alloc -= n;
group_alloc--;
}
} }
write_unlock(&EXT2_I(inode->i_meta_lock);
mark_buffer_dirty(bh); mark_buffer_dirty(bh);
if (sb->s_flags & MS_SYNCHRONOUS) { if (sb->s_flags & MS_SYNCHRONOUS) {
......
...@@ -20,6 +20,7 @@ struct ext2_inode_info { ...@@ -20,6 +20,7 @@ struct ext2_inode_info {
__u32 i_prealloc_block; __u32 i_prealloc_block;
__u32 i_prealloc_count; __u32 i_prealloc_count;
__u32 i_dir_start_lookup; __u32 i_dir_start_lookup;
rwlock_t i_meta_lock;
struct inode vfs_inode; struct inode vfs_inode;
}; };
......
...@@ -34,7 +34,6 @@ MODULE_AUTHOR("Remy Card and others"); ...@@ -34,7 +34,6 @@ MODULE_AUTHOR("Remy Card and others");
MODULE_DESCRIPTION("Second Extended Filesystem"); MODULE_DESCRIPTION("Second Extended Filesystem");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
static int ext2_update_inode(struct inode * inode, int do_sync); static int ext2_update_inode(struct inode * inode, int do_sync);
/* /*
...@@ -50,8 +49,6 @@ void ext2_put_inode (struct inode * inode) ...@@ -50,8 +49,6 @@ void ext2_put_inode (struct inode * inode)
*/ */
void ext2_delete_inode (struct inode * inode) void ext2_delete_inode (struct inode * inode)
{ {
lock_kernel();
if (is_bad_inode(inode) || if (is_bad_inode(inode) ||
inode->i_ino == EXT2_ACL_IDX_INO || inode->i_ino == EXT2_ACL_IDX_INO ||
inode->i_ino == EXT2_ACL_DATA_INO) inode->i_ino == EXT2_ACL_DATA_INO)
...@@ -59,15 +56,16 @@ void ext2_delete_inode (struct inode * inode) ...@@ -59,15 +56,16 @@ void ext2_delete_inode (struct inode * inode)
EXT2_I(inode)->i_dtime = CURRENT_TIME; EXT2_I(inode)->i_dtime = CURRENT_TIME;
mark_inode_dirty(inode); mark_inode_dirty(inode);
ext2_update_inode(inode, IS_SYNC(inode)); ext2_update_inode(inode, IS_SYNC(inode));
lock_kernel();
inode->i_size = 0; inode->i_size = 0;
if (inode->i_blocks) if (inode->i_blocks)
ext2_truncate (inode); ext2_truncate (inode);
ext2_free_inode (inode); ext2_free_inode (inode);
unlock_kernel(); unlock_kernel();
return; return;
no_delete: no_delete:
unlock_kernel();
clear_inode(inode); /* We must guarantee clearing of inode... */ clear_inode(inode); /* We must guarantee clearing of inode... */
} }
...@@ -75,17 +73,17 @@ void ext2_discard_prealloc (struct inode * inode) ...@@ -75,17 +73,17 @@ void ext2_discard_prealloc (struct inode * inode)
{ {
#ifdef EXT2_PREALLOCATE #ifdef EXT2_PREALLOCATE
struct ext2_inode_info *ei = EXT2_I(inode); struct ext2_inode_info *ei = EXT2_I(inode);
lock_kernel(); write_lock(&ei->i_meta_lock);
/* Writer: ->i_prealloc* */
if (ei->i_prealloc_count) { if (ei->i_prealloc_count) {
unsigned short total = ei->i_prealloc_count; unsigned short total = ei->i_prealloc_count;
unsigned long block = ei->i_prealloc_block; unsigned long block = ei->i_prealloc_block;
ei->i_prealloc_count = 0; ei->i_prealloc_count = 0;
ei->i_prealloc_block = 0; ei->i_prealloc_block = 0;
/* Writer: end */ write_unlock(&ei->i_meta_lock);
ext2_free_blocks (inode, block, total); ext2_free_blocks (inode, block, total);
} return;
unlock_kernel(); } else
write_unlock(&ei->i_meta_lock);
#endif #endif
} }
...@@ -99,17 +97,17 @@ static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err) ...@@ -99,17 +97,17 @@ static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err)
#ifdef EXT2_PREALLOCATE #ifdef EXT2_PREALLOCATE
struct ext2_inode_info *ei = EXT2_I(inode); struct ext2_inode_info *ei = EXT2_I(inode);
/* Writer: ->i_prealloc* */ write_lock(&ei->i_meta_lock);
if (ei->i_prealloc_count && if (ei->i_prealloc_count &&
(goal == ei->i_prealloc_block || (goal == ei->i_prealloc_block || goal + 1 == ei->i_prealloc_block))
goal + 1 == ei->i_prealloc_block))
{ {
result = ei->i_prealloc_block++; result = ei->i_prealloc_block++;
ei->i_prealloc_count--; ei->i_prealloc_count--;
/* Writer: end */ write_unlock(&ei->i_meta_lock);
ext2_debug ("preallocation hit (%lu/%lu).\n", ext2_debug ("preallocation hit (%lu/%lu).\n",
++alloc_hits, ++alloc_attempts); ++alloc_hits, ++alloc_attempts);
} else { } else {
write_unlock(&ei->i_meta_lock);
ext2_discard_prealloc (inode); ext2_discard_prealloc (inode);
ext2_debug ("preallocation miss (%lu/%lu).\n", ext2_debug ("preallocation miss (%lu/%lu).\n",
alloc_hits, ++alloc_attempts); alloc_hits, ++alloc_attempts);
...@@ -253,17 +251,18 @@ static Indirect *ext2_get_branch(struct inode *inode, ...@@ -253,17 +251,18 @@ static Indirect *ext2_get_branch(struct inode *inode,
bh = sb_bread(sb, le32_to_cpu(p->key)); bh = sb_bread(sb, le32_to_cpu(p->key));
if (!bh) if (!bh)
goto failure; goto failure;
/* Reader: pointers */ read_lock(&EXT2_I(inode)->i_meta_lock);
if (!verify_chain(chain, p)) if (!verify_chain(chain, p))
goto changed; goto changed;
add_chain(++p, bh, (u32*)bh->b_data + *++offsets); add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
/* Reader: end */ read_unlock(&EXT2_I(inode)->i_meta_lock);
if (!p->key) if (!p->key)
goto no_block; goto no_block;
} }
return NULL; return NULL;
changed: changed:
read_unlock(&EXT2_I(inode)->i_meta_lock);
*err = -EAGAIN; *err = -EAGAIN;
goto no_block; goto no_block;
failure: failure:
...@@ -329,13 +328,11 @@ static inline int ext2_find_goal(struct inode *inode, ...@@ -329,13 +328,11 @@ static inline int ext2_find_goal(struct inode *inode,
unsigned long *goal) unsigned long *goal)
{ {
struct ext2_inode_info *ei = EXT2_I(inode); struct ext2_inode_info *ei = EXT2_I(inode);
/* Writer: ->i_next_alloc* */ write_lock(&ei->i_meta_lock);
if (block == ei->i_next_alloc_block + 1) { if (block == ei->i_next_alloc_block + 1) {
ei->i_next_alloc_block++; ei->i_next_alloc_block++;
ei->i_next_alloc_goal++; ei->i_next_alloc_goal++;
} }
/* Writer: end */
/* Reader: pointers, ->i_next_alloc* */
if (verify_chain(chain, partial)) { if (verify_chain(chain, partial)) {
/* /*
* try the heuristic for sequential allocation, * try the heuristic for sequential allocation,
...@@ -345,9 +342,10 @@ static inline int ext2_find_goal(struct inode *inode, ...@@ -345,9 +342,10 @@ static inline int ext2_find_goal(struct inode *inode,
*goal = ei->i_next_alloc_goal; *goal = ei->i_next_alloc_goal;
if (!*goal) if (!*goal)
*goal = ext2_find_near(inode, partial); *goal = ext2_find_near(inode, partial);
write_unlock(&ei->i_meta_lock);
return 0; return 0;
} }
/* Reader: end */ write_unlock(&ei->i_meta_lock);
return -EAGAIN; return -EAGAIN;
} }
...@@ -454,9 +452,8 @@ static inline int ext2_splice_branch(struct inode *inode, ...@@ -454,9 +452,8 @@ static inline int ext2_splice_branch(struct inode *inode,
/* Verify that place we are splicing to is still there and vacant */ /* Verify that place we are splicing to is still there and vacant */
/* Writer: pointers, ->i_next_alloc* */ write_lock(&ei->i_meta_lock);
if (!verify_chain(chain, where-1) || *where->p) if (!verify_chain(chain, where-1) || *where->p)
/* Writer: end */
goto changed; goto changed;
/* That's it */ /* That's it */
...@@ -465,7 +462,7 @@ static inline int ext2_splice_branch(struct inode *inode, ...@@ -465,7 +462,7 @@ static inline int ext2_splice_branch(struct inode *inode,
ei->i_next_alloc_block = block; ei->i_next_alloc_block = block;
ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key); ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key);
/* Writer: end */ write_unlock(&ei->i_meta_lock);
/* We are done with atomic stuff, now do the rest of housekeeping */ /* We are done with atomic stuff, now do the rest of housekeeping */
...@@ -487,6 +484,7 @@ static inline int ext2_splice_branch(struct inode *inode, ...@@ -487,6 +484,7 @@ static inline int ext2_splice_branch(struct inode *inode,
return 0; return 0;
changed: changed:
write_unlock(&ei->i_meta_lock);
for (i = 1; i < num; i++) for (i = 1; i < num; i++)
bforget(where[i].bh); bforget(where[i].bh);
for (i = 0; i < num; i++) for (i = 0; i < num; i++)
...@@ -520,7 +518,6 @@ static int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_he ...@@ -520,7 +518,6 @@ static int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_he
if (depth == 0) if (depth == 0)
goto out; goto out;
lock_kernel();
reread: reread:
partial = ext2_get_branch(inode, depth, offsets, chain, &err); partial = ext2_get_branch(inode, depth, offsets, chain, &err);
...@@ -540,7 +537,6 @@ static int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_he ...@@ -540,7 +537,6 @@ static int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_he
brelse(partial->bh); brelse(partial->bh);
partial--; partial--;
} }
unlock_kernel();
out: out:
return err; return err;
} }
...@@ -666,16 +662,17 @@ static Indirect *ext2_find_shared(struct inode *inode, ...@@ -666,16 +662,17 @@ static Indirect *ext2_find_shared(struct inode *inode,
for (k = depth; k > 1 && !offsets[k-1]; k--) for (k = depth; k > 1 && !offsets[k-1]; k--)
; ;
partial = ext2_get_branch(inode, k, offsets, chain, &err); partial = ext2_get_branch(inode, k, offsets, chain, &err);
/* Writer: pointers */
if (!partial) if (!partial)
partial = chain + k-1; partial = chain + k-1;
/* /*
* If the branch acquired continuation since we've looked at it - * If the branch acquired continuation since we've looked at it -
* fine, it should all survive and (new) top doesn't belong to us. * fine, it should all survive and (new) top doesn't belong to us.
*/ */
if (!partial->key && *partial->p) write_lock(&EXT2_I(inode)->i_meta_lock);
/* Writer: end */ if (!partial->key && *partial->p) {
write_unlock(&EXT2_I(inode)->i_meta_lock);
goto no_top; goto no_top;
}
for (p=partial; p>chain && all_zeroes((u32*)p->bh->b_data,p->p); p--) for (p=partial; p>chain && all_zeroes((u32*)p->bh->b_data,p->p); p--)
; ;
/* /*
...@@ -690,7 +687,7 @@ static Indirect *ext2_find_shared(struct inode *inode, ...@@ -690,7 +687,7 @@ static Indirect *ext2_find_shared(struct inode *inode,
*top = *p->p; *top = *p->p;
*p->p = 0; *p->p = 0;
} }
/* Writer: end */ write_unlock(&EXT2_I(inode)->i_meta_lock);
while(partial > p) while(partial > p)
{ {
...@@ -804,6 +801,7 @@ void ext2_truncate (struct inode * inode) ...@@ -804,6 +801,7 @@ void ext2_truncate (struct inode * inode)
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return; return;
unlock_kernel();
ext2_discard_prealloc(inode); ext2_discard_prealloc(inode);
blocksize = inode->i_sb->s_blocksize; blocksize = inode->i_sb->s_blocksize;
...@@ -877,6 +875,7 @@ void ext2_truncate (struct inode * inode) ...@@ -877,6 +875,7 @@ void ext2_truncate (struct inode * inode)
ext2_sync_inode (inode); ext2_sync_inode (inode);
else else
mark_inode_dirty(inode); mark_inode_dirty(inode);
lock_kernel();
} }
void ext2_read_inode (struct inode * inode) void ext2_read_inode (struct inode * inode)
...@@ -1157,9 +1156,7 @@ static int ext2_update_inode(struct inode * inode, int do_sync) ...@@ -1157,9 +1156,7 @@ static int ext2_update_inode(struct inode * inode, int do_sync)
void ext2_write_inode (struct inode * inode, int wait) void ext2_write_inode (struct inode * inode, int wait)
{ {
lock_kernel();
ext2_update_inode (inode, wait); ext2_update_inode (inode, wait);
unlock_kernel();
} }
int ext2_sync_inode (struct inode *inode) int ext2_sync_inode (struct inode *inode)
......
...@@ -168,8 +168,10 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) ...@@ -168,8 +168,10 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) SLAB_CTOR_CONSTRUCTOR) {
rwlock_init(&ei->i_meta_lock);
inode_init_once(&ei->vfs_inode); inode_init_once(&ei->vfs_inode);
}
} }
static int init_inodecache(void) static int init_inodecache(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment