Commit c12b9866 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ext3: concurrent block/inode allocation

From: Alex Tomas <bzzz@tmi.comex.ru>


This patch weans ext3 off lock_super()-based protection for the inode and
block allocators.

It's basically the same as the ext2 changes.


1) each group has own spinlock, which is used for group counter
   modifications

2) sb->s_free_blocks_count isn't used any more.  ext2_statfs() and
   find_group_orlov() loop over groups to count free blocks

3) sb->s_free_blocks_count is recalculated at mount/umount/sync_super time
   in order to check consistency and to avoid fsck warnings

4) reserved blocks are distributed over last groups

5) ext3_new_block() tries to use non-reserved blocks and if it fails then
   tries to use reserved blocks

6) ext3_new_block() and ext3_free_blocks do not modify sb->s_free_blocks,
   therefore they do not call mark_buffer_dirty() for superblock's
   buffer_head. this should reduce I/O a bit


Also fix orlov allocator boundary case:

In the interests of SMP scalability the ext2 free blocks and free inodes
counters are "approximate".  But there is a piece of code in the Orlov
allocator which fails due to boundary conditions on really small
filesystems.

Fix that up via a final allocation pass which simply uses first-fit for
allocatiopn of a directory inode.
parent 78f2f471
......@@ -118,7 +118,6 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
printk ("ext3_free_blocks: nonexistent device");
return;
}
lock_super (sb);
es = EXT3_SB(sb)->s_es;
if (block < le32_to_cpu(es->s_first_data_block) ||
block + count < block ||
......@@ -184,11 +183,6 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
if (err)
goto error_return;
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
if (err)
goto error_return;
for (i = 0; i < count; i++) {
/*
* An HJ special. This is expensive...
......@@ -207,19 +201,6 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
}
}
#endif
BUFFER_TRACE(bitmap_bh, "clear bit");
if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) {
ext3_error (sb, __FUNCTION__,
"bit already cleared for block %lu",
block + i);
BUFFER_TRACE(bitmap_bh, "bit already cleared");
} else {
dquot_freed_blocks++;
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)+1);
es->s_free_blocks_count =
cpu_to_le32(le32_to_cpu(es->s_free_blocks_count)+1);
}
/* @@@ This prevents newly-allocated data from being
* freed and then reallocated within the same
* transaction.
......@@ -238,12 +219,35 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
* activity on the buffer any more and so it is safe to
* reallocate it.
*/
BUFFER_TRACE(bitmap_bh, "clear in b_committed_data");
BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
J_ASSERT_BH(bitmap_bh,
bh2jh(bitmap_bh)->b_committed_data != NULL);
ext3_set_bit(bit + i, bh2jh(bitmap_bh)->b_committed_data);
ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
bh2jh(bitmap_bh)->b_committed_data);
/*
* We clear the bit in the bitmap after setting the committed
* data bit, because this is the reverse order to that which
* the allocator uses.
*/
BUFFER_TRACE(bitmap_bh, "clear bit");
if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
bit + i, bitmap_bh->b_data)) {
ext3_error (sb, __FUNCTION__,
"bit already cleared for block %lu",
block + i);
BUFFER_TRACE(bitmap_bh, "bit already cleared");
} else {
dquot_freed_blocks++;
}
}
spin_lock(bg_lock(sb, block_group));
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
dquot_freed_blocks);
spin_unlock(bg_lock(sb, block_group));
/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
err = ext3_journal_dirty_metadata(handle, bitmap_bh);
......@@ -253,11 +257,6 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
ret = ext3_journal_dirty_metadata(handle, gd_bh);
if (!err) err = ret;
/* And the superblock */
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "dirtied superblock");
ret = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
if (!err) err = ret;
if (overflow && !err) {
block += count;
count = overflow;
......@@ -267,7 +266,6 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
error_return:
brelse(bitmap_bh);
ext3_std_error(sb, err);
unlock_super(sb);
if (dquot_freed_blocks)
DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
return;
......@@ -367,6 +365,98 @@ static int find_next_usable_block(int start,
return -1;
}
/*
* We think we can allocate this block in this bitmap. Try to set the bit.
* If that succeeds then check that nobody has allocated and then freed the
* block since we saw that is was not marked in b_committed_data. If it _was_
* allocated and freed then clear the bit in the bitmap again and return
* zero (failure).
*/
static inline int
claim_block(spinlock_t *lock, int block, struct buffer_head *bh)
{
if (ext3_set_bit_atomic(lock, block, bh->b_data))
return 0;
if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data &&
ext3_test_bit(block, bh2jh(bh)->b_committed_data)) {
ext3_clear_bit_atomic(lock, block, bh->b_data);
return 0;
}
return 1;
}
/*
* If we failed to allocate the desired block then we may end up crossing to a
* new bitmap. In that case we must release write access to the old one via
* ext3_journal_release_buffer(), else we'll run out of credits.
*/
static int
ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
struct buffer_head *bitmap_bh, int goal, int *errp)
{
int i, fatal = 0;
int have_access = 0;
*errp = 0;
if (goal >= 0 && ext3_test_allocatable(goal, bitmap_bh))
goto got;
repeat:
goal = find_next_usable_block(goal, bitmap_bh,
EXT3_BLOCKS_PER_GROUP(sb));
if (goal < 0)
goto fail;
for (i = 0;
i < 7 && goal > 0 && ext3_test_allocatable(goal - 1, bitmap_bh);
i++, goal--);
got:
if (!have_access) {
/*
* Make sure we use undo access for the bitmap, because it is
* critical that we do the frozen_data COW on bitmap buffers in
* all cases even if the buffer is in BJ_Forget state in the
* committing transaction.
*/
BUFFER_TRACE(bitmap_bh, "get undo access for new block");
fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
if (fatal) {
*errp = fatal;
goto fail;
}
have_access = 1;
}
if (!claim_block(bg_lock(sb, group), goal, bitmap_bh)) {
/*
* The block was allocated by another thread, or it was
* allocated and then freed by another thread
*/
goal++;
if (goal >= EXT3_BLOCKS_PER_GROUP(sb))
goto fail;
goto repeat;
}
BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for bitmap block");
fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
if (fatal) {
*errp = fatal;
goto fail;
}
return goal;
fail:
if (have_access) {
BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
ext3_journal_release_buffer(handle, bitmap_bh);
}
return -1;
}
/*
* ext3_new_block uses a goal block to assist allocation. If the goal is
* free, or there is a free block within 32 blocks of the goal, that block
......@@ -383,10 +473,12 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
struct buffer_head *gdp_bh; /* bh2 */
int group_no; /* i */
int ret_block; /* j */
int bit; /* k */
int bgi; /* blockgroup iteration index */
int target_block; /* tmp */
int fatal = 0, err;
int performed_allocation = 0;
int free;
int use_reserve = 0;
struct super_block *sb;
struct ext3_group_desc *gdp;
struct ext3_super_block *es;
......@@ -408,16 +500,7 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
return 0;
}
lock_super(sb);
es = EXT3_SB(sb)->s_es;
if (le32_to_cpu(es->s_free_blocks_count) <=
le32_to_cpu(es->s_r_blocks_count) &&
((EXT3_SB(sb)->s_resuid != current->fsuid) &&
(EXT3_SB(sb)->s_resgid == 0 ||
!in_group_p(EXT3_SB(sb)->s_resgid)) &&
!capable(CAP_SYS_RESOURCE)))
goto out;
ext3_debug("goal=%lu.\n", goal);
/*
......@@ -432,40 +515,28 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
if (!gdp)
goto io_error;
if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
free = le16_to_cpu(gdp->bg_free_blocks_count);
free -= EXT3_SB(sb)->s_bgi[group_no].bg_reserved;
if (free > 0) {
ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
EXT3_BLOCKS_PER_GROUP(sb));
#ifdef EXT3FS_DEBUG
if (ret_block)
goal_attempts++;
#endif
bitmap_bh = read_block_bitmap(sb, group_no);
if (!bitmap_bh)
goto io_error;
ext3_debug("goal is at %d:%d.\n", group_no, ret_block);
if (ext3_test_allocatable(ret_block, bitmap_bh)) {
#ifdef EXT3FS_DEBUG
goal_hits++;
ext3_debug("goal bit allocated.\n");
#endif
goto got_block;
}
ret_block = find_next_usable_block(ret_block, bitmap_bh,
EXT3_BLOCKS_PER_GROUP(sb));
ret_block = ext3_try_to_allocate(sb, handle, group_no,
bitmap_bh, ret_block, &fatal);
if (fatal)
goto out;
if (ret_block >= 0)
goto search_back;
goto allocated;
}
ext3_debug("Bit not found in block group %d.\n", group_no);
/*
* Now search the rest of the groups. We assume that
* i and gdp correctly point to the last group visited.
*/
for (bit = 0; bit < EXT3_SB(sb)->s_groups_count; bit++) {
repeat:
for (bgi = 0; bgi < EXT3_SB(sb)->s_groups_count; bgi++) {
group_no++;
if (group_no >= EXT3_SB(sb)->s_groups_count)
group_no = 0;
......@@ -474,57 +545,47 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
*errp = -EIO;
goto out;
}
if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
free = le16_to_cpu(gdp->bg_free_blocks_count);
if (!use_reserve)
free -= EXT3_SB(sb)->s_bgi[group_no].bg_reserved;
if (free <= 0)
continue;
brelse(bitmap_bh);
bitmap_bh = read_block_bitmap(sb, group_no);
if (!bitmap_bh)
goto io_error;
ret_block = find_next_usable_block(-1, bitmap_bh,
EXT3_BLOCKS_PER_GROUP(sb));
ret_block = ext3_try_to_allocate(sb, handle, group_no,
bitmap_bh, -1, &fatal);
if (fatal)
goto out;
if (ret_block >= 0)
goto search_back;
goto allocated;
}
if (!use_reserve &&
(EXT3_SB(sb)->s_resuid == current->fsuid ||
(EXT3_SB(sb)->s_resgid != 0 && in_group_p(EXT3_SB(sb)->s_resgid)) ||
capable(CAP_SYS_RESOURCE))) {
use_reserve = 1;
group_no = 0;
goto repeat;
}
/* No space left on the device */
*errp = -ENOSPC;
goto out;
search_back:
/*
* We have succeeded in finding a free byte in the block
* bitmap. Now search backwards up to 7 bits to find the
* start of this group of free blocks.
*/
for ( bit = 0;
bit < 7 && ret_block > 0 &&
ext3_test_allocatable(ret_block - 1, bitmap_bh);
bit++, ret_block--)
;
got_block:
allocated:
ext3_debug("using block group %d(%d)\n",
group_no, gdp->bg_free_blocks_count);
/* Make sure we use undo access for the bitmap, because it is
critical that we do the frozen_data COW on bitmap buffers in
all cases even if the buffer is in BJ_Forget state in the
committing transaction. */
BUFFER_TRACE(bitmap_bh, "get undo access for marking new block");
fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
if (fatal)
goto out;
BUFFER_TRACE(gdp_bh, "get_write_access");
fatal = ext3_journal_get_write_access(handle, gdp_bh);
if (fatal)
goto out;
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
if (fatal)
goto out;
target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb)
+ le32_to_cpu(es->s_first_data_block);
......@@ -536,11 +597,6 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
"Allocating block in system zone - "
"block = %u", target_block);
/* The superblock lock should guard against anybody else beating
* us to this point! */
J_ASSERT_BH(bitmap_bh, !ext3_test_bit(ret_block, bitmap_bh->b_data));
BUFFER_TRACE(bitmap_bh, "setting bitmap bit");
ext3_set_bit(ret_block, bitmap_bh->b_data);
performed_allocation = 1;
#ifdef CONFIG_JBD_DEBUG
......@@ -556,20 +612,17 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
}
}
#endif
spin_lock(bg_lock(sb, group_no));
if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data)
J_ASSERT_BH(bitmap_bh,
!ext3_test_bit(ret_block,
bh2jh(bitmap_bh)->b_committed_data));
ext3_debug("found bit %d\n", ret_block);
spin_unlock(bg_lock(sb, group_no));
/* ret_block was blockgroup-relative. Now it becomes fs-relative */
ret_block = target_block;
BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for bitmap block");
err = ext3_journal_dirty_metadata(handle, bitmap_bh);
if (!fatal)
fatal = err;
if (ret_block >= le32_to_cpu(es->s_blocks_count)) {
ext3_error(sb, "ext3_new_block",
"block(%d) >= blocks count(%d) - "
......@@ -586,27 +639,20 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
ext3_debug("allocating block %d. Goal hits %d of %d.\n",
ret_block, goal_hits, goal_attempts);
spin_lock(bg_lock(sb, group_no));
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
es->s_free_blocks_count =
cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - 1);
spin_unlock(bg_lock(sb, group_no));
BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
err = ext3_journal_dirty_metadata(handle, gdp_bh);
if (!fatal)
fatal = err;
BUFFER_TRACE(EXT3_SB(sb)->s_sbh,
"journal_dirty_metadata for superblock");
err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
if (!fatal)
fatal = err;
sb->s_dirt = 1;
if (fatal)
goto out;
unlock_super(sb);
*errp = 0;
brelse(bitmap_bh);
return ret_block;
......@@ -618,7 +664,6 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
*errp = fatal;
ext3_std_error(sb, fatal);
}
unlock_super(sb);
/*
* Undo the block allocation
*/
......@@ -631,12 +676,13 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
unsigned long ext3_count_free_blocks(struct super_block *sb)
{
unsigned long desc_count;
struct ext3_group_desc *gdp;
int i;
#ifdef EXT3FS_DEBUG
struct ext3_super_block *es;
unsigned long desc_count, bitmap_count, x;
unsigned long bitmap_count, x;
struct buffer_head *bitmap_bh = NULL;
struct ext3_group_desc *gdp;
int i;
lock_super(sb);
es = EXT3_SB(sb)->s_es;
......@@ -664,7 +710,15 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
unlock_super(sb);
return bitmap_count;
#else
return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count);
desc_count = 0;
for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
gdp = ext3_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
}
return desc_count;
#endif
}
......
......@@ -131,7 +131,6 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
/* Do this BEFORE marking the inode not in use or returning an error */
clear_inode (inode);
lock_super (sb);
es = EXT3_SB(sb)->s_es;
if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
ext3_error (sb, "ext3_free_inode",
......@@ -150,7 +149,8 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
goto error_return;
/* Ok, now we can actually update the inode bitmaps.. */
if (!ext3_clear_bit(bit, bitmap_bh->b_data))
if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
bit, bitmap_bh->b_data))
ext3_error (sb, "ext3_free_inode",
"bit already cleared for inode %lu", ino);
else {
......@@ -160,28 +160,18 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
fatal = ext3_journal_get_write_access(handle, bh2);
if (fatal) goto error_return;
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get write access");
fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
if (fatal) goto error_return;
if (gdp) {
spin_lock(&EXT3_SB(sb)->s_bgi[block_group].bg_ialloc_lock);
gdp->bg_free_inodes_count = cpu_to_le16(
le16_to_cpu(gdp->bg_free_inodes_count) + 1);
if (is_directory) {
if (is_directory)
gdp->bg_used_dirs_count = cpu_to_le16(
le16_to_cpu(gdp->bg_used_dirs_count) - 1);
EXT3_SB(sb)->s_dir_count--;
}
spin_unlock(&EXT3_SB(sb)->s_bgi[block_group].bg_ialloc_lock);
}
BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bh2);
if (!fatal) fatal = err;
es->s_free_inodes_count =
cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
BUFFER_TRACE(EXT3_SB(sb)->s_sbh,
"call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
if (!fatal) fatal = err;
}
BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bitmap_bh);
......@@ -191,7 +181,6 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
error_return:
brelse(bitmap_bh);
ext3_std_error(sb, fatal);
unlock_super(sb);
}
/*
......@@ -206,9 +195,8 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
*/
static int find_group_dir(struct super_block *sb, struct inode *parent)
{
struct ext3_super_block * es = EXT3_SB(sb)->s_es;
int ngroups = EXT3_SB(sb)->s_groups_count;
int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
int avefreei = ext3_count_free_inodes(sb) / ngroups;
struct ext3_group_desc *desc, *best_desc = NULL;
struct buffer_head *bh;
int group, best_group = -1;
......@@ -264,10 +252,12 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
struct ext3_super_block *es = sbi->s_es;
int ngroups = sbi->s_groups_count;
int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
int avefreeb = le32_to_cpu(es->s_free_blocks_count) / ngroups;
int freei = ext3_count_free_inodes(sb);
int avefreei = freei / ngroups;
int freeb = ext3_count_free_blocks(sb);
int avefreeb = freeb / ngroups;
int blocks_per_dir;
int ndirs = sbi->s_dir_count;
int ndirs = ext3_count_dirs(sb);
int max_debt, max_dirs, min_blocks, min_inodes;
int group = -1, i;
struct ext3_group_desc *desc;
......@@ -319,7 +309,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
desc = ext3_get_group_desc (sb, group, &bh);
if (!desc || !desc->bg_free_inodes_count)
continue;
if (sbi->s_debts[group] >= max_debt)
if (sbi->s_bgi[group].bg_debts >= max_debt)
continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
continue;
......@@ -340,6 +330,15 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
return group;
}
if (avefreei) {
/*
* The free-inodes counter is approximate, and for really small
* filesystems the above test can fail to find any blockgroups
*/
avefreei = 0;
goto fallback;
}
return -1;
}
......@@ -435,7 +434,6 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
return ERR_PTR(-ENOMEM);
ei = EXT3_I(inode);
lock_super (sb);
es = EXT3_SB(sb)->s_es;
repeat:
if (S_ISDIR(mode)) {
......@@ -464,11 +462,9 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
err = ext3_journal_get_write_access(handle, bitmap_bh);
if (err) goto fail;
if (ext3_set_bit(ino, bitmap_bh->b_data)) {
ext3_error (sb, "ext3_new_inode",
"bit already set for inode %lu", ino);
if (ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
ino, bitmap_bh->b_data))
goto repeat;
}
BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bitmap_bh);
if (err) goto fail;
......@@ -504,26 +500,19 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
BUFFER_TRACE(bh2, "get_write_access");
err = ext3_journal_get_write_access(handle, bh2);
if (err) goto fail;
spin_lock(&EXT3_SB(sb)->s_bgi[group].bg_ialloc_lock);
gdp->bg_free_inodes_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
if (S_ISDIR(mode)) {
gdp->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
EXT3_SB(sb)->s_dir_count++;
}
spin_unlock(&EXT3_SB(sb)->s_bgi[group].bg_ialloc_lock);
BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bh2);
if (err) goto fail;
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
if (err) goto fail;
es->s_free_inodes_count =
cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
sb->s_dirt = 1;
if (err) goto fail;
inode->i_uid = current->fsuid;
if (test_opt (sb, GRPID))
......@@ -576,7 +565,6 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
ei->i_state = EXT3_STATE_NEW;
unlock_super(sb);
ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
DQUOT_DROP(inode);
......@@ -600,7 +588,6 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
fail:
ext3_std_error(sb, err);
out:
unlock_super(sb);
iput(inode);
ret = ERR_PTR(err);
really_out:
......@@ -673,12 +660,13 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
unsigned long ext3_count_free_inodes (struct super_block * sb)
{
unsigned long desc_count;
struct ext3_group_desc *gdp;
int i;
#ifdef EXT3FS_DEBUG
struct ext3_super_block *es;
unsigned long desc_count, bitmap_count, x;
struct ext3_group_desc *gdp;
unsigned long bitmap_count, x;
struct buffer_head *bitmap_bh = NULL;
int i;
lock_super (sb);
es = EXT3_SB(sb)->s_es;
......@@ -706,7 +694,14 @@ unsigned long ext3_count_free_inodes (struct super_block * sb)
unlock_super(sb);
return desc_count;
#else
return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_inodes_count);
desc_count = 0;
for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
gdp = ext3_get_group_desc (sb, i, NULL);
if (!gdp)
continue;
desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
}
return desc_count;
#endif
}
......
......@@ -460,7 +460,7 @@ void ext3_put_super (struct super_block * sb)
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
kfree(sbi->s_debts);
kfree(sbi->s_bgi);
brelse(sbi->s_sbh);
/* Debugging code just in case the in-memory inode orphan list
......@@ -901,6 +901,8 @@ static int ext3_check_descriptors (struct super_block * sb)
struct ext3_sb_info *sbi = EXT3_SB(sb);
unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block);
struct ext3_group_desc * gdp = NULL;
unsigned long total_free;
unsigned int reserved = le32_to_cpu(sbi->s_es->s_r_blocks_count);
int desc_block = 0;
int i;
......@@ -947,6 +949,43 @@ static int ext3_check_descriptors (struct super_block * sb)
block += EXT3_BLOCKS_PER_GROUP(sb);
gdp++;
}
total_free = ext3_count_free_blocks(sb);
if (total_free != le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count)) {
printk("EXT3-fs: invalid s_free_blocks_count %u (real %lu)\n",
le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count),
total_free);
EXT3_SB(sb)->s_es->s_free_blocks_count = cpu_to_le32(total_free);
}
/* distribute reserved blocks over groups -bzzz */
for(i = sbi->s_groups_count - 1; reserved && total_free && i >= 0; i--) {
int free;
gdp = ext3_get_group_desc (sb, i, NULL);
if (!gdp) {
ext3_error (sb, "ext3_check_descriptors",
"cant get descriptor for group %d", i);
return 0;
}
free = le16_to_cpu(gdp->bg_free_blocks_count);
if (free > reserved)
free = reserved;
sbi->s_bgi[i].bg_reserved = free;
reserved -= free;
total_free -= free;
}
total_free = ext3_count_free_inodes(sb);
if (total_free != le32_to_cpu(EXT3_SB(sb)->s_es->s_free_inodes_count)) {
printk("EXT3-fs: invalid s_free_inodes_count %u (real %lu)\n",
le32_to_cpu(EXT3_SB(sb)->s_es->s_free_inodes_count),
total_free);
EXT3_SB(sb)->s_es->s_free_inodes_count = cpu_to_le32(total_free);
}
return 1;
}
......@@ -1307,13 +1346,17 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
printk (KERN_ERR "EXT3-fs: not enough memory\n");
goto failed_mount;
}
sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
sbi->s_bgi = kmalloc(sbi->s_groups_count * sizeof(struct ext3_bg_info),
GFP_KERNEL);
if (!sbi->s_debts) {
printk ("EXT3-fs: not enough memory\n");
if (!sbi->s_bgi) {
printk("EXT3-fs: not enough memory to allocate s_bgi\n");
goto failed_mount2;
}
memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts));
memset(sbi->s_bgi, 0, sbi->s_groups_count * sizeof(struct ext3_bg_info));
for (i = 0; i < sbi->s_groups_count; i++) {
spin_lock_init(&sbi->s_bgi[i].bg_balloc_lock);
spin_lock_init(&sbi->s_bgi[i].bg_ialloc_lock);
}
for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logic_sb_block, i);
sbi->s_group_desc[i] = sb_bread(sb, block);
......@@ -1329,7 +1372,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
sbi->s_gdb_count = db_count;
sbi->s_dir_count = ext3_count_dirs(sb);
/*
* set up enough so that it can read an inode
*/
......@@ -1432,8 +1474,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
failed_mount3:
journal_destroy(sbi->s_journal);
failed_mount2:
if (sbi->s_debts)
kfree(sbi->s_debts);
kfree(sbi->s_bgi);
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
......@@ -1702,6 +1743,8 @@ static void ext3_commit_super (struct super_block * sb,
if (!sbh)
return;
es->s_wtime = cpu_to_le32(get_seconds());
es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
BUFFER_TRACE(sbh, "marking dirty");
mark_buffer_dirty(sbh);
if (sync)
......
......@@ -48,9 +48,7 @@ EXPORT_SYMBOL(journal_get_create_access);
EXPORT_SYMBOL(journal_get_undo_access);
EXPORT_SYMBOL(journal_dirty_data);
EXPORT_SYMBOL(journal_dirty_metadata);
#if 0
EXPORT_SYMBOL(journal_release_buffer);
#endif
EXPORT_SYMBOL(journal_forget);
#if 0
EXPORT_SYMBOL(journal_sync_buffer);
......
......@@ -1106,7 +1106,6 @@ int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh)
return 0;
}
#if 0
/*
* journal_release_buffer: undo a get_write_access without any buffer
* updates, if the update decided in the end that it didn't need access.
......@@ -1140,7 +1139,6 @@ void journal_release_buffer (handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "exit");
unlock_journal(journal);
}
#endif
/**
* void journal_forget() - bforget() for potentially-journaled buffers.
......
......@@ -344,7 +344,9 @@ struct ext3_inode {
#endif
#define ext3_set_bit ext2_set_bit
#define ext3_set_bit_atomic ext2_set_bit_atomic
#define ext3_clear_bit ext2_clear_bit
#define ext3_clear_bit_atomic ext2_clear_bit_atomic
#define ext3_test_bit ext2_test_bit
#define ext3_find_first_zero_bit ext2_find_first_zero_bit
#define ext3_find_next_zero_bit ext2_find_next_zero_bit
......
......@@ -21,6 +21,13 @@
#include <linux/wait.h>
#endif
struct ext3_bg_info {
u8 bg_debts;
spinlock_t bg_balloc_lock;
spinlock_t bg_ialloc_lock;
unsigned long bg_reserved;
} ____cacheline_aligned_in_smp;
/*
* third extended-fs super-block data in memory
*/
......@@ -50,8 +57,7 @@ struct ext3_sb_info {
u32 s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
unsigned long s_dir_count;
u8 *s_debts;
struct ext3_bg_info *s_bgi;
/* Journaling */
struct inode * s_journal_inode;
......
......@@ -116,6 +116,12 @@ __ext3_journal_get_write_access(const char *where,
return err;
}
static inline void
ext3_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
{
journal_release_buffer(handle, bh);
}
static inline void
ext3_journal_forget(handle_t *handle, struct buffer_head *bh)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment