Commit 5c4ad014 authored by Theodore Y. Ts'o's avatar Theodore Y. Ts'o Committed by Linus Torvalds

[PATCH] Ext3: Retry allocation after transaction commit (v2)

Here is a reworked version of my patch to ext3 to retry certain filesystem
operations after an ENOSPC error.  The ext3_should_retry_alloc() function will
not wait on the currently running transaction if there is a currently active
handle; hence this should avoid deadlocks in the Lustre use case.  The patch
is versus BK-recent.

I've also included a simple, reliable test case which demonstrates the problem
this patch is intended to fix.  (Note that BK-recent is not sufficient to
address this test case, and waiting on the commiting transaction in
ext3_new_block is also not sufficient.  Been there, tried that, didn't work.
We need to do the full-bore retry from the top level.  The
ext3_should_retry_alloc() will only wait on the committing transaction if
there is an active handle; hence Lustre will probably also need to use
ext3_should_retry_alloc() if it wants to reliably avoid this particular
problem.)

#!/bin/sh
#
#
TEST_DIR=/tmp
IMAGE=$TEST_DIR/retry.img
MNTPT=$TEST_DIR/retry.mnt
TEST_SRC=/usr/projects/e2fsprogs/e2fsprogs/build
MKE2FS_OPTS=""
IMAGE_SIZE=8192

umount $MNTPT
dd if=/dev/zero of=$IMAGE bs=4k count=$IMAGE_SIZE
mke2fs -j -F $MKE2FS_OPTS $IMAGE 

function test_log ()
{
	echo $*
	logger -p local4.notice $*
}

mkdir -p $MNTPT
mount -o loop -t ext3 $IMAGE $MNTPT
test_log Retry test: BEGIN
for i in `seq 1 3`
do
	test_log "Retry test: Loop $i"
	echo 2 > /proc/sys/fs/jbd-debug
	while ! mkdir -p $MNTPT/foo/bar
	do
		test_log "Retry test: mkdir failed"
		sleep 1
	done
	echo 0 > /proc/sys/fs/jbd-debug
	cp -r $TEST_SRC $MNTPT/foo/bar 2> /dev/null
	rm -rf $MNTPT/*
done
umount $MNTPT
test_log "Retry test: END"


akpm@osdl.org

  Rework the code to make it a formal JBD API entry point.
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 248af7e2
......@@ -428,7 +428,9 @@ ext3_acl_chmod(struct inode *inode)
error = posix_acl_chmod_masq(clone, inode->i_mode);
if (!error) {
handle_t *handle;
int retries = 0;
retry:
handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
......@@ -437,6 +439,9 @@ ext3_acl_chmod(struct inode *inode)
}
error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
ext3_journal_stop(handle);
if (error == -ENOSPC &&
ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
}
out:
posix_acl_release(clone);
......@@ -516,7 +521,7 @@ ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
{
handle_t *handle;
struct posix_acl *acl;
int error;
int error, retries = 0;
if (!test_opt(inode->i_sb, POSIX_ACL))
return -EOPNOTSUPP;
......@@ -535,11 +540,14 @@ ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
} else
acl = NULL;
retry:
handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
if (IS_ERR(handle))
return PTR_ERR(handle);
error = ext3_set_acl(handle, inode, type, acl);
ext3_journal_stop(handle);
if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
release_and_out:
posix_acl_release(acl);
......
......@@ -465,6 +465,36 @@ ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
return -1;
}
static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
{
int free_blocks, root_blocks;
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
sbi->s_resuid != current->fsuid &&
(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
return 0;
}
return 1;
}
/*
* ext3_should_retry_alloc() is called when ENOSPC is returned, and if
* it is profitable to retry the operation, this function will wait
* for the current or commiting transaction to complete, and then
* return TRUE.
*/
int ext3_should_retry_alloc(struct super_block *sb, int *retries)
{
if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
return 0;
jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
}
/*
* ext3_new_block uses a goal block to assist allocation. If the goal is
* free, or there is a free block within 32 blocks of the goal, that block
......@@ -485,7 +515,7 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
int target_block; /* tmp */
int fatal = 0, err;
int performed_allocation = 0;
int free_blocks, root_blocks;
int free_blocks;
struct super_block *sb;
struct ext3_group_desc *gdp;
struct ext3_super_block *es;
......@@ -512,11 +542,7 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
es = EXT3_SB(sb)->s_es;
ext3_debug("goal=%lu.\n", goal);
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
root_blocks = le32_to_cpu(es->s_r_blocks_count);
if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
sbi->s_resuid != current->fsuid &&
(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
if (!ext3_has_free_blocks(sbi)) {
*errp = -ENOSPC;
goto out;
}
......
......@@ -1080,7 +1080,7 @@ static int ext3_prepare_write(struct file *file, struct page *page,
struct inode *inode = page->mapping->host;
int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
handle_t *handle;
int tried_commit = 0;
int retries = 0;
retry:
handle = ext3_journal_start(inode, needed_blocks);
......@@ -1089,19 +1089,8 @@ static int ext3_prepare_write(struct file *file, struct page *page,
goto out;
}
ret = block_prepare_write(page, from, to, ext3_get_block);
if (ret) {
if (ret != -ENOSPC || tried_commit)
if (ret)
goto prepare_write_failed;
/*
* It could be that there _is_ free space, but it's all tied up
* in uncommitted bitmaps. So force a commit here, which makes
* those blocks allocatable and try again.
*/
tried_commit = 1;
handle->h_sync = 1;
ext3_journal_stop(handle);
goto retry;
}
if (ext3_should_journal_data(inode)) {
ret = walk_page_buffers(handle, page_buffers(page),
......@@ -1110,6 +1099,8 @@ static int ext3_prepare_write(struct file *file, struct page *page,
prepare_write_failed:
if (ret)
ext3_journal_stop(handle);
if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
out:
return ret;
}
......
......@@ -1630,8 +1630,9 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
{
handle_t *handle;
struct inode * inode;
int err;
int err, retries = 0;
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2*EXT3_QUOTA_INIT_BLOCKS);
......@@ -1650,6 +1651,8 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
err = ext3_add_nondir(handle, dentry, inode);
}
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
return err;
}
......@@ -1658,11 +1661,12 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
{
handle_t *handle;
struct inode *inode;
int err;
int err, retries = 0;
if (!new_valid_dev(rdev))
return -EINVAL;
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2*EXT3_QUOTA_INIT_BLOCKS);
......@@ -1682,6 +1686,8 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
err = ext3_add_nondir(handle, dentry, inode);
}
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
return err;
}
......@@ -1691,11 +1697,12 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
struct inode * inode;
struct buffer_head * dir_block;
struct ext3_dir_entry_2 * de;
int err;
int err, retries = 0;
if (dir->i_nlink >= EXT3_LINK_MAX)
return -EMLINK;
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2*EXT3_QUOTA_INIT_BLOCKS);
......@@ -1753,6 +1760,8 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
d_instantiate(dentry, inode);
out_stop:
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
return err;
}
......@@ -2092,12 +2101,13 @@ static int ext3_symlink (struct inode * dir,
{
handle_t *handle;
struct inode * inode;
int l, err;
int l, err, retries = 0;
l = strlen(symname)+1;
if (l > dir->i_sb->s_blocksize)
return -ENAMETOOLONG;
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2*EXT3_QUOTA_INIT_BLOCKS);
......@@ -2136,6 +2146,8 @@ static int ext3_symlink (struct inode * dir,
err = ext3_add_nondir(handle, dentry, inode);
out_stop:
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
return err;
}
......@@ -2144,11 +2156,12 @@ static int ext3_link (struct dentry * old_dentry,
{
handle_t *handle;
struct inode *inode = old_dentry->d_inode;
int err;
int err, retries = 0;
if (inode->i_nlink >= EXT3_LINK_MAX)
return -EMLINK;
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
EXT3_INDEX_EXTRA_TRANS_BLOCKS);
if (IS_ERR(handle))
......@@ -2163,6 +2176,8 @@ static int ext3_link (struct dentry * old_dentry,
err = ext3_add_nondir(handle, dentry, inode);
ext3_journal_stop(handle);
if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
goto retry;
return err;
}
......
......@@ -875,8 +875,9 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name,
const void *value, size_t value_len, int flags)
{
handle_t *handle;
int error;
int error, retries = 0;
retry:
handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
......@@ -886,6 +887,9 @@ ext3_xattr_set(struct inode *inode, int name_index, const char *name,
error = ext3_xattr_set_handle(handle, inode, name_index, name,
value, value_len, flags);
error2 = ext3_journal_stop(handle);
if (error == -ENOSPC &&
ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
if (error == 0)
error = error2;
}
......
......@@ -73,6 +73,7 @@ EXPORT_SYMBOL(journal_ack_err);
EXPORT_SYMBOL(journal_clear_err);
EXPORT_SYMBOL(log_wait_commit);
EXPORT_SYMBOL(journal_start_commit);
EXPORT_SYMBOL(journal_force_commit_nested);
EXPORT_SYMBOL(journal_wipe);
EXPORT_SYMBOL(journal_blocks_per_page);
EXPORT_SYMBOL(journal_invalidatepage);
......@@ -464,6 +465,39 @@ int log_start_commit(journal_t *journal, tid_t tid)
return ret;
}
/*
* Force and wait upon a commit if the calling process is not within
* transaction. This is used for forcing out undo-protected data which contains
* bitmaps, when the fs is running out of space.
*
* We can only force the running transaction if we don't have an active handle;
* otherwise, we will deadlock.
*
* Returns true if a transaction was started.
*/
int journal_force_commit_nested(journal_t *journal)
{
transaction_t *transaction = NULL;
tid_t tid;
spin_lock(&journal->j_state_lock);
if (journal->j_running_transaction && !current->journal_info) {
transaction = journal->j_running_transaction;
__log_start_commit(journal, transaction->t_tid);
} else if (journal->j_committing_transaction)
transaction = journal->j_committing_transaction;
if (!transaction) {
spin_unlock(&journal->j_state_lock);
return 0; /* Nothing to retry */
}
tid = transaction->t_tid;
spin_unlock(&journal->j_state_lock);
log_wait_commit(journal, tid);
return 1;
}
/*
* Start a commit of the current running transaction (if any). Returns true
* if a transaction was started, and fills its tid in at *ptid
......
......@@ -689,6 +689,7 @@ extern void ext3_check_blocks_bitmap (struct super_block *);
extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
unsigned int block_group,
struct buffer_head ** bh);
extern int ext3_should_retry_alloc(struct super_block *sb, int *retries);
/* dir.c */
extern int ext3_check_dir_entry(const char *, struct inode *,
......
......@@ -1006,6 +1006,7 @@ int __log_space_left(journal_t *); /* Called with journal locked */
int log_start_commit(journal_t *journal, tid_t tid);
int __log_start_commit(journal_t *journal, tid_t tid);
int journal_start_commit(journal_t *journal, tid_t *tid);
int journal_force_commit_nested(journal_t *journal);
int log_wait_commit(journal_t *journal, tid_t tid);
int log_do_checkpoint(journal_t *journal);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment