Commit bb189247 authored by Jan Kara's avatar Jan Kara

jbd: Fix oops in journal_remove_journal_head()

journal_remove_journal_head() can oops when trying to access journal_head
returned by bh2jh(). This is caused for example by the following race:

	TASK1					TASK2
  journal_commit_transaction()
    ...
    processing t_forget list
      __journal_refile_buffer(jh);
      if (!jh->b_transaction) {
        jbd_unlock_bh_state(bh);
					journal_try_to_free_buffers()
					  journal_grab_journal_head(bh)
					  jbd_lock_bh_state(bh)
					  __journal_try_to_free_buffer()
					  journal_put_journal_head(jh)
        journal_remove_journal_head(bh);

journal_put_journal_head() in TASK2 sees that b_jcount == 0 and buffer is not
part of any transaction and thus frees journal_head before TASK1 gets to doing
so. Note that even buffer_head can be released by try_to_free_buffers() after
journal_put_journal_head() which adds even larger opportunity for oops (but I
didn't see this happen in reality).

Fix the problem by making transactions hold their own journal_head reference
(in b_jcount). That way we don't have to remove journal_head explicitely via
journal_remove_journal_head() and instead just remove journal_head when
b_jcount drops to zero. The result of this is that [__]journal_refile_buffer(),
[__]journal_unfile_buffer(), and __journal_remove_checkpoint() can free
journal_head which needs modification of a few callers. Also we have to be
careful because once journal_head is removed, buffer_head might be freed as
well. So we have to get our own buffer_head reference where it matters.
Signed-off-by: default avatarJan Kara <jack@suse.cz>
parent 2c2ea945
...@@ -96,10 +96,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh) ...@@ -96,10 +96,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
if (jh->b_jlist == BJ_None && !buffer_locked(bh) && if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) { !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
/*
* Get our reference so that bh cannot be freed before
* we unlock it
*/
get_bh(bh);
JBUFFER_TRACE(jh, "remove from checkpoint list"); JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __journal_remove_checkpoint(jh) + 1; ret = __journal_remove_checkpoint(jh) + 1;
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
BUFFER_TRACE(bh, "release"); BUFFER_TRACE(bh, "release");
__brelse(bh); __brelse(bh);
} else { } else {
...@@ -221,8 +225,8 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction) ...@@ -221,8 +225,8 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
goto restart; goto restart;
} }
if (buffer_locked(bh)) {
get_bh(bh); get_bh(bh);
if (buffer_locked(bh)) {
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
wait_on_buffer(bh); wait_on_buffer(bh);
...@@ -241,7 +245,6 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction) ...@@ -241,7 +245,6 @@ static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
*/ */
released = __journal_remove_checkpoint(jh); released = __journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
__brelse(bh); __brelse(bh);
} }
...@@ -305,12 +308,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, ...@@ -305,12 +308,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
ret = 1; ret = 1;
if (unlikely(buffer_write_io_error(bh))) if (unlikely(buffer_write_io_error(bh)))
ret = -EIO; ret = -EIO;
get_bh(bh);
J_ASSERT_JH(jh, !buffer_jbddirty(bh)); J_ASSERT_JH(jh, !buffer_jbddirty(bh));
BUFFER_TRACE(bh, "remove from checkpoint"); BUFFER_TRACE(bh, "remove from checkpoint");
__journal_remove_checkpoint(jh); __journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
__brelse(bh); __brelse(bh);
} else { } else {
/* /*
...@@ -526,9 +529,9 @@ int cleanup_journal_tail(journal_t *journal) ...@@ -526,9 +529,9 @@ int cleanup_journal_tail(journal_t *journal)
/* /*
* journal_clean_one_cp_list * journal_clean_one_cp_list
* *
* Find all the written-back checkpoint buffers in the given list and release them. * Find all the written-back checkpoint buffers in the given list and release
* them.
* *
* Called with the journal locked.
* Called with j_list_lock held. * Called with j_list_lock held.
* Returns number of bufers reaped (for debug) * Returns number of bufers reaped (for debug)
*/ */
...@@ -635,8 +638,8 @@ int __journal_clean_checkpoint_list(journal_t *journal) ...@@ -635,8 +638,8 @@ int __journal_clean_checkpoint_list(journal_t *journal)
* checkpoint lists. * checkpoint lists.
* *
* The function returns 1 if it frees the transaction, 0 otherwise. * The function returns 1 if it frees the transaction, 0 otherwise.
* The function can free jh and bh.
* *
* This function is called with the journal locked.
* This function is called with j_list_lock held. * This function is called with j_list_lock held.
* This function is called with jbd_lock_bh_state(jh2bh(jh)) * This function is called with jbd_lock_bh_state(jh2bh(jh))
*/ */
...@@ -655,13 +658,14 @@ int __journal_remove_checkpoint(struct journal_head *jh) ...@@ -655,13 +658,14 @@ int __journal_remove_checkpoint(struct journal_head *jh)
} }
journal = transaction->t_journal; journal = transaction->t_journal;
JBUFFER_TRACE(jh, "removing from transaction");
__buffer_unlink(jh); __buffer_unlink(jh);
jh->b_cp_transaction = NULL; jh->b_cp_transaction = NULL;
journal_put_journal_head(jh);
if (transaction->t_checkpoint_list != NULL || if (transaction->t_checkpoint_list != NULL ||
transaction->t_checkpoint_io_list != NULL) transaction->t_checkpoint_io_list != NULL)
goto out; goto out;
JBUFFER_TRACE(jh, "transaction has no more buffers");
/* /*
* There is one special case to worry about: if we have just pulled the * There is one special case to worry about: if we have just pulled the
...@@ -672,10 +676,8 @@ int __journal_remove_checkpoint(struct journal_head *jh) ...@@ -672,10 +676,8 @@ int __journal_remove_checkpoint(struct journal_head *jh)
* The locking here around t_state is a bit sleazy. * The locking here around t_state is a bit sleazy.
* See the comment at the end of journal_commit_transaction(). * See the comment at the end of journal_commit_transaction().
*/ */
if (transaction->t_state != T_FINISHED) { if (transaction->t_state != T_FINISHED)
JBUFFER_TRACE(jh, "belongs to running/committing transaction");
goto out; goto out;
}
/* OK, that was the last buffer for the transaction: we can now /* OK, that was the last buffer for the transaction: we can now
safely remove this transaction from the log */ safely remove this transaction from the log */
...@@ -687,7 +689,6 @@ int __journal_remove_checkpoint(struct journal_head *jh) ...@@ -687,7 +689,6 @@ int __journal_remove_checkpoint(struct journal_head *jh)
wake_up(&journal->j_wait_logspace); wake_up(&journal->j_wait_logspace);
ret = 1; ret = 1;
out: out:
JBUFFER_TRACE(jh, "exit");
return ret; return ret;
} }
...@@ -706,6 +707,8 @@ void __journal_insert_checkpoint(struct journal_head *jh, ...@@ -706,6 +707,8 @@ void __journal_insert_checkpoint(struct journal_head *jh,
J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
/* Get reference for checkpointing transaction */
journal_grab_journal_head(jh2bh(jh));
jh->b_cp_transaction = transaction; jh->b_cp_transaction = transaction;
if (!transaction->t_checkpoint_list) { if (!transaction->t_checkpoint_list) {
......
...@@ -258,10 +258,6 @@ static int journal_submit_data_buffers(journal_t *journal, ...@@ -258,10 +258,6 @@ static int journal_submit_data_buffers(journal_t *journal,
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
if (locked) if (locked)
unlock_buffer(bh); unlock_buffer(bh);
journal_remove_journal_head(bh);
/* One for our safety reference, other for
* journal_remove_journal_head() */
put_bh(bh);
release_data_buffer(bh); release_data_buffer(bh);
} }
...@@ -455,14 +451,9 @@ void journal_commit_transaction(journal_t *journal) ...@@ -455,14 +451,9 @@ void journal_commit_transaction(journal_t *journal)
} }
if (buffer_jbd(bh) && bh2jh(bh) == jh && if (buffer_jbd(bh) && bh2jh(bh) == jh &&
jh->b_transaction == commit_transaction && jh->b_transaction == commit_transaction &&
jh->b_jlist == BJ_Locked) { jh->b_jlist == BJ_Locked)
__journal_unfile_buffer(jh); __journal_unfile_buffer(jh);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
put_bh(bh);
} else {
jbd_unlock_bh_state(bh);
}
release_data_buffer(bh); release_data_buffer(bh);
cond_resched_lock(&journal->j_list_lock); cond_resched_lock(&journal->j_list_lock);
} }
...@@ -807,10 +798,16 @@ void journal_commit_transaction(journal_t *journal) ...@@ -807,10 +798,16 @@ void journal_commit_transaction(journal_t *journal)
while (commit_transaction->t_forget) { while (commit_transaction->t_forget) {
transaction_t *cp_transaction; transaction_t *cp_transaction;
struct buffer_head *bh; struct buffer_head *bh;
int try_to_free = 0;
jh = commit_transaction->t_forget; jh = commit_transaction->t_forget;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
bh = jh2bh(jh); bh = jh2bh(jh);
/*
* Get a reference so that bh cannot be freed before we are
* done with it.
*/
get_bh(bh);
jbd_lock_bh_state(bh); jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
jh->b_transaction == journal->j_running_transaction); jh->b_transaction == journal->j_running_transaction);
...@@ -868,28 +865,27 @@ void journal_commit_transaction(journal_t *journal) ...@@ -868,28 +865,27 @@ void journal_commit_transaction(journal_t *journal)
__journal_insert_checkpoint(jh, commit_transaction); __journal_insert_checkpoint(jh, commit_transaction);
if (is_journal_aborted(journal)) if (is_journal_aborted(journal))
clear_buffer_jbddirty(bh); clear_buffer_jbddirty(bh);
JBUFFER_TRACE(jh, "refile for checkpoint writeback");
__journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
} else { } else {
J_ASSERT_BH(bh, !buffer_dirty(bh)); J_ASSERT_BH(bh, !buffer_dirty(bh));
/* The buffer on BJ_Forget list and not jbddirty means /*
* The buffer on BJ_Forget list and not jbddirty means
* it has been freed by this transaction and hence it * it has been freed by this transaction and hence it
* could not have been reallocated until this * could not have been reallocated until this
* transaction has committed. *BUT* it could be * transaction has committed. *BUT* it could be
* reallocated once we have written all the data to * reallocated once we have written all the data to
* disk and before we process the buffer on BJ_Forget * disk and before we process the buffer on BJ_Forget
* list. */ * list.
*/
if (!jh->b_next_transaction)
try_to_free = 1;
}
JBUFFER_TRACE(jh, "refile or unfile freed buffer"); JBUFFER_TRACE(jh, "refile or unfile freed buffer");
__journal_refile_buffer(jh); __journal_refile_buffer(jh);
if (!jh->b_transaction) {
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
/* needs a brelse */ if (try_to_free)
journal_remove_journal_head(bh);
release_buffer_page(bh); release_buffer_page(bh);
} else else
jbd_unlock_bh_state(bh); __brelse(bh);
}
cond_resched_lock(&journal->j_list_lock); cond_resched_lock(&journal->j_list_lock);
} }
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
......
...@@ -1803,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh) ...@@ -1803,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh)
* When a buffer has its BH_JBD bit set it is immune from being released by * When a buffer has its BH_JBD bit set it is immune from being released by
* core kernel code, mainly via ->b_count. * core kernel code, mainly via ->b_count.
* *
* A journal_head may be detached from its buffer_head when the journal_head's * A journal_head is detached from its buffer_head when the journal_head's
* b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
* Various places in JBD call journal_remove_journal_head() to indicate that the * transaction (b_cp_transaction) hold their references to b_jcount.
* journal_head can be dropped if needed.
* *
* Various places in the kernel want to attach a journal_head to a buffer_head * Various places in the kernel want to attach a journal_head to a buffer_head
* _before_ attaching the journal_head to a transaction. To protect the * _before_ attaching the journal_head to a transaction. To protect the
...@@ -1819,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh) ...@@ -1819,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh)
* (Attach a journal_head if needed. Increments b_jcount) * (Attach a journal_head if needed. Increments b_jcount)
* struct journal_head *jh = journal_add_journal_head(bh); * struct journal_head *jh = journal_add_journal_head(bh);
* ... * ...
* (Get another reference for transaction)
* journal_grab_journal_head(bh);
* jh->b_transaction = xxx; * jh->b_transaction = xxx;
* (Put original reference)
* journal_put_journal_head(jh); * journal_put_journal_head(jh);
*
* Now, the journal_head's b_jcount is zero, but it is safe from being released
* because it has a non-zero b_transaction.
*/ */
/* /*
* Give a buffer_head a journal_head. * Give a buffer_head a journal_head.
* *
* Doesn't need the journal lock.
* May sleep. * May sleep.
*/ */
struct journal_head *journal_add_journal_head(struct buffer_head *bh) struct journal_head *journal_add_journal_head(struct buffer_head *bh)
...@@ -1893,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) ...@@ -1893,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
struct journal_head *jh = bh2jh(bh); struct journal_head *jh = bh2jh(bh);
J_ASSERT_JH(jh, jh->b_jcount >= 0); J_ASSERT_JH(jh, jh->b_jcount >= 0);
J_ASSERT_JH(jh, jh->b_transaction == NULL);
get_bh(bh); J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
if (jh->b_jcount == 0) { J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
if (jh->b_transaction == NULL &&
jh->b_next_transaction == NULL &&
jh->b_cp_transaction == NULL) {
J_ASSERT_JH(jh, jh->b_jlist == BJ_None); J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
J_ASSERT_BH(bh, buffer_jbd(bh)); J_ASSERT_BH(bh, buffer_jbd(bh));
J_ASSERT_BH(bh, jh2bh(jh) == bh); J_ASSERT_BH(bh, jh2bh(jh) == bh);
BUFFER_TRACE(bh, "remove journal_head"); BUFFER_TRACE(bh, "remove journal_head");
if (jh->b_frozen_data) { if (jh->b_frozen_data) {
printk(KERN_WARNING "%s: freeing " printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
"b_frozen_data\n",
__func__);
jbd_free(jh->b_frozen_data, bh->b_size); jbd_free(jh->b_frozen_data, bh->b_size);
} }
if (jh->b_committed_data) { if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing " printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
"b_committed_data\n",
__func__);
jbd_free(jh->b_committed_data, bh->b_size); jbd_free(jh->b_committed_data, bh->b_size);
} }
bh->b_private = NULL; bh->b_private = NULL;
jh->b_bh = NULL; /* debug, really */ jh->b_bh = NULL; /* debug, really */
clear_buffer_jbd(bh); clear_buffer_jbd(bh);
__brelse(bh);
journal_free_journal_head(jh); journal_free_journal_head(jh);
} else {
BUFFER_TRACE(bh, "journal_head was locked");
}
}
} }
/* /*
* journal_remove_journal_head(): if the buffer isn't attached to a transaction * Drop a reference on the passed journal_head. If it fell to zero then
* and has a zero b_jcount then remove and release its journal_head. If we did
* see that the buffer is not used by any transaction we also "logically"
* decrement ->b_count.
*
* We in fact take an additional increment on ->b_count as a convenience,
* because the caller usually wants to do additional things with the bh
* after calling here.
* The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
* time. Once the caller has run __brelse(), the buffer is eligible for
* reaping by try_to_free_buffers().
*/
void journal_remove_journal_head(struct buffer_head *bh)
{
jbd_lock_bh_journal_head(bh);
__journal_remove_journal_head(bh);
jbd_unlock_bh_journal_head(bh);
}
/*
* Drop a reference on the passed journal_head. If it fell to zero then try to
* release the journal_head from the buffer_head. * release the journal_head from the buffer_head.
*/ */
void journal_put_journal_head(struct journal_head *jh) void journal_put_journal_head(struct journal_head *jh)
...@@ -1957,10 +1923,11 @@ void journal_put_journal_head(struct journal_head *jh) ...@@ -1957,10 +1923,11 @@ void journal_put_journal_head(struct journal_head *jh)
jbd_lock_bh_journal_head(bh); jbd_lock_bh_journal_head(bh);
J_ASSERT_JH(jh, jh->b_jcount > 0); J_ASSERT_JH(jh, jh->b_jcount > 0);
--jh->b_jcount; --jh->b_jcount;
if (!jh->b_jcount && !jh->b_transaction) { if (!jh->b_jcount) {
__journal_remove_journal_head(bh); __journal_remove_journal_head(bh);
jbd_unlock_bh_journal_head(bh);
__brelse(bh); __brelse(bh);
} } else
jbd_unlock_bh_journal_head(bh); jbd_unlock_bh_journal_head(bh);
} }
......
...@@ -696,7 +696,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -696,7 +696,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
if (!jh->b_transaction) { if (!jh->b_transaction) {
JBUFFER_TRACE(jh, "no transaction"); JBUFFER_TRACE(jh, "no transaction");
J_ASSERT_JH(jh, !jh->b_next_transaction); J_ASSERT_JH(jh, !jh->b_next_transaction);
jh->b_transaction = transaction;
JBUFFER_TRACE(jh, "file as BJ_Reserved"); JBUFFER_TRACE(jh, "file as BJ_Reserved");
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
__journal_file_buffer(jh, transaction, BJ_Reserved); __journal_file_buffer(jh, transaction, BJ_Reserved);
...@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) ...@@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
* committed and so it's safe to clear the dirty bit. * committed and so it's safe to clear the dirty bit.
*/ */
clear_buffer_dirty(jh2bh(jh)); clear_buffer_dirty(jh2bh(jh));
jh->b_transaction = transaction;
/* first access by this transaction */ /* first access by this transaction */
jh->b_modified = 0; jh->b_modified = 0;
...@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) ...@@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
ret = -EIO; ret = -EIO;
goto no_journal; goto no_journal;
} }
/* We might have slept so buffer could be refiled now */
if (jh->b_transaction != NULL) { if (jh->b_transaction != NULL &&
jh->b_transaction != handle->h_transaction) {
JBUFFER_TRACE(jh, "unfile from commit"); JBUFFER_TRACE(jh, "unfile from commit");
__journal_temp_unlink_buffer(jh); __journal_temp_unlink_buffer(jh);
/* It still points to the committing /* It still points to the committing
...@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) ...@@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
JBUFFER_TRACE(jh, "not on correct data list: unfile"); JBUFFER_TRACE(jh, "not on correct data list: unfile");
J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
__journal_temp_unlink_buffer(jh);
jh->b_transaction = handle->h_transaction;
JBUFFER_TRACE(jh, "file as data"); JBUFFER_TRACE(jh, "file as data");
__journal_file_buffer(jh, handle->h_transaction, __journal_file_buffer(jh, handle->h_transaction,
BJ_SyncData); BJ_SyncData);
...@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) ...@@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
__journal_file_buffer(jh, transaction, BJ_Forget); __journal_file_buffer(jh, transaction, BJ_Forget);
} else { } else {
__journal_unfile_buffer(jh); __journal_unfile_buffer(jh);
journal_remove_journal_head(bh);
__brelse(bh);
if (!buffer_jbd(bh)) { if (!buffer_jbd(bh)) {
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
...@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh) ...@@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh)
mark_buffer_dirty(bh); /* Expose it to the VM */ mark_buffer_dirty(bh); /* Expose it to the VM */
} }
/*
* Remove buffer from all transactions.
*
* Called with bh_state lock and j_list_lock
*
* jh and bh may be already freed when this function returns.
*/
void __journal_unfile_buffer(struct journal_head *jh) void __journal_unfile_buffer(struct journal_head *jh)
{ {
__journal_temp_unlink_buffer(jh); __journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL; jh->b_transaction = NULL;
journal_put_journal_head(jh);
} }
void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
{ {
jbd_lock_bh_state(jh2bh(jh)); struct buffer_head *bh = jh2bh(jh);
/* Get reference so that buffer cannot be freed before we unlock it */
get_bh(bh);
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
__journal_unfile_buffer(jh); __journal_unfile_buffer(jh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(jh2bh(jh)); jbd_unlock_bh_state(bh);
__brelse(bh);
} }
/* /*
...@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) ...@@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
/* A written-back ordered data buffer */ /* A written-back ordered data buffer */
JBUFFER_TRACE(jh, "release data"); JBUFFER_TRACE(jh, "release data");
__journal_unfile_buffer(jh); __journal_unfile_buffer(jh);
journal_remove_journal_head(bh);
__brelse(bh);
} }
} else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
/* written-back checkpointed metadata buffer */ /* written-back checkpointed metadata buffer */
if (jh->b_jlist == BJ_None) { if (jh->b_jlist == BJ_None) {
JBUFFER_TRACE(jh, "remove from checkpoint list"); JBUFFER_TRACE(jh, "remove from checkpoint list");
__journal_remove_checkpoint(jh); __journal_remove_checkpoint(jh);
journal_remove_journal_head(bh);
__brelse(bh);
} }
} }
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
...@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal, ...@@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal,
/* /*
* We take our own ref against the journal_head here to avoid * We take our own ref against the journal_head here to avoid
* having to add tons of locking around each instance of * having to add tons of locking around each instance of
* journal_remove_journal_head() and journal_put_journal_head(). * journal_put_journal_head().
*/ */
jh = journal_grab_journal_head(bh); jh = journal_grab_journal_head(bh);
if (!jh) if (!jh)
...@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) ...@@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
int may_free = 1; int may_free = 1;
struct buffer_head *bh = jh2bh(jh); struct buffer_head *bh = jh2bh(jh);
__journal_unfile_buffer(jh);
if (jh->b_cp_transaction) { if (jh->b_cp_transaction) {
JBUFFER_TRACE(jh, "on running+cp transaction"); JBUFFER_TRACE(jh, "on running+cp transaction");
__journal_temp_unlink_buffer(jh);
/* /*
* We don't want to write the buffer anymore, clear the * We don't want to write the buffer anymore, clear the
* bit so that we don't confuse checks in * bit so that we don't confuse checks in
...@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) ...@@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
may_free = 0; may_free = 0;
} else { } else {
JBUFFER_TRACE(jh, "on running transaction"); JBUFFER_TRACE(jh, "on running transaction");
journal_remove_journal_head(bh); __journal_unfile_buffer(jh);
__brelse(bh);
} }
return may_free; return may_free;
} }
...@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh, ...@@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh,
if (jh->b_transaction) if (jh->b_transaction)
__journal_temp_unlink_buffer(jh); __journal_temp_unlink_buffer(jh);
else
journal_grab_journal_head(bh);
jh->b_transaction = transaction; jh->b_transaction = transaction;
switch (jlist) { switch (jlist) {
...@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh, ...@@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh,
* already started to be used by a subsequent transaction, refile the * already started to be used by a subsequent transaction, refile the
* buffer on that transaction's metadata list. * buffer on that transaction's metadata list.
* *
* Called under journal->j_list_lock * Called under j_list_lock
*
* Called under jbd_lock_bh_state(jh2bh(jh)) * Called under jbd_lock_bh_state(jh2bh(jh))
*
* jh and bh may be already free when this function returns
*/ */
void __journal_refile_buffer(struct journal_head *jh) void __journal_refile_buffer(struct journal_head *jh)
{ {
...@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh) ...@@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh)
was_dirty = test_clear_buffer_jbddirty(bh); was_dirty = test_clear_buffer_jbddirty(bh);
__journal_temp_unlink_buffer(jh); __journal_temp_unlink_buffer(jh);
/*
* We set b_transaction here because b_next_transaction will inherit
* our jh reference and thus __journal_file_buffer() must not take a
* new one.
*/
jh->b_transaction = jh->b_next_transaction; jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL; jh->b_next_transaction = NULL;
if (buffer_freed(bh)) if (buffer_freed(bh))
...@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh) ...@@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh)
} }
/* /*
* For the unlocked version of this call, also make sure that any * __journal_refile_buffer() with necessary locking added. We take our bh
* hanging journal_head is cleaned up if necessary. * reference so that we can safely unlock bh.
* *
* __journal_refile_buffer is usually called as part of a single locked * The jh and bh may be freed by this call.
* operation on a buffer_head, in which the caller is probably going to
* be hooking the journal_head onto other lists. In that case it is up
* to the caller to remove the journal_head if necessary. For the
* unlocked journal_refile_buffer call, the caller isn't going to be
* doing anything else to the buffer so we need to do the cleanup
* ourselves to avoid a jh leak.
*
* *** The journal_head may be freed by this call! ***
*/ */
void journal_refile_buffer(journal_t *journal, struct journal_head *jh) void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
{ {
struct buffer_head *bh = jh2bh(jh); struct buffer_head *bh = jh2bh(jh);
/* Get reference so that buffer cannot be freed before we unlock it */
get_bh(bh);
jbd_lock_bh_state(bh); jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
__journal_refile_buffer(jh); __journal_refile_buffer(jh);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
__brelse(bh); __brelse(bh);
} }
...@@ -940,7 +940,6 @@ extern int journal_force_commit(journal_t *); ...@@ -940,7 +940,6 @@ extern int journal_force_commit(journal_t *);
*/ */
struct journal_head *journal_add_journal_head(struct buffer_head *bh); struct journal_head *journal_add_journal_head(struct buffer_head *bh);
struct journal_head *journal_grab_journal_head(struct buffer_head *bh); struct journal_head *journal_grab_journal_head(struct buffer_head *bh);
void journal_remove_journal_head(struct buffer_head *bh);
void journal_put_journal_head(struct journal_head *jh); void journal_put_journal_head(struct journal_head *jh);
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment