Commit e821ceb2 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] JBD: implement b_transaction locking rules

Go through all use of b_transaction and implement the rules.

Fairly straightforward.
parent b07da5e5
...@@ -50,6 +50,7 @@ static inline void __buffer_unlink(struct journal_head *jh) ...@@ -50,6 +50,7 @@ static inline void __buffer_unlink(struct journal_head *jh)
* Try to release a checkpointed buffer from its transaction. * Try to release a checkpointed buffer from its transaction.
* Returns 1 if we released it. * Returns 1 if we released it.
* Requires journal_datalist_lock * Requires journal_datalist_lock
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/ */
static int __try_to_free_cp_buf(struct journal_head *jh) static int __try_to_free_cp_buf(struct journal_head *jh)
{ {
...@@ -59,10 +60,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh) ...@@ -59,10 +60,13 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
JBUFFER_TRACE(jh, "remove from checkpoint list"); JBUFFER_TRACE(jh, "remove from checkpoint list");
__journal_remove_checkpoint(jh); __journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh); journal_remove_journal_head(bh);
BUFFER_TRACE(bh, "release"); BUFFER_TRACE(bh, "release");
__brelse(bh); __brelse(bh);
ret = 1; ret = 1;
} else {
jbd_unlock_bh_state(bh);
} }
return ret; return ret;
} }
...@@ -92,6 +96,20 @@ void log_wait_for_space(journal_t *journal, int nblocks) ...@@ -92,6 +96,20 @@ void log_wait_for_space(journal_t *journal, int nblocks)
} }
} }
/*
* We were unable to perform jbd_trylock_bh_state() inside
* journal_datalist_lock. The caller must restart a list walk. Wait for
* someone else to run jbd_unlock_bh_state().
*/
static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
{
get_bh(bh);
spin_unlock(&journal_datalist_lock);
jbd_lock_bh_state(bh);
jbd_unlock_bh_state(bh);
put_bh(bh);
}
/* /*
* Clean up a transaction's checkpoint list. * Clean up a transaction's checkpoint list.
* *
...@@ -132,11 +150,20 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) ...@@ -132,11 +150,20 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
goto out_return_1; goto out_return_1;
} }
/*
* This is foul
*/
if (!jbd_trylock_bh_state(bh)) {
jbd_sync_bh(journal, bh);
goto out_return_1;
}
if (jh->b_transaction != NULL) { if (jh->b_transaction != NULL) {
transaction_t *transaction = jh->b_transaction; transaction_t *transaction = jh->b_transaction;
tid_t tid = transaction->t_tid; tid_t tid = transaction->t_tid;
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
log_start_commit(journal, transaction); log_start_commit(journal, transaction);
unlock_journal(journal); unlock_journal(journal);
log_wait_commit(journal, tid); log_wait_commit(journal, tid);
...@@ -156,11 +183,13 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction) ...@@ -156,11 +183,13 @@ static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) { if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
BUFFER_TRACE(bh, "remove from checkpoint"); BUFFER_TRACE(bh, "remove from checkpoint");
__journal_remove_checkpoint(jh); __journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
journal_remove_journal_head(bh); journal_remove_journal_head(bh);
__brelse(bh); __brelse(bh);
ret = 1; ret = 1;
} else {
jbd_unlock_bh_state(bh);
} }
jh = next_jh; jh = next_jh;
} while (jh != last_jh); } while (jh != last_jh);
...@@ -197,6 +226,7 @@ static void __flush_batch(struct buffer_head **bhs, int *batch_count) ...@@ -197,6 +226,7 @@ static void __flush_batch(struct buffer_head **bhs, int *batch_count)
* scan of the checkpoint list. * scan of the checkpoint list.
* *
* Called with journal_datalist_lock held. * Called with journal_datalist_lock held.
* Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
*/ */
static int __flush_buffer(journal_t *journal, struct journal_head *jh, static int __flush_buffer(journal_t *journal, struct journal_head *jh,
struct buffer_head **bhs, int *batch_count, struct buffer_head **bhs, int *batch_count,
...@@ -216,10 +246,11 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh, ...@@ -216,10 +246,11 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
* disk, as that would break recoverability. * disk, as that would break recoverability.
*/ */
BUFFER_TRACE(bh, "queue"); BUFFER_TRACE(bh, "queue");
atomic_inc(&bh->b_count); get_bh(bh);
J_ASSERT_BH(bh, !test_bit(BH_JWrite, &bh->b_state)); J_ASSERT_BH(bh, !buffer_jwrite(bh));
set_bit(BH_JWrite, &bh->b_state); set_buffer_jwrite(bh);
bhs[*batch_count] = bh; bhs[*batch_count] = bh;
jbd_unlock_bh_state(bh);
(*batch_count)++; (*batch_count)++;
if (*batch_count == NR_BATCH) { if (*batch_count == NR_BATCH) {
__flush_batch(bhs, batch_count); __flush_batch(bhs, batch_count);
...@@ -302,8 +333,16 @@ int log_do_checkpoint (journal_t *journal, int nblocks) ...@@ -302,8 +333,16 @@ int log_do_checkpoint (journal_t *journal, int nblocks)
last_jh = jh->b_cpprev; last_jh = jh->b_cpprev;
next_jh = jh; next_jh = jh;
do { do {
struct buffer_head *bh;
jh = next_jh; jh = next_jh;
next_jh = jh->b_cpnext; next_jh = jh->b_cpnext;
bh = jh2bh(jh);
if (!jbd_trylock_bh_state(bh)) {
jbd_sync_bh(journal, bh);
spin_lock(&journal_datalist_lock);
break;
}
retry = __flush_buffer(journal, jh, bhs, &batch_count, retry = __flush_buffer(journal, jh, bhs, &batch_count,
&drop_count); &drop_count);
} while (jh != last_jh && !retry); } while (jh != last_jh && !retry);
...@@ -439,9 +478,12 @@ int __journal_clean_checkpoint_list(journal_t *journal) ...@@ -439,9 +478,12 @@ int __journal_clean_checkpoint_list(journal_t *journal)
if (jh) { if (jh) {
struct journal_head *last_jh = jh->b_cpprev; struct journal_head *last_jh = jh->b_cpprev;
struct journal_head *next_jh = jh; struct journal_head *next_jh = jh;
do { do {
jh = next_jh; jh = next_jh;
next_jh = jh->b_cpnext; next_jh = jh->b_cpnext;
/* Use trylock because of the ranknig */
if (jbd_trylock_bh_state(jh2bh(jh)))
ret += __try_to_free_cp_buf(jh); ret += __try_to_free_cp_buf(jh);
} while (jh != last_jh); } while (jh != last_jh);
} }
......
...@@ -465,9 +465,10 @@ void journal_commit_transaction(journal_t *journal) ...@@ -465,9 +465,10 @@ void journal_commit_transaction(journal_t *journal)
* akpm: these are BJ_IO, and journal_datalist_lock is not needed. * akpm: these are BJ_IO, and journal_datalist_lock is not needed.
* See __journal_try_to_free_buffer. * See __journal_try_to_free_buffer.
*/ */
wait_for_iobuf: wait_for_iobuf:
while (commit_transaction->t_iobuf_list != NULL) { while (commit_transaction->t_iobuf_list != NULL) {
struct buffer_head *bh; struct buffer_head *bh;
jh = commit_transaction->t_iobuf_list->b_tprev; jh = commit_transaction->t_iobuf_list->b_tprev;
bh = jh2bh(jh); bh = jh2bh(jh);
if (buffer_locked(bh)) { if (buffer_locked(bh)) {
...@@ -479,7 +480,7 @@ void journal_commit_transaction(journal_t *journal) ...@@ -479,7 +480,7 @@ void journal_commit_transaction(journal_t *journal)
goto wait_for_iobuf; goto wait_for_iobuf;
} }
clear_bit(BH_JWrite, &jh2bh(jh)->b_state); clear_buffer_jwrite(bh);
JBUFFER_TRACE(jh, "ph4: unfile after journal write"); JBUFFER_TRACE(jh, "ph4: unfile after journal write");
journal_unfile_buffer(jh); journal_unfile_buffer(jh);
...@@ -495,7 +496,6 @@ void journal_commit_transaction(journal_t *journal) ...@@ -495,7 +496,6 @@ void journal_commit_transaction(journal_t *journal)
* ->t_iobuf_list should contain only dummy buffer_heads * ->t_iobuf_list should contain only dummy buffer_heads
* which were created by journal_write_metadata_buffer(). * which were created by journal_write_metadata_buffer().
*/ */
bh = jh2bh(jh);
BUFFER_TRACE(bh, "dumping temporary bh"); BUFFER_TRACE(bh, "dumping temporary bh");
journal_put_journal_head(jh); journal_put_journal_head(jh);
__brelse(bh); __brelse(bh);
...@@ -637,6 +637,8 @@ void journal_commit_transaction(journal_t *journal) ...@@ -637,6 +637,8 @@ void journal_commit_transaction(journal_t *journal)
struct buffer_head *bh; struct buffer_head *bh;
jh = commit_transaction->t_forget; jh = commit_transaction->t_forget;
bh = jh2bh(jh);
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || J_ASSERT_JH(jh, jh->b_transaction == commit_transaction ||
jh->b_transaction == journal->j_running_transaction); jh->b_transaction == journal->j_running_transaction);
...@@ -650,7 +652,6 @@ void journal_commit_transaction(journal_t *journal) ...@@ -650,7 +652,6 @@ void journal_commit_transaction(journal_t *journal)
* *
* Otherwise, we can just throw away the frozen data now. * Otherwise, we can just throw away the frozen data now.
*/ */
jbd_lock_bh_state(jh2bh(jh));
if (jh->b_committed_data) { if (jh->b_committed_data) {
kfree(jh->b_committed_data); kfree(jh->b_committed_data);
jh->b_committed_data = NULL; jh->b_committed_data = NULL;
...@@ -676,7 +677,6 @@ void journal_commit_transaction(journal_t *journal) ...@@ -676,7 +677,6 @@ void journal_commit_transaction(journal_t *journal)
* by journal_forget, it may no longer be dirty and * by journal_forget, it may no longer be dirty and
* there's no point in keeping a checkpoint record for * there's no point in keeping a checkpoint record for
* it. */ * it. */
bh = jh2bh(jh);
/* A buffer which has been freed while still being /* A buffer which has been freed while still being
* journaled by a previous transaction may end up still * journaled by a previous transaction may end up still
......
...@@ -259,56 +259,6 @@ static void journal_kill_thread(journal_t *journal) ...@@ -259,56 +259,6 @@ static void journal_kill_thread(journal_t *journal)
} }
} }
#if 0
This is no longer needed - we do it in commit quite efficiently.
Note that if this function is resurrected, the loop needs to
be reorganised into the next_jh/last_jh algorithm.
/*
* journal_clean_data_list: cleanup after data IO.
*
* Once the IO system has finished writing the buffers on the transaction's
* data list, we can remove those buffers from the list. This function
* scans the list for such buffers and removes them cleanly.
*
* We assume that the journal is already locked.
* We are called with journal_datalist_lock held.
*
* AKPM: This function looks inefficient. Approximately O(n^2)
* for potentially thousands of buffers. It no longer shows on profiles
* because these buffers are mainly dropped in journal_commit_transaction().
*/
void __journal_clean_data_list(transaction_t *transaction)
{
struct journal_head *jh, *next;
assert_spin_locked(&journal_datalist_lock);
restart:
jh = transaction->t_sync_datalist;
if (!jh)
goto out;
do {
next = jh->b_tnext;
if (!buffer_locked(jh2bh(jh)) && !buffer_dirty(jh2bh(jh))) {
struct buffer_head *bh = jh2bh(jh);
BUFFER_TRACE(bh, "data writeout complete: unfile");
__journal_unfile_buffer(jh);
jh->b_transaction = NULL;
journal_remove_journal_head(bh);
__brelse(bh);
goto restart;
}
jh = next;
} while (transaction->t_sync_datalist &&
jh != transaction->t_sync_datalist);
out:
return;
}
#endif
/* /*
* journal_write_metadata_buffer: write a metadata buffer to the journal. * journal_write_metadata_buffer: write a metadata buffer to the journal.
* *
......
...@@ -740,7 +740,7 @@ int journal_get_write_access (handle_t *handle, struct buffer_head *bh) ...@@ -740,7 +740,7 @@ int journal_get_write_access (handle_t *handle, struct buffer_head *bh)
* *
* Call this if you create a new bh. * Call this if you create a new bh.
*/ */
int journal_get_create_access (handle_t *handle, struct buffer_head *bh) int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal; journal_t *journal = transaction->t_journal;
...@@ -755,11 +755,15 @@ int journal_get_create_access (handle_t *handle, struct buffer_head *bh) ...@@ -755,11 +755,15 @@ int journal_get_create_access (handle_t *handle, struct buffer_head *bh)
err = 0; err = 0;
JBUFFER_TRACE(jh, "entry"); JBUFFER_TRACE(jh, "entry");
/* The buffer may already belong to this transaction due to /*
* pre-zeroing in the filesystem's new_block code. It may also * The buffer may already belong to this transaction due to pre-zeroing
* be on the previous, committing transaction's lists, but it * in the filesystem's new_block code. It may also be on the previous,
* HAS to be in Forget state in that case: the transaction must * committing transaction's lists, but it HAS to be in Forget state in
* have deleted the buffer for it to be reused here. */ * that case: the transaction must have deleted the buffer for it to be
* reused here.
*/
jbd_lock_bh_state(bh);
spin_lock(&journal_datalist_lock);
J_ASSERT_JH(jh, (jh->b_transaction == transaction || J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
jh->b_transaction == NULL || jh->b_transaction == NULL ||
(jh->b_transaction == journal->j_committing_transaction && (jh->b_transaction == journal->j_committing_transaction &&
...@@ -771,7 +775,6 @@ int journal_get_create_access (handle_t *handle, struct buffer_head *bh) ...@@ -771,7 +775,6 @@ int journal_get_create_access (handle_t *handle, struct buffer_head *bh)
J_ASSERT_JH(jh, handle->h_buffer_credits > 0); J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
handle->h_buffer_credits--; handle->h_buffer_credits--;
spin_lock(&journal_datalist_lock);
if (jh->b_transaction == NULL) { if (jh->b_transaction == NULL) {
jh->b_transaction = transaction; jh->b_transaction = transaction;
JBUFFER_TRACE(jh, "file as BJ_Reserved"); JBUFFER_TRACE(jh, "file as BJ_Reserved");
...@@ -781,6 +784,7 @@ int journal_get_create_access (handle_t *handle, struct buffer_head *bh) ...@@ -781,6 +784,7 @@ int journal_get_create_access (handle_t *handle, struct buffer_head *bh)
jh->b_next_transaction = transaction; jh->b_next_transaction = transaction;
} }
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
/* /*
* akpm: I added this. ext3_alloc_branch can pick up new indirect * akpm: I added this. ext3_alloc_branch can pick up new indirect
...@@ -799,10 +803,9 @@ int journal_get_create_access (handle_t *handle, struct buffer_head *bh) ...@@ -799,10 +803,9 @@ int journal_get_create_access (handle_t *handle, struct buffer_head *bh)
return err; return err;
} }
/** /**
* int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences * int journal_get_undo_access() - Notify intent to modify metadata with
* non-rewindable consequences
* @handle: transaction * @handle: transaction
* @bh: buffer to undo * @bh: buffer to undo
* *
...@@ -932,6 +935,7 @@ int journal_dirty_data (handle_t *handle, struct buffer_head *bh) ...@@ -932,6 +935,7 @@ int journal_dirty_data (handle_t *handle, struct buffer_head *bh)
* never, ever allow this to happen: there's nothing we can do * never, ever allow this to happen: there's nothing we can do
* about it in this layer. * about it in this layer.
*/ */
jbd_lock_bh_state(bh);
spin_lock(&journal_datalist_lock); spin_lock(&journal_datalist_lock);
if (jh->b_transaction) { if (jh->b_transaction) {
JBUFFER_TRACE(jh, "has transaction"); JBUFFER_TRACE(jh, "has transaction");
...@@ -986,10 +990,12 @@ int journal_dirty_data (handle_t *handle, struct buffer_head *bh) ...@@ -986,10 +990,12 @@ int journal_dirty_data (handle_t *handle, struct buffer_head *bh)
* commit to never terminate. * commit to never terminate.
*/ */
if (buffer_dirty(bh)) { if (buffer_dirty(bh)) {
atomic_inc(&bh->b_count); get_bh(bh);
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
need_brelse = 1; need_brelse = 1;
sync_dirty_buffer(bh); sync_dirty_buffer(bh);
jbd_lock_bh_state(bh);
spin_lock(&journal_datalist_lock); spin_lock(&journal_datalist_lock);
/* The buffer may become locked again at any /* The buffer may become locked again at any
time if it is redirtied */ time if it is redirtied */
...@@ -1025,6 +1031,7 @@ int journal_dirty_data (handle_t *handle, struct buffer_head *bh) ...@@ -1025,6 +1031,7 @@ int journal_dirty_data (handle_t *handle, struct buffer_head *bh)
} }
no_journal: no_journal:
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
if (need_brelse) { if (need_brelse) {
BUFFER_TRACE(bh, "brelse"); BUFFER_TRACE(bh, "brelse");
__brelse(bh); __brelse(bh);
...@@ -1053,7 +1060,7 @@ int journal_dirty_data (handle_t *handle, struct buffer_head *bh) ...@@ -1053,7 +1060,7 @@ int journal_dirty_data (handle_t *handle, struct buffer_head *bh)
* buffer: that only gets done when the old transaction finally * buffer: that only gets done when the old transaction finally
* completes its commit. * completes its commit.
*/ */
int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh) int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal; journal_t *journal = transaction->t_journal;
...@@ -1113,7 +1120,7 @@ int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh) ...@@ -1113,7 +1120,7 @@ int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh)
* journaling component to decide after the write access is returned * journaling component to decide after the write access is returned
* that global state has changed and the update is no longer required. */ * that global state has changed and the update is no longer required. */
void journal_release_buffer (handle_t *handle, struct buffer_head *bh) void journal_release_buffer(handle_t *handle, struct buffer_head *bh)
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal; journal_t *journal = transaction->t_journal;
...@@ -1126,6 +1133,7 @@ void journal_release_buffer (handle_t *handle, struct buffer_head *bh) ...@@ -1126,6 +1133,7 @@ void journal_release_buffer (handle_t *handle, struct buffer_head *bh)
* transaction, then it is safe to release it. In all other * transaction, then it is safe to release it. In all other
* cases, just leave the buffer as it is. */ * cases, just leave the buffer as it is. */
jbd_lock_bh_state(bh);
spin_lock(&journal_datalist_lock); spin_lock(&journal_datalist_lock);
if (jh->b_jlist == BJ_Reserved && jh->b_transaction == transaction && if (jh->b_jlist == BJ_Reserved && jh->b_transaction == transaction &&
!buffer_jbddirty(jh2bh(jh))) { !buffer_jbddirty(jh2bh(jh))) {
...@@ -1134,6 +1142,7 @@ void journal_release_buffer (handle_t *handle, struct buffer_head *bh) ...@@ -1134,6 +1142,7 @@ void journal_release_buffer (handle_t *handle, struct buffer_head *bh)
__journal_refile_buffer(jh); __journal_refile_buffer(jh);
} }
spin_unlock(&journal_datalist_lock); spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
JBUFFER_TRACE(jh, "exit"); JBUFFER_TRACE(jh, "exit");
unlock_journal(journal); unlock_journal(journal);
...@@ -1236,66 +1245,6 @@ void journal_forget (handle_t *handle, struct buffer_head *bh) ...@@ -1236,66 +1245,6 @@ void journal_forget (handle_t *handle, struct buffer_head *bh)
return; return;
} }
#if 0 /* Unused */
/*
* journal_sync_buffer: flush a potentially-journaled buffer to disk.
*
* Used for O_SYNC filesystem operations. If the buffer is journaled,
* we need to complete the O_SYNC by waiting for the transaction to
* complete. It is an error to call journal_sync_buffer before
* journal_stop!
*/
void journal_sync_buffer(struct buffer_head *bh)
{
transaction_t *transaction;
journal_t *journal;
long sequence;
struct journal_head *jh;
/* If the buffer isn't journaled, this is easy: just sync it to
* disk. */
BUFFER_TRACE(bh, "entry");
spin_lock(&journal_datalist_lock);
if (!buffer_jbd(bh)) {
spin_unlock(&journal_datalist_lock);
return;
}
jh = bh2jh(bh);
if (jh->b_transaction == NULL) {
/* If the buffer has already been journaled, then this
* is a noop. */
if (jh->b_cp_transaction == NULL) {
spin_unlock(&journal_datalist_lock);
return;
}
atomic_inc(&bh->b_count);
spin_unlock(&journal_datalist_lock);
sync_dirty_buffer(bh);
__brelse(bh);
goto out;
}
/* Otherwise, just wait until the transaction is synced to disk. */
transaction = jh->b_transaction;
journal = transaction->t_journal;
sequence = transaction->t_tid;
spin_unlock(&journal_datalist_lock);
jbd_debug(2, "requesting commit for jh %p\n", jh);
log_start_commit (journal, transaction);
while (tid_gt(sequence, journal->j_commit_sequence)) {
wake_up(&journal->j_wait_done_commit);
sleep_on(&journal->j_wait_done_commit);
}
JBUFFER_TRACE(jh, "exit");
out:
return;
}
#endif
/** /**
* void journal_callback_set() - Register a callback function for this handle. * void journal_callback_set() - Register a callback function for this handle.
* @handle: handle to attach the callback to. * @handle: handle to attach the callback to.
...@@ -1584,7 +1533,7 @@ void journal_unfile_buffer(struct journal_head *jh) ...@@ -1584,7 +1533,7 @@ void journal_unfile_buffer(struct journal_head *jh)
* One could use journal_datalist_lock to get unracy access to a * One could use journal_datalist_lock to get unracy access to a
* per-journal lock. * per-journal lock.
* *
* Called with journal_datalist_lock held. * Called under jbd_lock_bh_state(bh)
* *
* Returns non-zero iff we were able to free the journal_head. * Returns non-zero iff we were able to free the journal_head.
*/ */
...@@ -1592,8 +1541,6 @@ static inline int __journal_try_to_free_buffer(struct buffer_head *bh) ...@@ -1592,8 +1541,6 @@ static inline int __journal_try_to_free_buffer(struct buffer_head *bh)
{ {
struct journal_head *jh; struct journal_head *jh;
assert_spin_locked(&journal_datalist_lock);
jh = bh2jh(bh); jh = bh2jh(bh);
if (buffer_locked(bh) || buffer_dirty(bh)) if (buffer_locked(bh) || buffer_dirty(bh))
...@@ -1602,6 +1549,7 @@ static inline int __journal_try_to_free_buffer(struct buffer_head *bh) ...@@ -1602,6 +1549,7 @@ static inline int __journal_try_to_free_buffer(struct buffer_head *bh)
if (jh->b_next_transaction != 0) if (jh->b_next_transaction != 0)
goto out; goto out;
spin_lock(&journal_datalist_lock);
if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) { if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) {
if (jh->b_jlist == BJ_SyncData) { if (jh->b_jlist == BJ_SyncData) {
/* A written-back ordered data buffer */ /* A written-back ordered data buffer */
...@@ -1620,6 +1568,7 @@ static inline int __journal_try_to_free_buffer(struct buffer_head *bh) ...@@ -1620,6 +1568,7 @@ static inline int __journal_try_to_free_buffer(struct buffer_head *bh)
__brelse(bh); __brelse(bh);
} }
} }
spin_unlock(&journal_datalist_lock);
return !buffer_jbd(bh); return !buffer_jbd(bh);
out: out:
...@@ -1672,14 +1621,14 @@ int journal_try_to_free_buffers(journal_t *journal, ...@@ -1672,14 +1621,14 @@ int journal_try_to_free_buffers(journal_t *journal,
head = page_buffers(page); head = page_buffers(page);
bh = head; bh = head;
spin_lock(&journal_datalist_lock);
do { do {
jbd_lock_bh_state(bh);
if (buffer_jbd(bh) && !__journal_try_to_free_buffer(bh)) { if (buffer_jbd(bh) && !__journal_try_to_free_buffer(bh)) {
spin_unlock(&journal_datalist_lock); jbd_unlock_bh_state(bh);
goto busy; goto busy;
} }
jbd_unlock_bh_state(bh);
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
spin_unlock(&journal_datalist_lock);
ret = try_to_free_buffers(page); ret = try_to_free_buffers(page);
busy: busy:
return ret; return ret;
...@@ -1692,28 +1641,29 @@ int journal_try_to_free_buffers(journal_t *journal, ...@@ -1692,28 +1641,29 @@ int journal_try_to_free_buffers(journal_t *journal,
* this transaction commits. If the buffer isn't on a checkpoint list, we * this transaction commits. If the buffer isn't on a checkpoint list, we
* release it. * release it.
* Returns non-zero if JBD no longer has an interest in the buffer. * Returns non-zero if JBD no longer has an interest in the buffer.
*
* Called under journal_datalist_lock.
*
* Called under jbd_lock_bh_state(bh).
*/ */
static int dispose_buffer(struct journal_head *jh, static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
transaction_t *transaction)
{ {
int may_free = 1; int may_free = 1;
struct buffer_head *bh = jh2bh(jh); struct buffer_head *bh = jh2bh(jh);
spin_lock(&journal_datalist_lock);
__journal_unfile_buffer(jh); __journal_unfile_buffer(jh);
jh->b_transaction = 0; jh->b_transaction = 0;
if (jh->b_cp_transaction) { if (jh->b_cp_transaction) {
JBUFFER_TRACE(jh, "on running+cp transaction"); JBUFFER_TRACE(jh, "on running+cp transaction");
__journal_file_buffer(jh, transaction, BJ_Forget); __journal_file_buffer(jh, transaction, BJ_Forget);
clear_bit(BH_JBDDirty, &bh->b_state); clear_buffer_jbddirty(bh);
may_free = 0; may_free = 0;
} else { } else {
JBUFFER_TRACE(jh, "on running transaction"); JBUFFER_TRACE(jh, "on running transaction");
journal_remove_journal_head(bh); journal_remove_journal_head(bh);
__brelse(bh); __brelse(bh);
} }
spin_unlock(&journal_datalist_lock);
return may_free; return may_free;
} }
...@@ -1769,6 +1719,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) ...@@ -1769,6 +1719,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
transaction_t *transaction; transaction_t *transaction;
struct journal_head *jh; struct journal_head *jh;
int may_free = 1; int may_free = 1;
int ret;
BUFFER_TRACE(bh, "entry"); BUFFER_TRACE(bh, "entry");
...@@ -1778,8 +1729,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) ...@@ -1778,8 +1729,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
* page lock. --sct */ * page lock. --sct */
if (!buffer_jbd(bh)) if (!buffer_jbd(bh))
goto zap_buffer; goto zap_buffer_unlocked;
jbd_lock_bh_state(bh);
spin_lock(&journal_datalist_lock);
jh = bh2jh(bh); jh = bh2jh(bh);
transaction = jh->b_transaction; transaction = jh->b_transaction;
if (transaction == NULL) { if (transaction == NULL) {
...@@ -1806,8 +1759,11 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) ...@@ -1806,8 +1759,11 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
* committed, the buffer won't be needed any * committed, the buffer won't be needed any
* longer. */ * longer. */
JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
return dispose_buffer(jh, ret = __dispose_buffer(jh,
journal->j_running_transaction); journal->j_running_transaction);
spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
return ret;
} else { } else {
/* There is no currently-running transaction. So the /* There is no currently-running transaction. So the
* orphan record which we wrote for this file must have * orphan record which we wrote for this file must have
...@@ -1815,12 +1771,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) ...@@ -1815,12 +1771,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
* the committing transaction, if it exists. */ * the committing transaction, if it exists. */
if (journal->j_committing_transaction) { if (journal->j_committing_transaction) {
JBUFFER_TRACE(jh, "give to committing trans"); JBUFFER_TRACE(jh, "give to committing trans");
return dispose_buffer(jh, ret = __dispose_buffer(jh,
journal->j_committing_transaction); journal->j_committing_transaction);
spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
return ret;
} else { } else {
/* The orphan record's transaction has /* The orphan record's transaction has
* committed. We can cleanse this buffer */ * committed. We can cleanse this buffer */
clear_bit(BH_JBDDirty, &bh->b_state); clear_buffer_jbddirty(bh);
goto zap_buffer; goto zap_buffer;
} }
} }
...@@ -1836,6 +1795,8 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) ...@@ -1836,6 +1795,8 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
journal->j_running_transaction); journal->j_running_transaction);
jh->b_next_transaction = NULL; jh->b_next_transaction = NULL;
} }
spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
return 0; return 0;
} else { } else {
/* Good, the buffer belongs to the running transaction. /* Good, the buffer belongs to the running transaction.
...@@ -1845,10 +1806,13 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) ...@@ -1845,10 +1806,13 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
* i_size already for this truncate so recovery will not * i_size already for this truncate so recovery will not
* expose the disk blocks we are discarding here.) */ * expose the disk blocks we are discarding here.) */
J_ASSERT_JH(jh, transaction == journal->j_running_transaction); J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
may_free = dispose_buffer(jh, transaction); may_free = __dispose_buffer(jh, transaction);
} }
zap_buffer: zap_buffer:
spin_unlock(&journal_datalist_lock);
jbd_unlock_bh_state(bh);
zap_buffer_unlocked:
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
J_ASSERT_BH(bh, !buffer_jbddirty(bh)); J_ASSERT_BH(bh, !buffer_jbddirty(bh));
clear_buffer_mapped(bh); clear_buffer_mapped(bh);
...@@ -2007,6 +1971,7 @@ void journal_file_buffer(struct journal_head *jh, ...@@ -2007,6 +1971,7 @@ void journal_file_buffer(struct journal_head *jh,
void __journal_refile_buffer(struct journal_head *jh) void __journal_refile_buffer(struct journal_head *jh)
{ {
int was_dirty; int was_dirty;
struct buffer_head *bh = jh2bh(jh);
assert_spin_locked(&journal_datalist_lock); assert_spin_locked(&journal_datalist_lock);
...@@ -2017,10 +1982,12 @@ void __journal_refile_buffer(struct journal_head *jh) ...@@ -2017,10 +1982,12 @@ void __journal_refile_buffer(struct journal_head *jh)
return; return;
} }
/* It has been modified by a later transaction: add it to the /*
* new transaction's metadata list. */ * It has been modified by a later transaction: add it to the new
* transaction's metadata list.
*/
was_dirty = test_clear_buffer_jbddirty(jh2bh(jh)); was_dirty = test_clear_buffer_jbddirty(bh);
__journal_unfile_buffer(jh); __journal_unfile_buffer(jh);
jh->b_transaction = jh->b_next_transaction; jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL; jh->b_next_transaction = NULL;
...@@ -2028,7 +1995,7 @@ void __journal_refile_buffer(struct journal_head *jh) ...@@ -2028,7 +1995,7 @@ void __journal_refile_buffer(struct journal_head *jh)
J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
if (was_dirty) if (was_dirty)
set_buffer_jbddirty(jh2bh(jh)); set_buffer_jbddirty(bh);
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment