Commit cd5f8bb0 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] jbd: do_get_write_access lock contention reduction

We're seeing heavy contention against j_list_lock on 8-way in
do_get_write_access().

We actually don't need j_list_lock in there except for one little case - the
per-bh jbd_lock_bh_state() is sufficient to protect this buffer's internal
state.

On some nice quick LVM array Ram Pai measured an overall 3x speedup from this
patch:

the script took the following time on 265mm1
 real    0m57.504s
 user    0m0.400s
 sys     7m29.867s


 and with the 2patches it took
 real 	0m19.983s
 user    0m0.438s
 sys     1m55.896s
parent 073c11c2
...@@ -522,7 +522,6 @@ static void jbd_unexpected_dirty_buffer(struct journal_head *jh) ...@@ -522,7 +522,6 @@ static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
* part of the transaction, that is). * part of the transaction, that is).
* *
*/ */
static int static int
do_get_write_access(handle_t *handle, struct journal_head *jh, do_get_write_access(handle_t *handle, struct journal_head *jh,
int force_copy, int *credits) int force_copy, int *credits)
...@@ -550,7 +549,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -550,7 +549,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
lock_buffer(bh); lock_buffer(bh);
jbd_lock_bh_state(bh); jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
/* We now hold the buffer lock so it is safe to query the buffer /* We now hold the buffer lock so it is safe to query the buffer
* state. Is the buffer dirty? * state. Is the buffer dirty?
...@@ -566,9 +564,10 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -566,9 +564,10 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
* the buffer dirtied, ugh.) */ * the buffer dirtied, ugh.) */
if (buffer_dirty(bh)) { if (buffer_dirty(bh)) {
/* First question: is this buffer already part of the /*
* current transaction or the existing committing * First question: is this buffer already part of the current
* transaction? */ * transaction or the existing committing transaction?
*/
if (jh->b_transaction) { if (jh->b_transaction) {
J_ASSERT_JH(jh, J_ASSERT_JH(jh,
jh->b_transaction == transaction || jh->b_transaction == transaction ||
...@@ -586,22 +585,23 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -586,22 +585,23 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
error = -EROFS; error = -EROFS;
if (is_handle_aborted(handle)) { if (is_handle_aborted(handle)) {
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
goto out_unlocked; goto out;
} }
error = 0; error = 0;
/* The buffer is already part of this transaction if /*
* b_transaction or b_next_transaction points to it. */ * The buffer is already part of this transaction if b_transaction or
* b_next_transaction points to it
*/
if (jh->b_transaction == transaction || if (jh->b_transaction == transaction ||
jh->b_next_transaction == transaction) jh->b_next_transaction == transaction)
goto done_locked; goto done;
/* If there is already a copy-out version of this buffer, then
* we don't need to make another one. */
/*
* If there is already a copy-out version of this buffer, then we don't
* need to make another one
*/
if (jh->b_frozen_data) { if (jh->b_frozen_data) {
JBUFFER_TRACE(jh, "has frozen data"); JBUFFER_TRACE(jh, "has frozen data");
J_ASSERT_JH(jh, jh->b_next_transaction == NULL); J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
...@@ -611,7 +611,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -611,7 +611,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
handle->h_buffer_credits--; handle->h_buffer_credits--;
if (credits) if (credits)
(*credits)++; (*credits)++;
goto done_locked; goto done;
} }
/* Is there data here we need to preserve? */ /* Is there data here we need to preserve? */
...@@ -635,7 +635,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -635,7 +635,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
wait_queue_head_t *wqh; wait_queue_head_t *wqh;
JBUFFER_TRACE(jh, "on shadow: sleep"); JBUFFER_TRACE(jh, "on shadow: sleep");
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
/* commit wakes up all shadow buffers after IO */ /* commit wakes up all shadow buffers after IO */
wqh = bh_waitq_head(jh2bh(jh)); wqh = bh_waitq_head(jh2bh(jh));
...@@ -661,7 +660,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -661,7 +660,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
JBUFFER_TRACE(jh, "generate frozen data"); JBUFFER_TRACE(jh, "generate frozen data");
if (!frozen_buffer) { if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer"); JBUFFER_TRACE(jh, "allocate memory for buffer");
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size, frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
GFP_NOFS); GFP_NOFS);
...@@ -672,12 +670,10 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -672,12 +670,10 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
JBUFFER_TRACE(jh, "oom!"); JBUFFER_TRACE(jh, "oom!");
error = -ENOMEM; error = -ENOMEM;
jbd_lock_bh_state(bh); jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock); goto done;
goto done_locked;
} }
goto repeat; goto repeat;
} }
jh->b_frozen_data = frozen_buffer; jh->b_frozen_data = frozen_buffer;
frozen_buffer = NULL; frozen_buffer = NULL;
need_copy = 1; need_copy = 1;
...@@ -690,20 +686,22 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -690,20 +686,22 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
if (credits) if (credits)
(*credits)++; (*credits)++;
/* Finally, if the buffer is not journaled right now, we need to /*
* make sure it doesn't get written to disk before the caller * Finally, if the buffer is not journaled right now, we need to make
* actually commits the new data. */ * sure it doesn't get written to disk before the caller actually
* commits the new data
*/
if (!jh->b_transaction) { if (!jh->b_transaction) {
JBUFFER_TRACE(jh, "no transaction"); JBUFFER_TRACE(jh, "no transaction");
J_ASSERT_JH(jh, !jh->b_next_transaction); J_ASSERT_JH(jh, !jh->b_next_transaction);
jh->b_transaction = transaction; jh->b_transaction = transaction;
JBUFFER_TRACE(jh, "file as BJ_Reserved"); JBUFFER_TRACE(jh, "file as BJ_Reserved");
spin_lock(&journal->j_list_lock);
__journal_file_buffer(jh, transaction, BJ_Reserved); __journal_file_buffer(jh, transaction, BJ_Reserved);
spin_unlock(&journal->j_list_lock);
} }
done_locked: done:
spin_unlock(&journal->j_list_lock);
if (need_copy) { if (need_copy) {
struct page *page; struct page *page;
int offset; int offset;
...@@ -719,11 +717,13 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -719,11 +717,13 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
} }
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
/* If we are about to journal a buffer, then any revoke pending /*
on it is no longer valid. */ * If we are about to journal a buffer, then any revoke pending on it is
* no longer valid
*/
journal_cancel_revoke(handle, jh); journal_cancel_revoke(handle, jh);
out_unlocked: out:
if (frozen_buffer) if (frozen_buffer)
kfree(frozen_buffer); kfree(frozen_buffer);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment