Commit 5409c2b5 authored by Andrew Morton's avatar Andrew Morton Committed by Arnaldo Carvalho de Melo

[PATCH] fix ext3 race with writeback

The ext3-no-steal patch has exposed a long-standing race in ext3.  It
has been there all the time in 2.4, but never triggered until some
timing change in the ext3-no-steal patch exposed it.  The race was not
present in 2.2 because 2.2's bdflush runs inside lock_kernel().

The problem is that when ext3 is shuffling a buffer between journalling
lists there is a small window where the buffer is marked BH_dirty.
Aonther CPU can grab it, mark it clean and write it out.  Then ext3
puts the buffer onto a list of buffers which are expected to be dirty,
and gets confused later on when the buffer turns out to be clean.

The patch from Stephen records the expected dirtiness of the buffer in
a local variable, so BH_dirty is not transiently set while ext3
shuffles.
parent d9ae0cee
...@@ -1941,6 +1941,8 @@ void __journal_file_buffer(struct journal_head *jh, ...@@ -1941,6 +1941,8 @@ void __journal_file_buffer(struct journal_head *jh,
transaction_t *transaction, int jlist) transaction_t *transaction, int jlist)
{ {
struct journal_head **list = 0; struct journal_head **list = 0;
int was_dirty = 0;
struct buffer_head *bh = jh2bh(jh);
assert_spin_locked(&journal_datalist_lock); assert_spin_locked(&journal_datalist_lock);
...@@ -1951,13 +1953,24 @@ void __journal_file_buffer(struct journal_head *jh, ...@@ -1951,13 +1953,24 @@ void __journal_file_buffer(struct journal_head *jh,
J_ASSERT_JH(jh, jh->b_transaction == transaction || J_ASSERT_JH(jh, jh->b_transaction == transaction ||
jh->b_transaction == 0); jh->b_transaction == 0);
if (jh->b_transaction) { if (jh->b_transaction && jh->b_jlist == jlist)
if (jh->b_jlist == jlist) return;
return;
/* The following list of buffer states needs to be consistent
* with __jbd_unexpected_dirty_buffer()'s handling of dirty
* state. */
if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
jlist == BJ_Shadow || jlist == BJ_Forget) {
if (test_clear_buffer_dirty(bh) ||
test_clear_buffer_jbddirty(bh))
was_dirty = 1;
}
if (jh->b_transaction)
__journal_unfile_buffer(jh); __journal_unfile_buffer(jh);
} else { else
jh->b_transaction = transaction; jh->b_transaction = transaction;
}
switch (jlist) { switch (jlist) {
case BJ_None: case BJ_None:
...@@ -1994,12 +2007,8 @@ void __journal_file_buffer(struct journal_head *jh, ...@@ -1994,12 +2007,8 @@ void __journal_file_buffer(struct journal_head *jh,
__blist_add_buffer(list, jh); __blist_add_buffer(list, jh);
jh->b_jlist = jlist; jh->b_jlist = jlist;
if (jlist == BJ_Metadata || jlist == BJ_Reserved || if (was_dirty)
jlist == BJ_Shadow || jlist == BJ_Forget) { set_buffer_jbddirty(bh);
if (test_clear_buffer_dirty(jh2bh(jh))) {
set_bit(BH_JBDDirty, &jh2bh(jh)->b_state);
}
}
} }
void journal_file_buffer(struct journal_head *jh, void journal_file_buffer(struct journal_head *jh,
......
...@@ -235,6 +235,7 @@ enum jbd_state_bits { ...@@ -235,6 +235,7 @@ enum jbd_state_bits {
BUFFER_FNS(JBD, jbd) BUFFER_FNS(JBD, jbd)
BUFFER_FNS(JBDDirty, jbddirty) BUFFER_FNS(JBDDirty, jbddirty)
TAS_BUFFER_FNS(JBDDirty, jbddirty)
static inline struct buffer_head *jh2bh(struct journal_head *jh) static inline struct buffer_head *jh2bh(struct journal_head *jh)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment