Commit ba8edd6d authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] JBD: implement dual revoke tables.

From: Alex Tomas <bzzz@tmi.comex.ru>

We're about to remove lock_journal(), and it is lock_journal which separates
the running and committing transaction's revokes on the single revoke table.

So implement two revoke tables and rotate them at commit time.
parent ca340395
...@@ -149,14 +149,10 @@ void journal_commit_transaction(journal_t *journal) ...@@ -149,14 +149,10 @@ void journal_commit_transaction(journal_t *journal)
jbd_debug (3, "JBD: commit phase 1\n"); jbd_debug (3, "JBD: commit phase 1\n");
journal_write_revoke_records(journal, commit_transaction);
/* /*
* Now that we have built the revoke records, we can start * Switch to a new revoke table.
* reusing the revoke list for a new running transaction. We
* can now safely start committing the old transaction: time to
* get a new running transaction for incoming filesystem updates
*/ */
journal_switch_revoke_table(journal);
spin_lock(&journal->j_state_lock); spin_lock(&journal->j_state_lock);
commit_transaction->t_state = T_FLUSH; commit_transaction->t_state = T_FLUSH;
...@@ -283,6 +279,10 @@ void journal_commit_transaction(journal_t *journal) ...@@ -283,6 +279,10 @@ void journal_commit_transaction(journal_t *journal)
sync_datalist_empty: sync_datalist_empty:
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
journal_write_revoke_records(journal, commit_transaction);
jbd_debug(3, "JBD: commit phase 2\n");
/* /*
* If we found any dirty or locked buffers, then we should have * If we found any dirty or locked buffers, then we should have
* looped back up to the write_out_data label. If there weren't * looped back up to the write_out_data label. If there weren't
......
...@@ -129,7 +129,9 @@ int insert_revoke_hash(journal_t *journal, unsigned long blocknr, tid_t seq) ...@@ -129,7 +129,9 @@ int insert_revoke_hash(journal_t *journal, unsigned long blocknr, tid_t seq)
record->sequence = seq; record->sequence = seq;
record->blocknr = blocknr; record->blocknr = blocknr;
hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
spin_lock(&journal->j_revoke_lock);
list_add(&record->hash, hash_list); list_add(&record->hash, hash_list);
spin_unlock(&journal->j_revoke_lock);
return 0; return 0;
oom: oom:
...@@ -150,12 +152,16 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal, ...@@ -150,12 +152,16 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)]; hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
spin_lock(&journal->j_revoke_lock);
record = (struct jbd_revoke_record_s *) hash_list->next; record = (struct jbd_revoke_record_s *) hash_list->next;
while (&(record->hash) != hash_list) { while (&(record->hash) != hash_list) {
if (record->blocknr == blocknr) if (record->blocknr == blocknr) {
spin_unlock(&journal->j_revoke_lock);
return record; return record;
}
record = (struct jbd_revoke_record_s *) record->hash.next; record = (struct jbd_revoke_record_s *) record->hash.next;
} }
spin_unlock(&journal->j_revoke_lock);
return NULL; return NULL;
} }
...@@ -192,27 +198,58 @@ int journal_init_revoke(journal_t *journal, int hash_size) ...@@ -192,27 +198,58 @@ int journal_init_revoke(journal_t *journal, int hash_size)
{ {
int shift, tmp; int shift, tmp;
J_ASSERT (journal->j_revoke == NULL); J_ASSERT (journal->j_revoke_table[0] == NULL);
journal->j_revoke = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); shift = 0;
if (!journal->j_revoke) tmp = hash_size;
while((tmp >>= 1UL) != 0UL)
shift++;
journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
if (!journal->j_revoke_table[0])
return -ENOMEM; return -ENOMEM;
journal->j_revoke = journal->j_revoke_table[0];
/* Check that the hash_size is a power of two */
J_ASSERT ((hash_size & (hash_size-1)) == 0);
journal->j_revoke->hash_size = hash_size;
journal->j_revoke->hash_shift = shift;
journal->j_revoke->hash_table =
kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
if (!journal->j_revoke->hash_table) {
kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
journal->j_revoke = NULL;
return -ENOMEM;
}
for (tmp = 0; tmp < hash_size; tmp++)
INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
if (!journal->j_revoke_table[1]) {
kfree(journal->j_revoke_table[0]->hash_table);
kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
return -ENOMEM;
}
journal->j_revoke = journal->j_revoke_table[1];
/* Check that the hash_size is a power of two */ /* Check that the hash_size is a power of two */
J_ASSERT ((hash_size & (hash_size-1)) == 0); J_ASSERT ((hash_size & (hash_size-1)) == 0);
journal->j_revoke->hash_size = hash_size; journal->j_revoke->hash_size = hash_size;
shift = 0;
tmp = hash_size;
while((tmp >>= 1UL) != 0UL)
shift++;
journal->j_revoke->hash_shift = shift; journal->j_revoke->hash_shift = shift;
journal->j_revoke->hash_table = journal->j_revoke->hash_table =
kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
if (!journal->j_revoke->hash_table) { if (!journal->j_revoke->hash_table) {
kmem_cache_free(revoke_table_cache, journal->j_revoke); kfree(journal->j_revoke_table[0]->hash_table);
kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
journal->j_revoke = NULL; journal->j_revoke = NULL;
return -ENOMEM; return -ENOMEM;
} }
...@@ -220,6 +257,8 @@ int journal_init_revoke(journal_t *journal, int hash_size) ...@@ -220,6 +257,8 @@ int journal_init_revoke(journal_t *journal, int hash_size)
for (tmp = 0; tmp < hash_size; tmp++) for (tmp = 0; tmp < hash_size; tmp++)
INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
spin_lock_init(&journal->j_revoke_lock);
return 0; return 0;
} }
...@@ -231,7 +270,20 @@ void journal_destroy_revoke(journal_t *journal) ...@@ -231,7 +270,20 @@ void journal_destroy_revoke(journal_t *journal)
struct list_head *hash_list; struct list_head *hash_list;
int i; int i;
table = journal->j_revoke; table = journal->j_revoke_table[0];
if (!table)
return;
for (i=0; i<table->hash_size; i++) {
hash_list = &table->hash_table[i];
J_ASSERT (list_empty(hash_list));
}
kfree(table->hash_table);
kmem_cache_free(revoke_table_cache, table);
journal->j_revoke = NULL;
table = journal->j_revoke_table[1];
if (!table) if (!table)
return; return;
...@@ -337,11 +389,9 @@ int journal_revoke(handle_t *handle, unsigned long blocknr, ...@@ -337,11 +389,9 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
} }
} }
lock_journal(journal);
jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in); jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in);
err = insert_revoke_hash(journal, blocknr, err = insert_revoke_hash(journal, blocknr,
handle->h_transaction->t_tid); handle->h_transaction->t_tid);
unlock_journal(journal);
BUFFER_TRACE(bh_in, "exit"); BUFFER_TRACE(bh_in, "exit");
return err; return err;
} }
...@@ -389,7 +439,9 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) ...@@ -389,7 +439,9 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
if (record) { if (record) {
jbd_debug(4, "cancelled existing revoke on " jbd_debug(4, "cancelled existing revoke on "
"blocknr %llu\n", (u64)bh->b_blocknr); "blocknr %llu\n", (u64)bh->b_blocknr);
spin_lock(&journal->j_revoke_lock);
list_del(&record->hash); list_del(&record->hash);
spin_unlock(&journal->j_revoke_lock);
kmem_cache_free(revoke_record_cache, record); kmem_cache_free(revoke_record_cache, record);
did_revoke = 1; did_revoke = 1;
} }
...@@ -418,6 +470,22 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) ...@@ -418,6 +470,22 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
return did_revoke; return did_revoke;
} }
/* journal_switch_revoke table select j_revoke for next transaction
* we do not want to suspend any processing until all revokes are
* written -bzzz
*/
void journal_switch_revoke_table(journal_t *journal)
{
int i;
if (journal->j_revoke == journal->j_revoke_table[0])
journal->j_revoke = journal->j_revoke_table[1];
else
journal->j_revoke = journal->j_revoke_table[0];
for (i = 0; i < journal->j_revoke->hash_size; i++)
INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
}
/* /*
* Write revoke records to the journal for all entries in the current * Write revoke records to the journal for all entries in the current
...@@ -438,7 +506,10 @@ void journal_write_revoke_records(journal_t *journal, ...@@ -438,7 +506,10 @@ void journal_write_revoke_records(journal_t *journal,
descriptor = NULL; descriptor = NULL;
offset = 0; offset = 0;
count = 0; count = 0;
revoke = journal->j_revoke;
/* select revoke table for committing transaction */
revoke = journal->j_revoke == journal->j_revoke_table[0] ?
journal->j_revoke_table[1] : journal->j_revoke_table[0];
for (i = 0; i < revoke->hash_size; i++) { for (i = 0; i < revoke->hash_size; i++) {
hash_list = &revoke->hash_table[i]; hash_list = &revoke->hash_table[i];
......
...@@ -813,6 +813,7 @@ struct journal_s ...@@ -813,6 +813,7 @@ struct journal_s
*/ */
spinlock_t j_revoke_lock; spinlock_t j_revoke_lock;
struct jbd_revoke_table_s *j_revoke; struct jbd_revoke_table_s *j_revoke;
struct jbd_revoke_table_s *j_revoke_table[2];
/* /*
* An opaque pointer to fs-private information. ext3 puts its * An opaque pointer to fs-private information. ext3 puts its
...@@ -999,6 +1000,7 @@ extern int journal_set_revoke(journal_t *, unsigned long, tid_t); ...@@ -999,6 +1000,7 @@ extern int journal_set_revoke(journal_t *, unsigned long, tid_t);
extern int journal_test_revoke(journal_t *, unsigned long, tid_t); extern int journal_test_revoke(journal_t *, unsigned long, tid_t);
extern void journal_clear_revoke(journal_t *); extern void journal_clear_revoke(journal_t *);
extern void journal_brelse_array(struct buffer_head *b[], int n); extern void journal_brelse_array(struct buffer_head *b[], int n);
extern void journal_switch_revoke_table(journal_t *journal);
/* /*
* The log thread user interface: * The log thread user interface:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment