Commit 8b00e4fa authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] JBD commit callback capability

This is a patch which Stephen has applied to ext3's 2.4 repository.
Originally written by Andreas, generalised somewhat by Stephen.

Add jbd callback mechanism, requested for InterMezzo.  We allow the jbd's
client to request notification when a given handle's IO finally commits to
disk, so that clients can manage their own writeback state asynchronously.
parent 66c1d66f
...@@ -592,6 +592,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) ...@@ -592,6 +592,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
J_ASSERT (transaction->t_log_list == NULL); J_ASSERT (transaction->t_log_list == NULL);
J_ASSERT (transaction->t_checkpoint_list == NULL); J_ASSERT (transaction->t_checkpoint_list == NULL);
J_ASSERT (transaction->t_updates == 0); J_ASSERT (transaction->t_updates == 0);
J_ASSERT (list_empty(&transaction->t_jcb));
J_ASSERT (transaction->t_journal->j_committing_transaction != J_ASSERT (transaction->t_journal->j_committing_transaction !=
transaction); transaction);
......
...@@ -471,7 +471,7 @@ void journal_commit_transaction(journal_t *journal) ...@@ -471,7 +471,7 @@ void journal_commit_transaction(journal_t *journal)
transaction's t_log_list queue, and metadata buffers are on transaction's t_log_list queue, and metadata buffers are on
the t_iobuf_list queue. the t_iobuf_list queue.
Wait for the transactions in reverse order. That way we are Wait for the buffers in reverse order. That way we are
less likely to be woken up until all IOs have completed, and less likely to be woken up until all IOs have completed, and
so we incur less scheduling load. so we incur less scheduling load.
*/ */
...@@ -563,8 +563,10 @@ void journal_commit_transaction(journal_t *journal) ...@@ -563,8 +563,10 @@ void journal_commit_transaction(journal_t *journal)
jbd_debug(3, "JBD: commit phase 6\n"); jbd_debug(3, "JBD: commit phase 6\n");
if (is_journal_aborted(journal)) if (is_journal_aborted(journal)) {
unlock_journal(journal);
goto skip_commit; goto skip_commit;
}
/* Done it all: now write the commit record. We should have /* Done it all: now write the commit record. We should have
* cleaned up our previous buffers by now, so if we are in abort * cleaned up our previous buffers by now, so if we are in abort
...@@ -574,6 +576,7 @@ void journal_commit_transaction(journal_t *journal) ...@@ -574,6 +576,7 @@ void journal_commit_transaction(journal_t *journal)
descriptor = journal_get_descriptor_buffer(journal); descriptor = journal_get_descriptor_buffer(journal);
if (!descriptor) { if (!descriptor) {
__journal_abort_hard(journal); __journal_abort_hard(journal);
unlock_journal(journal);
goto skip_commit; goto skip_commit;
} }
...@@ -596,14 +599,32 @@ void journal_commit_transaction(journal_t *journal) ...@@ -596,14 +599,32 @@ void journal_commit_transaction(journal_t *journal)
__brelse(bh); /* One for getblk() */ __brelse(bh); /* One for getblk() */
journal_unlock_journal_head(descriptor); journal_unlock_journal_head(descriptor);
} }
lock_journal(journal);
/* End of a transaction! Finally, we can do checkpoint /* End of a transaction! Finally, we can do checkpoint
processing: any buffers committed as a result of this processing: any buffers committed as a result of this
transaction can be removed from any checkpoint list it was on transaction can be removed from any checkpoint list it was on
before. */ before. */
skip_commit: skip_commit: /* The journal should be unlocked by now. */
/* Call any callbacks that had been registered for handles in this
* transaction. It is up to the callback to free any allocated
* memory.
*/
if (!list_empty(&commit_transaction->t_jcb)) {
struct list_head *p, *n;
int error = is_journal_aborted(journal);
list_for_each_safe(p, n, &commit_transaction->t_jcb) {
struct journal_callback *jcb;
jcb = list_entry(p, struct journal_callback, jcb_list);
list_del(p);
jcb->jcb_func(jcb, error);
}
}
lock_journal(journal);
jbd_debug(3, "JBD: commit phase 7\n"); jbd_debug(3, "JBD: commit phase 7\n");
......
...@@ -58,6 +58,7 @@ EXPORT_SYMBOL(journal_sync_buffer); ...@@ -58,6 +58,7 @@ EXPORT_SYMBOL(journal_sync_buffer);
#endif #endif
EXPORT_SYMBOL(journal_flush); EXPORT_SYMBOL(journal_flush);
EXPORT_SYMBOL(journal_revoke); EXPORT_SYMBOL(journal_revoke);
EXPORT_SYMBOL(journal_callback_set);
EXPORT_SYMBOL(journal_init_dev); EXPORT_SYMBOL(journal_init_dev);
EXPORT_SYMBOL(journal_init_inode); EXPORT_SYMBOL(journal_init_inode);
......
...@@ -57,6 +57,7 @@ static transaction_t * get_transaction (journal_t * journal, int is_try) ...@@ -57,6 +57,7 @@ static transaction_t * get_transaction (journal_t * journal, int is_try)
transaction->t_state = T_RUNNING; transaction->t_state = T_RUNNING;
transaction->t_tid = journal->j_transaction_sequence++; transaction->t_tid = journal->j_transaction_sequence++;
transaction->t_expires = jiffies + journal->j_commit_interval; transaction->t_expires = jiffies + journal->j_commit_interval;
INIT_LIST_HEAD(&transaction->t_jcb);
/* Set up the commit timer for the new transaction. */ /* Set up the commit timer for the new transaction. */
J_ASSERT (!journal->j_commit_timer_active); J_ASSERT (!journal->j_commit_timer_active);
...@@ -91,6 +92,13 @@ static int start_this_handle(journal_t *journal, handle_t *handle) ...@@ -91,6 +92,13 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
int needed; int needed;
int nblocks = handle->h_buffer_credits; int nblocks = handle->h_buffer_credits;
if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
current->comm, nblocks,
journal->j_max_transaction_buffers);
return -ENOSPC;
}
jbd_debug(3, "New handle %p going live.\n", handle); jbd_debug(3, "New handle %p going live.\n", handle);
repeat: repeat:
...@@ -200,6 +208,20 @@ static int start_this_handle(journal_t *journal, handle_t *handle) ...@@ -200,6 +208,20 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
return 0; return 0;
} }
/* Allocate a new handle. This should probably be in a slab... */
static handle_t *new_handle(int nblocks)
{
handle_t *handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
if (!handle)
return NULL;
memset(handle, 0, sizeof (handle_t));
handle->h_buffer_credits = nblocks;
handle->h_ref = 1;
INIT_LIST_HEAD(&handle->h_jcb);
return handle;
}
/* /*
* Obtain a new handle. * Obtain a new handle.
* *
...@@ -227,13 +249,10 @@ handle_t *journal_start(journal_t *journal, int nblocks) ...@@ -227,13 +249,10 @@ handle_t *journal_start(journal_t *journal, int nblocks)
return handle; return handle;
} }
handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS); handle = new_handle(nblocks);
if (!handle) if (!handle)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
memset (handle, 0, sizeof (handle_t));
handle->h_buffer_credits = nblocks;
handle->h_ref = 1;
current->journal_info = handle; current->journal_info = handle;
err = start_this_handle(journal, handle); err = start_this_handle(journal, handle);
...@@ -333,13 +352,10 @@ handle_t *journal_try_start(journal_t *journal, int nblocks) ...@@ -333,13 +352,10 @@ handle_t *journal_try_start(journal_t *journal, int nblocks)
if (is_journal_aborted(journal)) if (is_journal_aborted(journal))
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS); handle = new_handle(nblocks);
if (!handle) if (!handle)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
memset (handle, 0, sizeof (handle_t));
handle->h_buffer_credits = nblocks;
handle->h_ref = 1;
current->journal_info = handle; current->journal_info = handle;
err = try_start_this_handle(journal, handle); err = try_start_this_handle(journal, handle);
...@@ -1347,6 +1363,28 @@ void journal_sync_buffer(struct buffer_head *bh) ...@@ -1347,6 +1363,28 @@ void journal_sync_buffer(struct buffer_head *bh)
} }
#endif #endif
/*
* Register a callback function for this handle. The function will be
* called when the transaction that this handle is part of has been
* committed to disk with the original callback data struct and the
* error status of the journal as parameters. There is no guarantee of
* ordering between handles within a single transaction, nor between
* callbacks registered on the same handle.
*
* The caller is responsible for allocating the journal_callback struct.
* This is to allow the caller to add as much extra data to the callback
* as needed, but reduce the overhead of multiple allocations. The caller
* allocated struct must start with a struct journal_callback at offset 0,
* and has the caller-specific data afterwards.
*/
void journal_callback_set(handle_t *handle,
void (*func)(struct journal_callback *jcb, int error),
struct journal_callback *jcb)
{
list_add_tail(&jcb->jcb_list, &handle->h_jcb);
jcb->jcb_func = func;
}
/* /*
* All done for a particular handle. * All done for a particular handle.
* *
...@@ -1411,6 +1449,9 @@ int journal_stop(handle_t *handle) ...@@ -1411,6 +1449,9 @@ int journal_stop(handle_t *handle)
wake_up(&journal->j_wait_transaction_locked); wake_up(&journal->j_wait_transaction_locked);
} }
/* Move callbacks from the handle to the transaction. */
list_splice(&handle->h_jcb, &transaction->t_jcb);
/* /*
* If the handle is marked SYNC, we need to set another commit * If the handle is marked SYNC, we need to set another commit
* going! We also want to force a commit if the current * going! We also want to force a commit if the current
......
...@@ -250,6 +250,13 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh) ...@@ -250,6 +250,13 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
return bh->b_private; return bh->b_private;
} }
#define HAVE_JOURNAL_CALLBACK_STATUS
struct journal_callback {
struct list_head jcb_list;
void (*jcb_func)(struct journal_callback *jcb, int error);
/* user data goes here */
};
struct jbd_revoke_table_s; struct jbd_revoke_table_s;
/* The handle_t type represents a single atomic update being performed /* The handle_t type represents a single atomic update being performed
...@@ -280,6 +287,12 @@ struct handle_s ...@@ -280,6 +287,12 @@ struct handle_s
operations */ operations */
int h_err; int h_err;
/* List of application registered callbacks for this handle.
* The function(s) will be called after the transaction that
* this handle is part of has been committed to disk.
*/
struct list_head h_jcb;
/* Flags */ /* Flags */
unsigned int h_sync: 1; /* sync-on-close */ unsigned int h_sync: 1; /* sync-on-close */
unsigned int h_jdata: 1; /* force data journaling */ unsigned int h_jdata: 1; /* force data journaling */
...@@ -399,6 +412,10 @@ struct transaction_s ...@@ -399,6 +412,10 @@ struct transaction_s
/* How many handles used this transaction? */ /* How many handles used this transaction? */
int t_handle_count; int t_handle_count;
/* List of registered callback functions for this transaction.
* Called when the transaction is committed. */
struct list_head t_jcb;
}; };
...@@ -647,6 +664,9 @@ extern int journal_invalidatepage(journal_t *, ...@@ -647,6 +664,9 @@ extern int journal_invalidatepage(journal_t *,
extern int journal_try_to_free_buffers(journal_t *, struct page *, int); extern int journal_try_to_free_buffers(journal_t *, struct page *, int);
extern int journal_stop(handle_t *); extern int journal_stop(handle_t *);
extern int journal_flush (journal_t *); extern int journal_flush (journal_t *);
extern void journal_callback_set(handle_t *handle,
void (*fn)(struct journal_callback *,int),
struct journal_callback *jcb);
extern void journal_lock_updates (journal_t *); extern void journal_lock_updates (journal_t *);
extern void journal_unlock_updates (journal_t *); extern void journal_unlock_updates (journal_t *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment