Commit 538665bc authored by inaam's avatar inaam

branches/zip: Enabled group commit functionality with XA.

Reviewed by: Heikki
parent 57afe1d5
...@@ -1911,7 +1911,12 @@ retry: ...@@ -1911,7 +1911,12 @@ retry:
trx->mysql_log_file_name = mysql_bin_log_file_name(); trx->mysql_log_file_name = mysql_bin_log_file_name();
trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos(); trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos();
/* Don't do write + flush right now. For group commit
to work we want to do the flush after releasing the
prepare_commit_mutex. */
trx->flush_log_later = TRUE;
innobase_commit_low(trx); innobase_commit_low(trx);
trx->flush_log_later = FALSE;
if (srv_commit_concurrency > 0) { if (srv_commit_concurrency > 0) {
pthread_mutex_lock(&commit_cond_m); pthread_mutex_lock(&commit_cond_m);
...@@ -1925,6 +1930,8 @@ retry: ...@@ -1925,6 +1930,8 @@ retry:
pthread_mutex_unlock(&prepare_commit_mutex); pthread_mutex_unlock(&prepare_commit_mutex);
} }
/* Now do a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
trx->active_trans = 0; trx->active_trans = 0;
} else { } else {
...@@ -7733,32 +7740,7 @@ innobase_xa_prepare( ...@@ -7733,32 +7740,7 @@ innobase_xa_prepare(
int error = 0; int error = 0;
trx_t* trx = check_trx_exists(thd); trx_t* trx = check_trx_exists(thd);
if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
(all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
{
/* For ibbackup to work the order of transactions in binlog
and InnoDB must be the same. Consider the situation
thread1> prepare; write to binlog; ...
<context switch>
thread2> prepare; write to binlog; commit
thread1> ... commit
To ensure this will not happen we're taking the mutex on
prepare, and releasing it on commit.
Note: only do it for normal commits, done via ha_commit_trans.
If 2pc protocol is executed by external transaction
coordinator, it will be just a regular MySQL client
executing XA PREPARE and XA COMMIT commands.
In this case we cannot know how many minutes or hours
will be between XA PREPARE and XA COMMIT, and we don't want
to block for undefined period of time.
*/
pthread_mutex_lock(&prepare_commit_mutex);
trx->active_trans = 2;
}
if (!THDVAR(thd, support_xa)) { if (!THDVAR(thd, support_xa)) {
...@@ -7811,6 +7793,33 @@ innobase_xa_prepare( ...@@ -7811,6 +7793,33 @@ innobase_xa_prepare(
srv_active_wake_master_thread(); srv_active_wake_master_thread();
if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
(all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
{
/* For ibbackup to work the order of transactions in binlog
and InnoDB must be the same. Consider the situation
thread1> prepare; write to binlog; ...
<context switch>
thread2> prepare; write to binlog; commit
thread1> ... commit
To ensure this will not happen we're taking the mutex on
prepare, and releasing it on commit.
Note: only do it for normal commits, done via ha_commit_trans.
If 2pc protocol is executed by external transaction
coordinator, it will be just a regular MySQL client
executing XA PREPARE and XA COMMIT commands.
In this case we cannot know how many minutes or hours
will be between XA PREPARE and XA COMMIT, and we don't want
to block for undefined period of time.
*/
pthread_mutex_lock(&prepare_commit_mutex);
trx->active_trans = 2;
}
return(error); return(error);
} }
......
...@@ -461,10 +461,12 @@ struct trx_struct{ ...@@ -461,10 +461,12 @@ struct trx_struct{
FALSE, one can save CPU time and about FALSE, one can save CPU time and about
150 bytes in the undo log size as then 150 bytes in the undo log size as then
we skip XA steps */ we skip XA steps */
unsigned flush_log_later:1;/* when we commit the transaction unsigned flush_log_later:1;/* In 2PC, we hold the
in MySQL's binlog write, we will prepare_commit mutex across
flush the log to disk later in both phases. In that case, we
a separate call */ defer flush of the logs to disk
until after we release the
mutex. */
unsigned must_flush_log_later:1;/* this flag is set to TRUE in unsigned must_flush_log_later:1;/* this flag is set to TRUE in
trx_commit_off_kernel() if trx_commit_off_kernel() if
flush_log_later was TRUE, and there flush_log_later was TRUE, and there
......
...@@ -842,11 +842,11 @@ trx_commit_off_kernel( ...@@ -842,11 +842,11 @@ trx_commit_off_kernel(
there are > 2 users in the database. Then at least 2 users can there are > 2 users in the database. Then at least 2 users can
gather behind one doing the physical log write to disk. gather behind one doing the physical log write to disk.
If we are calling trx_commit() under MySQL's binlog mutex, we If we are calling trx_commit() under prepare_commit_mutex, we
will delay possible log write and flush to a separate function will delay possible log write and flush to a separate function
trx_commit_complete_for_mysql(), which is only called when the trx_commit_complete_for_mysql(), which is only called when the
thread has released the binlog mutex. This is to make the thread has released the mutex. This is to make the
group commit algorithm to work. Otherwise, the MySQL binlog group commit algorithm to work. Otherwise, the prepare_commit
mutex would serialize all commits and prevent a group of mutex would serialize all commits and prevent a group of
transactions from gathering. */ transactions from gathering. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment