Commit 40381aed authored by inaam's avatar inaam

branches/zip: Undo changes commited in r1832.

These should have gone to branches/fts.

Spotted by: Marko and Ken
parent 04fb05b1
...@@ -1911,12 +1911,7 @@ retry: ...@@ -1911,12 +1911,7 @@ retry:
trx->mysql_log_file_name = mysql_bin_log_file_name(); trx->mysql_log_file_name = mysql_bin_log_file_name();
trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos(); trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos();
/* Don't do write + flush right now. For group commit
to work we want to do the flush after releasing the
prepare_commit_mutex. */
trx->flush_log_later = TRUE;
innobase_commit_low(trx); innobase_commit_low(trx);
trx->flush_log_later = FALSE;
if (srv_commit_concurrency > 0) { if (srv_commit_concurrency > 0) {
pthread_mutex_lock(&commit_cond_m); pthread_mutex_lock(&commit_cond_m);
...@@ -1930,8 +1925,6 @@ retry: ...@@ -1930,8 +1925,6 @@ retry:
pthread_mutex_unlock(&prepare_commit_mutex); pthread_mutex_unlock(&prepare_commit_mutex);
} }
/* Now do a write + flush of logs. */
trx_commit_complete_for_mysql(trx);
trx->active_trans = 0; trx->active_trans = 0;
} else { } else {
...@@ -7740,7 +7733,32 @@ innobase_xa_prepare( ...@@ -7740,7 +7733,32 @@ innobase_xa_prepare(
int error = 0; int error = 0;
trx_t* trx = check_trx_exists(thd); trx_t* trx = check_trx_exists(thd);
if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
(all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
{
/* For ibbackup to work the order of transactions in binlog
and InnoDB must be the same. Consider the situation
thread1> prepare; write to binlog; ...
<context switch>
thread2> prepare; write to binlog; commit
thread1> ... commit
To ensure this will not happen we're taking the mutex on
prepare, and releasing it on commit.
Note: only do it for normal commits, done via ha_commit_trans.
If 2pc protocol is executed by external transaction
coordinator, it will be just a regular MySQL client
executing XA PREPARE and XA COMMIT commands.
In this case we cannot know how many minutes or hours
will be between XA PREPARE and XA COMMIT, and we don't want
to block for undefined period of time.
*/
pthread_mutex_lock(&prepare_commit_mutex);
trx->active_trans = 2;
}
if (!THDVAR(thd, support_xa)) { if (!THDVAR(thd, support_xa)) {
...@@ -7793,33 +7811,6 @@ innobase_xa_prepare( ...@@ -7793,33 +7811,6 @@ innobase_xa_prepare(
srv_active_wake_master_thread(); srv_active_wake_master_thread();
if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
(all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
{
/* For ibbackup to work the order of transactions in binlog
and InnoDB must be the same. Consider the situation
thread1> prepare; write to binlog; ...
<context switch>
thread2> prepare; write to binlog; commit
thread1> ... commit
To ensure this will not happen we're taking the mutex on
prepare, and releasing it on commit.
Note: only do it for normal commits, done via ha_commit_trans.
If 2pc protocol is executed by external transaction
coordinator, it will be just a regular MySQL client
executing XA PREPARE and XA COMMIT commands.
In this case we cannot know how many minutes or hours
will be between XA PREPARE and XA COMMIT, and we don't want
to block for undefined period of time.
*/
pthread_mutex_lock(&prepare_commit_mutex);
trx->active_trans = 2;
}
return(error); return(error);
} }
......
...@@ -461,12 +461,10 @@ struct trx_struct{ ...@@ -461,12 +461,10 @@ struct trx_struct{
FALSE, one can save CPU time and about FALSE, one can save CPU time and about
150 bytes in the undo log size as then 150 bytes in the undo log size as then
we skip XA steps */ we skip XA steps */
unsigned flush_log_later:1;/* In 2PC, we hold the unsigned flush_log_later:1;/* when we commit the transaction
prepare_commit mutex across in MySQL's binlog write, we will
both phases. In that case, we flush the log to disk later in
defer flush of the logs to disk a separate call */
until after we release the
mutex. */
unsigned must_flush_log_later:1;/* this flag is set to TRUE in unsigned must_flush_log_later:1;/* this flag is set to TRUE in
trx_commit_off_kernel() if trx_commit_off_kernel() if
flush_log_later was TRUE, and there flush_log_later was TRUE, and there
......
...@@ -842,11 +842,11 @@ trx_commit_off_kernel( ...@@ -842,11 +842,11 @@ trx_commit_off_kernel(
there are > 2 users in the database. Then at least 2 users can there are > 2 users in the database. Then at least 2 users can
gather behind one doing the physical log write to disk. gather behind one doing the physical log write to disk.
If we are calling trx_commit() under prepare_commit_mutex, we If we are calling trx_commit() under MySQL's binlog mutex, we
will delay possible log write and flush to a separate function will delay possible log write and flush to a separate function
trx_commit_complete_for_mysql(), which is only called when the trx_commit_complete_for_mysql(), which is only called when the
thread has released the mutex. This is to make the thread has released the binlog mutex. This is to make the
group commit algorithm to work. Otherwise, the prepare_commit group commit algorithm to work. Otherwise, the MySQL binlog
mutex would serialize all commits and prevent a group of mutex would serialize all commits and prevent a group of
transactions from gathering. */ transactions from gathering. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment