Commit a0946fa3 authored by Alfranio Correia's avatar Alfranio Correia

BUG#50038 Deadlock on flush logs with concurrent DML and RBR

In auto-commit mode, updating both trx and non-trx tables (i.e. issuing a mixed
statement) causes the following sequence of events:

1 - "Flush trx changes" (MYSQL_BIN_LOG::write) - T1:
  1.1 - mutex_lock (&LOCK_log)
  1.2 - mutex_lock (&LOCK_prep_xids)
  1.3 - increase prepared_xids
  1.4 - mutex_unlock (&LOCK_prep_xids)
  1.5 - mutex_unlock (&LOCK_log)

2 - "Flush non-trx changes" (MYSQL_BIN_LOG::write) - T1:
  2.1 - mutex_lock (&LOCK_log)
  2.2 - mutex_unlock (&LOCK_log)

3. "unlog" - T1
  3.1 - mutex_lock (&LOCK_prep_xids)
  3.2 - decrease prepared xids
  3.3 - pthread_cond_signal(&COND_prep_xids);
  3.4 - mutex_unlock (&LOCK_prep_xids)

The "FLUSH logs" command produces the following sequence of events:

1 - "FLUSH logs" command (MYSQL_BIN_LOG::new_file_impl) - user thread:
  1.1 - mutex_lock (&LOCK_log)
  1.2 - mutex_lock (&LOCK_prep_xids)
  1.3 - while (prepared_xids)  pthread_cond_wait(..., &LOCK_prep_xids);
  1.4 - mutex_unlock (&LOCK_prep_xids)
  1.5 - mutex_unlock (&LOCK_log)

A deadlock will arise if T1 flushes the trx changes and thus increases
prepared_xids but before it is able to continue the execution and flush the
non-trx changes, an user thread calls the "FLUSH logs" command and wait that
the prepared_xids is decreased and gets to zero. However, T1 cannot proceed
with the call to "Flush non-trx changes" because it will block in the mutex
"LOCK_log" and by consequence cannot complete the execution and call the
unlog to decrease the prepared_xids.

To fix the problem, we ensure that the non-trx changes are always flushed
before the trx changes.

Note that if you call "Flush non-trx changes" and a concurrent "FLUSH logs" is
issued, the "Flush non-trx changes" may block, but a deadlock will never happen
because the prepared_xids will eventually get to zero. Bottom line, there will
not be any transaction able to increase the prepared_xids because they will
block in the mutex "LOCK_log" (MYSQL_BIN_LOG::write) and those that increased
the prepared_xids will eventually commit and decrease the prepared_xids.
parent 8e1d1e45
...@@ -123,7 +123,7 @@ Binlog_cache_disk_use 0 ...@@ -123,7 +123,7 @@ Binlog_cache_disk_use 0
create table t1 (a int) engine=innodb; create table t1 (a int) engine=innodb;
show status like "binlog_cache_use"; show status like "binlog_cache_use";
Variable_name Value Variable_name Value
Binlog_cache_use 1 Binlog_cache_use 2
show status like "binlog_cache_disk_use"; show status like "binlog_cache_disk_use";
Variable_name Value Variable_name Value
Binlog_cache_disk_use 1 Binlog_cache_disk_use 1
...@@ -132,7 +132,7 @@ delete from t1; ...@@ -132,7 +132,7 @@ delete from t1;
commit; commit;
show status like "binlog_cache_use"; show status like "binlog_cache_use";
Variable_name Value Variable_name Value
Binlog_cache_use 2 Binlog_cache_use 4
show status like "binlog_cache_disk_use"; show status like "binlog_cache_disk_use";
Variable_name Value Variable_name Value
Binlog_cache_disk_use 1 Binlog_cache_disk_use 1
......
...@@ -9,7 +9,7 @@ drop table if exists t1; ...@@ -9,7 +9,7 @@ drop table if exists t1;
create table t1 (a int) engine=innodb; create table t1 (a int) engine=innodb;
show status like "binlog_cache_use"; show status like "binlog_cache_use";
Variable_name Value Variable_name Value
Binlog_cache_use 1 Binlog_cache_use 2
show status like "binlog_cache_disk_use"; show status like "binlog_cache_disk_use";
Variable_name Value Variable_name Value
Binlog_cache_disk_use 1 Binlog_cache_disk_use 1
...@@ -18,7 +18,7 @@ delete from t1; ...@@ -18,7 +18,7 @@ delete from t1;
commit; commit;
show status like "binlog_cache_use"; show status like "binlog_cache_use";
Variable_name Value Variable_name Value
Binlog_cache_use 2 Binlog_cache_use 4
show status like "binlog_cache_disk_use"; show status like "binlog_cache_disk_use";
Variable_name Value Variable_name Value
Binlog_cache_disk_use 1 Binlog_cache_disk_use 1
......
...@@ -9,7 +9,7 @@ drop table if exists t1; ...@@ -9,7 +9,7 @@ drop table if exists t1;
create table t1 (a int) engine=innodb; create table t1 (a int) engine=innodb;
show status like "binlog_cache_use"; show status like "binlog_cache_use";
Variable_name Value Variable_name Value
Binlog_cache_use 1 Binlog_cache_use 2
show status like "binlog_cache_disk_use"; show status like "binlog_cache_disk_use";
Variable_name Value Variable_name Value
Binlog_cache_disk_use 1 Binlog_cache_disk_use 1
...@@ -18,7 +18,7 @@ delete from t1; ...@@ -18,7 +18,7 @@ delete from t1;
commit; commit;
show status like "binlog_cache_use"; show status like "binlog_cache_use";
Variable_name Value Variable_name Value
Binlog_cache_use 2 Binlog_cache_use 4
show status like "binlog_cache_disk_use"; show status like "binlog_cache_disk_use";
Variable_name Value Variable_name Value
Binlog_cache_disk_use 1 Binlog_cache_disk_use 1
......
...@@ -9,7 +9,7 @@ drop table if exists t1; ...@@ -9,7 +9,7 @@ drop table if exists t1;
create table t1 (a int) engine=innodb; create table t1 (a int) engine=innodb;
show status like "binlog_cache_use"; show status like "binlog_cache_use";
Variable_name Value Variable_name Value
Binlog_cache_use 1 Binlog_cache_use 2
show status like "binlog_cache_disk_use"; show status like "binlog_cache_disk_use";
Variable_name Value Variable_name Value
Binlog_cache_disk_use 1 Binlog_cache_disk_use 1
...@@ -18,7 +18,7 @@ delete from t1; ...@@ -18,7 +18,7 @@ delete from t1;
commit; commit;
show status like "binlog_cache_use"; show status like "binlog_cache_use";
Variable_name Value Variable_name Value
Binlog_cache_use 2 Binlog_cache_use 4
show status like "binlog_cache_disk_use"; show status like "binlog_cache_disk_use";
Variable_name Value Variable_name Value
Binlog_cache_disk_use 1 Binlog_cache_disk_use 1
......
...@@ -5958,7 +5958,8 @@ int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid) ...@@ -5958,7 +5958,8 @@ int TC_LOG_BINLOG::log_xid(THD *thd, my_xid xid)
We always commit the entire transaction when writing an XID. Also We always commit the entire transaction when writing an XID. Also
note that the return value is inverted. note that the return value is inverted.
*/ */
DBUG_RETURN(!binlog_flush_trx_cache(thd, cache_mngr, &xle)); DBUG_RETURN(!binlog_flush_stmt_cache(thd, cache_mngr) &&
!binlog_flush_trx_cache(thd, cache_mngr, &xle));
} }
void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid) void TC_LOG_BINLOG::unlog(ulong cookie, my_xid xid)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment