Commit c7e38076 authored by Andrei Elkin's avatar Andrei Elkin

MDEV-9510 Segmentation fault in binlog thread causes crash

With combination of --log-bin and Galera the server may crash
reporting two characteristic stacks:

  /usr/sbin/mysqld(_ZN13MYSQL_BIN_LOG13mark_xid_doneEmb+0xc7)[0x7f182a8e2cb7]
  /usr/sbin/mysqld(binlog_background_thread+0x2b5)[0x7f182a8e3275]

or

  /usr/sbin/mysqld(_ZN13MYSQL_BIN_LOG21do_checkpoint_requestEm+0x9d)[0x7ff395b2dafd]
  /usr/sbin/mysqld(_ZN13MYSQL_BIN_LOG20checkpoint_and_purgeEm+0x11)[0x7ff395b2db91]
  /usr/sbin/mysqld(_ZN13MYSQL_BIN_LOG16rotate_and_purgeEb+0xc2)[0x7ff395b300b2]

The reason of the failure appears to be non-matching decrements for
  `xid_count_per_binlog::xid_count`
which can occur when a transaction is executed having its connection issued
`SET @@sql_log_bin=0`. In such case the xid count is not incremented but
its decrements still runs to turn `binlog_xid_count_list` into improper state
which the following FLUSH BINARY LOGS exposes through the crash.

*Note_1*: the regression test reuses an existing galera.sql_log_bin
which does not run stably (even in its base form) by mtr with --log-bin.

*Note_2*: 10.0-galera branch is free of this issue having missed MDEV-7205
fixes.
parent aae49327
...@@ -6,6 +6,7 @@ INSERT INTO t1 VALUES (1); ...@@ -6,6 +6,7 @@ INSERT INTO t1 VALUES (1);
# Disable binary logging for current session # Disable binary logging for current session
SET SQL_LOG_BIN=OFF; SET SQL_LOG_BIN=OFF;
INSERT INTO t1 VALUES (2); INSERT INTO t1 VALUES (2);
FLUSH BINARY LOGS;
CREATE TABLE t2(c1 INT PRIMARY KEY) ENGINE=INNODB; CREATE TABLE t2(c1 INT PRIMARY KEY) ENGINE=INNODB;
INSERT INTO t2 VALUES (1); INSERT INTO t2 VALUES (1);
CREATE TABLE test.t3 AS SELECT * from t1; CREATE TABLE test.t3 AS SELECT * from t1;
......
# Test to check the behavior of galera cluster with sql_log_bin=ON|OFF & binary # Test to check the behavior of galera cluster with sql_log_bin=ON|OFF & binary
# logging is disabled. sql_bin_log should not affect galera replication. # logging is disabled. sql_bin_log should not affect galera replication.
#
# The following bugfixes are tested:
#
# MDEV-9510: Segmentation fault in binlog thread.
# A scenario otherwise causing a similar segfault is replayed.
# The test must pass having no crashes.
# The sequence of sql statements is provided by original
# sql_log_bin.test augmented with a FLUSH BINLOG LOGS, below.
--source include/galera_cluster.inc --source include/galera_cluster.inc
--source include/have_innodb.inc --source include/have_innodb.inc
...@@ -15,6 +23,10 @@ INSERT INTO t1 VALUES (1); ...@@ -15,6 +23,10 @@ INSERT INTO t1 VALUES (1);
--echo # Disable binary logging for current session --echo # Disable binary logging for current session
SET SQL_LOG_BIN=OFF; SET SQL_LOG_BIN=OFF;
INSERT INTO t1 VALUES (2); INSERT INTO t1 VALUES (2);
# MDEV-9510: the following binlog rotation due to FLUSH segfaults wo/ the fixes
FLUSH BINARY LOGS;
CREATE TABLE t2(c1 INT PRIMARY KEY) ENGINE=INNODB; CREATE TABLE t2(c1 INT PRIMARY KEY) ENGINE=INNODB;
INSERT INTO t2 VALUES (1); INSERT INTO t2 VALUES (1);
CREATE TABLE test.t3 AS SELECT * from t1; CREATE TABLE test.t3 AS SELECT * from t1;
......
...@@ -7165,8 +7165,15 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd, ...@@ -7165,8 +7165,15 @@ MYSQL_BIN_LOG::write_transaction_to_binlog(THD *thd,
mode. Also, do not write the cached updates to binlog if binary logging is mode. Also, do not write the cached updates to binlog if binary logging is
disabled (log-bin/sql_log_bin). disabled (log-bin/sql_log_bin).
*/ */
if (wsrep_emulate_bin_log || !(thd->variables.option_bits & OPTION_BIN_LOG)) if (wsrep_emulate_bin_log)
{
DBUG_RETURN(0);
}
else if (!(thd->variables.option_bits & OPTION_BIN_LOG))
{
cache_mngr->need_unlog= false;
DBUG_RETURN(0); DBUG_RETURN(0);
}
entry.thd= thd; entry.thd= thd;
entry.cache_mngr= cache_mngr; entry.cache_mngr= cache_mngr;
...@@ -9489,11 +9496,19 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all, ...@@ -9489,11 +9496,19 @@ TC_LOG_BINLOG::log_and_order(THD *thd, my_xid xid, bool all,
if (err) if (err)
DBUG_RETURN(0); DBUG_RETURN(0);
bool need_unlog= cache_mngr->need_unlog;
/*
The transaction won't need the flag anymore.
Todo/fixme: consider to move the statement into cache_mngr->reset()
relocated to the current or later point.
*/
cache_mngr->need_unlog= false;
/* /*
If using explicit user XA, we will not have XID. We must still return a If using explicit user XA, we will not have XID. We must still return a
non-zero cookie (as zero cookie signals error). non-zero cookie (as zero cookie signals error).
*/ */
if (!xid || !cache_mngr->need_unlog) if (!xid || !need_unlog)
DBUG_RETURN(BINLOG_COOKIE_DUMMY(cache_mngr->delayed_error)); DBUG_RETURN(BINLOG_COOKIE_DUMMY(cache_mngr->delayed_error));
else else
DBUG_RETURN(BINLOG_COOKIE_MAKE(cache_mngr->binlog_id, DBUG_RETURN(BINLOG_COOKIE_MAKE(cache_mngr->binlog_id,
...@@ -9566,6 +9581,9 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint) ...@@ -9566,6 +9581,9 @@ TC_LOG_BINLOG::mark_xid_done(ulong binlog_id, bool write_checkpoint)
if (b->binlog_id == binlog_id) if (b->binlog_id == binlog_id)
{ {
--b->xid_count; --b->xid_count;
DBUG_ASSERT(b->xid_count >= 0); // catch unmatched (++) decrement
break; break;
} }
first= false; first= false;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment