Commit 8d12dd8f authored by Daniele Sciascia's avatar Daniele Sciascia Committed by Jan Lindström

MDEV-28053 Sysbench data load crashes Galera secondary node in async master slave setup

This patch fixes a problem that arises when a Galera node acts as a
replica for native replication. When parallel applying is enabled, it
is possible to end up with attempts to write binlog events with gtids
out of order. This happens because when multiple events are delivered
from the native replication stream and applied in concurrently, it is
for them to be replicated to the Galera cluster in an order which is
different from the original order in which they were committed in the
aync replication master.
To correct this behavior we now wait_for_prior_commit() before
replicating changes though galera. As a consequence, parallel appliers
may apply events in parallel until the galera replication step, which
is now serialized.
parent 65eea231
connection node_2;
connection node_1;
connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3;
connection node_3;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB;
connection node_2;
connection node_3;
DROP TABLE t1;
connection node_2;
connection node_2;
STOP SLAVE;
RESET SLAVE ALL;
connection node_3;
RESET MASTER;
!include ../galera_2nodes_as_slave.cnf
[mysqld]
slave_parallel_threads=4
slave_parallel_mode=optimistic
gtid_strict_mode=1
#
# MDEV-28053 - Sysbench data load crashes Galera secondary node in
# async master slave setup
#
# Setup: node 3 is a regular MariaDB server, nodes 1 and 2 are members
# of a Galera cluster. Node 2 connects to node 3 through async replication.
#
# Test uses multiple parallel async applier threads (see MDEV-28053.cnf)
#
--source include/have_innodb.inc
--source include/galera_cluster.inc
--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3
--connection node_3
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB;
#
# Execute a few INSERTs, to simulate sysbench data load phase
#
--let $counter=100
--disable_query_log
while ($counter) {
--connection node_3
INSERT INTO t1 VALUES();
--dec $counter
}
--enable_query_log
--let gtid = `SELECT @@last_gtid`
#
# Start async replication on node 2.
# If bug is present, expect a crash when applying
# events concurrently.
#
--connection node_2
--disable_query_log
--disable_result_log
--eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_USER='root', MASTER_PORT=$NODE_MYPORT_3;
START SLAVE;
--eval SELECT MASTER_GTID_WAIT('$gtid', 600)
--enable_result_log
--enable_query_log
#
# Cleanup
#
--connection node_3
DROP TABLE t1;
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--connection node_2
STOP SLAVE;
RESET SLAVE ALL;
--connection node_3
RESET MASTER;
......@@ -229,6 +229,10 @@ static inline int wsrep_before_prepare(THD* thd, bool all)
WSREP_DEBUG("wsrep_before_prepare: %d", wsrep_is_real(thd, all));
int ret= 0;
DBUG_ASSERT(wsrep_run_commit_hook(thd, all));
if ((ret= thd->wsrep_parallel_slave_wait_for_prior_commit()))
{
DBUG_RETURN(ret);
}
if ((ret= thd->wsrep_cs().before_prepare()) == 0)
{
DBUG_ASSERT(!thd->wsrep_trx().ws_meta().gtid().is_undefined());
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment