Commit 38839854 authored by seppo's avatar seppo Committed by Jan Lindström

MDEV-19572 async slave node fails to apply MyISAM only writes (#1418)

The problem happens when MariaDB master replicates writes for only non InnoDB
tables (e.g. writes to MyISAM table(s)). Async slave node, in Galera cluster,
can apply these writes successfully, but it will, in the end, write gtid position in
mysql.gtid_slave_pos table. mysql.gtid_slave_pos table is InnoDB engine, and
this write makes innodb handlerton part of the replicated "transaction".
Note that wsrep patch identifies that write to gtid_slave_pos should not be replicated
and skips appending wsrep keys for these writes. However, as InnoDB was present
in the transaction, and there are replication events (for MyISAM table) in transaction
cache, but there are no appended keys, wsrep raises an error, and this makes the söave
thread to stop.

The fix is simply to not treat it as an error if async slave tries to replicate a write
set with binlog events, but no keys. We just skip wsrep replication and return successfully.

This commit contains also a mtr test which forces mysql.gtid_slave_pos table isto be
of InnoDB engine, and executes MyISAM only write through asyn replication.

There is additional fix for declaring IO and background slave threads as non wsrep.
These threads should not write anything for wsrep replication, and this is just a safeguard
to make sure nothing leaks into cluster from these slave threads.
parent a51f3b09
ALTER TABLE mysql.gtid_slave_pos engine = InnoDB;
START SLAVE;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=MyISAM;
INSERT INTO t1 VALUES(1);
SELECT LENGTH(@@global.gtid_binlog_state) > 1;
LENGTH(@@global.gtid_binlog_state) > 1
1
gtid_binlog_state_equal
0
SELECT COUNT(*) = 0 FROM t1;
COUNT(*) = 0
1
gtid_binlog_state_equal
0
#cleanup
DROP TABLE t1;
reset master;
STOP SLAVE;
RESET SLAVE ALL;
reset master;
reset master;
!include ../galera_2nodes_as_slave.cnf
[mysqld]
log-bin=mysqld-bin
log-slave-updates
binlog-format=ROW
#
# Test Galera as a slave to a MariaDB master using GTIDs
#
# suite/galera/galera_2nodes_as_slave.cnf describes the setup of the nodes
# suite/galera/t/galera_as_slave_gtid.cnf has the GTID options
#
# This test will replicate writes to MyISAM table and check that slave node is able
# to apply them.
# mysql.gtid_slave_pos table should be defined as innodb engine, original problem
# by writes to mysql.gtid_slave_pos, whereas the replicated transaction contained
# no innodb writes
#
--source include/have_innodb.inc
# As node #1 is not a Galera node, we connect to node #2 in order to run include/galera_cluster.inc
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
--source include/galera_cluster.inc
--connection node_2
# make sure gtid_slave_pos is of innodb engine, mtr does not currently provide that
ALTER TABLE mysql.gtid_slave_pos engine = InnoDB;
--disable_query_log
--eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_USER='root', MASTER_PORT=$NODE_MYPORT_1;
--enable_query_log
START SLAVE;
--connection node_1
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=MyISAM;
INSERT INTO t1 VALUES(1);
SELECT LENGTH(@@global.gtid_binlog_state) > 1;
--let $gtid_binlog_state_node1 = `SELECT @@global.gtid_binlog_state;`
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 1 FROM t1;
--source include/wait_condition.inc
--disable_query_log
--eval SELECT '$gtid_binlog_state_node1' = @@global.gtid_binlog_state AS gtid_binlog_state_equal;
--enable_query_log
--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3
SELECT COUNT(*) = 0 FROM t1;
--disable_query_log
--eval SELECT '$gtid_binlog_state_node1' = @@global.gtid_binlog_state AS gtid_binlog_state_equal;
--enable_query_log
--echo #cleanup
--connection node_1
DROP TABLE t1;
reset master;
--connection node_2
STOP SLAVE;
RESET SLAVE ALL;
reset master;
--connection node_3
reset master;
......@@ -306,6 +306,9 @@ handle_slave_background(void *arg __attribute__((unused)))
thd->store_globals();
thd->security_ctx->skip_grants();
thd->set_command(COM_DAEMON);
#ifdef WITH_WSREP
thd->variables.wsrep_on= 0;
#endif
thd_proc_info(thd, "Loading slave GTID position from table");
if (rpl_load_gtid_slave_state(thd))
......@@ -4181,7 +4184,9 @@ pthread_handler_t handle_slave_io(void *arg)
goto err;
}
#ifdef WITH_WSREP
thd->variables.wsrep_on= 0;
#endif
if (RUN_HOOK(binlog_relay_io, thread_start, (thd, mi)))
{
mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL,
......
......@@ -478,12 +478,29 @@ wsrep_run_wsrep_commit(THD *thd, bool all)
if (WSREP_UNDEFINED_TRX_ID == thd->wsrep_ws_handle.trx_id)
{
WSREP_WARN("SQL statement was ineffective, THD: %lu, buf: %zu\n"
/*
Async replication slave may have applied some non-innodb workload,
and then has written replication "meta data" into gtid_slave_pos
innodb table. Writes to gtid_slave_pos must not be replicated,
but this activity has caused that innodb hton is registered for this
transaction, but no wsrep keys have been appended.
We enter in this code path, because IO cache has events for non-innodb
tables.
=> we should not treat it an error if trx is not introduced for provider
*/
if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL)
{
WSREP_DEBUG("skipping wsrep replication for async slave, error not raised");
DBUG_RETURN(WSREP_TRX_OK);
}
WSREP_WARN("SQL statement was ineffective thd: %lu buf: %zu\n"
"schema: %s \n"
"QUERY: %s\n"
" => Skipping replication",
thd->thread_id, data_len,
(thd->db ? thd->db : "(null)"), thd->query());
rcode = WSREP_TRX_FAIL;
}
else if (!rcode)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment