Commit 13db50fc authored by Andrei Elkin's avatar Andrei Elkin

MDEV-19376 Repl_semi_sync_master::commit_trx assertion failure: ... ||...

MDEV-19376 Repl_semi_sync_master::commit_trx assertion failure: ... || !m_active_tranxs->is_tranx_end_pos(trx_wait_binlog_name, trx_wait_binlog_pos)

The assert indicates that the current transaction got caught uncleaned from
the semisync master's cache when it is signaled to proceed upon its
ack receive.

The reason of missed cleanup turns out to be a flaw in the gtid
connect mode.
A submitted by connecting slave value of its last received event's
binlog file *name* was adopted into
{{Repl_semi_sync_master::m_reply_file_name}} as a part of semisync
initialization.

Notice that the initialization still refines the position part of the
submitted last received event's binlog coordinates.
The master side binlog filename:pos refinement is
specific to the gtid connect mode for purpose of computing the latest
binlog file to resume slave feeding from.
Effectively in the gtid connect mode the computed resumption filename:pos
may appear smaller in which case a new post-connect time committing
transaction may be logged with its filename:pos also less than the
submitted coordinates and that triggers the assert.

Fixed with making the semisync initialization to use the refined filename:pos.
It is guaranteed to be less than any new generated transaction's binlog:pos.
parent dfd2d3d8
include/master-slave.inc
[connection master]
RESET MASTER;
SET @@GLOBAL.rpl_semi_sync_master_enabled = 1;
include/stop_slave.inc
SET @@GLOBAL. rpl_semi_sync_slave_enabled = 1;
include/start_slave.inc
CREATE TABLE t1 (a INT);
INSERT INTO t1 SET a = 1;
include/save_master_gtid.inc
FLUSH LOGS;
INSERT INTO t1 SET a = 2;
include/stop_slave_sql.inc
INSERT INTO t1 SET a = 3;
include/sync_slave_io_with_master.inc
include/stop_slave_io.inc
RESET MASTER;
SET @@global.gtid_binlog_state = '0-1-2';
CHANGE MASTER TO MASTER_USE_GTID = slave_pos;
SET @@global.gtid_slave_pos = '0-1-2';
include/start_slave.inc
INSERT INTO t1 SET a = 4;
DROP TABLE t1;
SET @@GLOBAL. rpl_semi_sync_master_enabled = 0;
include/stop_slave.inc
SET @@GLOBAL. rpl_semi_sync_slave_enabled = 0;
include/start_slave.inc
include/rpl_end.inc
source include/not_embedded.inc;
source include/have_binlog_format_mixed.inc;
source include/have_semisync.inc;
source include/master-slave.inc;
#
# Semisync initialization
#
--connection master
RESET MASTER;
--let $sav_enabled_master=`SELECT @@GLOBAL.rpl_semi_sync_master_enabled`
SET @@GLOBAL.rpl_semi_sync_master_enabled = 1;
--connection slave
source include/stop_slave.inc;
--let $sav_enabled_slave=`SELECT @@GLOBAL.rpl_semi_sync_slave_enabled`
SET @@GLOBAL. rpl_semi_sync_slave_enabled = 1;
source include/start_slave.inc;
# Prove fixes to
# MDEV-19376 Assert (!m_active_tranxs->is_tranx_end_pos(trx_wait_binlog_name...)
#
#
# Run few queries to replicate/execute on slave.
# Stop the slave applier.
# Replicate/not-executed few more.
# Restart the slave.
#
--connection master
CREATE TABLE t1 (a INT);
INSERT INTO t1 SET a = 1;
--source include/save_master_gtid.inc
--let $resume_gtid = $master_pos
FLUSH LOGS;
INSERT INTO t1 SET a = 2;
--sync_slave_with_master
--connection slave
--source include/stop_slave_sql.inc
--connection master
INSERT INTO t1 SET a = 3;
# the sync connection is 'slave' by default
--source include/sync_slave_io_with_master.inc
--connection slave
--source include/stop_slave_io.inc
--connection master
RESET MASTER;
--eval SET @@global.gtid_binlog_state = '$resume_gtid'
# The resume gtid is set up to point to the very first binlog file
--connection slave
CHANGE MASTER TO MASTER_USE_GTID = slave_pos;
--eval SET @@global.gtid_slave_pos = '$resume_gtid'
# Yet the slave io first submits the last received binlog file name:pos.
--source include/start_slave.inc
# Here goes the cracker.
--connection master
INSERT INTO t1 SET a = 4;
#
# Clean up
#
--connection master
DROP TABLE t1;
--eval SET @@GLOBAL. rpl_semi_sync_master_enabled = $sav_enabled_master
--sync_slave_with_master
source include/stop_slave.inc;
--eval SET @@GLOBAL. rpl_semi_sync_slave_enabled = $sav_enabled_slave
source include/start_slave.inc;
--source include/rpl_end.inc
...@@ -2725,7 +2725,11 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, ...@@ -2725,7 +2725,11 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
run hook first when all check has been made that slave seems to run hook first when all check has been made that slave seems to
be requesting a reasonable position. i.e when transmit actually starts be requesting a reasonable position. i.e when transmit actually starts
*/ */
if (RUN_HOOK(binlog_transmit, transmit_start, (thd, flags, log_ident, pos)))
DBUG_ASSERT(pos == linfo.pos);
if (RUN_HOOK(binlog_transmit, transmit_start,
(thd, flags, linfo.log_file_name, linfo.pos)))
{ {
info->errmsg= "Failed to run hook 'transmit_start'"; info->errmsg= "Failed to run hook 'transmit_start'";
info->error= ER_UNKNOWN_ERROR; info->error= ER_UNKNOWN_ERROR;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment