Commit 90a9c4ca authored by Sujatha's avatar Sujatha

MDEV-20217: Semi_sync: Last_IO_Error: Fatal error: Failed to run 'after_queue_event' hook

Fix:
===
Implemented upstream fix.

commit 7d3d0fc3
Author: He Zhenxing <zhenxing.he@sun.com>

Backport Bug#45852 Semisynch: Last_IO_Error: Fatal error: Failed
to run 'after_queue_event' hook

Errors when send reply to master should never cause the IO thread
to stop, because master can fall back to async replication if it
does not get reply from slave.

The problem is fixed by deliberately ignoring the return value of
slave_reply.
parent bfbf0f22
include/master-slave.inc
[connection master]
connection slave;
include/stop_slave.inc
connection master;
call mtr.add_suppression("Timeout waiting for reply of binlog*");
set global rpl_semi_sync_master_enabled = ON;
SET @@GLOBAL.rpl_semi_sync_master_timeout=100;
create table t1 (i int);
connection slave;
set global rpl_semi_sync_slave_enabled = ON;
CALL mtr.add_suppression("Semi-sync slave net_flush*");
SET @save_debug= @@global.debug;
SET GLOBAL debug_dbug="+d,semislave_failed_net_flush";
include/start_slave.inc
connection master;
connection slave;
"Assert that the net_fulsh() reply failed is present in slave error log.
FOUND 1 /Semi-sync slave net_flush\(\) reply failed/ in mysqld.2.err
"Assert that Slave IO thread is up and running."
SHOW STATUS LIKE 'Slave_running';
Variable_name Value
Slave_running ON
Slave_IO_Running= Yes
"Clear the network failure simulation."
SET GLOBAL debug_dbug= @save_debug;
connection master;
insert into t1 values (10);
connection slave;
connection slave;
# Compare the tables on master and slave.
include/diff_tables.inc [master:t1, slave:t1]
connection master;
drop table t1;
connection slave;
set global rpl_semi_sync_slave_enabled = OFF;
connection master;
set global rpl_semi_sync_master_enabled = OFF;
SET @@GLOBAL.rpl_semi_sync_master_timeout = 10000;
include/rpl_end.inc
# ==== Purpose ====
#
# Test verifies that slave IO thread doesn't report an error, when slave fails
# to send an acknowledgment to master with semi sync replication in use.
#
# ==== Implementation ====
#
# Steps:
# 0 - Have semi synchronous replication in use.
# 1 - Enable a debug simulation point which simulates network flush failure
# at the time of slave reply operation.
# 2 - Do some operation on master and wait for it to be replicated. Master
# will timeout waiting for reply from slave.
# 3 - Check the slave error log for appropriate error message regarding
# net_flush operation failure.
# 4 - Remove the debug simulation and do some more DML operations on master
# and wait for them to be replicated.
# 5 - Slave will be able to replicate and data is consistent on both master
# and slave. Semi sync will be automatically turned on.
#
# ==== References ====
#
# MDEV-20217: Semi_sync: Last_IO_Error: Fatal error: Failed to run
# 'after_queue_event' hook
#
--source include/have_debug.inc
--source include/master-slave.inc
--connection slave
--source include/stop_slave.inc
--connection master
call mtr.add_suppression("Timeout waiting for reply of binlog*");
--let $sav_timeout_master=`SELECT @@GLOBAL.rpl_semi_sync_master_timeout`
set global rpl_semi_sync_master_enabled = ON;
SET @@GLOBAL.rpl_semi_sync_master_timeout=100;
create table t1 (i int);
--connection slave
set global rpl_semi_sync_slave_enabled = ON;
CALL mtr.add_suppression("Semi-sync slave net_flush*");
SET @save_debug= @@global.debug;
SET GLOBAL debug_dbug="+d,semislave_failed_net_flush";
--source include/start_slave.inc
--connection master
--sync_slave_with_master
# Check error log for correct messages.
let $log_error_= `SELECT @@GLOBAL.log_error`;
if(!$log_error_)
{
# MySQL Server on windows is started with --console and thus
# does not know the location of its .err log, use default location
let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.2.err;
}
--echo "Assert that the net_fulsh() reply failed is present in slave error log.
--let SEARCH_FILE=$log_error_
--let SEARCH_PATTERN=Semi-sync slave net_flush\(\) reply failed
--source include/search_pattern_in_file.inc
--echo "Assert that Slave IO thread is up and running."
SHOW STATUS LIKE 'Slave_running';
let $status= query_get_value("show slave status", Slave_IO_Running, 1);
echo Slave_IO_Running= $status;
--echo "Clear the network failure simulation."
SET GLOBAL debug_dbug= @save_debug;
--connection master
insert into t1 values (10);
--sync_slave_with_master
--connection slave
--echo # Compare the tables on master and slave.
--let $diff_tables= master:t1, slave:t1
--source include/diff_tables.inc
--connection master
drop table t1;
--sync_slave_with_master
set global rpl_semi_sync_slave_enabled = OFF;
--connection master
set global rpl_semi_sync_master_enabled = OFF;
--eval SET @@GLOBAL.rpl_semi_sync_master_timeout = $sav_timeout_master
--source include/rpl_end.inc
...@@ -4911,13 +4911,14 @@ Stopping slave I/O thread due to out-of-memory error from master"); ...@@ -4911,13 +4911,14 @@ Stopping slave I/O thread due to out-of-memory error from master");
goto err; goto err;
} }
if (rpl_semi_sync_slave_status && (mi->semi_ack & SEMI_SYNC_NEED_ACK) && if (rpl_semi_sync_slave_status && (mi->semi_ack & SEMI_SYNC_NEED_ACK))
repl_semisync_slave.slave_reply(mi))
{ {
mi->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, NULL, /*
ER_THD(thd, ER_SLAVE_FATAL_ERROR), We deliberately ignore the error in slave_reply, such error should
"Failed to run 'after_queue_event' hook"); not cause the slave IO thread to stop, and the error messages are
goto err; already reported.
*/
(void)repl_semisync_slave.slave_reply(mi);
} }
if (mi->using_gtid == Master_info::USE_GTID_NO && if (mi->using_gtid == Master_info::USE_GTID_NO &&
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment