Commit b7968590 authored by Kristian Nielsen's avatar Kristian Nielsen

MDEV-6903: gtid_slave_pos is incorrect after master crash

When a master slave restarts, it logs a special restart format description
event in its binlog. When the slave sees this event, it knows it needs to roll
back any active partial transaction, in case the master crashed previously in
the middle of writing such transaction to its binlog.

However, there was a bug where this rollback did not reset rgi->pending_gtid.
This caused the @@gtid_slave_pos to be updated incorrectly with the GTID of
the partial transaction that was rolled back.

Fix this by always clearing rgi->pending_gtid in cleanup_context(), hopefully
preventing similar bugs from turning up in other special cases where a
transaction is rolled back during replication.

Thanks to Pavel Ivanov for tracking down the issue and providing a test case.
parent f3bdf9d7
......@@ -133,9 +133,17 @@ SELECT @@GLOBAL.server_id;
3
SELECT * from t1 WHERE a > 10 ORDER BY a;
a
gtid_check
Binlog pos ok
# Wait 30 seconds for SQL thread to catch up with IO thread
SELECT * from t1 WHERE a > 10 ORDER BY a;
a
gtid_check
Binlog pos ok
gtid_check
Slave pos ok
gtid_check
Current pos ok
# Repeat this with additional transactions on the master
SET GLOBAL debug_dbug="+d,inject_error_writing_xid";
BEGIN;
......@@ -175,11 +183,21 @@ SELECT * from t1 WHERE a > 10 ORDER BY a;
a
13
14
gtid_check
Binlog pos ok
gtid_check
Current pos ok
# Wait 30 seconds for SQL thread to catch up with IO thread
SELECT * from t1 WHERE a > 10 ORDER BY a;
a
13
14
gtid_check
Binlog pos ok
gtid_check
Slave pos ok
gtid_check
Current pos ok
# Repeat this with additional transactions on the master
SET GLOBAL debug_dbug="+d,inject_error_writing_xid";
BEGIN;
......@@ -205,5 +223,48 @@ a
14
23
24
# Repeat this with slave restart
SET GLOBAL debug_dbug="+d,inject_error_writing_xid";
BEGIN;
INSERT INTO t1 VALUES (25);
COMMIT;
ERROR HY000: Error writing file 'master-bin' (errno: 28 "No space left on device")
SET GLOBAL debug_dbug="+d,crash_dispatch_command_before";
COMMIT;
Got one of the listed errors
# Wait 30 seconds for IO thread to connect and SQL thread to catch up
# with IO thread.
include/stop_slave.inc
gtid_check
Binlog pos ok
gtid_check
Current pos ok
INSERT INTO t1 VALUES (26);
INSERT INTO t1 VALUES (27);
SELECT * from t1 WHERE a > 10 ORDER BY a;
a
13
14
23
24
26
27
include/save_master_gtid.inc
gtid_check
Binlog pos ok
gtid_check
Slave pos ok
gtid_check
Current pos ok
include/start_slave.inc
include/sync_with_master_gtid.inc
SELECT * from t1 WHERE a > 10 ORDER BY a;
a
13
14
23
24
26
27
DROP TABLE t1;
include/rpl_end.inc
......@@ -269,6 +269,7 @@ SET GLOBAL debug_dbug="+d,crash_before_writing_xid";
--connection server_1
INSERT INTO t1 VALUES (9), (10);
--let $saved_gtid=`SELECT @@last_gtid`
--save_master_pos
--connection server_2
......@@ -333,6 +334,9 @@ EOF
SELECT @@GLOBAL.server_id;
SELECT * from t1 WHERE a > 10 ORDER BY a;
--disable_query_log
eval SELECT IF(INSTR(@@gtid_binlog_pos, '$saved_gtid'), "Binlog pos ok", CONCAT("Unexpected binlog pos: ", @@gtid_binlog_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
--enable_query_log
--echo # Wait 30 seconds for SQL thread to catch up with IO thread
--connection server_2
......@@ -357,6 +361,11 @@ if ($read_log_pos != $exec_log_pos)
}
SELECT * from t1 WHERE a > 10 ORDER BY a;
--disable_query_log
eval SELECT IF(INSTR(@@gtid_binlog_pos, '$saved_gtid'), "Binlog pos ok", CONCAT("Unexpected binlog pos: ", @@gtid_binlog_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
eval SELECT IF(INSTR(@@gtid_slave_pos, '$saved_gtid'), "Slave pos ok", CONCAT("Unexpected slave pos: ", @@gtid_slave_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
eval SELECT IF(INSTR(@@gtid_current_pos, '$saved_gtid'), "Current pos ok", CONCAT("Unexpected current pos: ", @@gtid_current_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
--enable_query_log
--echo # Repeat this with additional transactions on the master
......@@ -387,6 +396,7 @@ EOF
SELECT @@GLOBAL.server_id;
INSERT INTO t1 VALUES (13);
INSERT INTO t1 VALUES (14);
--let $saved_gtid=`SELECT @@last_gtid`
SELECT * from t1 WHERE a > 10 ORDER BY a;
--source include/save_master_gtid.inc
......@@ -420,6 +430,10 @@ EOF
SELECT @@GLOBAL.server_id;
SELECT * from t1 WHERE a > 10 ORDER BY a;
--disable_query_log
eval SELECT IF(INSTR(@@gtid_binlog_pos, '$saved_gtid'), "Binlog pos ok", CONCAT("Unexpected binlog pos: ", @@gtid_binlog_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
eval SELECT IF(INSTR(@@gtid_current_pos, '$saved_gtid'), "Current pos ok", CONCAT("Unexpected current pos: ", @@gtid_current_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
--enable_query_log
--echo # Wait 30 seconds for SQL thread to catch up with IO thread
--connection server_2
......@@ -444,6 +458,11 @@ if ($read_log_pos != $exec_log_pos)
}
SELECT * from t1 WHERE a > 10 ORDER BY a;
--disable_query_log
eval SELECT IF(INSTR(@@gtid_binlog_pos, '$saved_gtid'), "Binlog pos ok", CONCAT("Unexpected binlog pos: ", @@gtid_binlog_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
eval SELECT IF(INSTR(@@gtid_slave_pos, '$saved_gtid'), "Slave pos ok", CONCAT("Unexpected slave pos: ", @@gtid_slave_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
eval SELECT IF(INSTR(@@gtid_current_pos, '$saved_gtid'), "Current pos ok", CONCAT("Unexpected current pos: ", @@gtid_current_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
--enable_query_log
--echo # Repeat this with additional transactions on the master
......@@ -472,10 +491,91 @@ EOF
INSERT INTO t1 VALUES (23);
INSERT INTO t1 VALUES (24);
--let $saved_gtid=`SELECT @@last_gtid`
SELECT * from t1 WHERE a > 10 ORDER BY a;
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
SELECT * from t1 WHERE a > 10 ORDER BY a;
--echo # Repeat this with slave restart
--connection server_1
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
wait
EOF
SET GLOBAL debug_dbug="+d,inject_error_writing_xid";
BEGIN;
INSERT INTO t1 VALUES (25);
--error ER_ERROR_ON_WRITE
COMMIT;
SET GLOBAL debug_dbug="+d,crash_dispatch_command_before";
--error 2006,2013
COMMIT;
--source include/wait_until_disconnected.inc
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
restart
EOF
--connection server_1
--enable_reconnect
--source include/wait_until_connected_again.inc
--connection server_2
--echo # Wait 30 seconds for IO thread to connect and SQL thread to catch up
--echo # with IO thread.
--let $wait_timeout= 300
while ($wait_timeout != 0)
{
--let $connected=`SELECT COUNT(*) > 0 FROM information_schema.processlist WHERE State = 'Waiting for master to send event'`
if ($connected)
{
--let $read_log_pos= query_get_value('SHOW SLAVE STATUS', Read_Master_Log_Pos, 1)
--let $exec_log_pos= query_get_value('SHOW SLAVE STATUS', Exec_Master_Log_Pos, 1)
if ($read_log_pos == $exec_log_pos)
{
--let $wait_timeout= 0
}
if ($read_log_pos != $exec_log_pos)
{
--sleep 0.1
--dec $wait_timeout
}
}
if (!$connected)
{
--sleep 0.1
--dec $wait_timeout
}
}
if (`SELECT NOT $connected OR $read_log_pos != $exec_log_pos`)
{
--die Timeout wait for IO thread to connect and SQL thread to catch up with IO thread
}
--source include/stop_slave.inc
--connection server_1
--disable_query_log
eval SELECT IF(INSTR(@@gtid_binlog_pos, '$saved_gtid'), "Binlog pos ok", CONCAT("Unexpected binlog pos: ", @@gtid_binlog_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
eval SELECT IF(INSTR(@@gtid_current_pos, '$saved_gtid'), "Current pos ok", CONCAT("Unexpected current pos: ", @@gtid_current_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
--enable_query_log
INSERT INTO t1 VALUES (26);
INSERT INTO t1 VALUES (27);
SELECT * from t1 WHERE a > 10 ORDER BY a;
--source include/save_master_gtid.inc
--connection server_2
--disable_query_log
eval SELECT IF(INSTR(@@gtid_binlog_pos, '$saved_gtid'), "Binlog pos ok", CONCAT("Unexpected binlog pos: ", @@gtid_binlog_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
eval SELECT IF(INSTR(@@gtid_slave_pos, '$saved_gtid'), "Slave pos ok", CONCAT("Unexpected slave pos: ", @@gtid_slave_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
eval SELECT IF(INSTR(@@gtid_current_pos, '$saved_gtid'), "Current pos ok", CONCAT("Unexpected current pos: ", @@gtid_current_pos, "; does not contain the GTID $saved_gtid.")) AS gtid_check;
--enable_query_log
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
SELECT * from t1 WHERE a > 10 ORDER BY a;
......
......@@ -1717,6 +1717,11 @@ void rpl_group_info::cleanup_context(THD *thd, bool error)
trans_rollback_stmt(thd); // if a "statement transaction"
/* trans_rollback() also resets OPTION_GTID_BEGIN */
trans_rollback(thd); // if a "real transaction"
/*
Now that we have rolled back the transaction, make sure we do not
errorneously update the GTID position.
*/
gtid_pending= false;
}
m_table_map.clear_tables();
slave_close_thread_tables(thd);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment