Commit 98ca71ab authored by Andrei's avatar Andrei Committed by Sergei Golubchik

MDEV-28461 semisync-slave server recovery fails to rollback prepared transaction

that is not in binlog.

Post-crash recovery of --rpl-semi-sync-slave-enabled server
failed to recognize a transaction in-doubt that needed rolled back.
A prepared-but-not-in-binlog transaction gets committed instead
to possibly create inconsistency with a master (e.g the way it was observed
in the bug report).

The semisync recovery is corrected now with initializing binlog coordinates
of any transaction in-doubt to the maximum offset which is
unreachable.
In effect when a prepared transaction that is not found in binlog
it will be decided to rollback because it's guaranteed to reside
in a truncated tail area of binlog.

Mtr tests are reinforced to cover the described scenario.
parent e03e7223
......@@ -4,11 +4,13 @@ RESET MASTER;
SET @@global.sync_binlog=1;
CREATE TABLE t (f INT) ENGINE=INNODB;
CREATE TABLE t2 (f INT) ENGINE=INNODB;
CREATE TABLE t4 (f INT) ENGINE=INNODB;
CREATE TABLE tm (f INT) ENGINE=Aria;
# Case A.
connect master1,localhost,root,,;
connect master2,localhost,root,,;
connect master3,localhost,root,,;
connect master4,localhost,root,,;
connection default;
INSERT INTO t VALUES (10);
INSERT INTO tm VALUES (10);
......@@ -23,14 +25,23 @@ connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash';
Before the crash
0-1-7
0-1-8
connection master4;
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
INSERT INTO t4 VALUES (13);
connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
Before the crash and never logged trx
0-1-8
connection default;
# Kill the server
disconnect master1;
disconnect master2;
disconnect master3;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-6/ in mysqld.1.err
disconnect master4;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-7/ in mysqld.1.err
Pre-crash binlog file content:
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
......@@ -39,6 +50,8 @@ master-bin.000001 # Query # # use `test`; CREATE TABLE t (f INT) ENGINE=INNODB
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; CREATE TABLE t2 (f INT) ENGINE=INNODB
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; CREATE TABLE t4 (f INT) ENGINE=INNODB
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `test`; CREATE TABLE tm (f INT) ENGINE=Aria
master-bin.000001 # Gtid # # BEGIN GTID #-#-#
master-bin.000001 # Query # # use `test`; INSERT INTO t VALUES (10)
......@@ -48,16 +61,20 @@ master-bin.000001 # Query # # use `test`; INSERT INTO tm VALUES (10)
master-bin.000001 # Query # # COMMIT
SELECT @@global.gtid_binlog_pos as 'After the crash';
After the crash
0-1-5
0-1-6
"One row should be present in table 't'"
SELECT * FROM t;
f
10
"No row should be present in table 't4'"
SELECT * FROM t4;
f
DELETE FROM t;
# Case B.
connect master1,localhost,root,,;
connect master2,localhost,root,,;
connect master3,localhost,root,,;
connect master4,localhost,root,,;
connection default;
INSERT INTO t VALUES (10);
INSERT INTO tm VALUES (10);
......@@ -72,14 +89,23 @@ connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash';
Before the crash
0-1-10
0-1-11
connection master4;
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
INSERT INTO t4 VALUES (13);
connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
Before the crash and never logged trx
0-1-11
connection default;
# Kill the server
disconnect master1;
disconnect master2;
disconnect master3;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-10/ in mysqld.1.err
disconnect master4;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-11/ in mysqld.1.err
Pre-crash binlog file content:
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
......@@ -97,11 +123,14 @@ master-bin.000002 # Query # # use `test`; DELETE FROM t2 WHERE f = 0
master-bin.000002 # Query # # COMMIT
SELECT @@global.gtid_binlog_pos as 'After the crash';
After the crash
0-1-9
0-1-10
"One row should be present in table 't'"
SELECT * FROM t;
f
10
"No row should be present in table 't4'"
SELECT * FROM t4;
f
DELETE FROM t;
# Case C.
CREATE PROCEDURE sp_blank_xa()
......@@ -114,6 +143,7 @@ END|
connect master1,localhost,root,,;
connect master2,localhost,root,,;
connect master3,localhost,root,,;
connect master4,localhost,root,,;
connection default;
INSERT INTO t VALUES (10);
INSERT INTO tm VALUES (10);
......@@ -128,14 +158,23 @@ connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash';
Before the crash
0-1-15
0-1-16
connection master4;
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
INSERT INTO t4 VALUES (13);
connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
Before the crash and never logged trx
0-1-16
connection default;
# Kill the server
disconnect master1;
disconnect master2;
disconnect master3;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-14/ in mysqld.1.err
disconnect master4;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-15/ in mysqld.1.err
Pre-crash binlog file content:
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
......@@ -158,11 +197,14 @@ master-bin.000003 # Query # # use `test`; INSERT INTO tm VALUES (10)
master-bin.000003 # Query # # COMMIT
SELECT @@global.gtid_binlog_pos as 'After the crash';
After the crash
0-1-13
0-1-14
"One row should be present in table 't'"
SELECT * FROM t;
f
10
"No row should be present in table 't4'"
SELECT * FROM t4;
f
DELETE FROM t;
DROP PROCEDURE sp_blank_xa;
# Case D.
......@@ -176,6 +218,7 @@ END|
connect master1,localhost,root,,;
connect master2,localhost,root,,;
connect master3,localhost,root,,;
connect master4,localhost,root,,;
connection default;
INSERT INTO t VALUES (10);
INSERT INTO tm VALUES (10);
......@@ -190,14 +233,23 @@ connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash';
Before the crash
0-1-20
0-1-21
connection master4;
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
INSERT INTO t4 VALUES (13);
connection master3;
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
Before the crash and never logged trx
0-1-21
connection default;
# Kill the server
disconnect master1;
disconnect master2;
disconnect master3;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-20/ in mysqld.1.err
disconnect master4;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
FOUND 1 /Successfully truncated.*to remove transactions starting from GTID 0-1-21/ in mysqld.1.err
Pre-crash binlog file content:
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
......@@ -226,11 +278,14 @@ master-bin.000004 # Query # # XA END X'786964',X'',1
master-bin.000004 # XA_prepare # # XA PREPARE X'786964',X'',1
SELECT @@global.gtid_binlog_pos as 'After the crash';
After the crash
0-1-19
0-1-20
"One row should be present in table 't'"
SELECT * FROM t;
f
10
"No row should be present in table 't4'"
SELECT * FROM t4;
f
DELETE FROM t;
DROP PROCEDURE sp_xa;
# Cleanup
......
......@@ -22,6 +22,11 @@ SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT
INSERT INTO ti VALUES (3, "not gonna survive");
connection default;
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
connect master3,localhost,root,,;
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master3_ready WAIT_FOR master3_go_never_arrives";
INSERT INTO ti VALUES (4, "not gonna be log therefore survive"),(5, "ditto");
connection default;
SET DEBUG_SYNC= "now WAIT_FOR master3_ready";
"List of binary logs before crash"
show binary logs;
Log_name File_size
......@@ -36,7 +41,8 @@ connection default;
# Kill the server
disconnect master1;
disconnect master2;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
disconnect master3;
# restart: --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
FOUND 1 /truncated binlog file:.*master.*000002/ in mysqld.1.err
"One record should be present in table"
SELECT * FROM ti;
......
connect(master1,localhost,root,,);
connect(master2,localhost,root,,);
connect(master3,localhost,root,,);
connect(master4,localhost,root,,);
--connection default
......@@ -22,16 +23,26 @@ SET DEBUG_SYNC= "commit_before_get_LOCK_after_binlog_sync SIGNAL master2_ready";
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash';
--connection master4
# Simulate prepared & not-logged trx; it will never recover.
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master4_ready WAIT_FOR signal_never_arrives";
--send INSERT INTO t4 VALUES (13)
--connection master3
SET DEBUG_SYNC= "now WAIT_FOR master4_ready";
SELECT @@global.gtid_binlog_pos as 'Before the crash and never logged trx';
--connection default
--source include/kill_mysqld.inc
--disconnect master1
--disconnect master2
--disconnect master3
--disconnect master4
#
# Server restart
#
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
--source include/start_mysqld.inc
# Check error log for a successful truncate message.
......@@ -49,6 +60,8 @@ SELECT @@global.gtid_binlog_pos as 'Before the crash';
SELECT @@global.gtid_binlog_pos as 'After the crash';
--echo "One row should be present in table 't'"
SELECT * FROM t;
--echo "No row should be present in table 't4'"
SELECT * FROM t4;
# prepare binlog file index for the next test
--inc $binlog_file_index
......
......@@ -25,6 +25,7 @@ RESET MASTER;
SET @@global.sync_binlog=1;
CREATE TABLE t (f INT) ENGINE=INNODB;
CREATE TABLE t2 (f INT) ENGINE=INNODB;
CREATE TABLE t4 (f INT) ENGINE=INNODB;
CREATE TABLE tm (f INT) ENGINE=Aria;
# Old (pre-crash) binlog file index initial value.
......@@ -42,14 +43,14 @@ CREATE TABLE tm (f INT) ENGINE=Aria;
# 'query1' onwards will be removed from the binary log.
# Show-binlog-events is to prove that.
--let $truncate_gtid_pos = 0-1-6
--let $truncate_gtid_pos = 0-1-7
--let $query1 = INSERT INTO t VALUES (20)
--let $query2 = DELETE FROM t2 WHERE f = 0 /* no such record */
--source binlog_truncate_active_log.inc
--echo # Case B.
# The inverted sequence ends up to truncate starting from $query2
--let $truncate_gtid_pos = 0-1-10
--let $truncate_gtid_pos = 0-1-11
--let $query1 = DELETE FROM t2 WHERE f = 0
--let $query2 = INSERT INTO t VALUES (20)
--source binlog_truncate_active_log.inc
......@@ -68,7 +69,7 @@ delimiter ;|
# The same as in A with $query2 being the zero-engine XA transaction.
# Both $query1 and $query2 are going to be truncated.
--let $truncate_gtid_pos = 0-1-14
--let $truncate_gtid_pos = 0-1-15
--let $query1 = INSERT INTO t VALUES (20)
--let $query2 = CALL sp_blank_xa
--source binlog_truncate_active_log.inc
......@@ -89,7 +90,7 @@ delimiter ;|
# The same as in B with $query1 being the prepared XA transaction.
# Truncation must occurs at $query2.
--let $truncate_gtid_pos = 0-1-20
--let $truncate_gtid_pos = 0-1-21
--let $query1 = CALL sp_xa
--let $query2 = INSERT INTO t2 VALUES (20)
--source binlog_truncate_active_log.inc
......
......@@ -40,6 +40,15 @@ SET DEBUG_SYNC= "commit_before_get_LOCK_commit_ordered SIGNAL master2_ready WAIT
--connection default
SET DEBUG_SYNC= "now WAIT_FOR master2_ready";
connect(master3,localhost,root,,);
# The 3nd trx for recovery, it won't get into binlog nor therefore recover
SET DEBUG_SYNC= "ha_commit_trans_before_log_and_order SIGNAL master3_ready WAIT_FOR master3_go_never_arrives";
--send INSERT INTO ti VALUES (4, "not gonna be log therefore survive"),(5, "ditto")
--connection default
SET DEBUG_SYNC= "now WAIT_FOR master3_ready";
--echo "List of binary logs before crash"
--source include/show_binary_logs.inc
--echo # The gtid binlog state prior the crash will be truncated at the end of the test
......@@ -49,11 +58,12 @@ SELECT @@global.gtid_binlog_state;
--source include/kill_mysqld.inc
--disconnect master1
--disconnect master2
--disconnect master3
#
# Server restart
#
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1
--let $restart_parameters= --rpl-semi-sync-slave-enabled=1 --sync-binlog=1 --log-warnings=3
--source include/start_mysqld.inc
# Check error log for a successful truncate message.
......
......@@ -963,6 +963,7 @@ typedef struct xid_t XID;
*/
typedef uint Binlog_file_id;
const Binlog_file_id MAX_binlog_id= UINT_MAX;
const my_off_t MAX_off_t = (~(my_off_t) 0);
/*
Compound binlog-id and byte offset of transaction's first event
in a sequence (e.g the recovery sequence) of binlog files.
......@@ -977,14 +978,22 @@ struct xid_recovery_member
my_xid xid;
uint in_engine_prepare; // number of engines that have xid prepared
bool decided_to_commit;
Binlog_offset binlog_coord; // semisync recovery binlog offset
/*
Semisync recovery binlog offset. It's initialized with the maximum
unreachable offset. The max value will remain for any transaction
not found in binlog to yield its rollback decision as it's guaranteed
to be within a truncated tail part of the binlog.
*/
Binlog_offset binlog_coord;
XID *full_xid; // needed by wsrep or past it recovery
decltype(::server_id) server_id; // server id of orginal server
xid_recovery_member(my_xid xid_arg, uint prepare_arg, bool decided_arg,
XID *full_xid_arg, decltype(::server_id) server_id_arg)
: xid(xid_arg), in_engine_prepare(prepare_arg),
decided_to_commit(decided_arg), full_xid(full_xid_arg) , server_id(server_id_arg) {};
decided_to_commit(decided_arg),
binlog_coord(Binlog_offset(MAX_binlog_id, MAX_off_t)),
full_xid(full_xid_arg), server_id(server_id_arg) {};
};
/* for recover() handlerton call */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment