Commit c89f769f authored by sjaakola's avatar sjaakola Committed by Julius Goryavsky

MDEV-31905 GTID inconsistency

This commit fixes GTID inconsistency which was injected by mariabackup SST.
Donor node now writes new info file: donor_galera_info, which is streamed
along the mariabackup donation to the joiner node. The donor_galera_info
file contains both GTID and gtid domain_id, and joiner will use these to
initialize the GTID state.

Commit has new mtr test case: galera_3nodes.galera_gtid_consistency, which
exercises potentially harmful mariabackup SST scenarios. The test has also
scenario with IST joining.
Signed-off-by: default avatarJulius Goryavsky <julius.goryavsky@mariadb.com>
parent 569381df
...@@ -1674,6 +1674,7 @@ ibx_copy_incremental_over_full() ...@@ -1674,6 +1674,7 @@ ibx_copy_incremental_over_full()
NULL}; NULL};
const char *sup_files[] = {"xtrabackup_binlog_info", const char *sup_files[] = {"xtrabackup_binlog_info",
"xtrabackup_galera_info", "xtrabackup_galera_info",
"donor_galera_info",
"xtrabackup_slave_info", "xtrabackup_slave_info",
"xtrabackup_info", "xtrabackup_info",
"ib_lru_dump", "ib_lru_dump",
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
/* special files */ /* special files */
#define XTRABACKUP_SLAVE_INFO "xtrabackup_slave_info" #define XTRABACKUP_SLAVE_INFO "xtrabackup_slave_info"
#define XTRABACKUP_GALERA_INFO "xtrabackup_galera_info" #define XTRABACKUP_GALERA_INFO "xtrabackup_galera_info"
#define XTRABACKUP_DONOR_GALERA_INFO "donor_galera_info"
#define XTRABACKUP_BINLOG_INFO "xtrabackup_binlog_info" #define XTRABACKUP_BINLOG_INFO "xtrabackup_binlog_info"
#define XTRABACKUP_INFO "xtrabackup_info" #define XTRABACKUP_INFO "xtrabackup_info"
......
...@@ -1424,6 +1424,7 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection) ...@@ -1424,6 +1424,7 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection)
{ {
char *state_uuid = NULL, *state_uuid55 = NULL; char *state_uuid = NULL, *state_uuid55 = NULL;
char *last_committed = NULL, *last_committed55 = NULL; char *last_committed = NULL, *last_committed55 = NULL;
char *domain_id = NULL, *domain_id55 = NULL;
bool result; bool result;
mysql_variable status[] = { mysql_variable status[] = {
...@@ -1434,6 +1435,12 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection) ...@@ -1434,6 +1435,12 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection)
{NULL, NULL} {NULL, NULL}
}; };
mysql_variable value[] = {
{"Wsrep_gtid_domain_id", &domain_id},
{"wsrep_gtid_domain_id", &domain_id55},
{NULL, NULL}
};
/* When backup locks are supported by the server, we should skip /* When backup locks are supported by the server, we should skip
creating xtrabackup_galera_info file on the backup stage, because creating xtrabackup_galera_info file on the backup stage, because
wsrep_local_state_uuid and wsrep_last_committed will be inconsistent wsrep_local_state_uuid and wsrep_last_committed will be inconsistent
...@@ -1452,9 +1459,26 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection) ...@@ -1452,9 +1459,26 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection)
goto cleanup; goto cleanup;
} }
read_mysql_variables(connection, "SHOW VARIABLES LIKE 'wsrep%'", value, true);
if (domain_id == NULL && domain_id55 == NULL) {
msg("Warning: failed to get master wsrep state from SHOW VARIABLES.");
result = true;
goto cleanup;
}
result = datasink->backup_file_printf(XTRABACKUP_GALERA_INFO, result = datasink->backup_file_printf(XTRABACKUP_GALERA_INFO,
"%s:%s\n", state_uuid ? state_uuid : state_uuid55, "%s:%s %s\n", state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55); last_committed ? last_committed : last_committed55,
domain_id ? domain_id : domain_id55);
if (result)
{
result= datasink->backup_file_printf(XTRABACKUP_DONOR_GALERA_INFO,
"%s:%s %s\n", state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55,
domain_id ? domain_id : domain_id55);
}
if (result) if (result)
{ {
write_current_binlog_file(datasink, connection); write_current_binlog_file(datasink, connection);
......
...@@ -53,6 +53,7 @@ permission notice: ...@@ -53,6 +53,7 @@ permission notice:
/*! Name of file where Galera info is stored on recovery */ /*! Name of file where Galera info is stored on recovery */
#define XB_GALERA_INFO_FILENAME "xtrabackup_galera_info" #define XB_GALERA_INFO_FILENAME "xtrabackup_galera_info"
#define XB_GALERA_DONOR_INFO_FILENAME "donor_galera_info"
/*********************************************************************** /***********************************************************************
Store Galera checkpoint info in the 'xtrabackup_galera_info' file, if that Store Galera checkpoint info in the 'xtrabackup_galera_info' file, if that
...@@ -67,7 +68,7 @@ xb_write_galera_info(bool incremental_prepare) ...@@ -67,7 +68,7 @@ xb_write_galera_info(bool incremental_prepare)
long long seqno; long long seqno;
MY_STAT statinfo; MY_STAT statinfo;
/* Do not overwrite existing an existing file to be compatible with /* Do not overwrite an existing file to be compatible with
servers with older server versions */ servers with older server versions */
if (!incremental_prepare && if (!incremental_prepare &&
my_stat(XB_GALERA_INFO_FILENAME, &statinfo, MYF(0)) != NULL) { my_stat(XB_GALERA_INFO_FILENAME, &statinfo, MYF(0)) != NULL) {
...@@ -101,10 +102,11 @@ xb_write_galera_info(bool incremental_prepare) ...@@ -101,10 +102,11 @@ xb_write_galera_info(bool incremental_prepare)
seqno = wsrep_xid_seqno(&xid); seqno = wsrep_xid_seqno(&xid);
msg("mariabackup: Recovered WSREP position: %s:%lld\n", msg("mariabackup: Recovered WSREP position: %s:%lld domain_id: %lld\n",
uuid_str, (long long) seqno); uuid_str, (long long) seqno, (long long)wsrep_get_domain_id());
if (fprintf(fp, "%s:%lld", uuid_str, (long long) seqno) < 0) { if (fprintf(fp, "%s:%lld %lld", uuid_str, (long long) seqno,
(long long)wsrep_get_domain_id()) < 0) {
die( die(
"could not write to " XB_GALERA_INFO_FILENAME "could not write to " XB_GALERA_INFO_FILENAME
......
...@@ -92,6 +92,7 @@ extern struct wsrep_service_st { ...@@ -92,6 +92,7 @@ extern struct wsrep_service_st {
void (*wsrep_thd_kill_LOCK_func)(const MYSQL_THD thd); void (*wsrep_thd_kill_LOCK_func)(const MYSQL_THD thd);
void (*wsrep_thd_kill_UNLOCK_func)(const MYSQL_THD thd); void (*wsrep_thd_kill_UNLOCK_func)(const MYSQL_THD thd);
void (*wsrep_thd_set_wsrep_PA_unsafe_func)(MYSQL_THD thd); void (*wsrep_thd_set_wsrep_PA_unsafe_func)(MYSQL_THD thd);
uint32 (*wsrep_get_domain_id_func)();
} *wsrep_service; } *wsrep_service;
#define MYSQL_SERVICE_WSREP_INCLUDED #define MYSQL_SERVICE_WSREP_INCLUDED
...@@ -139,6 +140,7 @@ extern struct wsrep_service_st { ...@@ -139,6 +140,7 @@ extern struct wsrep_service_st {
#define wsrep_thd_set_ignored_error(T,V) wsrep_service->wsrep_thd_set_ignored_error_func(T,V) #define wsrep_thd_set_ignored_error(T,V) wsrep_service->wsrep_thd_set_ignored_error_func(T,V)
#define wsrep_report_bf_lock_wait(T,I) wsrep_service->wsrep_report_bf_lock_wait(T,I) #define wsrep_report_bf_lock_wait(T,I) wsrep_service->wsrep_report_bf_lock_wait(T,I)
#define wsrep_thd_set_PA_unsafe(T) wsrep_service->wsrep_thd_set_PA_unsafe_func(T) #define wsrep_thd_set_PA_unsafe(T) wsrep_service->wsrep_thd_set_PA_unsafe_func(T)
#define wsrep_get_domain_id(T) wsrep_service->wsrep_get_domain_id_func(T)
#else #else
#define MYSQL_SERVICE_WSREP_STATIC_INCLUDED #define MYSQL_SERVICE_WSREP_STATIC_INCLUDED
...@@ -241,5 +243,6 @@ extern "C" void wsrep_report_bf_lock_wait(const THD *thd, ...@@ -241,5 +243,6 @@ extern "C" void wsrep_report_bf_lock_wait(const THD *thd,
unsigned long long trx_id); unsigned long long trx_id);
/* declare parallel applying unsafety for the THD */ /* declare parallel applying unsafety for the THD */
extern "C" void wsrep_thd_set_PA_unsafe(MYSQL_THD thd); extern "C" void wsrep_thd_set_PA_unsafe(MYSQL_THD thd);
extern "C" uint32 wsrep_get_domain_id();
#endif #endif
#endif /* MYSQL_SERVICE_WSREP_INCLUDED */ #endif /* MYSQL_SERVICE_WSREP_INCLUDED */
[rsync]
wsrep-sst-method=rsync
[mariabackup]
wsrep_sst_method=mariabackup
# The goal of including this file is to enable galera_sst_method combinations
# (see include/galera_sst_method.combinations)
--source include/have_innodb.inc
...@@ -14,6 +14,7 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name; ...@@ -14,6 +14,7 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name;
@@wsrep_gtid_domain_id @@wsrep_node_name @@wsrep_gtid_domain_id @@wsrep_node_name
100 node3 100 node3
connection node_3; connection node_3;
connection node_1;
connection node_2; connection node_2;
connection node_1; connection node_1;
connection node_1; connection node_1;
......
connection node_2;
connection node_1;
connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3;
connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2;
set wsrep_sync_wait=0;
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
set wsrep_sync_wait=0;
connection node_1;
CREATE PROCEDURE insert_row (IN node varchar(10), IN repeat_count int)
BEGIN
DECLARE current_num int;
SET current_num = 0;
WHILE current_num < repeat_count do
INSERT INTO t1(node, name) VALUES (node, UUID());
SET current_num = current_num + 1;
END WHILE;
END|
CREATE TABLE t1 (id bigint not null primary key auto_increment, node VARCHAR(10), name VARCHAR(64)) ENGINE=innodb;
# node_1
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2
connection node_2;
# node_2
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2
connection node_3;
# node_3
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2
connection node_1;
CALL insert_row('node1', 500);;
connection node_2;
CALL insert_row('node2', 500);;
connection node_3;
CALL insert_row('node3', 500);;
connection node_2;
# Shutdown node_2, force SST
connection node_2b;
# Wait until node_2 leaves cluster
connection node_1b;
connection node_1;
connection node_3;
connection node_1;
CALL insert_row('node1', 500);
connection node_3;
CALL insert_row('node3', 500);
CREATE TABLE t2(i int primary key) engine=innodb;
connection node_2;
# Restart node_2
# restart
connection node_1b;
# Wait until node_2 is back in cluster
# node2 has joined
# GTID in node1
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2503
connection node_2;
# GTID in node2
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2503
connection node_3;
# GTID in node3
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2503
# Shutdown node_3
connection node_3;
SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 1';
# Wait until node_3 leaves cluster
connection node_1b;
connection node_1;
CALL insert_row('node1', 50);
CREATE TABLE t3(i int primary key) engine=innodb;
connection node_3;
# Rejoin node_3
SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 0';
connection node_1b;
# Wait until node_3 is back in cluster
# node3 has joined
connection node_1;
# GTID in node1
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2554
connection node_2;
# GTID in node2
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2554
connection node_3;
# GTID in node3
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2554
# One by one shutdown all nodes
connection node_3;
# shutdown node_3
connection node_2;
# wait until node_3 is out of cluster
# shutdown node_2
connection node_1;
# wait until node_2 is out of cluster
# shutdown node_1
# Bootstrap from node_1
connection node_1;
# restart: --wsrep_new_cluster
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2554
ANALYZE TABLE t2;
Table Op Msg_type Msg_text
test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
CALL insert_row('node1', 100);;
# Restart node_2
connection node_2;
# restart
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
set wsrep_sync_wait=0;
connection node_1c;
# wait until node_1 and node_2 are in cluster
connection node_2;
ALTER TABLE t2 ADD COLUMN (k int);
CALL insert_row('node2', 100);;
# Restart node_3
connection node_3;
# restart
connection node_1c;
# wait until all nodes are back in cluster
after cluster restart
connection node_2;
connection node_1;
connection node_1;
node1 GTID
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2756
connection node_2;
node2 GTID
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2756
connection node_3;
node3 GTID
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2756
connection node_1;
table size in node1
SELECT COUNT(*) FROM t1;
COUNT(*)
2750
connection node_2;
table size in node2
SELECT COUNT(*) FROM t1;
COUNT(*)
2750
connection node_3;
table size in node3
SELECT COUNT(*) FROM t1;
COUNT(*)
2750
connection node_2;
call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node");
call mtr.add_suppression("WSREP: Sending JOIN failed:.*");
call mtr.add_suppression("Sending JOIN failed:.*");
call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*");
connection node_3;
call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node");
call mtr.add_suppression("WSREP: Sending JOIN failed:.*");
call mtr.add_suppression("Sending JOIN failed:.*");
call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*");
# cleanup
connection node_1;
DROP PROCEDURE insert_row;
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
connection node_3;
connection node_2;
disconnect node_3;
disconnect node_2b;
disconnect node_1b;
disconnect node_1c;
...@@ -4,11 +4,13 @@ ...@@ -4,11 +4,13 @@
# #
--source include/galera_cluster.inc --source include/galera_cluster.inc
--source include/have_innodb.inc --source include/galera_sst_method.inc
--source include/force_restart.inc
# #
# Initially wsrep gtid domain id is 100 # Initially wsrep gtid domain id is 100
# #
--connection node_1 --connection node_1
select @@wsrep_gtid_domain_id,@@wsrep_node_name; select @@wsrep_gtid_domain_id,@@wsrep_node_name;
...@@ -26,6 +28,10 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name; ...@@ -26,6 +28,10 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name;
--connection node_3 --connection node_3
--source include/shutdown_mysqld.inc --source include/shutdown_mysqld.inc
--connection node_1
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--connection node_2 --connection node_2
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size'; --let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc --source include/wait_condition.inc
...@@ -36,6 +42,7 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name; ...@@ -36,6 +42,7 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name;
--source include/wait_condition.inc --source include/wait_condition.inc
--source include/shutdown_mysqld.inc --source include/shutdown_mysqld.inc
--sleep 5
# #
# Bootstrap from node_1 and change wsrep_gtid_domain_id to 200 # Bootstrap from node_1 and change wsrep_gtid_domain_id to 200
...@@ -45,12 +52,11 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name; ...@@ -45,12 +52,11 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name;
--source include/start_mysqld.inc --source include/start_mysqld.inc
show variables like 'wsrep_gtid_domain_id'; show variables like 'wsrep_gtid_domain_id';
# #
# Restart node_2, expect that wsrep_gtid_domain_id has changed to 200 # Restart node_2, expect that wsrep_gtid_domain_id has changed to 200
# #
--connection node_2 --connection node_2
--let $restart_parameters = --let $restart_parameters =
--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.2.expect --let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
--source include/start_mysqld.inc --source include/start_mysqld.inc
show variables like 'wsrep_gtid_domain_id'; show variables like 'wsrep_gtid_domain_id';
......
!include ../galera_3nodes.cnf
[mysqld.1]
wsrep-node-name="node1"
wsrep_gtid_domain_id=1111
gtid_domain_id=2
server_id=10999
wsrep_sst_auth="root:"
wsrep_sst_method=mariabackup
log_slave_updates=ON
log_bin=mariadb-bin-log
binlog-format=row
wsrep-gtid-mode=ON
[mysqld.2]
wsrep-node-name="node2"
wsrep_gtid_domain_id=1112
gtid_domain_id=3
wsrep_sst_auth="root:"
wsrep_sst_method=mariabackup
log_slave_updates=ON
log_bin=mariadb-bin-log
binlog-format=row
wsrep-gtid-mode=ON
[mysqld.3]
wsrep-node-name="node3"
wsrep_gtid_domain_id=1113
gtid_domain_id=4
wsrep_sst_auth="root:"
wsrep_sst_method=mariabackup
log_slave_updates=ON
log_bin=mariadb-bin-log
binlog-format=row
wsrep-gtid-mode=ON
--source include/galera_cluster.inc
--source include/big_test.inc
--source include/force_restart.inc
#
# Testing gtid consistency in 3 node cluster when nodes drop
# and join back to cluster.
# The tests verify that wsrep_gtid_domain_id and gtid_binlog_pos
# remains same across the cluster
# In the configuration, nodes have different wsrep_gtid_domain_id
# but all nodes are supposed to receive effective domain id
# from the bootstrap node (node_1), and use it
#
--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3
--connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2
set wsrep_sync_wait=0;
--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1
set wsrep_sync_wait=0;
--connection node_1
DELIMITER |;
CREATE PROCEDURE insert_row (IN node varchar(10), IN repeat_count int)
BEGIN
DECLARE current_num int;
SET current_num = 0;
WHILE current_num < repeat_count do
INSERT INTO t1(node, name) VALUES (node, UUID());
SET current_num = current_num + 1;
END WHILE;
END|
DELIMITER ;|
CREATE TABLE t1 (id bigint not null primary key auto_increment, node VARCHAR(10), name VARCHAR(64)) ENGINE=innodb;
#
# report initial gtid positions after table t1 is created
#
--echo # node_1
show variables like '%gtid_binlog_pos%';
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'
--source include/wait_condition.inc
--echo # node_2
show variables like '%gtid_binlog_pos%';
--connection node_3
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'
--source include/wait_condition.inc
--echo # node_3
show variables like '%gtid_binlog_pos%';
###########################################
# scenario: join node 2 by SST
##########################################
#
# start concurrent insert load and stop node2 while the load is on
#
--connection node_1
--send CALL insert_row('node1', 500);
--connection node_2
--send CALL insert_row('node2', 500);
--connection node_3
--send CALL insert_row('node3', 500);
#
# stop load to node 2 and shutdown the node, force SST
#
--connection node_2
--reap
--echo # Shutdown node_2, force SST
--connection node_2b
--source include/shutdown_mysqld.inc
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat
--echo # Wait until node_2 leaves cluster
--connection node_1b
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
#
# stop the remaining load to node 1 and 3
#
--connection node_1
--reap
--connection node_3
--reap
#
# some more inserts and DDL to nodes 1 and 3
# while node 2 is absent
#
--connection node_1
CALL insert_row('node1', 500);
--connection node_3
CALL insert_row('node3', 500);
CREATE TABLE t2(i int primary key) engine=innodb;
#
# restart node 2, should join by SST
#
--connection node_2
--echo # Restart node_2
--source include/start_mysqld.inc
--connection node_1b
--echo # Wait until node_2 is back in cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--echo # node2 has joined
#
# check gtid positions in all nodes
#
--echo # GTID in node1
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
--connection node_2
--echo # GTID in node2
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
--connection node_3
--echo # GTID in node3
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
###########################################
# scenario: join node 3 by IST
##########################################
--echo # Shutdown node_3
--connection node_3
SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 1';
--echo # Wait until node_3 leaves cluster
--connection node_1b
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
#
# do some inserts and DDL to node 1
# while node 3 is absent
#
--connection node_1
CALL insert_row('node1', 50);
CREATE TABLE t3(i int primary key) engine=innodb;
#
# remove isolation in node 3, should join by IST
#
--connection node_3
--echo # Rejoin node_3
SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 0';
--connection node_1b
--echo # Wait until node_3 is back in cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--echo # node3 has joined
#
# check gtid positions in all nodes
#
--connection node_1
--echo # GTID in node1
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
--connection node_2
--echo # GTID in node2
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
--connection node_3
--echo # GTID in node3
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
###########################################
# scenario: restart full custer
##########################################
#
# stop all nodes, one by one
#
--echo # One by one shutdown all nodes
--connection node_3
--echo # shutdown node_3
--source include/shutdown_mysqld.inc
--remove_file $MYSQLTEST_VARDIR/mysqld.3/data/grastate.dat
--connection node_2
--echo # wait until node_3 is out of cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--echo # shutdown node_2
--source include/shutdown_mysqld.inc
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat
--connection node_1
--echo # wait until node_2 is out of cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--echo # shutdown node_1
--source include/shutdown_mysqld.inc
#
# bootstap cluster in order node1 - node2 - node3
# send some inserts and DDL after each node started
#
--sleep 5
--echo # Bootstrap from node_1
--connection node_1
--let $restart_parameters = --wsrep_new_cluster
--source include/start_mysqld.inc
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
ANALYZE TABLE t2;
--send CALL insert_row('node1', 100);
--echo # Restart node_2
--connection node_2
--let $restart_parameters =
--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
--source include/start_mysqld.inc
#
# connection node_1b may not be functional anymore, after node was
# shutdown, open node_1c for controlling node 1 state
#
--connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1
set wsrep_sync_wait=0;
--connection node_1c
--echo # wait until node_1 and node_2 are in cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--connection node_2
ALTER TABLE t2 ADD COLUMN (k int);
--send CALL insert_row('node2', 100);
--echo # Restart node_3
--connection node_3
--let $restart_parameters =
--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
--source include/start_mysqld.inc
--connection node_1c
--echo # wait until all nodes are back in cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--echo after cluster restart
# stop load for nodes 1 and 2
--connection node_2
--reap
--connection node_1
--reap
#
# check gtid positions in all nodes
#
--connection node_1
--echo node1 GTID
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
--connection node_2
--echo node2 GTID
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
--connection node_3
--echo node3 GTID
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
#
# check table size in all nodes
#
--connection node_1
--echo table size in node1
SELECT COUNT(*) FROM t1;
--connection node_2
--echo table size in node2
SELECT COUNT(*) FROM t1;
--connection node_3
--echo table size in node3
SELECT COUNT(*) FROM t1;
#
# cleanups
#
--connection node_2
call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node");
call mtr.add_suppression("WSREP: Sending JOIN failed:.*");
call mtr.add_suppression("Sending JOIN failed:.*");
call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*");
--connection node_3
call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node");
call mtr.add_suppression("WSREP: Sending JOIN failed:.*");
call mtr.add_suppression("Sending JOIN failed:.*");
call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*");
--echo # cleanup
--connection node_1
DROP PROCEDURE insert_row;
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
--connection node_3
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't2'
--source include/wait_condition.inc
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't2'
--source include/wait_condition.inc
--disconnect node_3
--disconnect node_2b
--disconnect node_1b
--disconnect node_1c
...@@ -104,8 +104,10 @@ fi ...@@ -104,8 +104,10 @@ fi
DATA="$WSREP_SST_OPT_DATA" DATA="$WSREP_SST_OPT_DATA"
INFO_FILE='xtrabackup_galera_info' INFO_FILE='xtrabackup_galera_info'
DONOR_INFO_FILE='donor_galera_info'
IST_FILE='xtrabackup_ist' IST_FILE='xtrabackup_ist'
MAGIC_FILE="$DATA/$INFO_FILE" MAGIC_FILE="$DATA/$INFO_FILE"
DONOR_MAGIC_FILE="$DATA/$DONOR_INFO_FILE"
INNOAPPLYLOG="$DATA/mariabackup.prepare.log" INNOAPPLYLOG="$DATA/mariabackup.prepare.log"
INNOMOVELOG="$DATA/mariabackup.move.log" INNOMOVELOG="$DATA/mariabackup.move.log"
...@@ -651,14 +653,14 @@ get_stream() ...@@ -651,14 +653,14 @@ get_stream()
if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then
strmcmd="'$STREAM_BIN' -x" strmcmd="'$STREAM_BIN' -x"
else else
strmcmd="'$STREAM_BIN' -c '$INFO_FILE'" strmcmd="'$STREAM_BIN' -c '$INFO_FILE' '$DONOR_INFO_FILE'"
fi fi
else else
sfmt='tar' sfmt='tar'
if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then
strmcmd='tar xfi -' strmcmd='tar xfi -'
else else
strmcmd="tar cf - '$INFO_FILE'" strmcmd="tar cf - '$INFO_FILE' '$DONOR_INFO_FILE'"
fi fi
fi fi
wsrep_log_info "Streaming with $sfmt" wsrep_log_info "Streaming with $sfmt"
...@@ -680,6 +682,7 @@ cleanup_at_exit() ...@@ -680,6 +682,7 @@ cleanup_at_exit()
if [ $estatus -ne 0 ]; then if [ $estatus -ne 0 ]; then
wsrep_log_error "Removing $MAGIC_FILE file due to signal" wsrep_log_error "Removing $MAGIC_FILE file due to signal"
[ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE" || : [ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE" || :
[ -f "$DONOR_MAGIC_FILE" ] && rm -f "$DONOR_MAGIC_FILE" || :
fi fi
if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then
...@@ -916,6 +919,7 @@ monitor_process() ...@@ -916,6 +919,7 @@ monitor_process()
} }
[ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE" [ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE"
[ -f "$DONOR_MAGIC_FILE" ] && rm -rf "$DONOR_MAGIC_FILE"
read_cnf read_cnf
setup_ports setup_ports
...@@ -1048,7 +1052,7 @@ send_magic() ...@@ -1048,7 +1052,7 @@ send_magic()
# Store donor's wsrep GTID (state ID) and wsrep_gtid_domain_id # Store donor's wsrep GTID (state ID) and wsrep_gtid_domain_id
# (separated by a space). # (separated by a space).
echo "$WSREP_SST_OPT_GTID $WSREP_SST_OPT_GTID_DOMAIN_ID" > "$MAGIC_FILE" echo "$WSREP_SST_OPT_GTID $WSREP_SST_OPT_GTID_DOMAIN_ID" > "$MAGIC_FILE"
echo "$WSREP_SST_OPT_GTID $WSREP_SST_OPT_GTID_DOMAIN_ID" > "$DONOR_MAGIC_FILE"
if [ -n "$WSREP_SST_OPT_REMOTE_PSWD" ]; then if [ -n "$WSREP_SST_OPT_REMOTE_PSWD" ]; then
# Let joiner know that we know its secret # Let joiner know that we know its secret
echo "$SECRET_TAG $WSREP_SST_OPT_REMOTE_PSWD" >> "$MAGIC_FILE" echo "$SECRET_TAG $WSREP_SST_OPT_REMOTE_PSWD" >> "$MAGIC_FILE"
...@@ -1594,9 +1598,16 @@ else # joiner ...@@ -1594,9 +1598,16 @@ else # joiner
exit 2 exit 2
fi fi
# use donor magic file, if present
# if IST was used, donor magic file was not created
# Remove special tags from the magic file, and from the output: # Remove special tags from the magic file, and from the output:
coords=$(head -n1 "$MAGIC_FILE") if [ -r "$DONOR_MAGIC_FILE" ]; then
wsrep_log_info "Galera co-ords from recovery: $coords" coords=$(head -n1 "$DONOR_MAGIC_FILE")
wsrep_log_info "Galera co-ords from donor: $coords"
else
coords=$(head -n1 "$MAGIC_FILE")
wsrep_log_info "Galera co-ords from recovery: $coords"
fi
echo "$coords" # Output : UUID:seqno wsrep_gtid_domain_id echo "$coords" # Output : UUID:seqno wsrep_gtid_domain_id
wsrep_log_info "Total time on joiner: $totime seconds" wsrep_log_info "Total time on joiner: $totime seconds"
......
...@@ -409,3 +409,8 @@ extern "C" void wsrep_thd_set_PA_unsafe(THD *thd) ...@@ -409,3 +409,8 @@ extern "C" void wsrep_thd_set_PA_unsafe(THD *thd)
WSREP_DEBUG("session does not have active transaction, can not mark as PA unsafe"); WSREP_DEBUG("session does not have active transaction, can not mark as PA unsafe");
} }
} }
extern "C" uint32 wsrep_get_domain_id()
{
return wsrep_gtid_domain_id;
}
...@@ -181,7 +181,8 @@ static struct wsrep_service_st wsrep_handler = { ...@@ -181,7 +181,8 @@ static struct wsrep_service_st wsrep_handler = {
wsrep_report_bf_lock_wait, wsrep_report_bf_lock_wait,
wsrep_thd_kill_LOCK, wsrep_thd_kill_LOCK,
wsrep_thd_kill_UNLOCK, wsrep_thd_kill_UNLOCK,
wsrep_thd_set_PA_unsafe wsrep_thd_set_PA_unsafe,
wsrep_get_domain_id
}; };
static struct thd_specifics_service_st thd_specifics_handler= static struct thd_specifics_service_st thd_specifics_handler=
......
...@@ -161,3 +161,5 @@ void wsrep_report_bf_lock_wait(const THD*, ...@@ -161,3 +161,5 @@ void wsrep_report_bf_lock_wait(const THD*,
void wsrep_thd_set_PA_unsafe(THD*) void wsrep_thd_set_PA_unsafe(THD*)
{} {}
uint32 wsrep_get_domain_id()
{ return 0;}
...@@ -652,7 +652,9 @@ static void* sst_joiner_thread (void* a) ...@@ -652,7 +652,9 @@ static void* sst_joiner_thread (void* a)
else else
{ {
// Read state ID (UUID:SEQNO) followed by wsrep_gtid_domain_id (if any). // Read state ID (UUID:SEQNO) followed by wsrep_gtid_domain_id (if any).
unsigned long int domain_id= wsrep_gtid_domain_id;
const char *pos= strchr(out, ' '); const char *pos= strchr(out, ' ');
WSREP_DEBUG("SST state ID tmp=%s out=%s pos=%p", tmp, out, pos);
if (!pos) { if (!pos) {
...@@ -662,6 +664,13 @@ static void* sst_joiner_thread (void* a) ...@@ -662,6 +664,13 @@ static void* sst_joiner_thread (void* a)
WSREP_WARN("Did not find domain ID from SST script output '%s'. " WSREP_WARN("Did not find domain ID from SST script output '%s'. "
"Domain ID must be set manually to keep binlog consistent", "Domain ID must be set manually to keep binlog consistent",
out); out);
if (wsrep_gtid_domain_id)
{
WSREP_INFO("This node is configured to use wsrep_gtid_domain_id=%lu by user.",
domain_id);
wsrep_gtid_server.domain_id= (uint32)domain_id;
wsrep_gtid_domain_id= (uint32)domain_id;
}
} }
err= sst_scan_uuid_seqno (out, &ret_uuid, &ret_seqno); err= sst_scan_uuid_seqno (out, &ret_uuid, &ret_seqno);
...@@ -1688,6 +1697,8 @@ static int sst_flush_tables(THD* thd) ...@@ -1688,6 +1697,8 @@ static int sst_flush_tables(THD* thd)
char content[100]; char content[100];
snprintf(content, sizeof(content), "%s:%lld %d\n", wsrep_cluster_state_uuid, snprintf(content, sizeof(content), "%s:%lld %d\n", wsrep_cluster_state_uuid,
(long long)wsrep_locked_seqno, wsrep_gtid_server.domain_id); (long long)wsrep_locked_seqno, wsrep_gtid_server.domain_id);
WSREP_DEBUG("sst_flush_tables : %s:%lld %d", wsrep_cluster_state_uuid,
(long long)wsrep_locked_seqno, wsrep_gtid_server.domain_id);
err= sst_create_file(flush_success, content); err= sst_create_file(flush_success, content);
if (err) if (err)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment