Commit 41b435fe authored by Jan Lindström's avatar Jan Lindström Committed by Monty

MDEV-33211 : Galera SST on maria-backup causes donor node to be unresponsive

If mariabackup with backup locks is used on SST we do not
pause and desync galera provider at all. If WSREP_MODE_BF_MARIABACKUP
case provider is paused and desync at BLOCK_COMMIT phase. In
other cases provider is paused and desync at BLOCK_DDL phase.
parent 5d4adeab
......@@ -1417,12 +1417,103 @@ write_slave_info(ds_ctxt *datasink, MYSQL *connection)
/*********************************************************************//**
Old function, not needed anymore with BACKUP LOCKS
Retrieves MySQL Galera and saves it in a file. It also prints it to stdout.
We should create xtrabackup_galelera_info file even when backup locks
are used because donor's wsrep_gtid_domain_id is needed later in joiner.
Note that at this stage wsrep_local_state_uuid and wsrep_last_committed
are inconsistent but they are not used in joiner. Joiner will rewrite this file
at mariabackup --prepare phase and thus there is extra file donor_galera_info.
Information is needed to maitain wsrep_gtid_domain_id and gtid_binlog_pos
same across the cluster. If joiner node have different wsrep_gtid_domain_id
we should still receive effective domain id from the donor node,
and use it.
*/
bool
write_galera_info(ds_ctxt *datasink, MYSQL *connection)
{
return true; // Success
char *state_uuid = NULL, *state_uuid55 = NULL;
char *last_committed = NULL, *last_committed55 = NULL;
char *domain_id = NULL, *domain_id55 = NULL;
bool result=true;
uint n_values=0;
char *wsrep_on = NULL, *wsrep_on55 = NULL;
mysql_variable vars[] = {
{"Wsrep_on", &wsrep_on},
{"wsrep_on", &wsrep_on55},
{NULL, NULL}
};
mysql_variable status[] = {
{"Wsrep_local_state_uuid", &state_uuid},
{"wsrep_local_state_uuid", &state_uuid55},
{"Wsrep_last_committed", &last_committed},
{"wsrep_last_committed", &last_committed55},
{NULL, NULL}
};
mysql_variable value[] = {
{"Wsrep_gtid_domain_id", &domain_id},
{"wsrep_gtid_domain_id", &domain_id55},
{NULL, NULL}
};
n_values= read_mysql_variables(connection, "SHOW VARIABLES", vars, true);
if (n_values == 0 || (wsrep_on == NULL && wsrep_on55 == NULL))
{
msg("Server is not Galera node thus --galera-info does not "
"have any effect.");
result = true;
goto cleanup;
}
read_mysql_variables(connection, "SHOW STATUS", status, true);
if ((state_uuid == NULL && state_uuid55 == NULL)
|| (last_committed == NULL && last_committed55 == NULL))
{
msg("Warning: failed to get master wsrep state from SHOW STATUS.");
result = true;
goto cleanup;
}
n_values= read_mysql_variables(connection, "SHOW VARIABLES LIKE 'wsrep%'", value, true);
if (n_values == 0 || (domain_id == NULL && domain_id55 == NULL))
{
msg("Warning: failed to get master wsrep state from SHOW VARIABLES.");
result = true;
goto cleanup;
}
result= datasink->backup_file_printf(XTRABACKUP_GALERA_INFO,
"%s:%s %s\n", state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55,
domain_id ? domain_id : domain_id55);
if (result)
{
result= datasink->backup_file_printf(XTRABACKUP_DONOR_GALERA_INFO,
"%s:%s %s\n", state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55,
domain_id ? domain_id : domain_id55);
}
if (result)
write_current_binlog_file(datasink, connection);
if (result)
msg("Writing Galera info succeeded with %s:%s %s",
state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55,
domain_id ? domain_id : domain_id55);
cleanup:
free_mysql_variables(status);
return(result);
}
......
......@@ -11,7 +11,7 @@ let $counter= 5000;
let $mysql_errno= 9999;
while ($mysql_errno)
{
--error 0,ER_ACCESS_DENIED_ERROR,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,ER_LOCK_WAIT_TIMEOUT,2002,2006,2013,HA_ERR_NO_ENCRYPTION
--error 0,ER_ACCESS_DENIED_ERROR,ER_SERVER_SHUTDOWN,ER_CONNECTION_KILLED,ER_LOCK_WAIT_TIMEOUT,2002,2006,2013,HA_ERR_NO_ENCRYPTION,2026
select 1;
dec $counter;
......
......@@ -12,9 +12,9 @@ connection node_1;
connection node_2;
Starting server ...
connection node_1;
# Both should return FOUND 2 as we have bootstrap and SST
FOUND 2 /Desyncing and pausing the provider/ in mysqld.1.err
FOUND 2 /Resuming and resyncing the provider/ in mysqld.1.err
# Both should return NOT FOUND as we have mariabackup with backup locks
NOT FOUND /Desyncing and pausing the provider/ in mysqld.1.err
NOT FOUND /Resuming and resyncing the provider/ in mysqld.1.err
connection node_1;
SET GLOBAL wsrep_mode = "BF_ABORT_MARIABACKUP";
# Restart node_2, force SST.
......@@ -25,9 +25,9 @@ connection node_2;
Starting server ...
connection node_2;
connection node_1;
# Both should return FOUND 3 as we have 1 new SST
FOUND 3 /Desyncing and pausing the provider/ in mysqld.1.err
FOUND 3 /Resuming and resyncing the provider/ in mysqld.1.err
# Both should return NOT FOUND as we have mariabackup with backup locks
NOT FOUND /Desyncing and pausing the provider/ in mysqld.1.err
NOT FOUND /Resuming and resyncing the provider/ in mysqld.1.err
SET GLOBAL wsrep_mode = "";
DROP TABLE t;
# Case 2: MariaBackup backup from node_2
......@@ -46,11 +46,13 @@ SET GLOBAL wsrep_mode = "BF_ABORT_MARIABACKUP";
SELECT @@wsrep_mode;
@@wsrep_mode
BF_ABORT_MARIABACKUP
# Both should return FOUND 1 as node should not desync
FOUND 1 /Desyncing and pausing the provider/ in mysqld.2.err
FOUND 1 /Resuming and resyncing the provider/ in mysqld.2.err
# Should return FOUND 1 because only last backup does not desync
FOUND 1 /Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used./ in mysqld.2.err
# Both should return FOUND 2 because both backups do desync but on different points
FOUND 2 /Desyncing and pausing the provider/ in mysqld.2.err
FOUND 2 /Resuming and resyncing the provider/ in mysqld.2.err
# Should return FOUND 1 as server did not desync at BLOCK_DDL
FOUND 1 /Server not desynched from group at BLOCK_DDL because WSREP_MODE_BF_MARIABACKUP is used./ in mysqld.2.err
# Should return FOUND 1 as server did desync and pause at BLOCK_COMMIT
FOUND 1 /Server desynched from group during BACKUP STAGE BLOCK_COMMIT./ in mysqld.2.err
SET GLOBAL wsrep_mode = "";
connection node_1;
DROP TABLE t;
......
......@@ -13,7 +13,7 @@
CREATE TABLE t(i INT NOT NULL PRIMARY KEY) ENGINE INNODB;
INSERT INTO t VALUES(1);
#
# In default settings donor should desync
# In default settings donor should not desync
#
--echo # Restart node_2, force SST.
--connection node_2
......@@ -37,7 +37,7 @@ let $restart_noprint=2;
--connection node_1
let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err;
--echo # Both should return FOUND 2 as we have bootstrap and SST
--echo # Both should return NOT FOUND as we have mariabackup with backup locks
let SEARCH_PATTERN = Desyncing and pausing the provider;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN = Resuming and resyncing the provider;
......@@ -76,7 +76,7 @@ let $restart_noprint=2;
--connection node_1
let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err;
--echo # Both should return FOUND 3 as we have 1 new SST
--echo # Both should return NOT FOUND as we have mariabackup with backup locks
let SEARCH_PATTERN = Desyncing and pausing the provider;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN = Resuming and resyncing the provider;
......@@ -117,13 +117,16 @@ let $targetdir=$MYSQLTEST_VARDIR/tmp/backup2;
--enable_result_log
let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.2.err;
--echo # Both should return FOUND 1 as node should not desync
--echo # Both should return FOUND 2 because both backups do desync but on different points
let SEARCH_PATTERN = Desyncing and pausing the provider;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN = Resuming and resyncing the provider;
--source include/search_pattern_in_file.inc
--echo # Should return FOUND 1 because only last backup does not desync
let SEARCH_PATTERN = Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used.;
--echo # Should return FOUND 1 as server did not desync at BLOCK_DDL
let SEARCH_PATTERN = Server not desynched from group at BLOCK_DDL because WSREP_MODE_BF_MARIABACKUP is used.;
--source include/search_pattern_in_file.inc
--echo # Should return FOUND 1 as server did desync and pause at BLOCK_COMMIT
let SEARCH_PATTERN = Server desynched from group during BACKUP STAGE BLOCK_COMMIT.;
--source include/search_pattern_in_file.inc
SET GLOBAL wsrep_mode = "";
......
......@@ -39,6 +39,7 @@
#ifdef WITH_WSREP
#include "wsrep_server_state.h"
#include "wsrep_mysqld.h"
#include "wsrep_sst.h"
#endif /* WITH_WSREP */
static const char *stage_names[]=
......@@ -293,29 +294,40 @@ static bool backup_block_ddl(THD *thd)
#ifdef WITH_WSREP
DBUG_ASSERT(thd->wsrep_desynced_backup_stage == false);
/*
if user is specifically choosing to allow BF aborting for BACKUP STAGE BLOCK_DDL lock
holder, then do not desync and pause the node from cluster replication.
e.g. mariabackup uses BACKUP STATE BLOCK_DDL; and will be abortable by this.
But, If node is processing as SST donor or WSREP_MODE_BF_MARIABACKUP mode is not set,
we desync the node for BACKUP STAGE because applier threads
bypass backup MDL locks (see MDL_lock::can_grant_lock)
*/
if (WSREP_NNULL(thd))
{
Wsrep_server_state &server_state= Wsrep_server_state::instance();
if (!wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP) ||
server_state.state() == Wsrep_server_state::s_donor)
/*
If user is specifically choosing to allow BF aborting for
BACKUP STAGE BLOCK_DDL lock holder, then do not desync and
pause the node from cluster replication. e.g. mariabackup
uses BACKUP STATE BLOCK_DDL; and will be abortable by this.
*/
bool mariabackup= (server_state.state() == Wsrep_server_state::s_donor
&& !strcmp(wsrep_sst_method, "mariabackup"));
bool allow_bf= wsrep_check_mode(WSREP_MODE_BF_MARIABACKUP);
bool pause_and_desync= true;
if ((allow_bf) || (mariabackup))
{
pause_and_desync= false;
}
if (pause_and_desync)
{
if (server_state.desync_and_pause().is_undefined()) {
if (server_state.desync_and_pause().is_undefined())
DBUG_RETURN(1);
}
WSREP_INFO("Server desynched from group during BACKUP STAGE BLOCK_DDL.");
DEBUG_SYNC(thd, "wsrep_backup_stage_after_desync_and_pause");
thd->wsrep_desynced_backup_stage= true;
}
else
WSREP_INFO("Server not desynched from group because WSREP_MODE_BF_MARIABACKUP used.");
{
WSREP_INFO("Server not desynched from group at BLOCK_DDL because %s is used.",
allow_bf ? "WSREP_MODE_BF_MARIABACKUP" : wsrep_sst_method);
}
}
#endif /* WITH_WSREP */
......@@ -399,6 +411,28 @@ static bool backup_block_commit(THD *thd)
}
thd->clear_error();
#ifdef WITH_WSREP
if (WSREP_NNULL(thd) && !thd->wsrep_desynced_backup_stage)
{
Wsrep_server_state &server_state= Wsrep_server_state::instance();
bool mariabackup= (server_state.state() == Wsrep_server_state::s_donor
&& !strcmp(wsrep_sst_method, "mariabackup"));
/* If this node is donor and mariabackup is not used
we desync and pause provider here if it is not yet done.
*/
if (!mariabackup)
{
if (server_state.desync_and_pause().is_undefined())
DBUG_RETURN(1);
WSREP_INFO("Server desynched from group during BACKUP STAGE BLOCK_COMMIT.");
thd->wsrep_desynced_backup_stage= true;
DEBUG_SYNC(thd, "wsrep_backup_stage_commit_after_desync_and_pause");
}
}
#endif /* WITH_WSREP */
DBUG_RETURN(0);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment