Commit 2291f8ef authored by Brandon Nesterenko's avatar Brandon Nesterenko

MDEV-25284: Assertion `info->type == READ_CACHE || info->type == WRITE_CACHE' failed

Problem:
========
This patch addresses two issues.

First, if a CHANGE MASTER command is issued and an error happens
while locating the replica’s relay logs, the logs can be put into an
invalid state where future updates fail and future CHANGE MASTER
calls crash the server. More specifically, right before a replica
purges the relay logs (part of the `CHANGE MASTER TO` logic), the
relay log is temporarily closed with state LOG_TO_BE_OPENED. If the
server errors in-between the temporary log closure and purge, i.e.
during the function find_log_pos, the log should be closed.
MDEV-25284 reveals the log is not properly closed.

Second, upon issuing a RESET SLAVE ALL command, a slave’s GTID
filters are not cleared (DO_DOMAIN_IDS, IGNORE_DOMIAN_IDS,
IGNORE_SERVER_IDS). MySQL had a similar bug report, Bug #18816897,
which fixed this issue to clear IGNORE_SERVER_IDS after issuing
RESET SLAVE ALL in version 5.7.

Solution:
=========

To fix the first problem, the CHANGE MASTER error handling logic was
extended to transition the relay log state to LOG_CLOSED from
LOG_TO_BE_OPENED.

To fix the second problem, the RESET SLAVE ALL logic is extended to
clear the domain_id filter and ignore_server_ids.

Reviewed By:
============
Andrei Elkin <andrei.elkin@mariadb.com>
parent 5f63f5dc
# This file ensures that a slave's id filtering variables (i.e. DO_DOMAIN_IDS,
# IGNORE_DOMAIN_IDS, and IGNORE_SERVER_IDS) are cleared after issuing
# `RESET SLAVE ALL`.
#
# param $_do_domain_ids Integer list of values to use for DO_DOMAIN_IDS
# param $_ignore_domain_ids Integer list of values to use for IGNORE_DOMAIN_IDS
# param $_ignore_server_ids Integer list of values to use for IGNORE_SERVER_IDS
#
--echo # Id filtering variable values should be empty initially
let $do_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1);
let $ignore_domain_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1);
let $ignore_server_ids_before= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Server_Ids, 1);
if (`SELECT "$do_domain_ids_before" != "" OR
"$ignore_domain_ids_before" != "" OR
"$ignore_server_ids_before" != ""`)
{
die("CHANGE MASTER TO id filter variables are not empty initially");
}
--echo # Set id filtering variables
eval CHANGE MASTER TO DO_DOMAIN_IDS=$_do_domain_ids, IGNORE_DOMAIN_IDS=$_ignore_domain_ids, IGNORE_SERVER_IDS=$_ignore_server_ids, MASTER_USE_GTID=SLAVE_POS;
let $do_domain_ids_set= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1);
let $ignore_domain_ids_set= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1);
let $ignore_server_ids_set= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Server_Ids, 1);
--echo # do domain id list: $do_domain_ids_set
--echo # ignore domain id list: $ignore_domain_ids_set
--echo # ignore server id list: $ignore_server_ids_set
--echo # RESET SLAVE ALL should clear values for all id filtering variables
RESET SLAVE ALL;
--replace_result $MASTER_MYPORT MASTER_MYPORT
eval change master to master_port=$MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
--source include/start_slave.inc
--source include/stop_slave.inc
let $do_domain_ids_cleared= query_get_value(SHOW SLAVE STATUS, Replicate_Do_Domain_Ids, 1);
let $ignore_domain_ids_cleared= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Domain_Ids, 1);
let $ignore_server_ids_cleared= query_get_value(SHOW SLAVE STATUS, Replicate_Ignore_Server_Ids, 1);
if (`SELECT "$do_domain_ids_cleared" != "" OR
"$ignore_domain_ids_cleared" != "" OR
"$ignore_server_ids_cleared" != ""`)
{
die("RESET SLAVE ALL did not clear id filtering variables");
}
include/master-slave.inc
[connection master]
#
# Failed CHANGE MASTER TO should not change relay log status
#
connection slave;
include/stop_slave.inc
SET @@debug_dbug="d,simulate_find_log_pos_error";
CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=SLAVE_POS;
ERROR HY000: Target log not found in binlog index
SET @@debug_dbug="";
include/start_slave.inc
#
# Ensure relay log can be updated after a failed CHANGE MASTER
#
FLUSH RELAY LOGS;
include/wait_for_slave_param.inc [Relay_Log_File]
#
# Slave should continue to receive data from old master after failed
# CHANGE MASTER TO
#
connection master;
CREATE TABLE t1 (a int);
insert into t1 values (1);
connection slave;
connection slave;
#
# Future CHANGE MASTER calls should succeed
#
include/stop_slave.inc
CHANGE MASTER TO MASTER_USE_GTID=SLAVE_POS;
include/start_slave.inc
########################
# Cleanup
########################
connection master;
DROP TABLE t1;
connection slave;
include/stop_slave.inc
RESET SLAVE ALL;
change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
include/start_slave.inc
include/rpl_end.inc
include/master-slave.inc
[connection master]
connection slave;
include/stop_slave.inc
#
# Category 1) DO_DOMAIN_IDS and IGNORE_SERVER_IDS specified together
#
# Id filtering variable values should be empty initially
# Set id filtering variables
CHANGE MASTER TO DO_DOMAIN_IDS=(1), IGNORE_DOMAIN_IDS=(), IGNORE_SERVER_IDS=(3), MASTER_USE_GTID=SLAVE_POS;
# do domain id list: 1
# ignore domain id list:
# ignore server id list: 3
# RESET SLAVE ALL should clear values for all id filtering variables
RESET SLAVE ALL;
change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
include/start_slave.inc
include/stop_slave.inc
#
# Category 2) IGNORE_DOMAIN_IDS and IGNORE_SERVER_IDS specified together
#
# Id filtering variable values should be empty initially
# Set id filtering variables
CHANGE MASTER TO DO_DOMAIN_IDS=(), IGNORE_DOMAIN_IDS=(2), IGNORE_SERVER_IDS=(3), MASTER_USE_GTID=SLAVE_POS;
# do domain id list:
# ignore domain id list: 2
# ignore server id list: 3
# RESET SLAVE ALL should clear values for all id filtering variables
RESET SLAVE ALL;
change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
include/start_slave.inc
include/stop_slave.inc
#
# Category 3) Null check - edge case with all empty lists to ensure a
# lack of specification doesn't break anything
#
# Id filtering variable values should be empty initially
# Set id filtering variables
CHANGE MASTER TO DO_DOMAIN_IDS=(), IGNORE_DOMAIN_IDS=(), IGNORE_SERVER_IDS=(), MASTER_USE_GTID=SLAVE_POS;
# do domain id list:
# ignore domain id list:
# ignore server id list:
# RESET SLAVE ALL should clear values for all id filtering variables
RESET SLAVE ALL;
change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
include/start_slave.inc
include/stop_slave.inc
############################
# Cleanup
############################
connection slave;
change master to master_port=MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
include/start_slave.inc
include/rpl_end.inc
#
# Purpose:
# This test ensures that issuing a CHANGE MASTER will not put a replica into
# an inconsistent state if the slave cannot find the log files (i.e. the call to
# find_log_pos in reset_logs fails). More specifically, right before a replica
# purges the relay logs (part of the `CHANGE MASTER TO` logic), the relay log is
# temporarily closed with state LOG_TO_BE_OPENED. If the server is issued a
# CHANGE MASTER and it errors in-between the temporary log closure and purge,
# i.e. during the function find_log_pos, the log should be closed. The bug
# reported by MDEV-25284 revealed the log is not properly closed, such that
# future relay log updates fail, and future CHANGE MASTER calls crash the
# server.
#
# Methodology:
# This test ensures that the relay log is properly closed by ensuring future
# updates and CHANGE MASTER calls succeed.
#
# References:
# MDEV-25284: Assertion `info->type == READ_CACHE ||
# info->type == WRITE_CACHE' failed
#
--source include/master-slave.inc
--source include/have_debug.inc
--echo #
--echo # Failed CHANGE MASTER TO should not change relay log status
--echo #
--connection slave
--source include/stop_slave.inc
SET @@debug_dbug="d,simulate_find_log_pos_error";
error 1373;
CHANGE MASTER TO IGNORE_DOMAIN_IDS=(1), MASTER_USE_GTID=SLAVE_POS;
SET @@debug_dbug="";
--source include/start_slave.inc
--echo #
--echo # Ensure relay log can be updated after a failed CHANGE MASTER
--echo #
FLUSH RELAY LOGS;
--let $slave_param= Relay_Log_File
--let $slave_param_value= slave-relay-bin.000003
--source include/wait_for_slave_param.inc
--echo #
--echo # Slave should continue to receive data from old master after failed
--echo # CHANGE MASTER TO
--echo #
--connection master
CREATE TABLE t1 (a int);
insert into t1 values (1);
--let $master_checksum= `CHECKSUM TABLE t1`
--sync_slave_with_master
--connection slave
if ($master_checksum != `CHECKSUM TABLE t1`)
{
die("Replica failed to pull data from primary after failed CHANGE MASTER TO");
}
--echo #
--echo # Future CHANGE MASTER calls should succeed
--echo #
--source include/stop_slave.inc
CHANGE MASTER TO MASTER_USE_GTID=SLAVE_POS;
--source include/start_slave.inc
--echo ########################
--echo # Cleanup
--echo ########################
--connection master
DROP TABLE t1;
--connection slave
--source include/stop_slave.inc
RESET SLAVE ALL;
--replace_result $MASTER_MYPORT MASTER_MYPORT
eval change master to master_port=$MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
--source include/start_slave.inc
--disable_query_log
call mtr.add_suppression("Failed to locate old binlog or relay log files");
--enable_query_log
--source include/rpl_end.inc
#
# Purpose:
# This test validates that after issuing the `SLAVE RESET ALL` command,
# any corresponding IGNORE_DOMAIN_IDS/DO_DOMAIN_IDS and IGNORE_SERVER_IDS
# values are cleared.
#
#
# Methodology:
# To ensure the filtering variables are properly cleared after issuing
# SLAVE RESET ALL, we categorize different combinations of allowable input
# into three different options, and ensure that the variables are cleared for
# each category. The categories are as follows:
# Category 1) DO_DOMAIN_IDS and IGNORE_SERVER_IDS specified together
# Category 2) IGNORE_DOMAIN_IDS and IGNORE_SERVER_IDS specified together
# Category 3) Null check - edge case with all empty lists to ensure a lack
# of specification doesn't break anything
#
# To specify the values, the variables are set in `CHANGE MASTER TO`. To
# ensure the slave state is correct, we test the domain/server id filtering
# variable values at the following times while testing each category.
#
# Before CHANGE MASTER TO the filtering variables are tested to all be
# empty.
#
# After CHANGE MASTER TO the variables are tested to ensure they reflect
# those set in the CHANGE MASTER command.
#
# After RESET SLAVE ALL the filtering variables are tested to all be
# empty.
#
--source include/master-slave.inc
--source include/have_debug.inc
--connection slave
--source include/stop_slave.inc
--echo #
--echo # Category 1) DO_DOMAIN_IDS and IGNORE_SERVER_IDS specified together
--echo #
--let $_do_domain_ids= (1)
--let $_ignore_domain_ids= ()
--let $_ignore_server_ids= (3)
--source include/rpl_reset_slave_all_check.inc
--echo #
--echo # Category 2) IGNORE_DOMAIN_IDS and IGNORE_SERVER_IDS specified together
--echo #
--let $_do_domain_ids= ()
--let $_ignore_domain_ids= (2)
--let $_ignore_server_ids= (3)
--source include/rpl_reset_slave_all_check.inc
--echo #
--echo # Category 3) Null check - edge case with all empty lists to ensure a
--echo # lack of specification doesn't break anything
--echo #
--let $_do_domain_ids= ()
--let $_ignore_domain_ids= ()
--let $_ignore_server_ids= ()
--source include/rpl_reset_slave_all_check.inc
--echo ############################
--echo # Cleanup
--echo ############################
--connection slave
--replace_result $MASTER_MYPORT MASTER_MYPORT
eval change master to master_port=$MASTER_MYPORT, master_host='127.0.0.1', master_user='root';
--source include/start_slave.inc
--source include/rpl_end.inc
......@@ -896,6 +896,20 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
void unlock_binlog_end_pos() { mysql_mutex_unlock(&LOCK_binlog_end_pos); }
mysql_mutex_t* get_binlog_end_pos_lock() { return &LOCK_binlog_end_pos; }
/*
Ensures the log's state is either LOG_OPEN or LOG_CLOSED. If something
failed along the desired path and left the log in invalid state, i.e.
LOG_TO_BE_OPENED, forces the state to be LOG_CLOSED.
*/
void try_fix_log_state()
{
mysql_mutex_lock(get_log_lock());
/* Only change the log state if it is LOG_TO_BE_OPENED */
if (log_state == LOG_TO_BE_OPENED)
log_state= LOG_CLOSED;
mysql_mutex_unlock(get_log_lock());
}
int wait_for_update_binlog_end_pos(THD* thd, struct timespec * timeout);
/*
......
......@@ -170,6 +170,8 @@ void Master_info::clear_in_memory_info(bool all)
{
port= MYSQL_PORT;
host[0] = 0; user[0] = 0; password[0] = 0;
domain_id_filter.clear_ids();
reset_dynamic(&ignore_server_ids);
}
}
......@@ -1788,6 +1790,12 @@ void Domain_id_filter::reset_filter()
m_filter= false;
}
void Domain_id_filter::clear_ids()
{
reset_dynamic(&m_domain_ids[DO_DOMAIN_IDS]);
reset_dynamic(&m_domain_ids[IGNORE_DOMAIN_IDS]);
}
/**
Update the do/ignore domain id filter lists.
......
......@@ -78,6 +78,11 @@ class Domain_id_filter
*/
void reset_filter();
/*
Clear do_ids and ignore_ids to disable domain id filtering
*/
void clear_ids();
/*
Update the do/ignore domain id filter lists.
......
......@@ -3840,6 +3840,16 @@ bool change_master(THD* thd, Master_info* mi, bool *master_info_added)
mi->unlock_slave_threads();
if (ret == FALSE)
my_ok(thd);
else
{
/*
Depending on where CHANGE MASTER failed, the logs may be waiting to be
reopened. This would break future log updates and CHANGE MASTER calls.
`try_fix_log_state()` allows the relay log to fix its state to no longer
expect to be reopened.
*/
mi->rli.relay_log.try_fix_log_state();
}
DBUG_RETURN(ret);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment