Commit e619295e authored by Sujatha Sivakumar's avatar Sujatha Sivakumar

Bug#24901077: RESET SLAVE ALL DOES NOT ALWAYS RESET SLAVE

Description:
============
If you have a relay log index file that has ended up with
some relay log files that do not exists, then RESET SLAVE
ALL is not enough to get back to a clean state.

Analysis:
=========
In the bug scenario slave server is in stopped state and
some of the relay logs got deleted but the relay log index
file is not updated.

During slave server restart replication initialization fails
as some of the required relay logs are missing. User
executes RESET SLAVE/RESET SLAVE ALL command to start a
clean slave. As per the documentation RESET SLAVE command
clears the master info and relay log info repositories,
deletes all the relay log files, and starts a new relay log
file. But in a scenario where the slave server's
Relay_log_info object is not initialized slave will not
purge the existing relay logs. Hence the index file still
remains in a bad state. Users will not be able to start
the slave unless these files are cleared.

Fix:
===
RESET SLAVE/RESET SLAVE ALL commands should do the cleanup
even in a scenario where Relay_log_info object
initialization failed.

Backported a flag named 'error_on_rli_init_info' which is
required to identify slave's Relay_log_info object
initialization failure. This flag exists in MySQL-5.6
onwards as part of BUG#14021292 fix.

During RESET SLAVE/RESET SLAVE ALL execution this flag
indicates the Relay_log_info initialization failure.
In such a case open the relay log index/relay log files
and do the required clean up.
parent 9181a561
include/master-slave.inc
[connection master]
CREATE TABLE t1 (c1 INT);
INSERT INTO t1 (c1) VALUES (1);
include/stop_slave_sql.inc
FLUSH LOGS;
FLUSH LOGS;
INSERT INTO t1 (c1) VALUES (2);
include/sync_slave_io_with_master.inc
call mtr.add_suppression("File '.*slave-relay-bin.");
call mtr.add_suppression("Could not open log file");
call mtr.add_suppression("Failed to open the relay log");
call mtr.add_suppression("Failed to initialize the master info structure");
include/rpl_stop_server.inc [server_number=2]
# Removing file(s)
include/rpl_start_server.inc [server_number=2]
START SLAVE;
ERROR HY000: Could not initialize master info structure; more error messages can be found in the MySQL error log
START SLAVE;
ERROR HY000: Could not initialize master info structure; more error messages can be found in the MySQL error log
RESET SLAVE;
DROP TABLE t1;
START SLAVE UNTIL MASTER_LOG_FILE= 'MASTER_LOG_FILE', MASTER_LOG_POS= MASTER_LOG_POS;;
include/wait_for_slave_sql_to_stop.inc
include/stop_slave_io.inc
include/start_slave.inc
include/diff_tables.inc [master:t1, slave:t1]
DROP TABLE t1;
include/rpl_end.inc
###############################################################################
# Bug#24901077: RESET SLAVE ALL DOES NOT ALWAYS RESET SLAVE
#
# Problem:
# =======
# If you have a relay log index file that has ended up with
# some relay log files that do not exists, then RESET SLAVE
# ALL is not enough to get back to a clean state.
###############################################################################
# Remove all slave-relay-bin.0* files (do not remove slave-relay-bin.index)
# During server restart rli initialization will fail as there are no
# relay logs. In case of bug RESET SLAVE will not do the required clean up
# as rli is not inited and subsequent START SLAVE will fail.
# Disable "Warning 1612 Being purged log ./slave-relay-bin.0* was not found"
# because it is different on Unix and Windows systems.
--source include/have_binlog_format_mixed.inc
--source include/master-slave.inc
--connection master
CREATE TABLE t1 (c1 INT);
INSERT INTO t1 (c1) VALUES (1);
--sync_slave_with_master
--connection slave
--source include/stop_slave_sql.inc
--let $MYSQLD_SLAVE_DATADIR= `select @@datadir`
--connection master
# Generate more relay logs on slave.
FLUSH LOGS;
FLUSH LOGS;
INSERT INTO t1 (c1) VALUES (2);
--source include/sync_slave_io_with_master.inc
call mtr.add_suppression("File '.*slave-relay-bin.");
call mtr.add_suppression("Could not open log file");
call mtr.add_suppression("Failed to open the relay log");
call mtr.add_suppression("Failed to initialize the master info structure");
# Stop slave
--let $rpl_server_number= 2
--source include/rpl_stop_server.inc
# Delete file(s)
--echo # Removing $remove_pattern file(s)
--let $remove_pattern= slave-relay-bin.0*
--remove_files_wildcard $MYSQLD_SLAVE_DATADIR $remove_pattern
# Start slave
--let $rpl_server_number= 2
--source include/rpl_start_server.inc
# Start slave must fail because of the removed file(s).
--error ER_MASTER_INFO
START SLAVE;
# Try a second time, it must fail again.
--error ER_MASTER_INFO
START SLAVE;
# Retrieve master executed position before reset slave.
--let $master_exec_file= query_get_value("SHOW SLAVE STATUS", Relay_Master_Log_File, 1)
--let $master_exec_pos= query_get_value("SHOW SLAVE STATUS", Exec_Master_Log_Pos, 1)
# Reset slave.
# Disable "Warning 1612 Being purged log ./slave-relay-bin.0* was not found"
# because it is different on Unix and Windows systems.
--disable_warnings
RESET SLAVE;
--enable_warnings
DROP TABLE t1;
--replace_result $master_exec_file MASTER_LOG_FILE $master_exec_pos MASTER_LOG_POS
--eval START SLAVE UNTIL MASTER_LOG_FILE= '$master_exec_file', MASTER_LOG_POS= $master_exec_pos;
--source include/wait_for_slave_sql_to_stop.inc
--source include/stop_slave_io.inc
# Start slave.
--source include/start_slave.inc
--connection master
--sync_slave_with_master
# Check consistency.
--let $diff_tables= master:t1, slave:t1
--source include/diff_tables.inc
# Cleanup
--connection master
DROP TABLE t1;
--sync_slave_with_master
--source include/rpl_end.inc
/* Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
/* Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -551,7 +551,6 @@ void end_master_info(Master_info* mi)
if (!mi->inited)
DBUG_VOID_RETURN;
end_relay_log_info(&mi->rli);
if (mi->fd >= 0)
{
end_io_cache(&mi->file);
......
/* Copyright (c) 2006, 2013, Oracle and/or its affiliates. All rights reserved.
/* Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -41,7 +41,8 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
no_storage(FALSE), replicate_same_server_id(::replicate_same_server_id),
info_fd(-1), cur_log_fd(-1), relay_log(&sync_relaylog_period),
sync_counter(0), is_relay_log_recovery(is_slave_recovery),
save_temporary_tables(0), cur_log_old_open_count(0), group_relay_log_pos(0),
save_temporary_tables(0), cur_log_old_open_count(0),
error_on_rli_init_info(false), group_relay_log_pos(0),
event_relay_log_pos(0),
#if HAVE_purify
is_fake(FALSE),
......@@ -108,7 +109,7 @@ int init_relay_log_info(Relay_log_info* rli,
const char* info_fname)
{
char fname[FN_REFLEN+128];
int info_fd;
int info_fd= -1;
const char* msg = 0;
int error = 0;
DBUG_ENTER("init_relay_log_info");
......@@ -118,6 +119,8 @@ int init_relay_log_info(Relay_log_info* rli,
DBUG_RETURN(0);
fn_format(fname, info_fname, mysql_data_home, "", 4+32);
mysql_mutex_lock(&rli->data_lock);
if (rli->error_on_rli_init_info)
goto err;
info_fd = rli->info_fd;
rli->cur_log_fd = -1;
rli->slave_skip_counter=0;
......@@ -351,11 +354,14 @@ Failed to open the existing relay log info file '%s' (errno %d)",
goto err;
}
rli->inited= 1;
rli->error_on_rli_init_info= false;
mysql_mutex_unlock(&rli->data_lock);
DBUG_RETURN(error);
err:
sql_print_error("%s", msg);
rli->error_on_rli_init_info= true;
if (msg)
sql_print_error("%s", msg);
end_io_cache(&rli->info_file);
if (info_fd >= 0)
mysql_file_close(info_fd, MYF(0));
......@@ -942,6 +948,8 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset,
const char** errmsg)
{
int error=0;
const char *ln;
char name_buf[FN_REFLEN];
DBUG_ENTER("purge_relay_logs");
/*
......@@ -968,12 +976,34 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset,
if (!rli->inited)
{
DBUG_PRINT("info", ("rli->inited == 0"));
DBUG_RETURN(0);
}
DBUG_ASSERT(rli->slave_running == 0);
DBUG_ASSERT(rli->mi->slave_running == 0);
if (rli->error_on_rli_init_info)
{
ln= rli->relay_log.generate_name(opt_relay_logname, "-relay-bin",
1, name_buf);
if (rli->relay_log.open_index_file(opt_relaylog_index_name, ln, TRUE))
{
sql_print_error("Unable to purge relay log files. Failed to open relay "
"log index file:%s.", rli->relay_log.get_index_fname());
DBUG_RETURN(1);
}
if (rli->relay_log.open(ln, LOG_BIN, 0, SEQ_READ_APPEND, 0,
(max_relay_log_size ? max_relay_log_size :
max_binlog_size), 1, TRUE))
{
sql_print_error("Unable to purge relay log files. Failed to open relay "
"log file:%s.", rli->relay_log.get_log_fname());
DBUG_RETURN(1);
}
}
else
DBUG_RETURN(0);
}
else
{
DBUG_ASSERT(rli->slave_running == 0);
DBUG_ASSERT(rli->mi->slave_running == 0);
}
rli->slave_skip_counter=0;
mysql_mutex_lock(&rli->data_lock);
......@@ -1013,6 +1043,8 @@ int purge_relay_logs(Relay_log_info* rli, THD *thd, bool just_reset,
rli->group_relay_log_pos,
0 /* do not need data lock */, errmsg, 0);
if (!rli->inited && rli->error_on_rli_init_info)
rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
err:
#ifndef DBUG_OFF
char buf[22];
......
/* Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
/* Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -153,7 +153,14 @@ class Relay_log_info : public Slave_reporting_capability
a different log under our feet
*/
uint32 cur_log_old_open_count;
/*
If on init_info() call error_on_rli_init_info is true that means
that previous call to init_info() terminated with an error, RESET
SLAVE must be executed and the problem fixed manually.
*/
bool error_on_rli_init_info;
/*
Let's call a group (of events) :
- a transaction
......
/* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -881,6 +881,7 @@ void close_active_mi()
if (active_mi)
{
end_master_info(active_mi);
end_relay_log_info(&active_mi->rli);
delete active_mi;
active_mi= 0;
}
......@@ -4165,6 +4166,7 @@ void end_relay_log_info(Relay_log_info* rli)
{
DBUG_ENTER("end_relay_log_info");
rli->error_on_rli_init_info= false;
if (!rli->inited)
DBUG_VOID_RETURN;
if (rli->info_fd >= 0)
......
/* Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
/* Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -1313,6 +1313,7 @@ int reset_slave(THD *thd, Master_info* mi)
// close master_info_file, relay_log_info_file, set mi->inited=rli->inited=0
end_master_info(mi);
end_relay_log_info(&mi->rli);
// and delete these two files
fn_format(fname, master_info_file, mysql_data_home, "", 4+32);
if (mysql_file_stat(key_file_master_info, fname, &stat_area, MYF(0)) &&
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment