Commit e69da6dc authored by Manish Kumar's avatar Manish Kumar

BUG#11752315 - 43460: STOP SLAVE UNABLE TO COMPLETE WHEN SLAVE THREAD IS TRYING TO RECONNECT TO

Problem : The basic problem is the way the thread sleeps in mysql-5.5 and also in mysql-5.1
          when we execute a stop slave on windows platform.
          On windows platform if the stop slave is executed after the master dies, we have 
          this long wait before the stop slave return a value. This is because there is a 
          sleep of the thread. The sleep is uninterruptable in the two above version,
          which was fixed by Davi patch for the BUG#11765860 for mysql-trunk. Backporting 
          his patch for mysql-5.5 fixes the problem. 

Solution : A new pair of mutex and condition variable is introduced to synchronize thread 
           sleep and finalization. A new mutex is required because the slave threads are 
           terminated while holding the slave thread locks (run_lock), which can not be 
           relinquished during termination as this would affect the lock order.

mysql-test/suite/rpl/r/rpl_start_stop_slave.result:
  The result file associated with the test added.
mysql-test/suite/rpl/t/rpl_start_stop_slave.test:
  A test to check the new functionality.
sql/rpl_mi.cc:
  The constructor using the new mutex and condition variables for the master_info.
sql/rpl_mi.h:
  The condition variable and mutex have been added for the master_info.
sql/rpl_rli.cc:
  The constructor using the new mutex and condition variables for the realy_log_info.
sql/rpl_rli.h:
  The condition variable and mutex have been added for the relay_log_info.
sql/slave.cc:
  Use a timed wait on a condition variable to implement a interruptible sleep. 
  The wait is registered with the THD object so that the thread will be woken 
  up if killed.
parent 31a1f8ef
include/master-slave.inc
[connection master]
set @time_before_kill := (select CURRENT_TIMESTAMP);
[Time before the query]
[Connection ID of the slave I/O thread found]
kill <connection_id>;
set @time_after_kill := (select CURRENT_TIMESTAMP);
[Time after the query]
[Killing of the slave IO thread was successful]
include/rpl_end.inc
#
#BUG#11752315 : STOP SLAVE UNABLE TO COMPLETE WHEN SLAVE THREAD IS TRYING
# TO RECONNECT TO
#
# ==== Purpose ====
#
#Tests that the slave does not go to a sleep for a long duration after the
#master is killed and we do a START_SLAVE and STOP_SLAVE.
#
# ==== Method ====
#
#This is a new functionality of having an interruptable sleep of the slave.
#We find the thread id for the slave thread. On finding the thread ID of the
#slave thread we kill the slave thread. A successful kill in less than 60 sec
#should serve the purpose of checking the functionality.
#
--source include/have_log_bin.inc
--source include/master-slave.inc
connection slave;
--let $connection_id=`SELECT id FROM information_schema.processlist where state LIKE 'Waiting for master to send event'`
set @time_before_kill := (select CURRENT_TIMESTAMP);
--echo [Time before the query]
--echo [Connection ID of the slave I/O thread found]
--replace_regex /kill [0-9]*/kill <connection_id>/
--eval kill $connection_id
set @time_after_kill := (select CURRENT_TIMESTAMP);
--echo [Time after the query]
if(`select TIMESTAMPDIFF(SECOND,@time_after_kill, @time_before_kill) > 60`)
{
--echo # assert : The difference between the timestamps 'time_after_kill' and 'time_before_kill' should be less than 60sec.
--die
}
--echo [Killing of the slave IO thread was successful]
# End of test
--source include/rpl_end.inc
......@@ -7682,8 +7682,10 @@ PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_prep_xids,
key_LOCK_system_variables_hash, key_LOCK_table_share, key_LOCK_thd_data,
key_LOCK_user_conn, key_LOCK_uuid_generator, key_LOG_LOCK_log,
key_master_info_data_lock, key_master_info_run_lock,
key_master_info_sleep_lock,
key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock,
key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
key_relay_log_info_sleep_lock,
key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count,
key_PARTITION_LOCK_auto_inc;
......@@ -7729,10 +7731,12 @@ static PSI_mutex_info all_server_mutexes[]=
{ &key_LOG_LOCK_log, "LOG::LOCK_log", 0},
{ &key_master_info_data_lock, "Master_info::data_lock", 0},
{ &key_master_info_run_lock, "Master_info::run_lock", 0},
{ &key_master_info_sleep_lock, "Master_info::sleep_lock", 0},
{ &key_mutex_slave_reporting_capability_err_lock, "Slave_reporting_capability::err_lock", 0},
{ &key_relay_log_info_data_lock, "Relay_log_info::data_lock", 0},
{ &key_relay_log_info_log_space_lock, "Relay_log_info::log_space_lock", 0},
{ &key_relay_log_info_run_lock, "Relay_log_info::run_lock", 0},
{ &key_relay_log_info_sleep_lock, "Relay_log_info::sleep_lock", 0},
{ &key_structure_guard_mutex, "Query_cache::structure_guard_mutex", 0},
{ &key_TABLE_SHARE_LOCK_ha_data, "TABLE_SHARE::LOCK_ha_data", 0},
{ &key_LOCK_error_messages, "LOCK_error_messages", PSI_FLAG_GLOBAL},
......@@ -7768,8 +7772,10 @@ PSI_cond_key key_BINLOG_COND_prep_xids, key_BINLOG_update_cond,
key_delayed_insert_cond, key_delayed_insert_cond_client,
key_item_func_sleep_cond, key_master_info_data_cond,
key_master_info_start_cond, key_master_info_stop_cond,
key_master_info_sleep_cond,
key_relay_log_info_data_cond, key_relay_log_info_log_space_cond,
key_relay_log_info_start_cond, key_relay_log_info_stop_cond,
key_relay_log_info_sleep_cond,
key_TABLE_SHARE_cond, key_user_level_lock_cond,
key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache;
PSI_cond_key key_RELAYLOG_update_cond;
......@@ -7797,10 +7803,12 @@ static PSI_cond_info all_server_conds[]=
{ &key_master_info_data_cond, "Master_info::data_cond", 0},
{ &key_master_info_start_cond, "Master_info::start_cond", 0},
{ &key_master_info_stop_cond, "Master_info::stop_cond", 0},
{ &key_master_info_sleep_cond, "Master_info::sleep_cond", 0},
{ &key_relay_log_info_data_cond, "Relay_log_info::data_cond", 0},
{ &key_relay_log_info_log_space_cond, "Relay_log_info::log_space_cond", 0},
{ &key_relay_log_info_start_cond, "Relay_log_info::start_cond", 0},
{ &key_relay_log_info_stop_cond, "Relay_log_info::stop_cond", 0},
{ &key_relay_log_info_sleep_cond, "Relay_log_info::sleep_cond", 0},
{ &key_TABLE_SHARE_cond, "TABLE_SHARE::cond", 0},
{ &key_user_level_lock_cond, "User_level_lock::cond", 0},
{ &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL},
......
......@@ -244,8 +244,10 @@ extern PSI_mutex_key key_BINLOG_LOCK_index, key_BINLOG_LOCK_prep_xids,
key_LOCK_table_share, key_LOCK_thd_data,
key_LOCK_user_conn, key_LOCK_uuid_generator, key_LOG_LOCK_log,
key_master_info_data_lock, key_master_info_run_lock,
key_master_info_sleep_lock,
key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock,
key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
key_relay_log_info_sleep_lock,
key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
key_LOCK_error_messages, key_LOCK_thread_count, key_PARTITION_LOCK_auto_inc;
extern PSI_mutex_key key_RELAYLOG_LOCK_index;
......@@ -264,8 +266,10 @@ extern PSI_cond_key key_BINLOG_COND_prep_xids, key_BINLOG_update_cond,
key_delayed_insert_cond, key_delayed_insert_cond_client,
key_item_func_sleep_cond, key_master_info_data_cond,
key_master_info_start_cond, key_master_info_stop_cond,
key_master_info_sleep_cond,
key_relay_log_info_data_cond, key_relay_log_info_log_space_cond,
key_relay_log_info_start_cond, key_relay_log_info_stop_cond,
key_relay_log_info_sleep_cond,
key_TABLE_SHARE_cond, key_user_level_lock_cond,
key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache;
extern PSI_cond_key key_RELAYLOG_update_cond;
......
......@@ -49,9 +49,11 @@ Master_info::Master_info(bool is_slave_recovery)
bzero((char*) &file, sizeof(file));
mysql_mutex_init(key_master_info_run_lock, &run_lock, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_master_info_data_lock, &data_lock, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_master_info_sleep_lock, &sleep_lock, MY_MUTEX_INIT_FAST);
mysql_cond_init(key_master_info_data_cond, &data_cond, NULL);
mysql_cond_init(key_master_info_start_cond, &start_cond, NULL);
mysql_cond_init(key_master_info_stop_cond, &stop_cond, NULL);
mysql_cond_init(key_master_info_sleep_cond, &sleep_cond, NULL);
}
Master_info::~Master_info()
......@@ -59,9 +61,11 @@ Master_info::~Master_info()
delete_dynamic(&ignore_server_ids);
mysql_mutex_destroy(&run_lock);
mysql_mutex_destroy(&data_lock);
mysql_mutex_destroy(&sleep_lock);
mysql_cond_destroy(&data_cond);
mysql_cond_destroy(&start_cond);
mysql_cond_destroy(&stop_cond);
mysql_cond_destroy(&sleep_cond);
}
/**
......
......@@ -78,8 +78,8 @@ class Master_info : public Slave_reporting_capability
File fd; // we keep the file open, so we need to remember the file pointer
IO_CACHE file;
mysql_mutex_t data_lock, run_lock;
mysql_cond_t data_cond, start_cond, stop_cond;
mysql_mutex_t data_lock, run_lock, sleep_lock;
mysql_cond_t data_cond, start_cond, stop_cond, sleep_cond;
THD *io_thd;
MYSQL* mysql;
uint32 file_id; /* for 3.23 load data infile */
......
......@@ -75,10 +75,12 @@ Relay_log_info::Relay_log_info(bool is_slave_recovery)
&data_lock, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_relay_log_info_log_space_lock,
&log_space_lock, MY_MUTEX_INIT_FAST);
mysql_mutex_init(key_relay_log_info_sleep_lock, &sleep_lock, MY_MUTEX_INIT_FAST);
mysql_cond_init(key_relay_log_info_data_cond, &data_cond, NULL);
mysql_cond_init(key_relay_log_info_start_cond, &start_cond, NULL);
mysql_cond_init(key_relay_log_info_stop_cond, &stop_cond, NULL);
mysql_cond_init(key_relay_log_info_log_space_cond, &log_space_cond, NULL);
mysql_cond_init(key_relay_log_info_sleep_cond, &sleep_cond, NULL);
relay_log.init_pthread_objects();
DBUG_VOID_RETURN;
}
......@@ -91,10 +93,12 @@ Relay_log_info::~Relay_log_info()
mysql_mutex_destroy(&run_lock);
mysql_mutex_destroy(&data_lock);
mysql_mutex_destroy(&log_space_lock);
mysql_mutex_destroy(&sleep_lock);
mysql_cond_destroy(&data_cond);
mysql_cond_destroy(&start_cond);
mysql_cond_destroy(&stop_cond);
mysql_cond_destroy(&log_space_cond);
mysql_cond_destroy(&sleep_cond);
relay_log.cleanup();
DBUG_VOID_RETURN;
}
......
......@@ -138,15 +138,13 @@ class Relay_log_info : public Slave_reporting_capability
standard lock acquisition order to avoid deadlocks:
run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index
*/
mysql_mutex_t data_lock, run_lock;
mysql_mutex_t data_lock, run_lock, sleep_lock;
/*
start_cond is broadcast when SQL thread is started
stop_cond - when stopped
data_cond - when data protected by data_lock changes
*/
mysql_cond_t start_cond, stop_cond, data_cond;
mysql_cond_t start_cond, stop_cond, data_cond, sleep_cond;
/* parent Master_info structure */
Master_info *mi;
......
......@@ -68,8 +68,6 @@ bool use_slave_mask = 0;
MY_BITMAP slave_error_mask;
char slave_skip_error_names[SHOW_VAR_FUNC_BUFF_SIZE];
typedef bool (*CHECK_KILLED_FUNC)(THD*,void*);
char* slave_load_tmpdir = 0;
Master_info *active_mi= 0;
my_bool replicate_same_server_id;
......@@ -152,9 +150,6 @@ static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
bool suppress_warnings);
static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
bool reconnect, bool suppress_warnings);
static int safe_sleep(THD* thd, int sec, CHECK_KILLED_FUNC thread_killed,
void* thread_killed_arg);
static int get_master_version_and_clock(MYSQL* mysql, Master_info* mi);
static Log_event* next_event(Relay_log_info* rli);
static int queue_event(Master_info* mi,const char* buf,ulong event_len);
static int terminate_slave_thread(THD *thd,
......@@ -2068,35 +2063,42 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
DBUG_RETURN(0);
}
/*
Sleep for a given amount of time or until killed.
@param thd Thread context of the current thread.
@param seconds The number of seconds to sleep.
@param func Function object to check if the thread has been killed.
@param info The Rpl_info object associated with this sleep.
static int safe_sleep(THD* thd, int sec, CHECK_KILLED_FUNC thread_killed,
void* thread_killed_arg)
@retval True if the thread has been killed, false otherwise.
*/
template <typename killed_func, typename rpl_info>
static inline bool slave_sleep(THD *thd, time_t seconds,
killed_func func, rpl_info info)
{
int nap_time;
thr_alarm_t alarmed;
DBUG_ENTER("safe_sleep");
thr_alarm_init(&alarmed);
time_t start_time= my_time(0);
time_t end_time= start_time+sec;
bool ret;
struct timespec abstime;
const char *old_proc_info;
while ((nap_time= (int) (end_time - start_time)) > 0)
{
ALARM alarm_buff;
/*
The only reason we are asking for alarm is so that
we will be woken up in case of murder, so if we do not get killed,
set the alarm so it goes off after we wake up naturally
*/
thr_alarm(&alarmed, 2 * nap_time, &alarm_buff);
sleep(nap_time);
thr_end_alarm(&alarmed);
mysql_mutex_t *lock= &info->sleep_lock;
mysql_cond_t *cond= &info->sleep_cond;
if ((*thread_killed)(thd,thread_killed_arg))
DBUG_RETURN(1);
start_time= my_time(0);
/* Absolute system time at which the sleep time expires. */
set_timespec(abstime, seconds);
mysql_mutex_lock(lock);
old_proc_info= thd->enter_cond(cond, lock, thd->proc_info);
while (! (ret= func(thd, info)))
{
int error= mysql_cond_timedwait(cond, lock, &abstime);
if (error == ETIMEDOUT || error == ETIME)
break;
}
DBUG_RETURN(0);
/* Implicitly unlocks the mutex. */
thd->exit_cond(old_proc_info);
return ret;
}
......@@ -2555,8 +2557,8 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli)
exec_res= 0;
rli->cleanup_context(thd, 1);
/* chance for concurrent connection to get more locks */
safe_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
(CHECK_KILLED_FUNC)sql_slave_killed, (void*)rli);
slave_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
sql_slave_killed, rli);
mysql_mutex_lock(&rli->data_lock); // because of SHOW STATUS
rli->trans_retries++;
rli->retried_trans++;
......@@ -2654,8 +2656,7 @@ static int try_to_reconnect(THD *thd, MYSQL *mysql, Master_info *mi,
{
if (*retry_count > master_retry_count)
return 1; // Don't retry forever
safe_sleep(thd, mi->connect_retry, (CHECK_KILLED_FUNC) io_slave_killed,
(void *) mi);
slave_sleep(thd, mi->connect_retry, io_slave_killed, mi);
}
if (check_io_slave_killed(thd, mi, messages[SLAVE_RECON_MSG_KILLED_WAITING]))
return 1;
......@@ -4248,8 +4249,7 @@ static int connect_to_master(THD* thd, MYSQL* mysql, Master_info* mi,
change_rpl_status(RPL_ACTIVE_SLAVE,RPL_LOST_SOLDIER);
break;
}
safe_sleep(thd,mi->connect_retry,(CHECK_KILLED_FUNC)io_slave_killed,
(void*)mi);
slave_sleep(thd,mi->connect_retry,io_slave_killed, mi);
}
if (!slave_was_killed)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment