Commit b70262d6 authored by unknown's avatar unknown

Fix for BUG#28030: test im_instance_conf fails with an assert.

The problem was a race condition on shutdown -- when IM got shutdown
request while a guarded mysqld is starting. In this case the Guardian
thread tried to stop the mysqld, but might fail if the mysqld hadn't
created pid-file so far. When this happened, the mysqld-monitor thread
didn't stop, so the assert in Thread_registry happened.

The fix is to make several attempts to stop mysqld if it is active.


server-tools/instance-manager/guardian.cc:
  Try to stop mysqld several times if it is still active.
server-tools/instance-manager/instance.cc:
  Make Instance::kill_mysqld() to return operation status.
server-tools/instance-manager/instance.h:
  Make Instance::kill_mysqld() to return operation status.
server-tools/instance-manager/thread_registry.cc:
  Log unregistered thread ids.
parent 361fda82
...@@ -403,6 +403,8 @@ void Guardian::init() ...@@ -403,6 +403,8 @@ void Guardian::init()
void Guardian::stop_instances() void Guardian::stop_instances()
{ {
static const int NUM_STOP_ATTEMPTS = 100;
Instance_map::Iterator instances_it(instance_map); Instance_map::Iterator instances_it(instance_map);
Instance *instance; Instance *instance;
...@@ -438,7 +440,34 @@ void Guardian::stop_instances() ...@@ -438,7 +440,34 @@ void Guardian::stop_instances()
/* Request mysqld to stop. */ /* Request mysqld to stop. */
instance->kill_mysqld(SIGTERM); bool instance_stopped= FALSE;
for (int cur_attempt= 0; cur_attempt < NUM_STOP_ATTEMPTS; ++cur_attempt)
{
if (!instance->kill_mysqld(SIGTERM))
{
instance_stopped= TRUE;
break;
}
if (!instance->is_active())
{
instance_stopped= TRUE;
break;
}
/* Sleep for 0.3 sec and check again. */
my_sleep(300000);
}
/*
Abort if we failed to stop mysqld instance. That should not happen,
but if it happened, we don't know what to do and prefer to have clear
failure with coredump.
*/
DBUG_ASSERT(instance_stopped);
instance->unlock(); instance->unlock();
} }
......
...@@ -771,7 +771,7 @@ bool Instance::stop_mysqld() ...@@ -771,7 +771,7 @@ bool Instance::stop_mysqld()
These operations should also be used in Guardian to manage instances. These operations should also be used in Guardian to manage instances.
*/ */
void Instance::kill_mysqld(int signum) bool Instance::kill_mysqld(int signum)
{ {
pid_t mysqld_pid= options.load_pid(); pid_t mysqld_pid= options.load_pid();
...@@ -780,7 +780,7 @@ void Instance::kill_mysqld(int signum) ...@@ -780,7 +780,7 @@ void Instance::kill_mysqld(int signum)
log_info("Instance '%s': no pid file to send a signal (%d).", log_info("Instance '%s': no pid file to send a signal (%d).",
(const char *) get_name()->str, (const char *) get_name()->str,
(int) signum); (int) signum);
return; return TRUE;
} }
log_info("Instance '%s': sending %d to %d...", log_info("Instance '%s': sending %d to %d...",
...@@ -792,7 +792,7 @@ void Instance::kill_mysqld(int signum) ...@@ -792,7 +792,7 @@ void Instance::kill_mysqld(int signum)
{ {
log_info("Instance '%s': kill() failed.", log_info("Instance '%s': kill() failed.",
(const char *) get_name()->str); (const char *) get_name()->str);
return; return TRUE;
} }
/* Kill suceeded */ /* Kill suceeded */
...@@ -804,6 +804,8 @@ void Instance::kill_mysqld(int signum) ...@@ -804,6 +804,8 @@ void Instance::kill_mysqld(int signum)
/* After sucessful hard kill the pidfile need to be removed */ /* After sucessful hard kill the pidfile need to be removed */
options.unlink_pidfile(); options.unlink_pidfile();
} }
return FALSE;
} }
......
...@@ -104,7 +104,7 @@ class Instance ...@@ -104,7 +104,7 @@ class Instance
bool start_mysqld(); bool start_mysqld();
bool stop_mysqld(); bool stop_mysqld();
void kill_mysqld(int signo); bool kill_mysqld(int signo);
void lock(); void lock();
void unlock(); void unlock();
......
...@@ -64,8 +64,12 @@ Thread_registry::~Thread_registry() ...@@ -64,8 +64,12 @@ Thread_registry::~Thread_registry()
/* Check that no one uses the repository. */ /* Check that no one uses the repository. */
pthread_mutex_lock(&LOCK_thread_registry); pthread_mutex_lock(&LOCK_thread_registry);
if (head.next != &head) for (Thread_info *ti= head.next; ti != &head; ti= ti->next)
log_error("Not all threads died properly\n"); {
log_error("Thread_registry: unregistered thread: %lu.",
(unsigned long) ti->thread_id);
}
/* All threads must unregister */ /* All threads must unregister */
DBUG_ASSERT(head.next == &head); DBUG_ASSERT(head.next == &head);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment