Commit 643606ca authored by anozdrin/alik@alik's avatar anozdrin/alik@alik

Instance Manager polishing.

parent 384f0fee
...@@ -66,11 +66,11 @@ Guardian_thread::~Guardian_thread() ...@@ -66,11 +66,11 @@ Guardian_thread::~Guardian_thread()
} }
void Guardian_thread::request_shutdown(bool stop_instances_arg) void Guardian_thread::request_shutdown()
{ {
pthread_mutex_lock(&LOCK_guardian); pthread_mutex_lock(&LOCK_guardian);
/* stop instances or just clean up Guardian repository */ /* stop instances or just clean up Guardian repository */
stop_instances(stop_instances_arg); stop_instances();
shutdown_requested= TRUE; shutdown_requested= TRUE;
pthread_mutex_unlock(&LOCK_guardian); pthread_mutex_unlock(&LOCK_guardian);
} }
...@@ -118,11 +118,11 @@ void Guardian_thread::process_instance(Instance *instance, ...@@ -118,11 +118,11 @@ void Guardian_thread::process_instance(Instance *instance,
{ {
/* Pid file not created yet, don't go to STARTED state yet */ /* Pid file not created yet, don't go to STARTED state yet */
} }
else else if (current_node->state != STARTED)
{ {
/* clear status fields */ /* clear status fields */
log_info("guardian: instance %s is running, set state to STARTED", log_info("guardian: instance '%s' is running, set state to STARTED.",
instance->options.instance_name); (const char *) instance->options.instance_name);
current_node->restart_counter= 0; current_node->restart_counter= 0;
current_node->crash_moment= 0; current_node->crash_moment= 0;
current_node->state= STARTED; current_node->state= STARTED;
...@@ -132,8 +132,8 @@ void Guardian_thread::process_instance(Instance *instance, ...@@ -132,8 +132,8 @@ void Guardian_thread::process_instance(Instance *instance,
{ {
switch (current_node->state) { switch (current_node->state) {
case NOT_STARTED: case NOT_STARTED:
log_info("guardian: starting instance %s", log_info("guardian: starting instance '%s'...",
instance->options.instance_name); (const char *) instance->options.instance_name);
/* NOTE, set state to STARTING _before_ start() is called */ /* NOTE, set state to STARTING _before_ start() is called */
current_node->state= STARTING; current_node->state= STARTING;
...@@ -157,8 +157,8 @@ void Guardian_thread::process_instance(Instance *instance, ...@@ -157,8 +157,8 @@ void Guardian_thread::process_instance(Instance *instance,
if (instance->is_crashed()) if (instance->is_crashed())
{ {
instance->start(); instance->start();
log_info("guardian: starting instance %s", log_info("guardian: starting instance '%s'...",
instance->options.instance_name); (const char *) instance->options.instance_name);
} }
} }
else else
...@@ -175,8 +175,8 @@ void Guardian_thread::process_instance(Instance *instance, ...@@ -175,8 +175,8 @@ void Guardian_thread::process_instance(Instance *instance,
instance->start(); instance->start();
current_node->last_checked= current_time; current_node->last_checked= current_time;
current_node->restart_counter++; current_node->restart_counter++;
log_info("guardian: restarting instance %s", log_info("guardian: restarting instance '%s'...",
instance->options.instance_name); (const char *) instance->options.instance_name);
} }
} }
else else
...@@ -382,12 +382,11 @@ int Guardian_thread::stop_guard(Instance *instance) ...@@ -382,12 +382,11 @@ int Guardian_thread::stop_guard(Instance *instance)
SYNOPSYS SYNOPSYS
stop_instances() stop_instances()
stop_instances_arg whether we should stop instances at shutdown
DESCRIPTION DESCRIPTION
Loops through the guarded_instances list and prepares them for shutdown. Loops through the guarded_instances list and prepares them for shutdown.
If stop_instances was requested, we need to issue a stop command and change For each instance we issue a stop command and change the state
the state accordingly. Otherwise we simply delete an entry. accordingly.
NOTE NOTE
Guardian object should be locked by the calling function. Guardian object should be locked by the calling function.
...@@ -397,42 +396,29 @@ int Guardian_thread::stop_guard(Instance *instance) ...@@ -397,42 +396,29 @@ int Guardian_thread::stop_guard(Instance *instance)
1 - error occured 1 - error occured
*/ */
int Guardian_thread::stop_instances(bool stop_instances_arg) int Guardian_thread::stop_instances()
{ {
LIST *node; LIST *node;
node= guarded_instances; node= guarded_instances;
while (node != NULL) while (node != NULL)
{ {
if (!stop_instances_arg) GUARD_NODE *current_node= (GUARD_NODE *) node->data;
/*
If instance is running or was running (and now probably hanging),
request stop.
*/
if (current_node->instance->is_running() ||
(current_node->state == STARTED))
{ {
/* just forget about an instance */ current_node->state= STOPPING;
guarded_instances= list_delete(guarded_instances, node); current_node->last_checked= time(NULL);
/*
This should still work fine, as we have only removed the
node from the list. The pointer to the next one is still valid
*/
node= node->next;
} }
else else
{ /* otherwise remove it from the list */
GUARD_NODE *current_node= (GUARD_NODE *) node->data; guarded_instances= list_delete(guarded_instances, node);
/* /* But try to kill it anyway. Just in case */
If instance is running or was running (and now probably hanging), current_node->instance->kill_instance(SIGTERM);
request stop. node= node->next;
*/
if (current_node->instance->is_running() ||
(current_node->state == STARTED))
{
current_node->state= STOPPING;
current_node->last_checked= time(NULL);
}
else
/* otherwise remove it from the list */
guarded_instances= list_delete(guarded_instances, node);
/* But try to kill it anyway. Just in case */
current_node->instance->kill_instance(SIGTERM);
node= node->next;
}
} }
return 0; return 0;
} }
...@@ -440,7 +426,7 @@ int Guardian_thread::stop_instances(bool stop_instances_arg) ...@@ -440,7 +426,7 @@ int Guardian_thread::stop_instances(bool stop_instances_arg)
void Guardian_thread::lock() void Guardian_thread::lock()
{ {
pthread_mutex_lock(&LOCK_guardian); pthread_mutex_lock(&LOCK_guardian);
} }
......
...@@ -89,7 +89,7 @@ class Guardian_thread: public Guardian_thread_args ...@@ -89,7 +89,7 @@ class Guardian_thread: public Guardian_thread_args
/* Initialize or refresh the list of guarded instances */ /* Initialize or refresh the list of guarded instances */
int init(); int init();
/* Request guardian shutdown. Stop instances if needed */ /* Request guardian shutdown. Stop instances if needed */
void request_shutdown(bool stop_instances); void request_shutdown();
/* Start instance protection */ /* Start instance protection */
int guard(Instance *instance, bool nolock= FALSE); int guard(Instance *instance, bool nolock= FALSE);
/* Stop instance protection */ /* Stop instance protection */
...@@ -104,7 +104,7 @@ class Guardian_thread: public Guardian_thread_args ...@@ -104,7 +104,7 @@ class Guardian_thread: public Guardian_thread_args
private: private:
/* Prepares Guardian shutdown. Stops instances is needed */ /* Prepares Guardian shutdown. Stops instances is needed */
int stop_instances(bool stop_instances_arg); int stop_instances();
/* check instance state and act accordingly */ /* check instance state and act accordingly */
void process_instance(Instance *instance, GUARD_NODE *current_node, void process_instance(Instance *instance, GUARD_NODE *current_node,
LIST **guarded_instances, LIST *elem); LIST **guarded_instances, LIST *elem);
......
...@@ -156,8 +156,8 @@ static int start_process(Instance_options *instance_options, ...@@ -156,8 +156,8 @@ static int start_process(Instance_options *instance_options,
/* exec never returns */ /* exec never returns */
exit(1); exit(1);
case -1: case -1:
log_info("cannot create a new process to start instance %s", log_info("cannot create a new process to start instance '%s'.",
instance_options->instance_name); (const char *) instance_options->instance_name);
return 1; return 1;
} }
return 0; return 0;
...@@ -252,7 +252,8 @@ static void start_and_monitor_instance(Instance_options *old_instance_options, ...@@ -252,7 +252,8 @@ static void start_and_monitor_instance(Instance_options *old_instance_options,
MAX_INSTANCE_NAME_LEN - 1); MAX_INSTANCE_NAME_LEN - 1);
instance_name_len= old_instance_options->instance_name_len; instance_name_len= old_instance_options->instance_name_len;
log_info("starting instance %s", instance_name_buff); log_info("starting instance '%s'...",
(const char *) instance_name_buff);
if (start_process(old_instance_options, &process_info)) if (start_process(old_instance_options, &process_info))
{ {
...@@ -286,9 +287,9 @@ void Instance::remove_pid() ...@@ -286,9 +287,9 @@ void Instance::remove_pid()
int pid; int pid;
if ((pid= options.get_pid()) != 0) /* check the pidfile */ if ((pid= options.get_pid()) != 0) /* check the pidfile */
if (options.unlink_pidfile()) /* remove stalled pidfile */ if (options.unlink_pidfile()) /* remove stalled pidfile */
log_error("cannot remove pidfile for instance %i, this might be \ log_error("cannot remove pidfile for instance '%s', this might be \
since IM lacks permmissions or hasn't found the pidifle", since IM lacks permmissions or hasn't found the pidifle",
options.instance_name); (const char *) options.instance_name);
} }
...@@ -435,9 +436,9 @@ bool Instance::is_running() ...@@ -435,9 +436,9 @@ bool Instance::is_running()
We have successfully connected to the server using fake We have successfully connected to the server using fake
username/password. Write a warning to the logfile. username/password. Write a warning to the logfile.
*/ */
log_info("The Instance Manager was able to log into you server \ log_info("The Instance Manager was able to log into you server "
with faked compiled-in password while checking server status. \ "with faked compiled-in password while checking server status. "
Looks like something is wrong."); "Looks like something is wrong.");
pthread_mutex_unlock(&LOCK_instance); pthread_mutex_unlock(&LOCK_instance);
return_val= TRUE; /* server is alive */ return_val= TRUE; /* server is alive */
} }
...@@ -577,10 +578,10 @@ void Instance::kill_instance(int signum) ...@@ -577,10 +578,10 @@ void Instance::kill_instance(int signum)
/* Kill suceeded */ /* Kill suceeded */
if (signum == SIGKILL) /* really killed instance with SIGKILL */ if (signum == SIGKILL) /* really killed instance with SIGKILL */
{ {
log_error("The instance %s is being stopped forcibly. Normally" \ log_error("The instance '%s' is being stopped forcibly. Normally"
"it should not happen. Probably the instance has been" \ "it should not happen. Probably the instance has been"
"hanging. You should also check your IM setup", "hanging. You should also check your IM setup",
options.instance_name); (const char *) options.instance_name);
/* After sucessful hard kill the pidfile need to be removed */ /* After sucessful hard kill the pidfile need to be removed */
options.unlink_pidfile(); options.unlink_pidfile();
} }
......
...@@ -280,7 +280,7 @@ int Listener_thread::create_tcp_socket() ...@@ -280,7 +280,7 @@ int Listener_thread::create_tcp_socket()
FD_SET(ip_socket, &read_fds); FD_SET(ip_socket, &read_fds);
sockets[num_sockets++]= ip_socket; sockets[num_sockets++]= ip_socket;
log_info("accepting connections on ip socket"); log_info("accepting connections on ip socket (port: %d)", (int) im_port);
return 0; return 0;
} }
...@@ -334,7 +334,7 @@ create_unix_socket(struct sockaddr_un &unix_socket_address) ...@@ -334,7 +334,7 @@ create_unix_socket(struct sockaddr_un &unix_socket_address)
/* make sure that instances won't be listening our sockets */ /* make sure that instances won't be listening our sockets */
set_no_inherit(unix_socket); set_no_inherit(unix_socket);
log_info("accepting connections on unix socket %s", log_info("accepting connections on unix socket '%s'",
unix_socket_address.sun_path); unix_socket_address.sun_path);
sockets[num_sockets++]= unix_socket; sockets[num_sockets++]= unix_socket;
FD_SET(unix_socket, &read_fds); FD_SET(unix_socket, &read_fds);
......
...@@ -110,7 +110,7 @@ void stop_all(Guardian_thread *guardian, Thread_registry *registry) ...@@ -110,7 +110,7 @@ void stop_all(Guardian_thread *guardian, Thread_registry *registry)
Let guardian thread know that it should break it's processing cycle, Let guardian thread know that it should break it's processing cycle,
once it wakes up. once it wakes up.
*/ */
guardian->request_shutdown(true); guardian->request_shutdown();
/* wake guardian */ /* wake guardian */
pthread_cond_signal(&guardian->COND_guardian); pthread_cond_signal(&guardian->COND_guardian);
/* stop all threads */ /* stop all threads */
...@@ -282,8 +282,7 @@ void manager(const Options &options) ...@@ -282,8 +282,7 @@ void manager(const Options &options)
{ {
if (!guardian_thread.is_stopped()) if (!guardian_thread.is_stopped())
{ {
bool stop_instances= true; guardian_thread.request_shutdown();
guardian_thread.request_shutdown(stop_instances);
pthread_cond_signal(&guardian_thread.COND_guardian); pthread_cond_signal(&guardian_thread.COND_guardian);
} }
else else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment