Commit be569826 authored by Jan Lindström's avatar Jan Lindström

MDEV-15607: mysqld crashed few after node is being joined with sst

This is a typical systemd response where it tries to shutdown the
joiner (due to "timeout") before the joiner manages to complete SST.

wsrep_sst_wait
wsrep_SE_init_wait
	While waiting the operation to finish use mysql_cond_timedwait
	instead of mysql_cond_wait and if operation is not finished
	extend systemd timeout (if needed).
parent c6392d52
......@@ -30,6 +30,10 @@
#include <cstdio>
#include <cstdlib>
#if MYSQL_VERSION_ID < 100200
# include <my_service_manager.h>
#endif
static char wsrep_defaults_file[FN_REFLEN * 2 + 10 + 30 +
sizeof(WSREP_SST_OPT_CONF) +
sizeof(WSREP_SST_OPT_CONF_SUFFIX) +
......@@ -186,6 +190,9 @@ bool wsrep_before_SE()
static bool sst_complete = false;
static bool sst_needed = false;
#define WSREP_EXTEND_TIMEOUT_INTERVAL 30
#define WSREP_TIMEDWAIT_SECONDS 10
void wsrep_sst_grab ()
{
WSREP_INFO("wsrep_sst_grab()");
......@@ -197,11 +204,25 @@ void wsrep_sst_grab ()
// Wait for end of SST
bool wsrep_sst_wait ()
{
if (mysql_mutex_lock (&LOCK_wsrep_sst)) abort();
struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0};
uint32 total_wtime = 0;
if (mysql_mutex_lock (&LOCK_wsrep_sst))
abort();
WSREP_INFO("Waiting for SST to complete.");
while (!sst_complete)
{
WSREP_INFO("Waiting for SST to complete.");
mysql_cond_wait (&COND_wsrep_sst, &LOCK_wsrep_sst);
mysql_cond_timedwait (&COND_wsrep_sst, &LOCK_wsrep_sst, &wtime);
if (!sst_complete)
{
total_wtime += wtime.tv_sec;
WSREP_DEBUG("Waiting for SST to complete. waited %u secs.", total_wtime);
service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL,
"WSREP state transfer ongoing, current seqno: %ld", local_seqno);
}
}
if (local_seqno >= 0)
......@@ -1298,10 +1319,22 @@ void wsrep_SE_init_grab()
void wsrep_SE_init_wait()
{
struct timespec wtime = {WSREP_TIMEDWAIT_SECONDS, 0};
uint32 total_wtime=0;
while (SE_initialized == false)
{
mysql_cond_wait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init);
mysql_cond_timedwait (&COND_wsrep_sst_init, &LOCK_wsrep_sst_init, &wtime);
if (!SE_initialized)
{
total_wtime += wtime.tv_sec;
WSREP_DEBUG("Waiting for SST to complete. waited %u secs.", total_wtime);
service_manager_extend_timeout(WSREP_EXTEND_TIMEOUT_INTERVAL,
"WSREP SE initialization ongoing.");
}
}
mysql_mutex_unlock (&LOCK_wsrep_sst_init);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment