Commit 26965387 authored by Sergei Golubchik's avatar Sergei Golubchik

updating @@wsrep_cluster_address deadlocks

wsrep_cluster_address_update() causes LOCK_wsrep_slave_threads
to be locked under LOCK_wsrep_cluster_config, while normally
the order should be the opposite.

Fix: don't protect @@wsrep_cluster_address value with the
LOCK_wsrep_cluster_config, LOCK_global_system_variables is enough.

Only protect wsrep reinitialization with the LOCK_wsrep_cluster_config.
And make it use a local copy of the global @@wsrep_cluster_address.

Also, introduce a helper function that checks whether
wsrep_cluster_address is set and also asserts that it can be safely
read by the caller.
parent b91e77cf
......@@ -5804,9 +5804,12 @@ int mysqld_main(int argc, char **argv)
wsrep_init_startup (false);
}
WSREP_DEBUG("Startup creating %ld applier threads running %lu",
wsrep_slave_threads - 1, wsrep_running_applier_threads);
wsrep_create_appliers(wsrep_slave_threads - 1);
if (wsrep_cluster_address_exists())
{
WSREP_DEBUG("Startup creating %ld applier threads running %lu",
wsrep_slave_threads - 1, wsrep_running_applier_threads);
wsrep_create_appliers(wsrep_slave_threads - 1);
}
}
}
......
......@@ -5445,13 +5445,12 @@ static Sys_var_charptr Sys_wsrep_cluster_name(
ON_CHECK(wsrep_cluster_name_check),
ON_UPDATE(wsrep_cluster_name_update));
static PolyLock_mutex PLock_wsrep_cluster_config(&LOCK_wsrep_cluster_config);
static Sys_var_charptr Sys_wsrep_cluster_address (
"wsrep_cluster_address", "Address to initially connect to cluster",
PREALLOCATED GLOBAL_VAR(wsrep_cluster_address),
CMD_LINE(REQUIRED_ARG),
IN_SYSTEM_CHARSET, DEFAULT(""),
&PLock_wsrep_cluster_config, NOT_IN_BINLOG,
NO_MUTEX_GUARD, NOT_IN_BINLOG,
ON_CHECK(wsrep_cluster_address_check),
ON_UPDATE(wsrep_cluster_address_update));
......@@ -5482,7 +5481,7 @@ static Sys_var_ulong Sys_wsrep_slave_threads(
"wsrep_slave_threads", "Number of slave appliers to launch",
GLOBAL_VAR(wsrep_slave_threads), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(1, 512), DEFAULT(1), BLOCK_SIZE(1),
&PLock_wsrep_cluster_config, NOT_IN_BINLOG,
NO_MUTEX_GUARD, NOT_IN_BINLOG,
ON_CHECK(0),
ON_UPDATE(wsrep_slave_threads_update));
......
......@@ -63,7 +63,7 @@ int wsrep_check_opts()
else
{
// non-mysqldump SST requires wsrep_cluster_address on startup
if (!wsrep_cluster_address || !wsrep_cluster_address[0])
if (!wsrep_cluster_address_exists())
{
WSREP_ERROR ("%s SST method requires wsrep_cluster_address to be "
"configured on startup.", wsrep_sst_method);
......
......@@ -878,13 +878,13 @@ void wsrep_init_startup (bool sst_first)
if (!strcmp(wsrep_provider, WSREP_NONE)) return;
/* Skip replication start if no cluster address */
if (!wsrep_cluster_address || wsrep_cluster_address[0] == 0) return;
if (!wsrep_cluster_address_exists()) return;
/*
Read value of wsrep_new_cluster before wsrep_start_replication(),
the value is reset to FALSE inside wsrep_start_replication.
*/
if (!wsrep_start_replication()) unireg_abort(1);
if (!wsrep_start_replication(wsrep_cluster_address)) unireg_abort(1);
wsrep_create_rollbacker();
wsrep_create_appliers(1);
......@@ -1034,7 +1034,7 @@ void wsrep_shutdown_replication()
my_pthread_setspecific_ptr(THR_THD, NULL);
}
bool wsrep_start_replication()
bool wsrep_start_replication(const char *wsrep_cluster_address)
{
int rcode;
WSREP_DEBUG("wsrep_start_replication");
......@@ -1049,12 +1049,7 @@ bool wsrep_start_replication()
return true;
}
if (!wsrep_cluster_address || wsrep_cluster_address[0]== 0)
{
// if provider is non-trivial, but no address is specified, wait for address
WSREP_DEBUG("wsrep_start_replication exit due to empty address");
return true;
}
DBUG_ASSERT(wsrep_cluster_address[0]);
bool const bootstrap(TRUE == wsrep_new_cluster);
wsrep_new_cluster= FALSE;
......
......@@ -203,7 +203,7 @@ extern void wsrep_close_applier_threads(int count);
/* new defines */
extern void wsrep_stop_replication(THD *thd);
extern bool wsrep_start_replication();
extern bool wsrep_start_replication(const char *wsrep_cluster_address);
extern void wsrep_shutdown_replication();
extern bool wsrep_must_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ);
extern bool wsrep_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ);
......@@ -280,6 +280,13 @@ void WSREP_LOG(void (*fun)(const char* fmt, ...), const char* fmt, ...);
#define WSREP_PROVIDER_EXISTS \
(wsrep_provider && strncasecmp(wsrep_provider, WSREP_NONE, FN_REFLEN))
static inline bool wsrep_cluster_address_exists()
{
if (mysqld_server_started)
mysql_mutex_assert_owner(&LOCK_global_system_variables);
return wsrep_cluster_address && wsrep_cluster_address[0];
}
#define WSREP_QUERY(thd) (thd->query())
extern my_bool wsrep_ready_get();
......@@ -501,6 +508,7 @@ wsrep::key wsrep_prepare_key_for_toi(const char* db, const char* table,
#define wsrep_thr_deinit() do {} while(0)
#define wsrep_init_globals() do {} while(0)
#define wsrep_create_appliers(X) do {} while(0)
#define wsrep_cluster_address_exists() (false)
#endif /* WITH_WSREP */
......
......@@ -125,11 +125,7 @@ bool wsrep_create_appliers(long threads, bool mutex_protected)
return false;
}
if (!wsrep_cluster_address || wsrep_cluster_address[0]== 0)
{
WSREP_DEBUG("wsrep_create_appliers exit due to empty address");
return false;
}
DBUG_ASSERT(wsrep_cluster_address[0]);
long wsrep_threads=0;
......@@ -284,16 +280,14 @@ static void wsrep_rollback_process(THD *rollbacker,
void wsrep_create_rollbacker()
{
if (wsrep_cluster_address && wsrep_cluster_address[0] != 0)
{
Wsrep_thd_args* args(new Wsrep_thd_args(wsrep_rollback_process,
WSREP_ROLLBACKER_THREAD,
pthread_self()));
/* create rollbacker */
if (create_wsrep_THD(args, false))
WSREP_WARN("Can't create thread to manage wsrep rollback");
}
DBUG_ASSERT(wsrep_cluster_address[0]);
Wsrep_thd_args* args(new Wsrep_thd_args(wsrep_rollback_process,
WSREP_ROLLBACKER_THREAD,
pthread_self()));
/* create rollbacker */
if (create_wsrep_THD(args, false))
WSREP_WARN("Can't create thread to manage wsrep rollback");
}
/*
......
......@@ -554,30 +554,31 @@ bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type)
/* stop replication is heavy operation, and includes closing all client
connections. Closing clients may need to get LOCK_global_system_variables
at least in MariaDB.
Note: releasing LOCK_global_system_variables may cause race condition, if
there can be several concurrent clients changing wsrep_provider
*/
char *tmp= my_strdup(wsrep_cluster_address, MYF(MY_WME));
WSREP_DEBUG("wsrep_cluster_address_update: %s", wsrep_cluster_address);
mysql_mutex_unlock(&LOCK_global_system_variables);
mysql_mutex_lock(&LOCK_wsrep_cluster_config);
wsrep_stop_replication(thd);
if (wsrep_start_replication())
if (*tmp && wsrep_start_replication(tmp))
{
wsrep_create_rollbacker();
WSREP_DEBUG("Cluster address update creating %ld applier threads running %lu",
wsrep_slave_threads, wsrep_running_applier_threads);
wsrep_create_appliers(wsrep_slave_threads);
}
/* locking order to be enforced is:
1. LOCK_global_system_variables
2. LOCK_wsrep_cluster_config
=> have to juggle mutexes to comply with this
*/
mysql_mutex_unlock(&LOCK_wsrep_cluster_config);
mysql_mutex_lock(&LOCK_global_system_variables);
mysql_mutex_lock(&LOCK_wsrep_cluster_config);
if (strcmp(tmp, wsrep_cluster_address))
{
my_free((void*)wsrep_cluster_address);
wsrep_cluster_address= tmp;
}
else
my_free(tmp);
return false;
}
......@@ -674,11 +675,12 @@ static void wsrep_slave_count_change_update ()
bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type)
{
mysql_mutex_unlock(&LOCK_wsrep_cluster_config);
if (!wsrep_cluster_address_exists())
return false;
mysql_mutex_unlock(&LOCK_global_system_variables);
mysql_mutex_lock(&LOCK_wsrep_slave_threads);
mysql_mutex_lock(&LOCK_global_system_variables);
mysql_mutex_lock(&LOCK_wsrep_cluster_config);
bool res= false;
wsrep_slave_count_change_update();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment