Commit 26965387 authored by Sergei Golubchik's avatar Sergei Golubchik

updating @@wsrep_cluster_address deadlocks

wsrep_cluster_address_update() causes LOCK_wsrep_slave_threads
to be locked under LOCK_wsrep_cluster_config, while normally
the order should be the opposite.

Fix: don't protect @@wsrep_cluster_address value with the
LOCK_wsrep_cluster_config, LOCK_global_system_variables is enough.

Only protect wsrep reinitialization with the LOCK_wsrep_cluster_config.
And make it use a local copy of the global @@wsrep_cluster_address.

Also, introduce a helper function that checks whether
wsrep_cluster_address is set and also asserts that it can be safely
read by the caller.
parent b91e77cf
...@@ -5804,9 +5804,12 @@ int mysqld_main(int argc, char **argv) ...@@ -5804,9 +5804,12 @@ int mysqld_main(int argc, char **argv)
wsrep_init_startup (false); wsrep_init_startup (false);
} }
WSREP_DEBUG("Startup creating %ld applier threads running %lu", if (wsrep_cluster_address_exists())
wsrep_slave_threads - 1, wsrep_running_applier_threads); {
wsrep_create_appliers(wsrep_slave_threads - 1); WSREP_DEBUG("Startup creating %ld applier threads running %lu",
wsrep_slave_threads - 1, wsrep_running_applier_threads);
wsrep_create_appliers(wsrep_slave_threads - 1);
}
} }
} }
......
...@@ -5445,13 +5445,12 @@ static Sys_var_charptr Sys_wsrep_cluster_name( ...@@ -5445,13 +5445,12 @@ static Sys_var_charptr Sys_wsrep_cluster_name(
ON_CHECK(wsrep_cluster_name_check), ON_CHECK(wsrep_cluster_name_check),
ON_UPDATE(wsrep_cluster_name_update)); ON_UPDATE(wsrep_cluster_name_update));
static PolyLock_mutex PLock_wsrep_cluster_config(&LOCK_wsrep_cluster_config);
static Sys_var_charptr Sys_wsrep_cluster_address ( static Sys_var_charptr Sys_wsrep_cluster_address (
"wsrep_cluster_address", "Address to initially connect to cluster", "wsrep_cluster_address", "Address to initially connect to cluster",
PREALLOCATED GLOBAL_VAR(wsrep_cluster_address), PREALLOCATED GLOBAL_VAR(wsrep_cluster_address),
CMD_LINE(REQUIRED_ARG), CMD_LINE(REQUIRED_ARG),
IN_SYSTEM_CHARSET, DEFAULT(""), IN_SYSTEM_CHARSET, DEFAULT(""),
&PLock_wsrep_cluster_config, NOT_IN_BINLOG, NO_MUTEX_GUARD, NOT_IN_BINLOG,
ON_CHECK(wsrep_cluster_address_check), ON_CHECK(wsrep_cluster_address_check),
ON_UPDATE(wsrep_cluster_address_update)); ON_UPDATE(wsrep_cluster_address_update));
...@@ -5482,7 +5481,7 @@ static Sys_var_ulong Sys_wsrep_slave_threads( ...@@ -5482,7 +5481,7 @@ static Sys_var_ulong Sys_wsrep_slave_threads(
"wsrep_slave_threads", "Number of slave appliers to launch", "wsrep_slave_threads", "Number of slave appliers to launch",
GLOBAL_VAR(wsrep_slave_threads), CMD_LINE(REQUIRED_ARG), GLOBAL_VAR(wsrep_slave_threads), CMD_LINE(REQUIRED_ARG),
VALID_RANGE(1, 512), DEFAULT(1), BLOCK_SIZE(1), VALID_RANGE(1, 512), DEFAULT(1), BLOCK_SIZE(1),
&PLock_wsrep_cluster_config, NOT_IN_BINLOG, NO_MUTEX_GUARD, NOT_IN_BINLOG,
ON_CHECK(0), ON_CHECK(0),
ON_UPDATE(wsrep_slave_threads_update)); ON_UPDATE(wsrep_slave_threads_update));
......
...@@ -63,7 +63,7 @@ int wsrep_check_opts() ...@@ -63,7 +63,7 @@ int wsrep_check_opts()
else else
{ {
// non-mysqldump SST requires wsrep_cluster_address on startup // non-mysqldump SST requires wsrep_cluster_address on startup
if (!wsrep_cluster_address || !wsrep_cluster_address[0]) if (!wsrep_cluster_address_exists())
{ {
WSREP_ERROR ("%s SST method requires wsrep_cluster_address to be " WSREP_ERROR ("%s SST method requires wsrep_cluster_address to be "
"configured on startup.", wsrep_sst_method); "configured on startup.", wsrep_sst_method);
......
...@@ -878,13 +878,13 @@ void wsrep_init_startup (bool sst_first) ...@@ -878,13 +878,13 @@ void wsrep_init_startup (bool sst_first)
if (!strcmp(wsrep_provider, WSREP_NONE)) return; if (!strcmp(wsrep_provider, WSREP_NONE)) return;
/* Skip replication start if no cluster address */ /* Skip replication start if no cluster address */
if (!wsrep_cluster_address || wsrep_cluster_address[0] == 0) return; if (!wsrep_cluster_address_exists()) return;
/* /*
Read value of wsrep_new_cluster before wsrep_start_replication(), Read value of wsrep_new_cluster before wsrep_start_replication(),
the value is reset to FALSE inside wsrep_start_replication. the value is reset to FALSE inside wsrep_start_replication.
*/ */
if (!wsrep_start_replication()) unireg_abort(1); if (!wsrep_start_replication(wsrep_cluster_address)) unireg_abort(1);
wsrep_create_rollbacker(); wsrep_create_rollbacker();
wsrep_create_appliers(1); wsrep_create_appliers(1);
...@@ -1034,7 +1034,7 @@ void wsrep_shutdown_replication() ...@@ -1034,7 +1034,7 @@ void wsrep_shutdown_replication()
my_pthread_setspecific_ptr(THR_THD, NULL); my_pthread_setspecific_ptr(THR_THD, NULL);
} }
bool wsrep_start_replication() bool wsrep_start_replication(const char *wsrep_cluster_address)
{ {
int rcode; int rcode;
WSREP_DEBUG("wsrep_start_replication"); WSREP_DEBUG("wsrep_start_replication");
...@@ -1049,12 +1049,7 @@ bool wsrep_start_replication() ...@@ -1049,12 +1049,7 @@ bool wsrep_start_replication()
return true; return true;
} }
if (!wsrep_cluster_address || wsrep_cluster_address[0]== 0) DBUG_ASSERT(wsrep_cluster_address[0]);
{
// if provider is non-trivial, but no address is specified, wait for address
WSREP_DEBUG("wsrep_start_replication exit due to empty address");
return true;
}
bool const bootstrap(TRUE == wsrep_new_cluster); bool const bootstrap(TRUE == wsrep_new_cluster);
wsrep_new_cluster= FALSE; wsrep_new_cluster= FALSE;
......
...@@ -203,7 +203,7 @@ extern void wsrep_close_applier_threads(int count); ...@@ -203,7 +203,7 @@ extern void wsrep_close_applier_threads(int count);
/* new defines */ /* new defines */
extern void wsrep_stop_replication(THD *thd); extern void wsrep_stop_replication(THD *thd);
extern bool wsrep_start_replication(); extern bool wsrep_start_replication(const char *wsrep_cluster_address);
extern void wsrep_shutdown_replication(); extern void wsrep_shutdown_replication();
extern bool wsrep_must_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); extern bool wsrep_must_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ);
extern bool wsrep_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); extern bool wsrep_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ);
...@@ -280,6 +280,13 @@ void WSREP_LOG(void (*fun)(const char* fmt, ...), const char* fmt, ...); ...@@ -280,6 +280,13 @@ void WSREP_LOG(void (*fun)(const char* fmt, ...), const char* fmt, ...);
#define WSREP_PROVIDER_EXISTS \ #define WSREP_PROVIDER_EXISTS \
(wsrep_provider && strncasecmp(wsrep_provider, WSREP_NONE, FN_REFLEN)) (wsrep_provider && strncasecmp(wsrep_provider, WSREP_NONE, FN_REFLEN))
static inline bool wsrep_cluster_address_exists()
{
if (mysqld_server_started)
mysql_mutex_assert_owner(&LOCK_global_system_variables);
return wsrep_cluster_address && wsrep_cluster_address[0];
}
#define WSREP_QUERY(thd) (thd->query()) #define WSREP_QUERY(thd) (thd->query())
extern my_bool wsrep_ready_get(); extern my_bool wsrep_ready_get();
...@@ -501,6 +508,7 @@ wsrep::key wsrep_prepare_key_for_toi(const char* db, const char* table, ...@@ -501,6 +508,7 @@ wsrep::key wsrep_prepare_key_for_toi(const char* db, const char* table,
#define wsrep_thr_deinit() do {} while(0) #define wsrep_thr_deinit() do {} while(0)
#define wsrep_init_globals() do {} while(0) #define wsrep_init_globals() do {} while(0)
#define wsrep_create_appliers(X) do {} while(0) #define wsrep_create_appliers(X) do {} while(0)
#define wsrep_cluster_address_exists() (false)
#endif /* WITH_WSREP */ #endif /* WITH_WSREP */
......
...@@ -125,11 +125,7 @@ bool wsrep_create_appliers(long threads, bool mutex_protected) ...@@ -125,11 +125,7 @@ bool wsrep_create_appliers(long threads, bool mutex_protected)
return false; return false;
} }
if (!wsrep_cluster_address || wsrep_cluster_address[0]== 0) DBUG_ASSERT(wsrep_cluster_address[0]);
{
WSREP_DEBUG("wsrep_create_appliers exit due to empty address");
return false;
}
long wsrep_threads=0; long wsrep_threads=0;
...@@ -284,16 +280,14 @@ static void wsrep_rollback_process(THD *rollbacker, ...@@ -284,16 +280,14 @@ static void wsrep_rollback_process(THD *rollbacker,
void wsrep_create_rollbacker() void wsrep_create_rollbacker()
{ {
if (wsrep_cluster_address && wsrep_cluster_address[0] != 0) DBUG_ASSERT(wsrep_cluster_address[0]);
{ Wsrep_thd_args* args(new Wsrep_thd_args(wsrep_rollback_process,
Wsrep_thd_args* args(new Wsrep_thd_args(wsrep_rollback_process, WSREP_ROLLBACKER_THREAD,
WSREP_ROLLBACKER_THREAD, pthread_self()));
pthread_self()));
/* create rollbacker */
/* create rollbacker */ if (create_wsrep_THD(args, false))
if (create_wsrep_THD(args, false)) WSREP_WARN("Can't create thread to manage wsrep rollback");
WSREP_WARN("Can't create thread to manage wsrep rollback");
}
} }
/* /*
......
...@@ -554,30 +554,31 @@ bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type) ...@@ -554,30 +554,31 @@ bool wsrep_cluster_address_update (sys_var *self, THD* thd, enum_var_type type)
/* stop replication is heavy operation, and includes closing all client /* stop replication is heavy operation, and includes closing all client
connections. Closing clients may need to get LOCK_global_system_variables connections. Closing clients may need to get LOCK_global_system_variables
at least in MariaDB. at least in MariaDB.
Note: releasing LOCK_global_system_variables may cause race condition, if
there can be several concurrent clients changing wsrep_provider
*/ */
char *tmp= my_strdup(wsrep_cluster_address, MYF(MY_WME));
WSREP_DEBUG("wsrep_cluster_address_update: %s", wsrep_cluster_address); WSREP_DEBUG("wsrep_cluster_address_update: %s", wsrep_cluster_address);
mysql_mutex_unlock(&LOCK_global_system_variables); mysql_mutex_unlock(&LOCK_global_system_variables);
mysql_mutex_lock(&LOCK_wsrep_cluster_config);
wsrep_stop_replication(thd); wsrep_stop_replication(thd);
if (wsrep_start_replication()) if (*tmp && wsrep_start_replication(tmp))
{ {
wsrep_create_rollbacker(); wsrep_create_rollbacker();
WSREP_DEBUG("Cluster address update creating %ld applier threads running %lu", WSREP_DEBUG("Cluster address update creating %ld applier threads running %lu",
wsrep_slave_threads, wsrep_running_applier_threads); wsrep_slave_threads, wsrep_running_applier_threads);
wsrep_create_appliers(wsrep_slave_threads); wsrep_create_appliers(wsrep_slave_threads);
} }
/* locking order to be enforced is:
1. LOCK_global_system_variables
2. LOCK_wsrep_cluster_config
=> have to juggle mutexes to comply with this
*/
mysql_mutex_unlock(&LOCK_wsrep_cluster_config); mysql_mutex_unlock(&LOCK_wsrep_cluster_config);
mysql_mutex_lock(&LOCK_global_system_variables); mysql_mutex_lock(&LOCK_global_system_variables);
mysql_mutex_lock(&LOCK_wsrep_cluster_config); if (strcmp(tmp, wsrep_cluster_address))
{
my_free((void*)wsrep_cluster_address);
wsrep_cluster_address= tmp;
}
else
my_free(tmp);
return false; return false;
} }
...@@ -674,11 +675,12 @@ static void wsrep_slave_count_change_update () ...@@ -674,11 +675,12 @@ static void wsrep_slave_count_change_update ()
bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type) bool wsrep_slave_threads_update (sys_var *self, THD* thd, enum_var_type type)
{ {
mysql_mutex_unlock(&LOCK_wsrep_cluster_config); if (!wsrep_cluster_address_exists())
return false;
mysql_mutex_unlock(&LOCK_global_system_variables); mysql_mutex_unlock(&LOCK_global_system_variables);
mysql_mutex_lock(&LOCK_wsrep_slave_threads); mysql_mutex_lock(&LOCK_wsrep_slave_threads);
mysql_mutex_lock(&LOCK_global_system_variables); mysql_mutex_lock(&LOCK_global_system_variables);
mysql_mutex_lock(&LOCK_wsrep_cluster_config);
bool res= false; bool res= false;
wsrep_slave_count_change_update(); wsrep_slave_count_change_update();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment