Commit 92fe7556 authored by unknown's avatar unknown

Fix for BUG#2921 "Replication problem on mutex lock in mySQL-4.0.18":

re-using unused LOCK_active_mi to serialize all administrative
commands related to replication:
START SLAVE, STOP SLAVE, RESET SLAVE, CHANGE MASTER, init_slave()
(replication autostart at server startup), end_slave() (replication
autostop at server shutdown), LOAD DATA FROM MASTER.
This protects us against a handful of deadlocks (like BUG#2921
when two START SLAVE, but when two STOP SLAVE too).
Removing unused variables.


sql/item_func.cc:
  We don't need LOCK_active_mi just to MASTER_POS_WAIT().
sql/repl_failsafe.cc:
  no need for macro
sql/set_var.cc:
  no need for macro
sql/slave.cc:
  Re-using unused LOCK_active_mi to serialize all administrative
  commands related to replication:
  START SLAVE, STOP SLAVE, RESET SLAVE, CHANGE MASTER, init_slave()
  (replication autostart at server startup), end_slave() (replication
  autostop at server shutdown), LOAD DATA FROM MASTER.
  This protects us against a handful of deadlocks.
  Removing unused variables.
sql/slave.h:
  Re-using LOCK_active_mi to serialize administrative replication commands.
  Macros unneeded. Removing unneeded variables.
sql/sql_parse.cc:
  found unused variable.
  Replacing macros.
sql/sql_show.cc:
  replacing macros
parent fc192448
......@@ -1543,13 +1543,11 @@ longlong Item_master_pos_wait::val_int()
}
longlong pos = args[1]->val_int();
longlong timeout = (arg_count==3) ? args[2]->val_int() : 0 ;
LOCK_ACTIVE_MI;
if ((event_count = active_mi->rli.wait_for_pos(thd, log_name, pos, timeout)) == -2)
{
null_value = 1;
event_count=0;
}
UNLOCK_ACTIVE_MI;
return event_count;
}
......
......@@ -750,7 +750,7 @@ int load_master_data(THD* thd)
We do not want anyone messing with the slave at all for the entire
duration of the data load.
*/
LOCK_ACTIVE_MI;
pthread_mutex_lock(&LOCK_active_mi);
lock_slave_threads(active_mi);
init_thread_mask(&restart_thread_mask,active_mi,0 /*not inverse*/);
if (restart_thread_mask &&
......@@ -759,7 +759,7 @@ int load_master_data(THD* thd)
{
send_error(&thd->net,error);
unlock_slave_threads(active_mi);
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
return 1;
}
......@@ -913,7 +913,7 @@ int load_master_data(THD* thd)
{
send_error(&thd->net, 0, "Failed purging old relay logs");
unlock_slave_threads(active_mi);
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
return 1;
}
pthread_mutex_lock(&active_mi->rli.data_lock);
......@@ -934,7 +934,7 @@ int load_master_data(THD* thd)
err:
unlock_slave_threads(active_mi);
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
thd->proc_info = 0;
mc_mysql_close(&mysql); // safe to call since we always do mc_mysql_init()
......
......@@ -1271,7 +1271,7 @@ byte *sys_var_insert_id::value_ptr(THD *thd, enum_var_type type)
bool sys_var_slave_skip_counter::check(THD *thd, set_var *var)
{
int result= 0;
LOCK_ACTIVE_MI;
pthread_mutex_lock(&LOCK_active_mi);
pthread_mutex_lock(&active_mi->rli.run_lock);
if (active_mi->rli.slave_running)
{
......@@ -1279,14 +1279,14 @@ bool sys_var_slave_skip_counter::check(THD *thd, set_var *var)
result=1;
}
pthread_mutex_unlock(&active_mi->rli.run_lock);
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
return result;
}
bool sys_var_slave_skip_counter::update(THD *thd, set_var *var)
{
LOCK_ACTIVE_MI;
pthread_mutex_lock(&LOCK_active_mi);
pthread_mutex_lock(&active_mi->rli.run_lock);
/*
The following test should normally never be true as we test this
......@@ -1300,7 +1300,7 @@ bool sys_var_slave_skip_counter::update(THD *thd, set_var *var)
pthread_mutex_unlock(&active_mi->rli.data_lock);
}
pthread_mutex_unlock(&active_mi->rli.run_lock);
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
return 0;
}
......
......@@ -34,7 +34,6 @@ typedef bool (*CHECK_KILLED_FUNC)(THD*,void*);
volatile bool slave_sql_running = 0, slave_io_running = 0;
char* slave_load_tmpdir = 0;
MASTER_INFO *active_mi;
volatile int active_mi_in_use = 0;
HASH replicate_do_table, replicate_ignore_table;
DYNAMIC_ARRAY replicate_wild_do_table, replicate_wild_ignore_table;
bool do_table_inited = 0, ignore_table_inited = 0;
......@@ -114,8 +113,12 @@ int init_slave()
{
DBUG_ENTER("init_slave");
/* This is called when mysqld starts */
/*
This is called when mysqld starts. Before client connections are
accepted. However bootstrap may conflict with us if it does START SLAVE.
So it's safer to take the lock.
*/
pthread_mutex_lock(&LOCK_active_mi);
/*
TODO: re-write this to interate through the list of files
for multi-master
......@@ -160,9 +163,11 @@ int init_slave()
goto err;
}
}
pthread_mutex_unlock(&LOCK_active_mi);
DBUG_RETURN(0);
err:
pthread_mutex_unlock(&LOCK_active_mi);
DBUG_RETURN(1);
}
......@@ -806,7 +811,14 @@ static int end_slave_on_walk(MASTER_INFO* mi, gptr /*unused*/)
void end_slave()
{
/* This is called when the server terminates, in close_connections(). */
/*
This is called when the server terminates, in close_connections().
It terminates slave threads. However, some CHANGE MASTER etc may still be
running presently. If a START SLAVE was in progress, the mutex lock below
will make us wait until slave threads have started, and START SLAVE
returns, then we terminate them here.
*/
pthread_mutex_lock(&LOCK_active_mi);
if (active_mi)
{
/*
......@@ -827,6 +839,7 @@ void end_slave()
delete active_mi;
active_mi= 0;
}
pthread_mutex_unlock(&LOCK_active_mi);
}
......
......@@ -27,12 +27,19 @@
/*
MUTEXES in replication:
LOCK_active_mi: this is meant for multimaster, when we can switch from a
master to another. It protects active_mi. We don't care of it for the moment,
as active_mi never moves (it's created at startup and deleted at shutdown, and
not changed: it always points to the same MASTER_INFO struct), because we
don't have multimaster. So for the moment, mi does not move, and mi->rli does
not either.
LOCK_active_mi: [note: this was originally meant for multimaster, to switch
from a master to another, to protect active_mi] It is used to SERIALIZE ALL
administrative commands of replication: START SLAVE, STOP SLAVE, CHANGE
MASTER, RESET SLAVE, end_slave() (when mysqld stops) [init_slave() does not
need it it's called early]. Any of these commands holds the mutex from the
start till the end. This thus protects us against a handful of deadlocks
(consider start_slave_thread() which, when starting the I/O thread, releases
mi->run_lock, keeps rli->run_lock, and tries to re-acquire mi->run_lock).
Currently active_mi never moves (it's created at startup and deleted at
shutdown, and not changed: it always points to the same MASTER_INFO struct),
because we don't have multimaster. So for the moment, mi does not move, and
mi->rli does not either.
In MASTER_INFO: run_lock, data_lock
run_lock protects all information about the run state: slave_running, and the
......@@ -43,6 +50,9 @@
In RELAY_LOG_INFO: run_lock, data_lock
see MASTER_INFO
Order of acquisition: if you want to have LOCK_active_mi and a run_lock, you
must acquire LOCK_active_mi first.
In MYSQL_LOG: LOCK_log, LOCK_index of the binlog and the relay log
LOCK_log: when you write to it. LOCK_index: when you create/delete a binlog
(so that you have to update the .index file).
......@@ -64,19 +74,6 @@ enum enum_binlog_formats {
BINLOG_FORMAT_323_LESS_57,
BINLOG_FORMAT_323_GEQ_57 };
/*
TODO: this needs to be redone, but for now it does not matter since
we do not have multi-master yet.
*/
#define LOCK_ACTIVE_MI { pthread_mutex_lock(&LOCK_active_mi); \
++active_mi_in_use; \
pthread_mutex_unlock(&LOCK_active_mi);}
#define UNLOCK_ACTIVE_MI { pthread_mutex_lock(&LOCK_active_mi); \
--active_mi_in_use; \
pthread_mutex_unlock(&LOCK_active_mi); }
/*
st_relay_log_info contains information on the current relay log and
relay log offset, and master log name and log sequence corresponding to the
......@@ -441,7 +438,6 @@ extern "C" pthread_handler_decl(handle_slave_io,arg);
extern "C" pthread_handler_decl(handle_slave_sql,arg);
extern bool volatile abort_loop;
extern MASTER_INFO main_mi, *active_mi; /* active_mi for multi-master */
extern volatile int active_mi_in_use;
extern LIST master_list;
extern HASH replicate_do_table, replicate_ignore_table;
extern DYNAMIC_ARRAY replicate_wild_do_table, replicate_wild_ignore_table;
......
......@@ -1519,9 +1519,9 @@ mysql_execute_command(void)
{
if (check_global_access(thd, SUPER_ACL))
goto error;
LOCK_ACTIVE_MI;
pthread_mutex_lock(&LOCK_active_mi);
res = change_master(thd,active_mi);
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
break;
}
case SQLCOM_SHOW_SLAVE_STAT:
......@@ -1529,9 +1529,9 @@ mysql_execute_command(void)
/* Accept one of two privileges */
if (check_global_access(thd, SUPER_ACL | REPL_CLIENT_ACL))
goto error;
LOCK_ACTIVE_MI;
pthread_mutex_lock(&LOCK_active_mi);
res = show_master_info(thd,active_mi);
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
break;
}
case SQLCOM_SHOW_MASTER_STAT:
......@@ -1581,7 +1581,7 @@ mysql_execute_command(void)
if (error)
goto error;
}
LOCK_ACTIVE_MI;
pthread_mutex_lock(&LOCK_active_mi);
/*
fetch_master_table will send the error to the client on failure.
Give error if the table already exists.
......@@ -1591,7 +1591,7 @@ mysql_execute_command(void)
{
send_ok(&thd->net);
}
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
break;
}
#endif /* HAVE_REPLICATION */
......@@ -1702,9 +1702,9 @@ mysql_execute_command(void)
#ifdef HAVE_REPLICATION
case SQLCOM_SLAVE_START:
{
LOCK_ACTIVE_MI;
pthread_mutex_lock(&LOCK_active_mi);
start_slave(thd,active_mi,1 /* net report*/);
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
break;
}
case SQLCOM_SLAVE_STOP:
......@@ -1727,9 +1727,9 @@ mysql_execute_command(void)
break;
}
{
LOCK_ACTIVE_MI;
pthread_mutex_lock(&LOCK_active_mi);
stop_slave(thd,active_mi,1/* net report*/);
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
break;
}
#endif /* HAVE_REPLICATION */
......@@ -3638,9 +3638,9 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables)
mysql_update_log.new_file(1);
mysql_bin_log.new_file(1);
mysql_slow_log.new_file(1);
LOCK_ACTIVE_MI;
pthread_mutex_lock(&LOCK_active_mi);
rotate_relay_log(active_mi);
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
if (ha_flush_logs())
result=1;
......@@ -3685,7 +3685,7 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables)
#endif
if (options & REFRESH_SLAVE)
{
LOCK_ACTIVE_MI;
pthread_mutex_lock(&LOCK_active_mi);
if (reset_slave(thd, active_mi))
{
result=1;
......@@ -3697,7 +3697,7 @@ bool reload_acl_and_cache(THD *thd, ulong options, TABLE_LIST *tables)
*/
error_already_sent=1;
}
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
}
if (options & REFRESH_USER_RESOURCES)
reset_mqh(thd,(LEX_USER *) NULL);
......
......@@ -1270,11 +1270,11 @@ int mysqld_show(THD *thd, const char *wild, show_var_st *variables,
#ifdef HAVE_REPLICATION
case SHOW_SLAVE_RUNNING:
{
LOCK_ACTIVE_MI;
pthread_mutex_lock(&LOCK_active_mi);
net_store_data(&packet2, (active_mi->slave_running &&
active_mi->rli.slave_running)
? "ON" : "OFF");
UNLOCK_ACTIVE_MI;
pthread_mutex_unlock(&LOCK_active_mi);
break;
}
#endif
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment