Commit d7e0784c authored by unknown's avatar unknown

Fix for BUG#8325 "Deadlock in replication thread stops replication":

in slave SQL thread: if a transaction fails because of InnoDB deadlock or innodb_lock_wait_timeout exceeded,
optionally retry the transaction a certain number of times (new variable --slave_transaction_retries).


sql/mysql_priv.h:
  new var slave_transaction_retries
sql/mysqld.cc:
  new variable slave_transaction_retries. Plus fixing a typo.
sql/set_var.cc:
  new global variable slave_transaction_retries (will be one per subslave, when we have multimaster).
sql/slave.cc:
  Slave SQL thread: if a transaction fails because of InnoDB deadlock or innodb_lock_wait_timeout exceeded,
  optionally retry the transaction a certain number of times (--slave_transaction_retries).
sql/slave.h:
  new RELAY_LOG_INFO::trans_retries.
parent 3087b2f1
stop slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
create table t1 (a int not null, key(a)) engine=innodb;
create table t2 (a int not null, key(a)) engine=innodb;
create table t3 (a int) engine=innodb;
create table t4 (a int) engine=innodb;
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` int(11) NOT NULL default '0',
KEY `a` (`a`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
show create table t2;
Table Create Table
t2 CREATE TABLE `t2` (
`a` int(11) NOT NULL default '0',
KEY `a` (`a`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1
stop slave;
begin;
insert into t3 select * from t2 for update;
insert into t1 values(1);
commit;
begin;
select * from t1 for update;
a
start slave;
insert into t2 values(22);
commit;
select * from t1;
a
1
select * from t2;
a
22
show slave status;
Slave_IO_State Master_Host Master_User Master_Port Connect_Retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_Do_DB Replicate_Ignore_DB Replicate_Do_Table Replicate_Ignore_Table Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table Last_Errno Last_Error Skip_Counter Exec_Master_Log_Pos Relay_Log_Space Until_Condition Until_Log_File Until_Log_Pos Master_SSL_Allowed Master_SSL_CA_File Master_SSL_CA_Path Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master
# 127.0.0.1 root MASTER_MYPORT 1 master-bin.000001 13110 # # master-bin.000001 Yes Yes 0 0 13110 # None 0 No #
stop slave;
change master to master_log_pos=401;
begin;
select * from t2 for update;
a
22
start slave;
commit;
select * from t1;
a
1
1
select * from t2;
a
22
show slave status;
Slave_IO_State Master_Host Master_User Master_Port Connect_Retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_Do_DB Replicate_Ignore_DB Replicate_Do_Table Replicate_Ignore_Table Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table Last_Errno Last_Error Skip_Counter Exec_Master_Log_Pos Relay_Log_Space Until_Condition Until_Log_File Until_Log_Pos Master_SSL_Allowed Master_SSL_CA_File Master_SSL_CA_Path Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master
# 127.0.0.1 root MASTER_MYPORT 1 master-bin.000001 13110 # # master-bin.000001 Yes Yes 0 0 13110 # None 0 No #
set global max_relay_log_size=0;
stop slave;
change master to master_log_pos=401;
begin;
select * from t2 for update;
a
22
start slave;
commit;
select * from t1;
a
1
1
1
select * from t2;
a
22
show slave status;
Slave_IO_State Master_Host Master_User Master_Port Connect_Retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_Do_DB Replicate_Ignore_DB Replicate_Do_Table Replicate_Ignore_Table Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table Last_Errno Last_Error Skip_Counter Exec_Master_Log_Pos Relay_Log_Space Until_Condition Until_Log_File Until_Log_Pos Master_SSL_Allowed Master_SSL_CA_File Master_SSL_CA_Path Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master
# 127.0.0.1 root MASTER_MYPORT 1 master-bin.000001 13110 # # master-bin.000001 Yes Yes 0 0 13110 # None 0 No #
drop table t1,t2;
--innodb --loose-innodb_lock_wait_timeout=4 --slave-transaction-retries=2 --max-relay-log-size=4096
# See if slave restarts the transaction after failing on an InnoDB deadlock error.
# Note: testing what happens when too many retries is possible, but
# needs large waits when running with --debug, so we don't do it.
# The same way, this test may not test what is expected when run
# under Valgrind, timings are too short then (with --valgrind I
# (Guilhem) have seen the test manage to provoke lock wait timeout
# error but not deadlock error; that is ok as code deals with the two
# errors in exactly the same way.
source include/have_innodb.inc;
source include/master-slave.inc;
connection master;
create table t1 (a int not null, key(a)) engine=innodb;
create table t2 (a int not null, key(a)) engine=innodb;
create table t3 (a int) engine=innodb;
create table t4 (a int) engine=innodb;
sync_slave_with_master;
show create table t1;
show create table t2;
stop slave;
# 1) Test deadlock
connection master;
begin;
# Let's keep BEGIN and the locked statement in two different relay logs.
let $1=200;
disable_query_log;
while ($1)
{
eval insert into t3 values( $1 );
dec $1;
}
enable_query_log;
insert into t3 select * from t2 for update;
insert into t1 values(1);
commit;
save_master_pos;
connection slave;
begin;
# Let's make our transaction large so that it's slave who is chosen as
# victim
let $1=1000;
disable_query_log;
while ($1)
{
eval insert into t4 values( $1 );
dec $1;
}
enable_query_log;
select * from t1 for update;
start slave;
--sleep 3; # hope that slave is blocked now
insert into t2 values(22); # provoke deadlock, slave should be victim
commit;
sync_with_master;
select * from t1; # check that slave succeeded finally
select * from t2;
# check that no error is reported
--replace_column 1 # 8 # 9 # 23 # 33 #
--replace_result $MASTER_MYPORT MASTER_MYPORT
show slave status;
# 2) Test lock wait timeout
stop slave;
change master to master_log_pos=401; # the BEGIN log event
begin;
select * from t2 for update; # hold lock
start slave;
--sleep 10; # slave should have blocked, and be retrying
commit;
sync_with_master;
select * from t1; # check that slave succeeded finally
select * from t2;
# check that no error is reported
--replace_column 1 # 8 # 9 # 23 # 33 #
--replace_result $MASTER_MYPORT MASTER_MYPORT
show slave status;
# Now we repeat 2), but with BEGIN in the same relay log as
# COMMIT (to see if seeking into hot log is ok).
set global max_relay_log_size=0;
# This is really copy-paste of 2) of above
stop slave;
change master to master_log_pos=401;
begin;
select * from t2 for update;
start slave;
--sleep 10;
commit;
sync_with_master;
select * from t1;
select * from t2;
--replace_column 1 # 8 # 9 # 23 # 33 #
--replace_result $MASTER_MYPORT MASTER_MYPORT
show slave status;
connection master;
drop table t1,t2;
sync_slave_with_master;
...@@ -890,7 +890,7 @@ extern ulong ha_read_first_count, ha_read_last_count; ...@@ -890,7 +890,7 @@ extern ulong ha_read_first_count, ha_read_last_count;
extern ulong ha_read_rnd_count, ha_read_rnd_next_count, ha_discover_count; extern ulong ha_read_rnd_count, ha_read_rnd_next_count, ha_discover_count;
extern ulong ha_commit_count, ha_rollback_count,table_cache_size; extern ulong ha_commit_count, ha_rollback_count,table_cache_size;
extern ulong max_connections,max_connect_errors, connect_timeout; extern ulong max_connections,max_connect_errors, connect_timeout;
extern ulong slave_net_timeout; extern ulong slave_net_timeout, slave_trans_retries;
extern ulong max_user_connections; extern ulong max_user_connections;
extern ulong long_query_count, what_to_log,flush_time; extern ulong long_query_count, what_to_log,flush_time;
extern ulong query_buff_size, thread_stack,thread_stack_min; extern ulong query_buff_size, thread_stack,thread_stack_min;
......
...@@ -311,7 +311,7 @@ ulong server_id, thd_startup_options; ...@@ -311,7 +311,7 @@ ulong server_id, thd_startup_options;
ulong table_cache_size, thread_stack, thread_stack_min, what_to_log; ulong table_cache_size, thread_stack, thread_stack_min, what_to_log;
ulong query_buff_size, slow_launch_time, slave_open_temp_tables; ulong query_buff_size, slow_launch_time, slave_open_temp_tables;
ulong open_files_limit, max_binlog_size, max_relay_log_size; ulong open_files_limit, max_binlog_size, max_relay_log_size;
ulong slave_net_timeout; ulong slave_net_timeout, slave_trans_retries;
ulong thread_cache_size=0, binlog_cache_size=0, max_binlog_cache_size=0; ulong thread_cache_size=0, binlog_cache_size=0, max_binlog_cache_size=0;
ulong query_cache_size=0; ulong query_cache_size=0;
ulong com_stat[(uint) SQLCOM_END], com_other; ulong com_stat[(uint) SQLCOM_END], com_other;
...@@ -4096,7 +4096,7 @@ enum options_mysqld ...@@ -4096,7 +4096,7 @@ enum options_mysqld
OPT_QUERY_CACHE_TYPE, OPT_QUERY_CACHE_WLOCK_INVALIDATE, OPT_RECORD_BUFFER, OPT_QUERY_CACHE_TYPE, OPT_QUERY_CACHE_WLOCK_INVALIDATE, OPT_RECORD_BUFFER,
OPT_RECORD_RND_BUFFER, OPT_RELAY_LOG_SPACE_LIMIT, OPT_RELAY_LOG_PURGE, OPT_RECORD_RND_BUFFER, OPT_RELAY_LOG_SPACE_LIMIT, OPT_RELAY_LOG_PURGE,
OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME, OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME,
OPT_READONLY, OPT_DEBUGGING, OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING,
OPT_SORT_BUFFER, OPT_TABLE_CACHE, OPT_SORT_BUFFER, OPT_TABLE_CACHE,
OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE, OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE,
OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK, OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK,
...@@ -5049,7 +5049,7 @@ The minimum value for this variable is 4096.", ...@@ -5049,7 +5049,7 @@ The minimum value for this variable is 4096.",
(gptr*) &max_system_variables.max_length_for_sort_data, 0, GET_ULONG, (gptr*) &max_system_variables.max_length_for_sort_data, 0, GET_ULONG,
REQUIRED_ARG, 1024, 4, 8192*1024L, 0, 1, 0}, REQUIRED_ARG, 1024, 4, 8192*1024L, 0, 1, 0},
{"max_relay_log_size", OPT_MAX_RELAY_LOG_SIZE, {"max_relay_log_size", OPT_MAX_RELAY_LOG_SIZE,
"If non-zero: relay log will be rotated automatically when the size exceeds this value; if zero (the default): when the size exceeds max_binlog_size. 0 expected, the minimum value for this variable is 4096.", "If non-zero: relay log will be rotated automatically when the size exceeds this value; if zero (the default): when the size exceeds max_binlog_size. 0 excepted, the minimum value for this variable is 4096.",
(gptr*) &max_relay_log_size, (gptr*) &max_relay_log_size, 0, GET_ULONG, (gptr*) &max_relay_log_size, (gptr*) &max_relay_log_size, 0, GET_ULONG,
REQUIRED_ARG, 0L, 0L, 1024*1024L*1024L, 0, IO_SIZE, 0}, REQUIRED_ARG, 0L, 0L, 1024*1024L*1024L, 0, IO_SIZE, 0},
{ "max_seeks_for_key", OPT_MAX_SEEKS_FOR_KEY, { "max_seeks_for_key", OPT_MAX_SEEKS_FOR_KEY,
...@@ -5220,6 +5220,12 @@ The minimum value for this variable is 4096.", ...@@ -5220,6 +5220,12 @@ The minimum value for this variable is 4096.",
"Number of seconds to wait for more data from a master/slave connection before aborting the read.", "Number of seconds to wait for more data from a master/slave connection before aborting the read.",
(gptr*) &slave_net_timeout, (gptr*) &slave_net_timeout, 0, (gptr*) &slave_net_timeout, (gptr*) &slave_net_timeout, 0,
GET_ULONG, REQUIRED_ARG, SLAVE_NET_TIMEOUT, 1, LONG_TIMEOUT, 0, 1, 0}, GET_ULONG, REQUIRED_ARG, SLAVE_NET_TIMEOUT, 1, LONG_TIMEOUT, 0, 1, 0},
{"slave_transaction_retries", OPT_SLAVE_TRANS_RETRIES,
"Number of times the slave SQL thread will retry a transaction in case "
"it failed with a deadlock or elapsed lock wait timeout, "
"before giving up and stopping.",
(gptr*) &slave_trans_retries, (gptr*) &slave_trans_retries, 0,
GET_ULONG, REQUIRED_ARG, 0L, 0L, (longlong) ULONG_MAX, 0, 1, 0},
#endif /* HAVE_REPLICATION */ #endif /* HAVE_REPLICATION */
{"slow_launch_time", OPT_SLOW_LAUNCH_TIME, {"slow_launch_time", OPT_SLOW_LAUNCH_TIME,
"If creating the thread takes longer than this value (in seconds), the Slow_launch_threads counter will be incremented.", "If creating the thread takes longer than this value (in seconds), the Slow_launch_threads counter will be incremented.",
......
...@@ -320,6 +320,8 @@ sys_var_bool_ptr sys_slave_compressed_protocol("slave_compressed_protocol", ...@@ -320,6 +320,8 @@ sys_var_bool_ptr sys_slave_compressed_protocol("slave_compressed_protocol",
#ifdef HAVE_REPLICATION #ifdef HAVE_REPLICATION
sys_var_long_ptr sys_slave_net_timeout("slave_net_timeout", sys_var_long_ptr sys_slave_net_timeout("slave_net_timeout",
&slave_net_timeout); &slave_net_timeout);
sys_var_long_ptr sys_slave_trans_retries("slave_transaction_retries",
&slave_trans_retries);
#endif #endif
sys_var_long_ptr sys_slow_launch_time("slow_launch_time", sys_var_long_ptr sys_slow_launch_time("slow_launch_time",
&slow_launch_time); &slow_launch_time);
...@@ -604,6 +606,7 @@ sys_var *sys_variables[]= ...@@ -604,6 +606,7 @@ sys_var *sys_variables[]=
#ifdef HAVE_REPLICATION #ifdef HAVE_REPLICATION
&sys_slave_compressed_protocol, &sys_slave_compressed_protocol,
&sys_slave_net_timeout, &sys_slave_net_timeout,
&sys_slave_trans_retries,
&sys_slave_skip_counter, &sys_slave_skip_counter,
#endif #endif
&sys_slow_launch_time, &sys_slow_launch_time,
...@@ -855,6 +858,7 @@ struct show_var_st init_vars[]= { ...@@ -855,6 +858,7 @@ struct show_var_st init_vars[]= {
{"skip_show_database", (char*) &opt_skip_show_db, SHOW_BOOL}, {"skip_show_database", (char*) &opt_skip_show_db, SHOW_BOOL},
#ifdef HAVE_REPLICATION #ifdef HAVE_REPLICATION
{sys_slave_net_timeout.name,(char*) &sys_slave_net_timeout, SHOW_SYS}, {sys_slave_net_timeout.name,(char*) &sys_slave_net_timeout, SHOW_SYS},
{sys_slave_trans_retries.name,(char*) &sys_slave_trans_retries, SHOW_SYS},
#endif #endif
{sys_slow_launch_time.name, (char*) &sys_slow_launch_time, SHOW_SYS}, {sys_slow_launch_time.name, (char*) &sys_slow_launch_time, SHOW_SYS},
#ifdef HAVE_SYS_UN_H #ifdef HAVE_SYS_UN_H
......
...@@ -2959,6 +2959,62 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) ...@@ -2959,6 +2959,62 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
exec_res = ev->exec_event(rli); exec_res = ev->exec_event(rli);
DBUG_ASSERT(rli->sql_thd==thd); DBUG_ASSERT(rli->sql_thd==thd);
delete ev; delete ev;
if (slave_trans_retries)
{
if (exec_res &&
(thd->net.last_errno == ER_LOCK_DEADLOCK ||
thd->net.last_errno == ER_LOCK_WAIT_TIMEOUT) &&
!thd->is_fatal_error)
{
const char *errmsg;
/*
We were in a transaction which has been rolled back because of a
deadlock (currently, InnoDB deadlock detected by InnoDB) or lock
wait timeout (innodb_lock_wait_timeout exceeded); let's seek back to
BEGIN log event and retry it all again.
We have to not only seek but also
a) init_master_info(), to seek back to hot relay log's start for later
(for when we will come back to this hot log after re-processing the
possibly existing old logs where BEGIN is: check_binlog_magic() will
then need the cache to be at position 0 (see comments at beginning of
init_master_info()).
b) init_relay_log_pos(), because the BEGIN may be an older relay log.
*/
if (rli->trans_retries--)
{
sql_print_information("Slave SQL thread retries transaction");
if (init_master_info(rli->mi, 0, 0, 0, SLAVE_SQL))
sql_print_error("Failed to initialize the master info structure");
else if (init_relay_log_pos(rli,
rli->group_relay_log_name,
rli->group_relay_log_pos,
1, &errmsg))
sql_print_error("Error initializing relay log position: %s",
errmsg);
else
{
exec_res= 0;
sleep(2); // chance for concurrent connection to get more locks
}
}
else
sql_print_error("Slave SQL thread retried transaction %lu time(s) "
"in vain, giving up. Consider raising the value of "
"the slave_transaction_retries variable.",
slave_trans_retries);
}
if (!((thd->options & OPTION_BEGIN) && opt_using_transactions))
{
rli->trans_retries= slave_trans_retries; // restart from fresh
/*
TODO: when merged into 5.0, when slave does auto-rollback if
corrupted binlog, this should reset the retry counter too
(any rollback should). In fact it will work, as here we are just out
of a Format_description_log_event::exec_event() which rolled back.
But check repl code in 5.0 for new ha_rollback calls, just in case.
*/
}
}
return exec_res; return exec_res;
} }
else else
...@@ -3370,6 +3426,7 @@ extern "C" pthread_handler_decl(handle_slave_sql,arg) ...@@ -3370,6 +3426,7 @@ extern "C" pthread_handler_decl(handle_slave_sql,arg)
pthread_mutex_lock(&rli->log_space_lock); pthread_mutex_lock(&rli->log_space_lock);
rli->ignore_log_space_limit= 0; rli->ignore_log_space_limit= 0;
pthread_mutex_unlock(&rli->log_space_lock); pthread_mutex_unlock(&rli->log_space_lock);
rli->trans_retries= slave_trans_retries; // start from "no error"
if (init_relay_log_pos(rli, if (init_relay_log_pos(rli,
rli->group_relay_log_name, rli->group_relay_log_name,
......
...@@ -294,7 +294,9 @@ typedef struct st_relay_log_info ...@@ -294,7 +294,9 @@ typedef struct st_relay_log_info
UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1, UNTIL_LOG_NAMES_CMP_UNKNOWN= -2, UNTIL_LOG_NAMES_CMP_LESS= -1,
UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1 UNTIL_LOG_NAMES_CMP_EQUAL= 0, UNTIL_LOG_NAMES_CMP_GREATER= 1
} until_log_names_cmp_result; } until_log_names_cmp_result;
ulong trans_retries;
st_relay_log_info(); st_relay_log_info();
~st_relay_log_info(); ~st_relay_log_info();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment