Merge gbichot@bk-internal.mysql.com:/home/bk/mysql-5.0

into mysql.com:/home/mysql_src/mysql-5.0-xa sql/item_func.cc: Auto merged sql/log_event.cc: Auto merged

Merge gbichot@bk-internal.mysql.com:/home/bk/mysql-5.0
into mysql.com:/home/mysql_src/mysql-5.0-xa sql/item_func.cc: Auto merged sql/log_event.cc: Auto merged
8e3e88c9 · unknown · ad357fb1 · 973551d8 · 8e3e88c9 · 8e3e88c9
Commit 8e3e88c9 authored Mar 02, 2005 by unknown
10 changed files
--- a/mysql-test/r/rpl_deadlock.result
+++ b/mysql-test/r/rpl_deadlock.result
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+create table t1 (a int not null, key(a)) engine=innodb;
+create table t2 (a int not null, key(a)) engine=innodb;
+create table t3 (a int) engine=innodb;
+create table t4 (a int) engine=innodb;
+show create table t1;
+Table	Create Table
+t1	CREATE TABLE `t1` (
+  `a` int(11) NOT NULL default '0',
+  KEY `a` (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+show create table t2;
+Table	Create Table
+t2	CREATE TABLE `t2` (
+  `a` int(11) NOT NULL default '0',
+  KEY `a` (`a`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+stop slave;
+begin;
+insert into t3 select * from t2 for update;
+insert into t1 values(1);
+commit;
+begin;
+select * from t1 for update;
+a
+start slave;
+insert into t2 values(22);
+commit;
+select * from t1;
+a
+1
+select * from t2;
+a
+22
+show slave status;
+Slave_IO_State	Master_Host	Master_User	Master_Port	Connect_Retry	Master_Log_File	Read_Master_Log_Pos	Relay_Log_File	Relay_Log_Pos	Relay_Master_Log_File	Slave_IO_Running	Slave_SQL_Running	Replicate_Do_DB	Replicate_Ignore_DB	Replicate_Do_Table	Replicate_Ignore_Table	Replicate_Wild_Do_Table	Replicate_Wild_Ignore_Table	Last_Errno	Last_Error	Skip_Counter	Exec_Master_Log_Pos	Relay_Log_Space	Until_Condition	Until_Log_File	Until_Log_Pos	Master_SSL_Allowed	Master_SSL_CA_File	Master_SSL_CA_Path	Master_SSL_Cert	Master_SSL_Cipher	Master_SSL_Key	Seconds_Behind_Master
+#	127.0.0.1	root	MASTER_MYPORT	1	master-bin.000001	13110	#	#	master-bin.000001	Yes	Yes							0		0	13110	#	None		0	No						#
+stop slave;
+change master to master_log_pos=401;
+begin;
+select * from t2 for update;
+a
+22
+start slave;
+commit;
+select * from t1;
+a
+1
+1
+select * from t2;
+a
+22
+show slave status;
+Slave_IO_State	Master_Host	Master_User	Master_Port	Connect_Retry	Master_Log_File	Read_Master_Log_Pos	Relay_Log_File	Relay_Log_Pos	Relay_Master_Log_File	Slave_IO_Running	Slave_SQL_Running	Replicate_Do_DB	Replicate_Ignore_DB	Replicate_Do_Table	Replicate_Ignore_Table	Replicate_Wild_Do_Table	Replicate_Wild_Ignore_Table	Last_Errno	Last_Error	Skip_Counter	Exec_Master_Log_Pos	Relay_Log_Space	Until_Condition	Until_Log_File	Until_Log_Pos	Master_SSL_Allowed	Master_SSL_CA_File	Master_SSL_CA_Path	Master_SSL_Cert	Master_SSL_Cipher	Master_SSL_Key	Seconds_Behind_Master
+#	127.0.0.1	root	MASTER_MYPORT	1	master-bin.000001	13110	#	#	master-bin.000001	Yes	Yes							0		0	13110	#	None		0	No						#
+set global max_relay_log_size=0;
+stop slave;
+change master to master_log_pos=401;
+begin;
+select * from t2 for update;
+a
+22
+start slave;
+commit;
+select * from t1;
+a
+1
+1
+1
+select * from t2;
+a
+22
+show slave status;
+Slave_IO_State	Master_Host	Master_User	Master_Port	Connect_Retry	Master_Log_File	Read_Master_Log_Pos	Relay_Log_File	Relay_Log_Pos	Relay_Master_Log_File	Slave_IO_Running	Slave_SQL_Running	Replicate_Do_DB	Replicate_Ignore_DB	Replicate_Do_Table	Replicate_Ignore_Table	Replicate_Wild_Do_Table	Replicate_Wild_Ignore_Table	Last_Errno	Last_Error	Skip_Counter	Exec_Master_Log_Pos	Relay_Log_Space	Until_Condition	Until_Log_File	Until_Log_Pos	Master_SSL_Allowed	Master_SSL_CA_File	Master_SSL_CA_Path	Master_SSL_Cert	Master_SSL_Cipher	Master_SSL_Key	Seconds_Behind_Master
+#	127.0.0.1	root	MASTER_MYPORT	1	master-bin.000001	13110	#	#	master-bin.000001	Yes	Yes							0		0	13110	#	None		0	No						#
+drop table t1,t2;
--- a/mysql-test/t/rpl_deadlock-slave.opt
+++ b/mysql-test/t/rpl_deadlock-slave.opt
+--innodb --loose-innodb_lock_wait_timeout=4 --slave-transaction-retries=2 --max-relay-log-size=4096
--- a/mysql-test/t/rpl_deadlock.test
+++ b/mysql-test/t/rpl_deadlock.test
+# See if slave restarts the transaction after failing on an InnoDB deadlock error.
+
+# Note: testing what happens when too many retries is possible, but
+# needs large waits when running with --debug, so we don't do it.
+# The same way, this test may not test what is expected when run
+# under Valgrind, timings are too short then (with --valgrind I
+# (Guilhem) have seen the test manage to provoke lock wait timeout
+# error but not deadlock error; that is ok as code deals with the two
+# errors in exactly the same way.
+
+source include/have_innodb.inc;
+source include/master-slave.inc;
+
+connection master;
+create table t1 (a int not null, key(a)) engine=innodb;
+create table t2 (a int not null, key(a)) engine=innodb;
+create table t3 (a int) engine=innodb;
+create table t4 (a int) engine=innodb;
+sync_slave_with_master;
+
+show create table t1;
+show create table t2;
+stop slave;
+
+# 1) Test deadlock
+
+connection master;
+begin;
+# Let's keep BEGIN and the locked statement in two different relay logs.
+let $1=200;
+disable_query_log;
+while ($1)
+{
+ eval insert into t3 values( $1 );
+ dec $1;
+}
+enable_query_log;
+insert into t3 select * from t2 for update;
+insert into t1 values(1);
+commit;
+save_master_pos;
+
+connection slave;
+begin;
+# Let's make our transaction large so that it's slave who is chosen as
+# victim
+let $1=1000;
+disable_query_log;
+while ($1)
+{
+ eval insert into t4 values( $1 );
+ dec $1;
+}
+enable_query_log;
+select * from t1 for update;
+start slave;
+--sleep 3; # hope that slave is blocked now
+insert into t2 values(22); # provoke deadlock, slave should be victim
+commit;
+sync_with_master;
+select * from t1; # check that slave succeeded finally
+select * from t2;
+# check that no error is reported
+--replace_column 1 # 8 # 9 # 23 # 33 #
+--replace_result $MASTER_MYPORT MASTER_MYPORT
+show slave status;
+
+# 2) Test lock wait timeout
+
+stop slave;
+change master to master_log_pos=401; # the BEGIN log event
+begin;
+select * from t2 for update; # hold lock
+start slave;
+--sleep 10; # slave should have blocked, and be retrying
+commit;
+sync_with_master;
+select * from t1; # check that slave succeeded finally
+select * from t2;
+# check that no error is reported
+--replace_column 1 # 8 # 9 # 23 # 33 #
+--replace_result $MASTER_MYPORT MASTER_MYPORT
+show slave status;
+
+# Now we repeat 2), but with BEGIN in the same relay log as
+# COMMIT (to see if seeking into hot log is ok).
+
+set global max_relay_log_size=0;
+
+# This is really copy-paste of 2) of above
+stop slave;
+change master to master_log_pos=401;
+begin;
+select * from t2 for update;
+start slave;
+--sleep 10;
+commit;
+sync_with_master;
+select * from t1;
+select * from t2;
+--replace_column 1 # 8 # 9 # 23 # 33 #
+--replace_result $MASTER_MYPORT MASTER_MYPORT
+show slave status;
+
+connection master;
+drop table t1,t2;
+sync_slave_with_master;
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@@ -2856,18 +2856,6 @@ void item_user_lock_free(void)
 void item_user_lock_release(User_level_lock *ull)
 {
  ull->locked=0;
-  if (mysql_bin_log.is_open())
-  {
-    char buf[256];
-    const char *command="DO RELEASE_LOCK(\"";
-    String tmp(buf,sizeof(buf), system_charset_info);
-    tmp.copy(command, strlen(command), tmp.charset());
-    tmp.append(ull->key,ull->key_length);
-    tmp.append("\")", 2);
-    Query_log_event qev(current_thd, tmp.ptr(), tmp.length(), 0, FALSE);
-    qev.error_code=0; // this query is always safe to run on slave
-    mysql_bin_log.write(&qev);
-  }
  if (--ull->count)
    pthread_cond_signal(&ull->cond);
  else
@@ -2991,6 +2979,16 @@ longlong Item_func_get_lock::val_int()
  User_level_lock *ull;
  int error=0;

+  /*
+    In slave thread no need to get locks, everything is serialized. Anyway
+    there is no way to make GET_LOCK() work on slave like it did on master
+    (i.e. make it return exactly the same value) because we don't have the
+    same other concurrent threads environment. No matter what we return here,
+    it's not guaranteed to be same as on master.
+  */
+  if (thd->slave_thread)
+    return 1;
+
  pthread_mutex_lock(&LOCK_user_locks);

  if (!res || !res->length())

--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -1616,9 +1616,9 @@ Default database: '%s'. Query: '%s'",
    probably, so data_buf will be freed, so the thd->... listed above will be
    pointers to freed memory. 
    So we must set them to 0, so that those bad pointers values are not later
-    used. Note that "cleanup" queries (automatic DO RELEASE_LOCK() and DROP
-    TEMPORARY TABLE don't suffer from these assignments to 0 as DROP TEMPORARY
-    TABLE uses the db.table syntax).
+    used. Note that "cleanup" queries like automatic DROP TEMPORARY TABLE
+    don't suffer from these assignments to 0 as DROP TEMPORARY
+    TABLE uses the db.table syntax.
  */
  thd->db= thd->catalog= 0;	        // prevent db from being freed
  thd->query= 0;			// just to be sure
@@ -3666,8 +3666,8 @@ void Stop_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_ev

  The master stopped.
  We used to clean up all temporary tables but this is useless as, as the
-  master has shut down properly, it has written all DROP TEMPORARY TABLE and DO
-  RELEASE_LOCK (prepared statements' deletion is TODO).
+  master has shut down properly, it has written all DROP TEMPORARY TABLE
+  (prepared statements' deletion is TODO only when we binlog prep stmts).
  We used to clean up slave_load_tmpdir, but this is useless as it has been
  cleared at the end of LOAD DATA INFILE.
  So we have nothing to do here.

--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -1037,7 +1037,7 @@ extern ulong query_cache_size, query_cache_min_res_unit;
 extern ulong thd_startup_options, slow_launch_threads, slow_launch_time;
 extern ulong table_cache_size;
 extern ulong max_connections,max_connect_errors, connect_timeout;
-extern ulong slave_net_timeout;
+extern ulong slave_net_timeout, slave_trans_retries;
 extern uint max_user_connections;
 extern ulong what_to_log,flush_time;
 extern ulong query_buff_size, thread_stack,thread_stack_min;

--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -334,7 +334,7 @@ ulong server_id, thd_startup_options;
 ulong table_cache_size, thread_stack, thread_stack_min, what_to_log;
 ulong query_buff_size, slow_launch_time, slave_open_temp_tables;
 ulong open_files_limit, max_binlog_size, max_relay_log_size;
-ulong slave_net_timeout;
+ulong slave_net_timeout, slave_trans_retries;
 ulong thread_cache_size=0, binlog_cache_size=0, max_binlog_cache_size=0;
 ulong query_cache_size=0;
 ulong refresh_version, flush_version;	/* Increments on each reload */
@@ -4192,7 +4192,7 @@ enum options_mysqld
  OPT_QUERY_CACHE_TYPE, OPT_QUERY_CACHE_WLOCK_INVALIDATE, OPT_RECORD_BUFFER,
  OPT_RECORD_RND_BUFFER, OPT_RELAY_LOG_SPACE_LIMIT, OPT_RELAY_LOG_PURGE,
  OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME,
-  OPT_READONLY, OPT_DEBUGGING,
+  OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING,
  OPT_SORT_BUFFER, OPT_TABLE_CACHE,
  OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE,
  OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK,
@@ -5222,7 +5222,7 @@ The minimum value for this variable is 4096.",
    (gptr*) &max_system_variables.max_length_for_sort_data, 0, GET_ULONG,
    REQUIRED_ARG, 1024, 4, 8192*1024L, 0, 1, 0},
  {"max_relay_log_size", OPT_MAX_RELAY_LOG_SIZE,
-   "If non-zero: relay log will be rotated automatically when the size exceeds this value; if zero (the default): when the size exceeds max_binlog_size. 0 expected, the minimum value for this variable is 4096.",
+   "If non-zero: relay log will be rotated automatically when the size exceeds this value; if zero (the default): when the size exceeds max_binlog_size. 0 excepted, the minimum value for this variable is 4096.",
   (gptr*) &max_relay_log_size, (gptr*) &max_relay_log_size, 0, GET_ULONG,
   REQUIRED_ARG, 0L, 0L, 1024*1024L*1024L, 0, IO_SIZE, 0},
  { "max_seeks_for_key", OPT_MAX_SEEKS_FOR_KEY,
@@ -5408,6 +5408,12 @@ The minimum value for this variable is 4096.",
   "Number of seconds to wait for more data from a master/slave connection before aborting the read.",
   (gptr*) &slave_net_timeout, (gptr*) &slave_net_timeout, 0,
   GET_ULONG, REQUIRED_ARG, SLAVE_NET_TIMEOUT, 1, LONG_TIMEOUT, 0, 1, 0},
+  {"slave_transaction_retries", OPT_SLAVE_TRANS_RETRIES,
+   "Number of times the slave SQL thread will retry a transaction in case "
+   "it failed with a deadlock or elapsed lock wait timeout, "
+   "before giving up and stopping.",
+   (gptr*) &slave_trans_retries, (gptr*) &slave_trans_retries, 0,
+   GET_ULONG, REQUIRED_ARG, 0L, 0L, (longlong) ULONG_MAX, 0, 1, 0},
 #endif /* HAVE_REPLICATION */
  {"slow_launch_time", OPT_SLOW_LAUNCH_TIME,
   "If creating the thread takes longer than this value (in seconds), the Slow_launch_threads counter will be incremented.",
@@ -5793,7 +5799,7 @@ static void mysql_init_variables(void)
  opt_log= opt_update_log= opt_bin_log= opt_slow_log= 0;
  opt_disable_networking= opt_skip_show_db=0;
  opt_logname= opt_update_logname= opt_binlog_index_name= opt_slow_logname= 0;
-  opt_tc_log_file= "tc.log";      // no hostname in tc_log file name !
+  opt_tc_log_file= (char *)"tc.log";      // no hostname in tc_log file name !
  opt_secure_auth= 0;
  opt_bootstrap= opt_myisam_log= 0;
  mqh_used= 0;

--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -340,6 +340,8 @@ sys_var_bool_ptr	sys_slave_compressed_protocol("slave_compressed_protocol",
 #ifdef HAVE_REPLICATION
 sys_var_long_ptr	sys_slave_net_timeout("slave_net_timeout",
 					      &slave_net_timeout);
+sys_var_long_ptr	sys_slave_trans_retries("slave_transaction_retries",
+                                                &slave_trans_retries);
 #endif
 sys_var_long_ptr	sys_slow_launch_time("slow_launch_time",
 					     &slow_launch_time);
@@ -652,6 +654,7 @@ sys_var *sys_variables[]=
 #ifdef HAVE_REPLICATION
  &sys_slave_compressed_protocol,
  &sys_slave_net_timeout,
+  &sys_slave_trans_retries,
  &sys_slave_skip_counter,
 #endif
  &sys_slow_launch_time,
@@ -931,6 +934,7 @@ struct show_var_st init_vars[]= {
  {"skip_show_database",      (char*) &opt_skip_show_db,            SHOW_BOOL},
 #ifdef HAVE_REPLICATION
  {sys_slave_net_timeout.name,(char*) &sys_slave_net_timeout,	    SHOW_SYS},
+  {sys_slave_trans_retries.name,(char*) &sys_slave_trans_retries,   SHOW_SYS},
 #endif
  {sys_slow_launch_time.name, (char*) &sys_slow_launch_time,        SHOW_SYS},
 #ifdef HAVE_SYS_UN_H

--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -3228,6 +3228,53 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
      DBUG_PRINT("info", ("Deleting the event after it has been executed"));
      delete ev;
    }
+    if (slave_trans_retries)
+    {
+      if (exec_res &&
+          (thd->net.last_errno == ER_LOCK_DEADLOCK ||
+           thd->net.last_errno == ER_LOCK_WAIT_TIMEOUT) &&
+          !thd->is_fatal_error)
+      {
+        const char *errmsg;
+        /*
+          We were in a transaction which has been rolled back because of a
+          deadlock (currently, InnoDB deadlock detected by InnoDB) or lock
+          wait timeout (innodb_lock_wait_timeout exceeded); let's seek back to
+          BEGIN log event and retry it all again.
+          We have to not only seek but also
+          a) init_master_info(), to seek back to hot relay log's start for later
+          (for when we will come back to this hot log after re-processing the
+          possibly existing old logs where BEGIN is: check_binlog_magic() will
+          then need the cache to be at position 0 (see comments at beginning of
+          init_master_info()).
+          b) init_relay_log_pos(), because the BEGIN may be an older relay log.
+        */
+        if (rli->trans_retries--)
+        {
+          sql_print_information("Slave SQL thread retries transaction");
+          if (init_master_info(rli->mi, 0, 0, 0, SLAVE_SQL))
+            sql_print_error("Failed to initialize the master info structure");
+          else if (init_relay_log_pos(rli,
+                                      rli->group_relay_log_name,
+                                      rli->group_relay_log_pos,
+                                      1, &errmsg, 1))
+            sql_print_error("Error initializing relay log position: %s",
+                            errmsg);
+          else
+          {
+            exec_res= 0;
+            sleep(2); // chance for concurrent connection to get more locks
+          }
+        }
+        else
+          sql_print_error("Slave SQL thread retried transaction %lu time(s) "
+                          "in vain, giving up. Consider raising the value of "
+                          "the slave_transaction_retries variable.",
+                          slave_trans_retries);
+      }
+      if (!((thd->options & OPTION_BEGIN) && opt_using_transactions))
+        rli->trans_retries= slave_trans_retries; // restart from fresh
+    }
    return exec_res;
  }
  else
@@ -3642,6 +3689,7 @@ extern "C" pthread_handler_decl(handle_slave_sql,arg)
  pthread_mutex_lock(&rli->log_space_lock);
  rli->ignore_log_space_limit= 0;
  pthread_mutex_unlock(&rli->log_space_lock);
+  rli->trans_retries= slave_trans_retries; // start from "no error"

  if (init_relay_log_pos(rli,
 			 rli->group_relay_log_name,
@@ -4188,7 +4236,7 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
      master server shutdown. The only thing this does is cleaning. But
      cleaning is already done on a per-master-thread basis (as the master
      server is shutting down cleanly, it has written all DROP TEMPORARY TABLE
-      and DO RELEASE_LOCK; prepared statements' deletion are TODO).
+      prepared statements' deletion are TODO only when we binlog prep stmts).
      
      We don't even increment mi->master_log_pos, because we may be just after
      a Rotate event. Btw, in a few milliseconds we are going to have a Start

--- a/sql/slave.h
+++ b/sql/slave.h
@@ -293,7 +293,8 @@ typedef struct st_relay_log_info
  } until_log_names_cmp_result;

  char cached_charset[6];
-  
+  ulong trans_retries;
+
  st_relay_log_info();
  ~st_relay_log_info();