BUG#40337 Fsyncing master and relay log to disk after every event is too slow

NOTE: Backporting the patch to next-mr. The fix proposed in BUG#35542 and BUG#31665 introduces a performance issue when fsyncing the master.info, relay.info and relay-log.bin* after #th events. Although such solution has been proposed to reduce the probability of corrupted files due to a slave-crash, the performance penalty introduced by it has made the approach impractical for highly intensive workloads. In a nutshell, the option --syn-relay-log proposed in BUG#35542 and BUG#31665 simultaneously fsyncs master.info, relay-log.info and relay-log.bin* and this is the main source of performance issues. This patch introduces new options that give more control to the user on what should be fsynced and how often: 1) (--sync-master-info, integer) which syncs the master.info after #th event; 2) (--sync-relay-log, integer) which syncs the relay-log.bin* after #th events. 3) (--sync-relay-log-info, integer) which syncs the relay.info after #th transactions. To provide both performance and increased reliability, we recommend the following setup: 1) --sync-master-info = 0 eventually the operating system will fsync it; 2) --sync-relay-log = 0 eventually the operating system will fsync it; 3) --sync-relay-log-info = 1 fsyncs it after every transaction; Notice, that the previous setup does not reduce the probability of corrupted master.info and relay-log.bin*. To overcome the issue, this patch also introduces a recovery mechanism that right after restart throws away relay-log.bin* retrieved from a master and updates the master.info based on the relay.info: 4) (--relay-log-recovery, boolean) which enables a recovery mechanism that throws away relay-log.bin* after a crash. However, it can only recover the incorrect binlog file and position in master.info, if other informations (host, port password, etc) are corrupted or incorrect, then this recovery mechanism will fail to work.

BUG#40337 Fsyncing master and relay log to disk after every event is too slow
NOTE: Backporting the patch to next-mr. The fix proposed in BUG#35542 and BUG#31665 introduces a performance issue when fsyncing the master.info, relay.info and relay-log.bin* after #th events. Although such solution has been proposed to reduce the probability of corrupted files due to a slave-crash, the performance penalty introduced by it has made the approach impractical for highly intensive workloads. In a nutshell, the option --syn-relay-log proposed in BUG#35542 and BUG#31665 simultaneously fsyncs master.info, relay-log.info and relay-log.bin* and this is the main source of performance issues. This patch introduces new options that give more control to the user on what should be fsynced and how often: 1) (--sync-master-info, integer) which syncs the master.info after #th event; 2) (--sync-relay-log, integer) which syncs the relay-log.bin* after #th events. 3) (--sync-relay-log-info, integer) which syncs the relay.info after #th transactions. To provide both performance and increased reliability, we recommend the following setup: 1) --sync-master-info = 0 eventually the operating system will fsync it; 2) --sync-relay-log = 0 eventually the operating system will fsync it; 3) --sync-relay-log-info = 1 fsyncs it after every transaction; Notice, that the previous setup does not reduce the probability of corrupted master.info and relay-log.bin*. To overcome the issue, this patch also introduces a recovery mechanism that right after restart throws away relay-log.bin* retrieved from a master and updates the master.info based on the relay.info: 4) (--relay-log-recovery, boolean) which enables a recovery mechanism that throws away relay-log.bin* after a crash. However, it can only recover the incorrect binlog file and position in master.info, if other informations (host, port password, etc) are corrupted or incorrect, then this recovery mechanism will fail to work.
ef89b6d5 · Alfranio Correia · f758e38b · ef89b6d5 · ef89b6d5 · ef89b6d5
Commit ef89b6d5 authored Sep 29, 2009 by Alfranio Correia
15 changed files
--- a/mysql-test/suite/rpl/r/rpl_flushlog_loop.result
+++ b/mysql-test/suite/rpl/r/rpl_flushlog_loop.result
@@ -10,6 +10,7 @@ relay_log	MYSQLD_DATADIR/relay-log
 relay_log_index	
 relay_log_info_file	relay-log.info
 relay_log_purge	ON
+relay_log_recovery	OFF
 relay_log_space_limit	0
 stop slave;
 change master to master_host='127.0.0.1',master_user='root',

--- a/mysql-test/suite/rpl/r/rpl_sync.result
+++ b/mysql-test/suite/rpl/r/rpl_sync.result
+=====Configuring the enviroment=======;
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+call mtr.add_suppression('Attempting backtrace');
+call mtr.add_suppression("Recovery from master pos .* and file master-bin.000001");
+CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
+insert into t1(a) values(1);
+insert into t1(a) values(2);
+insert into t1(a) values(3);
+=====Inserting data on the master but without the SQL Thread being running=======;
+stop slave SQL_THREAD;
+insert into t1(a) values(4);
+insert into t1(a) values(5);
+insert into t1(a) values(6);
+=====Removing relay log files and crashing/recoverying the slave=======;
+stop slave IO_THREAD;
+SET SESSION debug="d,crash_before_rotate_relaylog";
+FLUSH LOGS;
+ERROR HY000: Lost connection to MySQL server during query
+=====Dumping and comparing tables=======;
+start slave;
+Comparing tables master:test.t1 and slave:test.t1
+=====Corrupting the master.info=======;
+stop slave;
+FLUSH LOGS;
+insert into t1(a) values(7);
+insert into t1(a) values(8);
+insert into t1(a) values(9);
+SET SESSION debug="d,crash_before_rotate_relaylog";
+FLUSH LOGS;
+ERROR HY000: Lost connection to MySQL server during query
+=====Dumping and comparing tables=======;
+start slave;
+Comparing tables master:test.t1 and slave:test.t1
+=====Clean up=======;
+drop table t1;
--- a/mysql-test/suite/rpl/t/rpl_sync-slave.opt
+++ b/mysql-test/suite/rpl/t/rpl_sync-slave.opt
+--sync-relay-log-info=1 --relay-log-recovery=1
--- a/mysql-test/suite/rpl/t/rpl_sync.test
+++ b/mysql-test/suite/rpl/t/rpl_sync.test
+########################################################################################
+# This test verifies the options --sync-relay-log-info and --relay-log-recovery by 
+# crashing the slave in two different situations:
+#  (case-1) - Corrupt the relay log with changes which were not processed by
+#  the SQL Thread and crashes it.
+#  (case-2) - Corrupt the master.info with wrong coordinates and crashes it.
+#
+#  Case 1:
+#    1 - Stops the SQL Thread
+#    2 - Inserts new records into the master.
+#    3 - Corrupts the relay-log.bin* which most likely has such changes.
+#    4 - Crashes the slave
+#    5 - Verifies if the slave is sync with the master which means that the information
+#    loss was circumvented by the recovery process.
+#
+#  Case 2:
+#    1 - Stops the SQL/IO Threads
+#    2 - Inserts new records into the master.
+#    3 - Corrupts the master.info with wrong coordinates.
+#    4 - Crashes the slave
+#    5 - Verifies if the slave is sync with the master which means that the information
+#    loss was circumvented by the recovery process.
+########################################################################################
+
+########################################################################################
+#                                Configuring the environment
+########################################################################################
+--echo =====Configuring the enviroment=======;
+--source include/master-slave.inc
+--source include/not_embedded.inc
+--source include/not_valgrind.inc
+--source include/have_debug.inc
+--source include/have_innodb.inc
+
+call mtr.add_suppression('Attempting backtrace');
+call mtr.add_suppression("Recovery from master pos .* and file master-bin.000001");
+CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
+
+insert into t1(a) values(1);
+insert into t1(a) values(2);
+insert into t1(a) values(3);
+
+########################################################################################
+#                             Case 1: Corrupt a relay-log.bin*
+########################################################################################
+--echo =====Inserting data on the master but without the SQL Thread being running=======;
+sync_slave_with_master;
+
+connection slave;
+let $MYSQLD_SLAVE_DATADIR= `select @@datadir`;
+--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
+--copy_file $MYSQLD_SLAVE_DATADIR/master.info $MYSQLD_SLAVE_DATADIR/master.backup
+stop slave SQL_THREAD;
+source include/wait_for_slave_sql_to_stop.inc;
+
+connection master;
+insert into t1(a) values(4);
+insert into t1(a) values(5);
+insert into t1(a) values(6);
+
+--echo =====Removing relay log files and crashing/recoverying the slave=======;
+connection slave;
+stop slave IO_THREAD;
+source include/wait_for_slave_io_to_stop.inc;
+
+let $file= query_get_value("SHOW SLAVE STATUS", Relay_Log_File, 1);
+--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
+--exec echo "failure" > $MYSQLD_SLAVE_DATADIR/$file
+
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+SET SESSION debug="d,crash_before_rotate_relaylog";
+--error 2013
+FLUSH LOGS;
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--echo =====Dumping and comparing tables=======;
+start slave;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+sync_slave_with_master;
+
+let $diff_table_1=master:test.t1;
+let $diff_table_2=slave:test.t1;
+source include/diff_tables.inc;
+
+########################################################################################
+#                             Case 2: Corrupt a master.info
+########################################################################################
+--echo =====Corrupting the master.info=======;
+connection slave;
+stop slave;
+source include/wait_for_slave_to_stop.inc;
+
+connection master;
+FLUSH LOGS;
+
+insert into t1(a) values(7);
+insert into t1(a) values(8);
+insert into t1(a) values(9);
+
+connection slave;
+--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
+--exec cat $MYSQLD_SLAVE_DATADIR/master.backup > $MYSQLD_SLAVE_DATADIR/master.info
+
+let MYSQLD_SLAVE_DATADIR=`select @@datadir`;
+
+--perl
+use strict;
+use warnings;
+my $src= "$ENV{'MYSQLD_SLAVE_DATADIR'}/master.backup";
+my $dst= "$ENV{'MYSQLD_SLAVE_DATADIR'}/master.info";
+open(FILE, "<", $src) or die;
+my @content= <FILE>;
+close FILE;
+open(FILE, ">", $dst) or die;
+binmode FILE;
+print FILE @content;
+close FILE;
+EOF
+
+--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
+SET SESSION debug="d,crash_before_rotate_relaylog";
+--error 2013
+FLUSH LOGS;
+
+--enable_reconnect
+--source include/wait_until_connected_again.inc
+
+--echo =====Dumping and comparing tables=======;
+start slave;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+sync_slave_with_master;
+
+let $diff_table_1=master:test.t1;
+let $diff_table_2=slave:test.t1;
+source include/diff_tables.inc;
+
+########################################################################################
+#                                      Clean up
+########################################################################################
+--echo =====Clean up=======;
+connection master;
+drop table t1;
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -1869,10 +1869,12 @@ extern ulong MYSQL_PLUGIN_IMPORT specialflag;
 #ifdef MYSQL_SERVER
 extern ulong current_pid;
 extern ulong expire_logs_days;
-extern uint sync_binlog_period, sync_relaylog_period;
+extern uint sync_binlog_period, sync_relaylog_period, 
+            sync_relayloginfo_period, sync_masterinfo_period;
 extern ulong opt_tc_log_size, tc_log_max_pages_used, tc_log_page_size;
 extern ulong tc_log_page_waits;
 extern my_bool relay_log_purge, opt_innodb_safe_binlog, opt_innodb;
+extern my_bool relay_log_recovery;
 extern uint test_flags,select_errors,ha_open_options;
 extern uint protocol_version, mysqld_port, dropping_tables;
 extern uint delay_key_write_options;

--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -477,6 +477,7 @@ extern const char *opt_ndb_distribution;
 extern enum ndb_distribution opt_ndb_distribution_id;
 #endif
 my_bool opt_readonly, use_temp_pool, relay_log_purge;
+my_bool relay_log_recovery;
 my_bool opt_sync_frm, opt_allow_suspicious_udfs;
 my_bool opt_secure_auth= 0;
 char* opt_secure_file_priv= 0;
@@ -553,7 +554,8 @@ ulong max_prepared_stmt_count;
 ulong prepared_stmt_count=0;
 ulong thread_id=1L,current_pid;
 ulong slow_launch_threads = 0;
-uint sync_binlog_period= 0, sync_relaylog_period= 0;
+uint sync_binlog_period= 0, sync_relaylog_period= 0,
+     sync_relayloginfo_period= 0, sync_masterinfo_period= 0;
 ulong expire_logs_days = 0;
 ulong rpl_recovery_rank=0;
 const char *log_output_str= "FILE";
@@ -5605,6 +5607,7 @@ enum options_mysqld
  OPT_QUERY_CACHE_TYPE, OPT_QUERY_CACHE_WLOCK_INVALIDATE, OPT_RECORD_BUFFER,
  OPT_RECORD_RND_BUFFER, OPT_DIV_PRECINCREMENT, OPT_RELAY_LOG_SPACE_LIMIT,
  OPT_RELAY_LOG_PURGE,
+  OPT_RELAY_LOG_RECOVERY,
  OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME,
  OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING,
  OPT_SORT_BUFFER, OPT_TABLE_OPEN_CACHE, OPT_TABLE_DEF_CACHE,
@@ -5669,7 +5672,9 @@ enum options_mysqld
  OPT_GENERAL_LOG_FILE,
  OPT_SLOW_QUERY_LOG_FILE,
  OPT_IGNORE_BUILTIN_INNODB,
-  OPT_SYNC_RELAY_LOG
+  OPT_SYNC_RELAY_LOG,
+  OPT_SYNC_RELAY_LOG_INFO,
+  OPT_SYNC_MASTER_INFO
 };


@@ -6889,6 +6894,13 @@ The minimum value for this variable is 4096.",
   (uchar**) &relay_log_purge,
   (uchar**) &relay_log_purge, 0, GET_BOOL, NO_ARG,
   1, 0, 1, 0, 1, 0},
+  {"relay_log_recovery", OPT_RELAY_LOG_RECOVERY,
+   "Enables automatic relay log recovery right after the database startup, "
+   "which means that the IO Thread starts re-fetching from the master " 
+   "right after the last transaction processed.",
+   (uchar**) &relay_log_recovery,
+   (uchar**) &relay_log_recovery, 0, GET_BOOL, NO_ARG,
+   0, 0, 1, 0, 1, 0},
  {"relay_log_space_limit", OPT_RELAY_LOG_SPACE_LIMIT,
   "Maximum space to use for all relay logs.",
   (uchar**) &relay_log_space_limit,
@@ -6930,6 +6942,16 @@ The minimum value for this variable is 4096.",
   "Use 0 (default) to disable synchronous flushing.",
   (uchar**) &sync_relaylog_period, (uchar**) &sync_relaylog_period, 0, GET_UINT,
   REQUIRED_ARG, 0, 0, (longlong) UINT_MAX, 0, 1, 0},
+  {"sync-relay-log-info", OPT_SYNC_RELAY_LOG_INFO,
+   "Synchronously flush relay log info to disk after #th transaction. "
+   "Use 0 (default) to disable synchronous flushing.",
+   (uchar**) &sync_relayloginfo_period, (uchar**) &sync_relayloginfo_period, 0, GET_UINT,
+   REQUIRED_ARG, 0, 0, (longlong) UINT_MAX, 0, 1, 0},
+  {"sync-master-info", OPT_SYNC_MASTER_INFO,
+   "Synchronously flush master info to disk after every #th event. "
+   "Use 0 (default) to disable synchronous flushing.",
+   (uchar**) &sync_masterinfo_period, (uchar**) &sync_masterinfo_period, 0, GET_UINT,
+   REQUIRED_ARG, 0, 0, (longlong) UINT_MAX, 0, 1, 0},
  {"sync-frm", OPT_SYNC_FRM, "Sync .frm to disk on create. Enabled by default.",
   (uchar**) &opt_sync_frm, (uchar**) &opt_sync_frm, 0, GET_BOOL, NO_ARG, 1, 0,
   0, 0, 0, 0},

--- a/sql/rpl_mi.cc
+++ b/sql/rpl_mi.cc
@@ -27,11 +27,11 @@ int init_intvar_from_file(int* var, IO_CACHE* f, int default_val);
 int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
 			  const char *default_val);

-Master_info::Master_info()
+Master_info::Master_info(bool is_slave_recovery)
  :Slave_reporting_capability("I/O"),
   ssl(0), ssl_verify_server_cert(0), fd(-1), io_thd(0), inited(0),
-   abort_slave(0),slave_running(0),
-   slave_run_id(0)
+   rli(is_slave_recovery), abort_slave(0), slave_running(0),
+   slave_run_id(0), sync_counter(0)
 {
  host[0] = 0; user[0] = 0; password[0] = 0;
  ssl_ca[0]= 0; ssl_capath[0]= 0; ssl_cert[0]= 0;
@@ -364,11 +364,6 @@ int flush_master_info(Master_info* mi, bool flush_relay_log_cache)
    IO_CACHE *log_file= mi->rli.relay_log.get_log_file();
    if (flush_io_cache(log_file))
      DBUG_RETURN(2);
-
-    /* Sync to disk if --sync-relay-log is set */
-    if (sync_relaylog_period &&
-        my_sync(log_file->file, MY_WME))
-      DBUG_RETURN(2);
  }

  /*
@@ -398,8 +393,12 @@ int flush_master_info(Master_info* mi, bool flush_relay_log_cache)
              (int)(mi->ssl), mi->ssl_ca, mi->ssl_capath, mi->ssl_cert,
              mi->ssl_cipher, mi->ssl_key, mi->ssl_verify_server_cert);
  err= flush_io_cache(file);
-  if (sync_relaylog_period && !err)
+  if (sync_masterinfo_period && !err && 
+      ++(mi->sync_counter) >= sync_masterinfo_period)
+  {
    err= my_sync(mi->fd, MYF(MY_WME));
+    mi->sync_counter= 0;
+  }
  DBUG_RETURN(-err);
 }


--- a/sql/rpl_mi.h
+++ b/sql/rpl_mi.h
@@ -58,7 +58,7 @@
 class Master_info : public Slave_reporting_capability
 {
 public:
-  Master_info();
+  Master_info(bool is_slave_recovery);
  ~Master_info();

  /* the variables below are needed because we can change masters on the fly */
@@ -100,6 +100,13 @@ class Master_info : public Slave_reporting_capability

  */
  long clock_diff_with_master;
+
+  /*
+    Keeps track of the number of events before fsyncing.
+    The option --sync-master-info determines how many
+    events should happen before fsyncing.
+  */
+  uint sync_counter;
 };

 void init_master_info_with_options(Master_info* mi);

--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -28,11 +28,11 @@ int init_intvar_from_file(int* var, IO_CACHE* f, int default_val);
 int init_strvar_from_file(char *var, int max_size, IO_CACHE *f,
 			  const char *default_val);

-
-Relay_log_info::Relay_log_info()
+Relay_log_info::Relay_log_info(bool is_slave_recovery)
  :Slave_reporting_capability("SQL"),
   no_storage(FALSE), replicate_same_server_id(::replicate_same_server_id),
   info_fd(-1), cur_log_fd(-1), relay_log(&sync_relaylog_period),
+   sync_counter(0), is_relay_log_recovery(is_slave_recovery),
   save_temporary_tables(0),
 #if HAVE_purify
   is_fake(FALSE),
@@ -259,7 +259,8 @@ Failed to open the existing relay log info file '%s' (errno %d)",
    rli->group_relay_log_pos= rli->event_relay_log_pos= relay_log_pos;
    rli->group_master_log_pos= master_log_pos;

-    if (init_relay_log_pos(rli,
+    if (!rli->is_relay_log_recovery &&
+        init_relay_log_pos(rli,
                           rli->group_relay_log_name,
                           rli->group_relay_log_pos,
                           0 /* no data lock*/,
@@ -274,6 +275,7 @@ Failed to open the existing relay log info file '%s' (errno %d)",
  }

 #ifndef DBUG_OFF
+  if (!rli->is_relay_log_recovery)
  {
    char llbuf1[22], llbuf2[22];
    DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",

--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -96,6 +96,19 @@ class Relay_log_info : public Slave_reporting_capability
  LOG_INFO linfo;
  IO_CACHE cache_buf,*cur_log;

+  /*
+    Keeps track of the number of transactions that commits
+    before fsyncing. The option --sync-relay-log-info determines 
+    how many transactions should commit before fsyncing.
+  */ 
+  uint sync_counter;
+
+  /*
+    Identifies when the recovery process is going on.
+    See sql/slave.cc:init_recovery for further details.
+  */ 
+  bool is_relay_log_recovery;
+
  /* The following variables are safe to read any time */

  /* IO_CACHE of the info file - set only during init or end */
@@ -267,7 +280,7 @@ class Relay_log_info : public Slave_reporting_capability
  char slave_patternload_file[FN_REFLEN]; 
  size_t slave_patternload_file_size;  

-  Relay_log_info();
+  Relay_log_info(bool is_slave_recovery);
  ~Relay_log_info();

  /*

--- a/sql/set_var.cc
+++ b/sql/set_var.cc
@@ -1534,19 +1534,19 @@ static bool get_unsigned(THD *thd, set_var *var, ulonglong user_max,
 }


-bool sys_var_int_ptr::check(THD *thd, set_var *var)
+bool sys_var_uint_ptr::check(THD *thd, set_var *var)
 {
-  var->save_result.ulong_value= (ulong) var->value->val_int();
+  var->save_result.ulong_value= (ulong) var->value->val_uint();
  return 0;
 }

-bool sys_var_int_ptr::update(THD *thd, set_var *var)
+bool sys_var_uint_ptr::update(THD *thd, set_var *var)
 {
  *value= (uint) var->save_result.ulong_value;
  return 0;
 }

-void sys_var_int_ptr::set_default(THD *thd, enum_var_type type)
+void sys_var_uint_ptr::set_default(THD *thd, enum_var_type type)
 {
  *value= (uint) option_limits->def_value;
 }

--- a/sql/set_var.h
+++ b/sql/set_var.h
@@ -178,10 +178,10 @@ class sys_var_long_ptr_global: public sys_var_global
 /**
   Unsigned int system variable class
 */
-class sys_var_int_ptr :public sys_var
+class sys_var_uint_ptr :public sys_var
 {
 public:
-  sys_var_int_ptr(sys_var_chain *chain, const char *name_arg, 
+  sys_var_uint_ptr(sys_var_chain *chain, const char *name_arg, 
                  uint *value_ptr_arg,
                  sys_after_update_func after_update_arg= NULL)
    :sys_var(name_arg, after_update_arg),

--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -129,6 +129,7 @@ static bool wait_for_relay_log_space(Relay_log_info* rli);
 static inline bool io_slave_killed(THD* thd,Master_info* mi);
 static inline bool sql_slave_killed(THD* thd,Relay_log_info* rli);
 static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type);
+static int init_recovery(Master_info* mi);
 static void print_slave_skip_errors(void);
 static int safe_connect(THD* thd, MYSQL* mysql, Master_info* mi);
 static int safe_reconnect(THD* thd, MYSQL* mysql, Master_info* mi,
@@ -220,6 +221,7 @@ void unlock_slave_threads(Master_info* mi)
 int init_slave()
 {
  DBUG_ENTER("init_slave");
+  int error= 0;

  /*
    This is called when mysqld starts. Before client connections are
@@ -231,7 +233,7 @@ int init_slave()
    TODO: re-write this to interate through the list of files
    for multi-master
  */
-  active_mi= new Master_info;
+  active_mi= new Master_info(relay_log_recovery);

  /*
    If --slave-skip-errors=... was not used, the string value for the
@@ -250,6 +252,7 @@ int init_slave()
  if (!active_mi)
  {
    sql_print_error("Failed to allocate memory for the master info structure");
+    error= 1;
    goto err;
  }

@@ -257,6 +260,13 @@ int init_slave()
                       !master_host, (SLAVE_IO | SLAVE_SQL)))
  {
    sql_print_error("Failed to initialize the master info structure");
+    error= 1;
+    goto err;
+  }
+
+  if (active_mi->rli.is_relay_log_recovery && init_recovery(active_mi))
+  {
+    error= 1;
    goto err;
  }

@@ -275,18 +285,89 @@ int init_slave()
                            SLAVE_IO | SLAVE_SQL))
    {
      sql_print_error("Failed to create slave threads");
+      error= 1;
      goto err;
    }
  }
-  pthread_mutex_unlock(&LOCK_active_mi);
-  DBUG_RETURN(0);

 err:
+  active_mi->rli.is_relay_log_recovery= FALSE;
  pthread_mutex_unlock(&LOCK_active_mi);
-  DBUG_RETURN(1);
+  DBUG_RETURN(error);
 }

-
+/*
+  Updates the master info based on the information stored in the
+  relay info and ignores relay logs previously retrieved by the IO 
+  thread, which thus starts fetching again based on to the  
+  group_master_log_pos and group_master_log_name. Eventually, the old
+  relay logs will be purged by the normal purge mechanism.
+
+  In the feature, we should improve this routine in order to avoid throwing
+  away logs that are safely stored in the disk. Note also that this recovery 
+  routine relies on the correctness of the relay-log.info and only tolerates 
+  coordinate problems in master.info.
+  
+  In this function, there is no need for a mutex as the caller 
+  (i.e. init_slave) already has one acquired.
+  
+  Specifically, the following structures are updated:
+ 
+  1 - mi->master_log_pos  <-- rli->group_master_log_pos
+  2 - mi->master_log_name <-- rli->group_master_log_name
+  3 - It moves the relay log to the new relay log file, by
+      rli->group_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
+      rli->event_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
+      rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
+      rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
+  
+   If there is an error, it returns (1), otherwise returns (0).
+ */
+static int init_recovery(Master_info* mi)
+{
+  const char *errmsg= 0;
+  DBUG_ENTER("init_recovery");
+ 
+  Relay_log_info *rli= &mi->rli;
+  if (rli->group_master_log_name[0])
+  {
+    mi->master_log_pos= max(BIN_LOG_HEADER_SIZE,
+                             rli->group_master_log_pos);
+    strmake(mi->master_log_name, rli->group_master_log_name,
+            sizeof(mi->master_log_name)-1);
+ 
+    sql_print_warning("Recovery from master pos %ld and file %s.",
+                      (ulong) mi->master_log_pos, mi->master_log_name);
+ 
+    strmake(rli->group_relay_log_name, rli->relay_log.get_log_fname(),
+            sizeof(rli->group_relay_log_name)-1);
+    strmake(rli->event_relay_log_name, rli->relay_log.get_log_fname(),
+            sizeof(mi->rli.event_relay_log_name)-1);
+ 
+    rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
+ 
+    if (init_relay_log_pos(rli,
+                           rli->group_relay_log_name,
+                           rli->group_relay_log_pos,
+                           0 /*no data lock*/,
+                            &errmsg, 0))
+      DBUG_RETURN(1);
+ 
+    if (flush_master_info(mi, 0))
+    {
+      sql_print_error("Failed to flush master info file");
+      DBUG_RETURN(1);
+    }
+    if (flush_relay_log_info(rli))
+    {
+       sql_print_error("Failed to flush relay info file");
+       DBUG_RETURN(1);
+    }
+  }
+ 
+  DBUG_RETURN(0);
+}
+ 
 /**
  Convert slave skip errors bitmap into a printable string.
 */
@@ -3959,7 +4040,14 @@ bool flush_relay_log_info(Relay_log_info* rli)
    error=1;
  if (flush_io_cache(file))
    error=1;
-
+  if (sync_relayloginfo_period &&
+      !error &&
+      ++(rli->sync_counter) >= sync_relayloginfo_period)
+  {
+    if (my_sync(rli->info_fd, MYF(MY_WME)))
+      error=1;
+    rli->sync_counter= 0;
+  }
  /* Flushing the relay log is done by the slave I/O thread */
  DBUG_RETURN(error);
 }
@@ -4366,6 +4454,8 @@ void rotate_relay_log(Master_info* mi)
  DBUG_ENTER("rotate_relay_log");
  Relay_log_info* rli= &mi->rli;

+  DBUG_EXECUTE_IF("crash_before_rotate_relaylog", abort(););
+
  /* We don't lock rli->run_lock. This would lead to deadlocks. */
  pthread_mutex_lock(&mi->run_lock);


--- a/sql/sql_binlog.cc
+++ b/sql/sql_binlog.cc
@@ -58,7 +58,7 @@ void mysql_client_binlog_statement(THD* thd)
  my_bool have_fd_event= TRUE;
  if (!thd->rli_fake)
  {
-    thd->rli_fake= new Relay_log_info;
+    thd->rli_fake= new Relay_log_info(FALSE);
 #ifdef HAVE_purify
    thd->rli_fake->is_fake= TRUE;
 #endif

--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -1769,6 +1769,16 @@ static sys_var_const    sys_relay_log_info_file(&vars, "relay_log_info_file",
                                      (uchar*) &relay_log_info_file);
 static sys_var_bool_ptr	sys_relay_log_purge(&vars, "relay_log_purge",
 					    &relay_log_purge);
+static sys_var_bool_ptr sys_relay_log_recovery(&vars, "relay_log_recovery",
+                                               &relay_log_recovery);
+static sys_var_uint_ptr sys_sync_binlog_period(&vars, "sync_binlog",
+                                              &sync_binlog_period);
+static sys_var_uint_ptr sys_sync_relaylog_period(&vars, "sync_relay_log",
+                                                &sync_relaylog_period);
+static sys_var_uint_ptr sys_sync_relayloginfo_period(&vars, "sync_relay_log_info",
+                                                    &sync_relayloginfo_period);
+static sys_var_uint_ptr sys_sync_masterinfo_period(&vars, "sync_master_info",
+                                                  &sync_masterinfo_period);
 static sys_var_const    sys_relay_log_space_limit(&vars,
                                                  "relay_log_space_limit",
                                                  OPT_GLOBAL, SHOW_LONGLONG,
@@ -1784,8 +1794,6 @@ static sys_var_const    sys_slave_skip_errors(&vars, "slave_skip_errors",
                                              (uchar*) slave_skip_error_names);
 static sys_var_long_ptr	sys_slave_trans_retries(&vars, "slave_transaction_retries",
 						&slave_trans_retries);
-static sys_var_int_ptr sys_sync_binlog_period(&vars, "sync_binlog", &sync_binlog_period);
-static sys_var_int_ptr sys_sync_relaylog_period(&vars, "sync_relay_log", &sync_relaylog_period);
 static sys_var_slave_skip_counter sys_slave_skip_counter(&vars, "sql_slave_skip_counter");