From 80649ee8746aab02a198285248668cb49ce70f33 Mon Sep 17 00:00:00 2001
From: unknown <guilhem@mysql.com>
Date: Tue, 18 Nov 2003 17:31:17 +0100
Subject: [PATCH] Fix for BUG#1870     "CHANGE MASTER makes SQL thread restart
 from coordinates of I/O thread".     So, in CHANGE MASTER:     when it seems
 reasonable that the user did not want to discontinue     its replication
 (i.e. when he is not specifying host or port or master_log_file     or
 master_log_pos; this will be documented), set the coordinates of the     I/O
 thread to those of the SQL thread. This way, the SQL thread will see     no
 discontinuity in the relay log (i.e. will skip no events), because     the
 I/O thread will fill the brand new relay log with the events which     are
 just after the position where the SQL thread had stopped     (before CHANGE
 MASTER was issued).     And a new test for this bug.

mysql-test/r/rpl_loaddata.result:
  Now, after CHANGE MASTER the coordinates of the I/O thread are the last ones of the SQL thread, so result update.
sql/sql_repl.cc:
  Fix for BUG#1870
  "CHANGE MASTER makes SQL thread restart from coordinates of I/O thread".
  So, in CHANGE MASTER:
  when it seems reasonable that the user did not want to discontinue
  its replication (i.e. when he is not specifying host or port or master_log_file
  or master_log_pos; this will be documented), set the coordinates of the
  I/O thread to those of the SQL thread. This way, the SQL thread will see
  no discontinuity in the relay log (i.e. will skip no events), because
  the I/O thread will fill the brand new relay log with the events which
  are just after the position where the SQL thread had stopped
  (before CHANGE MASTER was issued).
---
 mysql-test/r/rpl_change_master.result | 32 +++++++++++++++++
 mysql-test/r/rpl_loaddata.result      |  2 +-
 mysql-test/t/rpl_change_master.test   | 26 ++++++++++++++
 sql/sql_repl.cc                       | 49 ++++++++++++++++++++++++---
 4 files changed, 104 insertions(+), 5 deletions(-)
 create mode 100644 mysql-test/r/rpl_change_master.result
 create mode 100644 mysql-test/t/rpl_change_master.test

diff --git a/mysql-test/r/rpl_change_master.result b/mysql-test/r/rpl_change_master.result
new file mode 100644
index 0000000000..be2aec616b
--- /dev/null
+++ b/mysql-test/r/rpl_change_master.result
@@ -0,0 +1,32 @@
+slave stop;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+slave start;
+select get_lock("a",5);
+get_lock("a",5)
+1
+create table t1(n int);
+insert into t1 values(1+get_lock("a",10)*0);
+insert into t1 values(2);
+stop slave;
+select * from t1;
+n
+1
+show slave status;
+Master_Host	Master_User	Master_Port	Connect_retry	Master_Log_File	Read_Master_Log_Pos	Relay_Log_File	Relay_Log_Pos	Relay_Master_Log_File	Slave_IO_Running	Slave_SQL_Running	Replicate_do_db	Replicate_ignore_db	Last_errno	Last_error	Skip_counter	Exec_master_log_pos	Relay_log_space
+127.0.0.1	root	9306	1	master-bin.001	273	slave-relay-bin.002	255	master-bin.001	No	No			0		0	214	314
+change master to master_user='root';
+show slave status;
+Master_Host	Master_User	Master_Port	Connect_retry	Master_Log_File	Read_Master_Log_Pos	Relay_Log_File	Relay_Log_Pos	Relay_Master_Log_File	Slave_IO_Running	Slave_SQL_Running	Replicate_do_db	Replicate_ignore_db	Last_errno	Last_error	Skip_counter	Exec_master_log_pos	Relay_log_space
+127.0.0.1	root	9306	1	master-bin.001	214	slave-relay-bin.001	4	master-bin.001	No	No			0		0	214	4
+select release_lock("a");
+release_lock("a")
+1
+start slave;
+select * from t1;
+n
+1
+2
+drop table t1;
diff --git a/mysql-test/r/rpl_loaddata.result b/mysql-test/r/rpl_loaddata.result
index 8b910d0d18..268e383ce6 100644
--- a/mysql-test/r/rpl_loaddata.result
+++ b/mysql-test/r/rpl_loaddata.result
@@ -43,7 +43,7 @@ change master to master_user='test';
 change master to master_user='root';
 show slave status;
 Master_Host	Master_User	Master_Port	Connect_retry	Master_Log_File	Read_Master_Log_Pos	Relay_Log_File	Relay_Log_Pos	Relay_Master_Log_File	Slave_IO_Running	Slave_SQL_Running	Replicate_do_db	Replicate_ignore_db	Last_errno	Last_error	Skip_counter	Exec_master_log_pos	Relay_log_space
-127.0.0.1	root	MASTER_PORT	1	master-bin.001	1442	slave-relay-bin.001	4	master-bin.001	No	No			0		0	1442	4
+127.0.0.1	root	MASTER_PORT	1	master-bin.001	1419	slave-relay-bin.001	4	master-bin.001	No	No			0		0	1419	4
 set global sql_slave_skip_counter=1;
 start slave;
 set sql_log_bin=0;
diff --git a/mysql-test/t/rpl_change_master.test b/mysql-test/t/rpl_change_master.test
new file mode 100644
index 0000000000..61de22fe57
--- /dev/null
+++ b/mysql-test/t/rpl_change_master.test
@@ -0,0 +1,26 @@
+source include/master-slave.inc;
+
+connection slave;
+select get_lock("a",5);
+connection master;
+create table t1(n int);
+insert into t1 values(1+get_lock("a",10)*0);
+insert into t1 values(2);
+save_master_pos;
+connection slave;
+sleep 3; # can't sync_with_master as we should be blocked
+stop slave;
+select * from t1;
+show slave status;
+change master to master_user='root';
+show slave status;
+# Will restart from after the values(2), which is bug
+select release_lock("a");
+start slave;
+sync_with_master;
+select * from t1;
+connection master;
+drop table t1;
+save_master_pos;
+connection slave;
+sync_with_master;
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index 10581431c7..c95cdc1b04 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -853,8 +853,8 @@ void kill_zombie_dump_threads(uint32 slave_server_id)
 int change_master(THD* thd, MASTER_INFO* mi)
 {
   int thread_mask;
-  const char* errmsg=0;
-  bool need_relay_log_purge=1;
+  const char* errmsg= 0;
+  bool need_relay_log_purge= 1;
   DBUG_ENTER("change_master");
 
   lock_slave_threads(mi);
@@ -928,6 +928,36 @@ int change_master(THD* thd, MASTER_INFO* mi)
     mi->rli.relay_log_pos=lex_mi->relay_log_pos;
   }
 
+  /*
+    If user did specify neither host nor port nor any log name nor any log
+    pos, i.e. he specified only user/password/master_connect_retry, he probably
+    wants replication to resume from where it had left, i.e. from the
+    coordinates of the **SQL** thread (imagine the case where the I/O is ahead
+    of the SQL; restarting from the coordinates of the I/O would lose some
+    events which is probably unwanted when you are just doing minor changes
+    like changing master_connect_retry).
+    A side-effect is that if only the I/O thread was started, this thread may
+    restart from ''/4 after the CHANGE MASTER. That's a minor problem (it is a
+    much more unlikely situation than the one we are fixing here).
+    Note: coordinates of the SQL thread must be read here, before the
+    'if (need_relay_log_purge)' block which resets them.
+  */
+  if (!lex_mi->host && !lex_mi->port &&
+      !lex_mi->log_file_name && !lex_mi->pos &&
+      need_relay_log_purge)
+   {
+     /*
+       Sometimes mi->rli.master_log_pos == 0 (it happens when the SQL thread is
+       not initialized), so we use a max().
+       What happens to mi->rli.master_log_pos during the initialization stages
+       of replication is not 100% clear, so we guard against problems using
+       max().
+      */
+     mi->master_log_pos = max(BIN_LOG_HEADER_SIZE, mi->rli.master_log_pos);
+     strmake(mi->master_log_name,mi->rli.master_log_name,
+             sizeof(mi->master_log_name)-1);
+  }
+
   flush_master_info(mi);
   if (need_relay_log_purge)
   {
@@ -959,10 +989,21 @@ int change_master(THD* thd, MASTER_INFO* mi)
     }
   }
   DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos));
-  /* If changing RELAY_LOG_FILE or RELAY_LOG_POS, this will be nonsense: */
+
+  /*
+    Coordinates in rli were spoilt by the 'if (need_relay_log_purge)' block,
+    so restore them to good values. If we left them to ''/0, that would work;
+    but that would fail in the case of 2 successive CHANGE MASTER (without a
+    START SLAVE in between): because first one would set the coords in mi to
+    the good values of those in rli, the set those in rli to ''/0, then
+    second CHANGE MASTER would set the coords in mi to those of rli, i.e. to
+    ''/0: we have lost all copies of the original good coordinates.
+    That's why we always save good coords in rli.
+  */
   mi->rli.master_log_pos = mi->master_log_pos;
   strmake(mi->rli.master_log_name,mi->master_log_name,
-	  sizeof(mi->rli.master_log_name)-1);
+          sizeof(mi->rli.master_log_name)-1);
+
   if (!mi->rli.master_log_name[0]) // uninitialized case
     mi->rli.master_log_pos=0;
 
-- 
2.30.9