From 80649ee8746aab02a198285248668cb49ce70f33 Mon Sep 17 00:00:00 2001 From: unknown <guilhem@mysql.com> Date: Tue, 18 Nov 2003 17:31:17 +0100 Subject: [PATCH] Fix for BUG#1870 "CHANGE MASTER makes SQL thread restart from coordinates of I/O thread". So, in CHANGE MASTER: when it seems reasonable that the user did not want to discontinue its replication (i.e. when he is not specifying host or port or master_log_file or master_log_pos; this will be documented), set the coordinates of the I/O thread to those of the SQL thread. This way, the SQL thread will see no discontinuity in the relay log (i.e. will skip no events), because the I/O thread will fill the brand new relay log with the events which are just after the position where the SQL thread had stopped (before CHANGE MASTER was issued). And a new test for this bug. mysql-test/r/rpl_loaddata.result: Now, after CHANGE MASTER the coordinates of the I/O thread are the last ones of the SQL thread, so result update. sql/sql_repl.cc: Fix for BUG#1870 "CHANGE MASTER makes SQL thread restart from coordinates of I/O thread". So, in CHANGE MASTER: when it seems reasonable that the user did not want to discontinue its replication (i.e. when he is not specifying host or port or master_log_file or master_log_pos; this will be documented), set the coordinates of the I/O thread to those of the SQL thread. This way, the SQL thread will see no discontinuity in the relay log (i.e. will skip no events), because the I/O thread will fill the brand new relay log with the events which are just after the position where the SQL thread had stopped (before CHANGE MASTER was issued). --- mysql-test/r/rpl_change_master.result | 32 +++++++++++++++++ mysql-test/r/rpl_loaddata.result | 2 +- mysql-test/t/rpl_change_master.test | 26 ++++++++++++++ sql/sql_repl.cc | 49 ++++++++++++++++++++++++--- 4 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 mysql-test/r/rpl_change_master.result create mode 100644 mysql-test/t/rpl_change_master.test diff --git a/mysql-test/r/rpl_change_master.result b/mysql-test/r/rpl_change_master.result new file mode 100644 index 0000000000..be2aec616b --- /dev/null +++ b/mysql-test/r/rpl_change_master.result @@ -0,0 +1,32 @@ +slave stop; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +reset master; +reset slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +slave start; +select get_lock("a",5); +get_lock("a",5) +1 +create table t1(n int); +insert into t1 values(1+get_lock("a",10)*0); +insert into t1 values(2); +stop slave; +select * from t1; +n +1 +show slave status; +Master_Host Master_User Master_Port Connect_retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_do_db Replicate_ignore_db Last_errno Last_error Skip_counter Exec_master_log_pos Relay_log_space +127.0.0.1 root 9306 1 master-bin.001 273 slave-relay-bin.002 255 master-bin.001 No No 0 0 214 314 +change master to master_user='root'; +show slave status; +Master_Host Master_User Master_Port Connect_retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_do_db Replicate_ignore_db Last_errno Last_error Skip_counter Exec_master_log_pos Relay_log_space +127.0.0.1 root 9306 1 master-bin.001 214 slave-relay-bin.001 4 master-bin.001 No No 0 0 214 4 +select release_lock("a"); +release_lock("a") +1 +start slave; +select * from t1; +n +1 +2 +drop table t1; diff --git a/mysql-test/r/rpl_loaddata.result b/mysql-test/r/rpl_loaddata.result index 8b910d0d18..268e383ce6 100644 --- a/mysql-test/r/rpl_loaddata.result +++ b/mysql-test/r/rpl_loaddata.result @@ -43,7 +43,7 @@ change master to master_user='test'; change master to master_user='root'; show slave status; Master_Host Master_User Master_Port Connect_retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_do_db Replicate_ignore_db Last_errno Last_error Skip_counter Exec_master_log_pos Relay_log_space -127.0.0.1 root MASTER_PORT 1 master-bin.001 1442 slave-relay-bin.001 4 master-bin.001 No No 0 0 1442 4 +127.0.0.1 root MASTER_PORT 1 master-bin.001 1419 slave-relay-bin.001 4 master-bin.001 No No 0 0 1419 4 set global sql_slave_skip_counter=1; start slave; set sql_log_bin=0; diff --git a/mysql-test/t/rpl_change_master.test b/mysql-test/t/rpl_change_master.test new file mode 100644 index 0000000000..61de22fe57 --- /dev/null +++ b/mysql-test/t/rpl_change_master.test @@ -0,0 +1,26 @@ +source include/master-slave.inc; + +connection slave; +select get_lock("a",5); +connection master; +create table t1(n int); +insert into t1 values(1+get_lock("a",10)*0); +insert into t1 values(2); +save_master_pos; +connection slave; +sleep 3; # can't sync_with_master as we should be blocked +stop slave; +select * from t1; +show slave status; +change master to master_user='root'; +show slave status; +# Will restart from after the values(2), which is bug +select release_lock("a"); +start slave; +sync_with_master; +select * from t1; +connection master; +drop table t1; +save_master_pos; +connection slave; +sync_with_master; diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 10581431c7..c95cdc1b04 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -853,8 +853,8 @@ void kill_zombie_dump_threads(uint32 slave_server_id) int change_master(THD* thd, MASTER_INFO* mi) { int thread_mask; - const char* errmsg=0; - bool need_relay_log_purge=1; + const char* errmsg= 0; + bool need_relay_log_purge= 1; DBUG_ENTER("change_master"); lock_slave_threads(mi); @@ -928,6 +928,36 @@ int change_master(THD* thd, MASTER_INFO* mi) mi->rli.relay_log_pos=lex_mi->relay_log_pos; } + /* + If user did specify neither host nor port nor any log name nor any log + pos, i.e. he specified only user/password/master_connect_retry, he probably + wants replication to resume from where it had left, i.e. from the + coordinates of the **SQL** thread (imagine the case where the I/O is ahead + of the SQL; restarting from the coordinates of the I/O would lose some + events which is probably unwanted when you are just doing minor changes + like changing master_connect_retry). + A side-effect is that if only the I/O thread was started, this thread may + restart from ''/4 after the CHANGE MASTER. That's a minor problem (it is a + much more unlikely situation than the one we are fixing here). + Note: coordinates of the SQL thread must be read here, before the + 'if (need_relay_log_purge)' block which resets them. + */ + if (!lex_mi->host && !lex_mi->port && + !lex_mi->log_file_name && !lex_mi->pos && + need_relay_log_purge) + { + /* + Sometimes mi->rli.master_log_pos == 0 (it happens when the SQL thread is + not initialized), so we use a max(). + What happens to mi->rli.master_log_pos during the initialization stages + of replication is not 100% clear, so we guard against problems using + max(). + */ + mi->master_log_pos = max(BIN_LOG_HEADER_SIZE, mi->rli.master_log_pos); + strmake(mi->master_log_name,mi->rli.master_log_name, + sizeof(mi->master_log_name)-1); + } + flush_master_info(mi); if (need_relay_log_purge) { @@ -959,10 +989,21 @@ int change_master(THD* thd, MASTER_INFO* mi) } } DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos)); - /* If changing RELAY_LOG_FILE or RELAY_LOG_POS, this will be nonsense: */ + + /* + Coordinates in rli were spoilt by the 'if (need_relay_log_purge)' block, + so restore them to good values. If we left them to ''/0, that would work; + but that would fail in the case of 2 successive CHANGE MASTER (without a + START SLAVE in between): because first one would set the coords in mi to + the good values of those in rli, the set those in rli to ''/0, then + second CHANGE MASTER would set the coords in mi to those of rli, i.e. to + ''/0: we have lost all copies of the original good coordinates. + That's why we always save good coords in rli. + */ mi->rli.master_log_pos = mi->master_log_pos; strmake(mi->rli.master_log_name,mi->master_log_name, - sizeof(mi->rli.master_log_name)-1); + sizeof(mi->rli.master_log_name)-1); + if (!mi->rli.master_log_name[0]) // uninitialized case mi->rli.master_log_pos=0; -- 2.30.9