BUG#23171 (Illegal slave restart position):

Third patch of the bug fix where the code for skipping events and for executing events is factored out into three functions: - shall_skip() to decide if the event shall be skipped and the reason for it; - do_apply_event(), where the event is applied to the database; and - do_update_pos(), which updates the actual relay log position and group positions. mysql-test/r/rpl_row_tabledefs_2myisam.result: Result change. mysql-test/r/rpl_row_tabledefs_3innodb.result: Result change. sql/log_event.cc: Creating shall_skip(), do_update_pos(), and do_apply_event() functions for each event by factoring out the previous code. Adding debug code and fixing some error codes that were not correct. sql/rpl_rli.cc: Renaming unsafe_to_stop_at into last_event_start_time. Adding debug code. sql/rpl_rli.h: Renaming unsafe_to_stop_at into last_event_start_time. sql/slave.cc: Renaming unsafe_to_stop_at into last_event_start_time.

BUG#23171 (Illegal slave restart position):
Third patch of the bug fix where the code for skipping events and for executing events is factored out into three functions: - shall_skip() to decide if the event shall be skipped and the reason for it; - do_apply_event(), where the event is applied to the database; and - do_update_pos(), which updates the actual relay log position and group positions. mysql-test/r/rpl_row_tabledefs_2myisam.result: Result change. mysql-test/r/rpl_row_tabledefs_3innodb.result: Result change. sql/log_event.cc: Creating shall_skip(), do_update_pos(), and do_apply_event() functions for each event by factoring out the previous code. Adding debug code and fixing some error codes that were not correct. sql/rpl_rli.cc: Renaming unsafe_to_stop_at into last_event_start_time. Adding debug code. sql/rpl_rli.h: Renaming unsafe_to_stop_at into last_event_start_time. sql/slave.cc: Renaming unsafe_to_stop_at into last_event_start_time.
4b00b3f0 · unknown · baaa102d · 4b00b3f0 · 4b00b3f0 · 4b00b3f0
Commit 4b00b3f0 authored Jan 17, 2007 by unknown
6 changed files
--- a/mysql-test/r/rpl_row_tabledefs_2myisam.result
+++ b/mysql-test/r/rpl_row_tabledefs_2myisam.result
@@ -121,7 +121,7 @@ Replicate_Do_Table
 Replicate_Ignore_Table	
 Replicate_Wild_Do_Table	
 Replicate_Wild_Ignore_Table	
-Last_Errno	1364
+Last_Errno	1105
 Last_Error	Error in Write_rows event: error during transaction execution on table test.t1_nodef
 Skip_Counter	0
 Exec_Master_Log_Pos	#

--- a/mysql-test/r/rpl_row_tabledefs_3innodb.result
+++ b/mysql-test/r/rpl_row_tabledefs_3innodb.result
@@ -121,7 +121,7 @@ Replicate_Do_Table
 Replicate_Ignore_Table	
 Replicate_Wild_Do_Table	
 Replicate_Wild_Ignore_Table	
-Last_Errno	1364
+Last_Errno	1105
 Last_Error	Error in Write_rows event: error during transaction execution on table test.t1_nodef
 Skip_Counter	0
 Exec_Master_Log_Pos	#

--- a/sql/log_event.cc
+++ b/sql/log_event.cc
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -36,7 +36,7 @@ st_relay_log_info::st_relay_log_info()
   inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE),
   until_log_pos(0), retried_trans(0),
   tables_to_lock(0), tables_to_lock_count(0),
-   unsafe_to_stop_at(0)
+   last_event_start_time(0)
 {
  DBUG_ENTER("st_relay_log_info::st_relay_log_info");

@@ -1001,6 +1001,22 @@ bool st_relay_log_info::is_until_satisfied()
    log_pos= group_relay_log_pos;
  }

+#ifndef DBUG_OFF
+  {
+    char buf[32];
+    DBUG_PRINT("info", ("group_master_log_name='%s', group_master_log_pos=%s",
+                        group_master_log_name, llstr(group_master_log_pos, buf)));
+    DBUG_PRINT("info", ("group_relay_log_name='%s', group_relay_log_pos=%s",
+                        group_relay_log_name, llstr(group_relay_log_pos, buf)));
+    DBUG_PRINT("info", ("(%s) log_name='%s', log_pos=%s",
+                        until_condition == UNTIL_MASTER_POS ? "master" : "relay",
+                        log_name, llstr(log_pos, buf)));
+    DBUG_PRINT("info", ("(%s) until_log_name='%s', until_log_pos=%s",
+                        until_condition == UNTIL_MASTER_POS ? "master" : "relay",
+                        until_log_name, llstr(until_log_pos, buf)));
+  }
+#endif
+
  if (until_log_names_cmp_result == UNTIL_LOG_NAMES_CMP_UNKNOWN)
  {
    /*
@@ -1095,7 +1111,7 @@ void st_relay_log_info::cleanup_context(THD *thd, bool error)
  m_table_map.clear_tables();
  close_thread_tables(thd);
  clear_tables_to_lock();
-  unsafe_to_stop_at= 0;
+  last_event_start_time= 0;
  DBUG_VOID_RETURN;
 }
 #endif
--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -305,7 +305,14 @@ typedef struct st_relay_log_info
    DBUG_ASSERT(tables_to_lock == NULL && tables_to_lock_count == 0);
  }

-  time_t unsafe_to_stop_at;
+  /*
+    Used by row-based replication to detect that it should not stop at
+    this event, but give it a chance to send more events. The time
+    where the last event inside a group started is stored here. If the
+    variable is zero, we are not in a group (but may be in a
+    transaction).
+   */
+  time_t last_event_start_time;
 } RELAY_LOG_INFO;



--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -517,11 +517,11 @@ static bool sql_slave_killed(THD* thd, RELAY_LOG_INFO* rli)
      really one minute of idleness, we don't timeout if the slave SQL thread
      is actively working.
    */
-    if (!rli->unsafe_to_stop_at)
+    if (rli->last_event_start_time == 0)
      DBUG_RETURN(1);
    DBUG_PRINT("info", ("Slave SQL thread is in an unsafe situation, giving "
                        "it some grace period"));
-    if (difftime(time(0), rli->unsafe_to_stop_at) > 60)
+    if (difftime(time(0), rli->last_event_start_time) > 60)
    {
      slave_print_msg(ERROR_LEVEL, rli, 0,
                      "SQL thread had to stop in an unsafe situation, in "
@@ -1737,61 +1737,14 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
      now the relay log starts with its Format_desc, has a Rotate etc).
    */

-    DBUG_PRINT("info",("type_code=%d, server_id=%d",type_code,ev->server_id));
+    DBUG_PRINT("info",("type_code=%d (%s), server_id=%d",
+                       type_code, ev->get_type_str(), ev->server_id));

-    if ((ev->server_id == (uint32) ::server_id &&
-         !replicate_same_server_id &&
-         type_code != FORMAT_DESCRIPTION_EVENT) ||
-        (rli->slave_skip_counter &&
-         type_code != ROTATE_EVENT && type_code != STOP_EVENT &&
-         type_code != START_EVENT_V3 && type_code!= FORMAT_DESCRIPTION_EVENT))
-    {
-      DBUG_PRINT("info", ("event skipped"));
-      /*
-        We only skip the event here and do not increase the group log
-        position.  In the event that we have to restart, this means
-        that we might have to skip the event again, but that is a
-        minor issue.
-
-        If we were to increase the group log position when skipping an
-        event, it might be that we are restarting at the wrong
-        position and have events before that we should have executed,
-        so not increasing the group log position is a sure bet in this
-        case.
-
-        In this way, we just step the group log position when we
-        *know* that we are at the end of a group.
-       */
-      rli->inc_event_relay_log_pos();

-      /*
-        Protect against common user error of setting the counter to 1
-        instead of 2 while recovering from an insert which used auto_increment,
-        rand or user var.
-      */
-      if (rli->slave_skip_counter &&
-          !((type_code == INTVAR_EVENT ||
-             type_code == RAND_EVENT ||
-             type_code == USER_VAR_EVENT) &&
-            rli->slave_skip_counter == 1) &&
-          /*
-            The events from ourselves which have something to do with the relay
-            log itself must be skipped, true, but they mustn't decrement
-            rli->slave_skip_counter, because the user is supposed to not see
-            these events (they are not in the master's binlog) and if we
-            decremented, START SLAVE would for example decrement when it sees
-            the Rotate, so the event which the user probably wanted to skip
-            would not be skipped.
-          */
-          !(ev->server_id == (uint32) ::server_id &&
-            (type_code == ROTATE_EVENT || type_code == STOP_EVENT ||
-             type_code == START_EVENT_V3 || type_code == FORMAT_DESCRIPTION_EVENT)))
-        --rli->slave_skip_counter;
-      pthread_mutex_unlock(&rli->data_lock);
-      delete ev;
-      DBUG_RETURN(0);                                 // avoid infinite update loops
-    }
-    pthread_mutex_unlock(&rli->data_lock);
+    /*
+      Execute the event, but first we set some data that is needed for
+      the thread.
+    */

    thd->server_id = ev->server_id; // use the original server id for logging
    thd->set_time();                            // time the query
@@ -1799,7 +1752,8 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
    if (!ev->when)
      ev->when = time(NULL);
    ev->thd = thd; // because up to this point, ev->thd == 0
-    exec_res = ev->exec_event(rli);
+
+    exec_res= ev->exec_event(rli);
    DBUG_PRINT("info", ("exec_event result = %d", exec_res));
    DBUG_ASSERT(rli->sql_thd==thd);
    /*
@@ -2354,13 +2308,17 @@ Slave SQL thread aborted. Can't execute init_slave query");
    THD_CHECK_SENTRY(thd);
    if (exec_relay_log_event(thd,rli))
    {
+      DBUG_PRINT("info", ("exec_relay_log_event() failed"));
      // do not scare the user if SQL thread was simply killed or stopped
      if (!sql_slave_killed(thd,rli))
      {
        /*
-          retrieve as much info as possible from the thd and, error codes and warnings
-          and print this to the error log as to allow the user to locate the error
+          retrieve as much info as possible from the thd and, error
+          codes and warnings and print this to the error log as to
+          allow the user to locate the error
        */
+        DBUG_PRINT("info", ("thd->net.last_errno=%d; rli->last_slave_errno=%d",
+                            thd->net.last_errno, rli->last_slave_errno));
        if (thd->net.last_errno != 0)
        {
          if (rli->last_slave_errno == 0)
@@ -2682,6 +2640,7 @@ static int queue_binlog_ver_1_event(MASTER_INFO *mi, const char *buf,
    my_free((char*) tmp_buf, MYF(MY_ALLOW_ZERO_PTR));
    DBUG_RETURN(1);
  }
+
  pthread_mutex_lock(&mi->data_lock);
  ev->log_pos= mi->master_log_pos; /* 3.23 events don't contain log_pos */
  switch (ev->get_type_code()) {