A change of behaviour of Seconds_Behind_Master from SHOW SLAVE STATUS. It's going into 4.1

because old behaviour was somewhat nonsensical (kind of bug). Changes are that if repl threads are down or disconnected the column will be NULL, and if master is idle the column will not grow indefinitely anymore. sql/slave.cc: mi->slave_running and rli->slave_running now uints (was needed only for mi but because of start_slave_thread() usage, had to change both). So mi->slave_running can now take 3 values: not running, running & not connected, running and connected. The last value serves for calculation of Seconds_Behind_Master in SHOW SLAVE STATUS. Changing this column's behaviour: if SQL or I/O thread is not running, or if I/O thread is not connected (for example if it is reconnecting), it's NULL (to mean "unknown"). And if master is idle, the column will not grow indefinitely like it used to (that was meaningless); this is fixed by forcing a value of 0 when the slave SQL thread has hit EOF of relay log (which has only a limited number of caveats explained in comments in code). sql/slave.h: slave_running used to be bool but we need to distinguish, for the I/O slave thread, between "running & connected" and "running & not connected" ("running" means the thread exists). sql/sql_repl.cc: we don't need anymore to set rli->last_master_timestamp to 0 (we used that to make Seconds_Behind_Master be NULL) in RESET SLAVE and CHANGE MASTER, as these commands imply that slave threads are not running and so Seconds_Behind_Master is already NULL because of that.

A change of behaviour of Seconds_Behind_Master from SHOW SLAVE STATUS. It's going into 4.1
because old behaviour was somewhat nonsensical (kind of bug). Changes are that if repl threads are down or disconnected the column will be NULL, and if master is idle the column will not grow indefinitely anymore. sql/slave.cc: mi->slave_running and rli->slave_running now uints (was needed only for mi but because of start_slave_thread() usage, had to change both). So mi->slave_running can now take 3 values: not running, running & not connected, running and connected. The last value serves for calculation of Seconds_Behind_Master in SHOW SLAVE STATUS. Changing this column's behaviour: if SQL or I/O thread is not running, or if I/O thread is not connected (for example if it is reconnecting), it's NULL (to mean "unknown"). And if master is idle, the column will not grow indefinitely like it used to (that was meaningless); this is fixed by forcing a value of 0 when the slave SQL thread has hit EOF of relay log (which has only a limited number of caveats explained in comments in code). sql/slave.h: slave_running used to be bool but we need to distinguish, for the I/O slave thread, between "running & connected" and "running & not connected" ("running" means the thread exists). sql/sql_repl.cc: we don't need anymore to set rli->last_master_timestamp to 0 (we used that to make Seconds_Behind_Master be NULL) in RESET SLAVE and CHANGE MASTER, as these commands imply that slave threads are not running and so Seconds_Behind_Master is already NULL because of that.
b3d56a7d · unknown · 8ad9752e · b3d56a7d · b3d56a7d · b3d56a7d
Commit b3d56a7d authored Dec 16, 2004 by unknown
Hide whitespace changes
Inline Side-by-side

Showing with 74 additions and 22 deletions

sql/slave.cc sql/slave.cc +48 -13

sql/slave.h sql/slave.h +22 -5

sql/sql_repl.cc sql/sql_repl.cc +4 -4

No files found.
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -545,7 +545,7 @@ int terminate_slave_threads(MASTER_INFO* mi,int thread_mask,bool skip_lock)
 int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock,
 			   pthread_mutex_t *cond_lock,
 			   pthread_cond_t* term_cond,
-			   volatile bool* slave_running)
+			   volatile uint *slave_running)
 {
  if (term_lock)
  {
@@ -583,7 +583,7 @@ int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock,
 int start_slave_thread(pthread_handler h_func, pthread_mutex_t *start_lock,
 		       pthread_mutex_t *cond_lock,
 		       pthread_cond_t *start_cond,
-		       volatile bool *slave_running,
+		       volatile uint *slave_running,
 		       volatile ulong *slave_run_id,
 		       MASTER_INFO* mi,
                       bool high_priority)
@@ -963,7 +963,7 @@ void end_slave()
 static bool io_slave_killed(THD* thd, MASTER_INFO* mi)
 {
  DBUG_ASSERT(mi->io_thd == thd);
-  DBUG_ASSERT(mi->slave_running == 1); // tracking buffer overrun
+  DBUG_ASSERT(mi->slave_running); // tracking buffer overrun
  return mi->abort_slave || abort_loop || thd->killed;
 }

@@ -1767,19 +1767,13 @@ void init_master_info_with_options(MASTER_INFO* mi)
    strmake(mi->ssl_key, master_ssl_key, sizeof(mi->ssl_key)-1);
 }

-static void clear_slave_error(RELAY_LOG_INFO* rli)
+void clear_slave_error(RELAY_LOG_INFO* rli)
 {
  /* Clear the errors displayed by SHOW SLAVE STATUS */
  rli->last_slave_error[0]= 0;
  rli->last_slave_errno= 0;
 }

-void clear_slave_error_timestamp(RELAY_LOG_INFO* rli)
-{
-  rli->last_master_timestamp= 0;
-  clear_slave_error(rli);
-}
-
 /*
    Reset UNTIL condition for RELAY_LOG_INFO
   SYNOPSYS
@@ -2166,6 +2160,11 @@ int show_master_info(THD* thd, MASTER_INFO* mi)
    String *packet= &thd->packet;
    protocol->prepare_for_resend();
  
+    /*
+      TODO: we read slave_running without run_lock, whereas these variables
+      are updated under run_lock and not data_lock. In 5.0 we should lock
+      run_lock on top of data_lock (with good order).
+    */
    pthread_mutex_lock(&mi->data_lock);
    pthread_mutex_lock(&mi->rli.data_lock);

@@ -2226,7 +2225,12 @@ int show_master_info(THD* thd, MASTER_INFO* mi)
    protocol->store(mi->ssl_cipher, &my_charset_bin);
    protocol->store(mi->ssl_key, &my_charset_bin);

-    if (mi->rli.last_master_timestamp)
+    /*
+      Seconds_Behind_Master: if SQL thread is running and I/O thread is
+      connected, we can compute it otherwise show NULL (i.e. unknown).
+    */
+    if ((mi->slave_running == MYSQL_SLAVE_RUN_CONNECT) &&
+        mi->rli.slave_running)
    {
      long tmp= (long)((time_t)time((time_t*) 0)
                               - mi->rli.last_master_timestamp)
@@ -2246,9 +2250,13 @@ int show_master_info(THD* thd, MASTER_INFO* mi)
        slave is 2. At SHOW SLAVE STATUS time, assume that the difference
        between timestamp of slave and rli->last_master_timestamp is 0
        (i.e. they are in the same second), then we get 0-(2-1)=-1 as a result.
-        This confuses users, so we don't go below 0.
+        This confuses users, so we don't go below 0: hence the max().
+
+        last_master_timestamp == 0 (an "impossible" timestamp 1970) is a
+        special marker to say "consider we have caught up".
      */
-      protocol->store((longlong)(max(0, tmp)));
+      protocol->store((longlong)(mi->rli.last_master_timestamp ? max(0, tmp)
+                                 : 0));
    }
    else
      protocol->store_null();
@@ -3041,6 +3049,8 @@ extern "C" pthread_handler_decl(handle_slave_io,arg)

 connected:

+  // TODO: the assignment below should be under mutex (5.0)
+  mi->slave_running= MYSQL_SLAVE_RUN_CONNECT;
  thd->slave_net = &mysql->net;
  thd->proc_info = "Checking master version";
  if (get_master_version_and_clock(mysql, mi))
@@ -3072,6 +3082,7 @@ dump");
 	goto err;
      }
 	  
+      mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
      thd->proc_info= "Waiting to reconnect after a failed binlog dump request";
 #ifdef SIGNAL_WITH_VIO_CLOSE
      thd->clear_active_vio();
@@ -3148,6 +3159,7 @@ max_allowed_packet",
 			  mysql_error(mysql));
 	  goto err;
 	}
+        mi->slave_running= MYSQL_SLAVE_RUN_NOT_CONNECT;
 	thd->proc_info = "Waiting to reconnect after a failed master event read";
 #ifdef SIGNAL_WITH_VIO_CLOSE
        thd->clear_active_vio();
@@ -3323,6 +3335,14 @@ extern "C" pthread_handler_decl(handle_slave_sql,arg)
  pthread_mutex_lock(&LOCK_thread_count);
  threads.append(thd);
  pthread_mutex_unlock(&LOCK_thread_count);
+  /*
+    We are going to set slave_running to 1. Assuming slave I/O thread is
+    alive and connected, this is going to make Seconds_Behind_Master be 0
+    i.e. "caught up". Even if we're just at start of thread. Well it's ok, at
+    the moment we start we can think we are caught up, and the next second we
+    start receiving data so we realize we are not caught up and
+    Seconds_Behind_Master grows. No big deal.
+  */
  rli->slave_running = 1;
  rli->abort_slave = 0;
  pthread_mutex_unlock(&rli->run_lock);
@@ -4211,10 +4231,25 @@ Before assert, my_b_tell(cur_log)=%s  rli->event_relay_log_pos=%s",
        */
        pthread_mutex_unlock(&rli->log_space_lock);
        pthread_cond_broadcast(&rli->log_space_cond);
+        /*
+          We say in Seconds_Behind_Master that we have "caught up". Note that
+          for example if network link is broken but I/O slave thread hasn't
+          noticed it (slave_net_timeout not elapsed), then we'll say "caught
+          up" whereas we're not really caught up. Fixing that would require
+          internally cutting timeout in smaller pieces in network read, no
+          thanks. Another example: SQL has caught up on I/O, now I/O has read
+          a new event and is queuing it; the false "0" will exist until SQL
+          finishes executing the new event; it will be look abnormal only if
+          the events have old timestamps (then you get "many", 0, "many").
+          Transient phases like this can't really be fixed.
+        */
+        time_t save_timestamp= rli->last_master_timestamp;
+        rli->last_master_timestamp= 0;
        // Note that wait_for_update unlocks lock_log !
        rli->relay_log.wait_for_update(rli->sql_thd, 1);
        // re-acquire data lock since we released it earlier
        pthread_mutex_lock(&rli->data_lock);
+        rli->last_master_timestamp= save_timestamp;
 	continue;
      }
      /*

--- a/sql/slave.h
+++ b/sql/slave.h
@@ -98,6 +98,21 @@ enum enum_binlog_formats {
  BINLOG_FORMAT_323_LESS_57, 
  BINLOG_FORMAT_323_GEQ_57 };

+/*
+  3 possible values for MASTER_INFO::slave_running and
+  RELAY_LOG_INFO::slave_running.
+  The values 0,1,2 are very important: to keep the diff small, I didn't
+  substitute places where we use 0/1 with the newly defined symbols. So don't change
+  these values.
+  The same way, code is assuming that in RELAY_LOG_INFO we use only values
+  0/1.
+  I started with using an enum, but
+  enum_variable=1; is not legal so would have required many line changes.
+*/
+#define MYSQL_SLAVE_NOT_RUN         0
+#define MYSQL_SLAVE_RUN_NOT_CONNECT 1
+#define MYSQL_SLAVE_RUN_CONNECT     2
+
 /****************************************************************************

  Replication SQL Thread
@@ -251,7 +266,8 @@ typedef struct st_relay_log_info

  /* if not set, the value of other members of the structure are undefined */
  bool inited;
-  volatile bool abort_slave, slave_running;
+  volatile bool abort_slave;
+  volatile uint slave_running;

  /* 
     Condition and its parameters from START SLAVE UNTIL clause.
@@ -385,7 +401,8 @@ typedef struct st_master_info
 #endif
  bool inited;
  enum enum_binlog_formats old_format;
-  volatile bool abort_slave, slave_running;
+  volatile bool abort_slave;
+  volatile uint slave_running;
  volatile ulong slave_run_id;
  /* 
     The difference in seconds between the clock of the master and the clock of
@@ -464,7 +481,7 @@ int terminate_slave_threads(MASTER_INFO* mi, int thread_mask,
 int terminate_slave_thread(THD* thd, pthread_mutex_t* term_mutex,
 			   pthread_mutex_t* cond_lock,
 			   pthread_cond_t* term_cond,
-			   volatile bool* slave_running);
+			   volatile uint* slave_running);
 int start_slave_threads(bool need_slave_mutex, bool wait_for_start,
 			MASTER_INFO* mi, const char* master_info_fname,
 			const char* slave_info_fname, int thread_mask);
@@ -477,7 +494,7 @@ int start_slave_threads(bool need_slave_mutex, bool wait_for_start,
 int start_slave_thread(pthread_handler h_func, pthread_mutex_t* start_lock,
 		       pthread_mutex_t *cond_lock,
 		       pthread_cond_t* start_cond,
-		       volatile bool *slave_running,
+		       volatile uint *slave_running,
 		       volatile ulong *slave_run_id,
 		       MASTER_INFO* mi,
                       bool high_priority);
@@ -519,7 +536,7 @@ void slave_print_error(RELAY_LOG_INFO* rli, int err_code, const char* msg, ...);
 void end_slave(); /* clean up */
 void init_master_info_with_options(MASTER_INFO* mi);
 void clear_until_condition(RELAY_LOG_INFO* rli);
-void clear_slave_error_timestamp(RELAY_LOG_INFO* rli);
+void clear_slave_error(RELAY_LOG_INFO* rli);
 int init_master_info(MASTER_INFO* mi, const char* master_info_fname,
 		     const char* slave_info_fname,
 		     bool abort_if_no_master_info_file,

--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -880,10 +880,10 @@ int reset_slave(THD *thd, MASTER_INFO* mi)
  */
  init_master_info_with_options(mi);
  /* 
-     Reset errors, and master timestamp (the idea is that we forget about the
+     Reset errors (the idea is that we forget about the
     old master).
  */
-  clear_slave_error_timestamp(&mi->rli);
+  clear_slave_error(&mi->rli);
  clear_until_condition(&mi->rli);
  
  // close master_info_file, relay_log_info_file, set mi->inited=rli->inited=0
@@ -1143,8 +1143,8 @@ int change_master(THD* thd, MASTER_INFO* mi)

  pthread_mutex_lock(&mi->rli.data_lock);
  mi->rli.abort_pos_wait++; /* for MASTER_POS_WAIT() to abort */
-  /* Clear the errors, for a clean start, and master timestamp */
-  clear_slave_error_timestamp(&mi->rli);
+  /* Clear the errors, for a clean start */
+  clear_slave_error(&mi->rli);
  clear_until_condition(&mi->rli);
  /*
    If we don't write new coordinates to disk now, then old will remain in