Merge bk-internal.mysql.com:/home/bk/mysql-4.0

into narttu.mysql.fi:/my/mysql-4.0

Merge bk-internal.mysql.com:/home/bk/mysql-4.0
into narttu.mysql.fi:/my/mysql-4.0
f5a134ba · monty@narttu.mysql.fi · 2c1f1206 · 73fe41f9 · f5a134ba · f5a134ba
Commit f5a134ba authored Aug 25, 2003 by monty@narttu.mysql.fi
6 changed files
--- a/innobase/os/os0file.c
+++ b/innobase/os/os0file.c
@@ -226,13 +226,8 @@ os_file_get_last_error(void)
  "InnoDB: the directory. It may also be you have created a subdirectory\n"
  "InnoDB: of the same name as a data file.\n"); 
 		} else {
-			 if (strerror((int)err) != NULL) {
-				fprintf(stderr,
-  "InnoDB: Error number %lu means '%s'.\n", err, strerror((int)err));
-			 }
-
 			 fprintf(stderr,
-  "InnoDB: See also section 13.2 at http://www.innodb.com/ibman.html\n"
+  "InnoDB: See section 13.2 at http://www.innodb.com/ibman.html\n"
  "InnoDB: about operating system error numbers.\n");
 		}
 	}

--- a/sql/log.cc
+++ b/sql/log.cc
@@ -1526,6 +1526,9 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length,
  SYNOPSIS
    wait_for_update()
    thd			Thread variable
+    master_or_slave     If 0, the caller is the Binlog_dump thread from master;
+                        if 1, the caller is the SQL thread from the slave. This
+                        influences only thd->proc_info.

  NOTES
    One must have a lock on LOCK_log before calling this function.
@@ -1538,11 +1541,15 @@ bool MYSQL_LOG::write(THD *thd,const char *query, uint query_length,
 */


-void MYSQL_LOG:: wait_for_update(THD* thd)
+void MYSQL_LOG:: wait_for_update(THD* thd, bool master_or_slave)
 {
  safe_mutex_assert_owner(&LOCK_log);
  const char* old_msg = thd->enter_cond(&update_cond, &LOCK_log,
-					"Slave: waiting for binlog update");
+                                        master_or_slave ?
+                                        "Has read all relay log; waiting for \
+the I/O slave thread to update it" : 
+                                        "Has sent all binlog to slave; \
+waiting for binlog to be updated"); 
  pthread_cond_wait(&update_cond, &LOCK_log);
  pthread_mutex_unlock(&LOCK_log);		// See NOTES
  thd->exit_cond(old_msg);

--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -2066,9 +2066,6 @@ Fatal error running LOAD DATA INFILE on table '%s'. Default database: '%s'",

  TODO
    - Remove all active user locks
-    - If we have an active transaction at this point, the master died
-      in the middle while writing the transaction to the binary log.
-      In this case we should stop the slave.
 */

 int Start_log_event::exec_event(struct st_relay_log_info* rli)
@@ -2096,8 +2093,10 @@ int Start_log_event::exec_event(struct st_relay_log_info* rli)
    break;
 case BINLOG_FORMAT_323_GEQ_57 : 
    /* Can distinguish, based on the value of 'created' */
-    if (created) /* this was generated at master startup*/
-      close_temporary_tables(thd);
+    if (!created) 
+      break;
+    /* otherwise this was generated at master startup*/  
+    close_temporary_tables(thd);
    break;
  default :
    /* this case is impossible */
@@ -2154,10 +2153,28 @@ int Stop_log_event::exec_event(struct st_relay_log_info* rli)
    We can't rotate the slave as this will cause infinitive rotations
    in a A -> B -> A setup.

+  NOTES
+    As a transaction NEVER spans on 2 or more binlogs:
+    if we have an active transaction at this point, the master died while
+    writing the transaction to the binary log, i.e. while flushing the binlog
+    cache to the binlog. As the write was started, the transaction had been
+    committed on the master, so we lack of information to replay this
+    transaction on the slave; all we can do is stop with error.
+    If we didn't detect it, then positions would start to become garbage (as we
+    are incrementing rli->relay_log_pos whereas we are in a transaction: the new
+    rli->relay_log_pos will be
+    relay_log_pos of the BEGIN + size of the Rotate event = garbage.
+
+    Since MySQL 4.0.14, the master ALWAYS sends a Rotate event when it starts
+    sending the next binlog, so we are sure to receive a Rotate event just
+    after the end of the "dead master"'s binlog; so this exec_event() is the
+    right place to catch the problem. If we would wait until
+    Start_log_event::exec_event() it would be too late, rli->relay_log_pos would
+    already be garbage.
+
  RETURN VALUES
    0	ok
- */
-  
+*/

 int Rotate_log_event::exec_event(struct st_relay_log_info* rli)
 {
@@ -2165,6 +2182,18 @@ int Rotate_log_event::exec_event(struct st_relay_log_info* rli)
  DBUG_ENTER("Rotate_log_event::exec_event");

  pthread_mutex_lock(&rli->data_lock);
+
+  if (rli->inside_transaction)
+  {
+    slave_print_error(rli, 0,
+                      "there is an unfinished transaction in the relay log \
+(could find neither COMMIT nor ROLLBACK in the relay log); it could be that \
+the master died while writing the transaction to its binary log. Now the slave \
+is rolling back the transaction.");
+    pthread_mutex_unlock(&rli->data_lock);
+    DBUG_RETURN(1);
+  }
+
  memcpy(log_name, new_log_ident, ident_len+1);
  rli->master_log_pos = pos;
  rli->relay_log_pos += get_event_len();

--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -1443,7 +1443,8 @@ static bool wait_for_relay_log_space(RELAY_LOG_INFO* rli)
  pthread_mutex_lock(&rli->log_space_lock);
  const char* save_proc_info= thd->enter_cond(&rli->log_space_cond,
                                              &rli->log_space_lock, 
-                                              "Waiting for relay log space to free");
+                                              "Waiting for the SQL slave \
+thread to free enough relay log space");
  while (rli->log_space_limit < rli->log_space_total &&
 	 !(slave_killed=io_slave_killed(thd,mi)) &&
         !rli->ignore_log_space_limit)
@@ -1925,7 +1926,8 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
    
    DBUG_PRINT("info",("Waiting for master update"));
    const char* msg = thd->enter_cond(&data_cond, &data_lock,
-                                      "Waiting for master update");
+                                      "Waiting for the SQL slave thread to \
+advance position");
    /*
      We are going to pthread_cond_(timed)wait(); if the SQL thread stops it
      will wake us up.
@@ -1988,7 +1990,14 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
  thd->master_access= ~0;
  thd->priv_user = 0;
  thd->slave_thread = 1;
-  thd->options = (((opt_log_slave_updates) ? OPTION_BIN_LOG:0) | OPTION_AUTO_IS_NULL) ;
+  thd->options = ((opt_log_slave_updates) ? OPTION_BIN_LOG:0) |
+    OPTION_AUTO_IS_NULL |
+    /* 
+       It's nonsense to constraint the slave threads with max_join_size; if a
+       query succeeded on master, we HAVE to execute it.
+    */
+    OPTION_BIG_SELECTS ; 
+    
  thd->client_capabilities = CLIENT_LOCAL_FILES;
  thd->real_id=pthread_self();
  pthread_mutex_lock(&LOCK_thread_count);
@@ -2008,11 +2017,8 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
  VOID(pthread_sigmask(SIG_UNBLOCK,&set,&thd->block_signals));
 #endif

-  if (thd->variables.max_join_size == HA_POS_ERROR)
-    thd->options |= OPTION_BIG_SELECTS;
-
  if (thd_type == SLAVE_THD_SQL)
-    thd->proc_info= "Waiting for the next event in slave queue";
+    thd->proc_info= "Waiting for the next event in relay log";
  else
    thd->proc_info= "Waiting for master update";
  thd->version=refresh_version;
@@ -2260,7 +2266,7 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
  }
  else
  {
-    sql_print_error("\
+    slave_print_error(rli, 0, "\
 Could not parse relay log event entry. The possible reasons are: the master's \
 binary log is corrupted (you can check this by running 'mysqlbinlog' on the \
 binary log), the slave's relay log is corrupted (you can check this by running \
@@ -2334,7 +2340,7 @@ extern "C" pthread_handler_decl(handle_slave_io,arg)
  }
  

-  thd->proc_info = "connecting to master";
+  thd->proc_info = "Connecting to master";
  // we can get killed during safe_connect
  if (!safe_connect(thd, mysql, mi))
    sql_print_error("Slave I/O thread: connected to master '%s@%s:%d',\
@@ -2381,7 +2387,7 @@ dump");
 	goto err;
      }
 	  
-      thd->proc_info = "Waiiting to reconnect after a failed dump request";
+      thd->proc_info= "Waiting to reconnect after a failed binlog dump request";
      mc_end_server(mysql);
      /*
 	First time retry immediately, assuming that we can recover
@@ -2402,7 +2408,7 @@ dump");
 	goto err;
      }

-      thd->proc_info = "Reconnecting after a failed dump request";
+      thd->proc_info = "Reconnecting after a failed binlog dump request";
      if (!suppress_warnings)
 	sql_print_error("Slave I/O thread: failed dump request, \
 reconnecting to try again, log '%s' at postion %s", IO_RPL_LOG_NAME,
@@ -2421,7 +2427,13 @@ after reconnect");
    while (!io_slave_killed(thd,mi))
    {
      bool suppress_warnings= 0;    
-      thd->proc_info = "Reading master update";
+      /* 
+         We say "waiting" because read_event() will wait if there's nothing to
+         read. But if there's something to read, it will not wait. The important
+         thing is to not confuse users by saying "reading" whereas we're in fact
+         receiving nothing.
+      */
+      thd->proc_info = "Waiting for master to send event";
      ulong event_len = read_event(mysql, mi, &suppress_warnings);
      if (io_slave_killed(thd,mi))
      {
@@ -2448,7 +2460,8 @@ max_allowed_packet",
 			  mc_mysql_error(mysql));
 	  goto err;
 	}
-	thd->proc_info = "Waiting to reconnect after a failed read";
+	thd->proc_info = "Waiting to reconnect after a failed master event \
+read";
 	mc_end_server(mysql);
 	if (retry_count++)
 	{
@@ -2464,7 +2477,7 @@ max_allowed_packet",
 reconnect after a failed read");
 	  goto err;
 	}
-	thd->proc_info = "Reconnecting after a failed read";
+	thd->proc_info = "Reconnecting after a failed master event read";
 	if (!suppress_warnings)
 	  sql_print_error("Slave I/O thread: Failed reading log event, \
 reconnecting to retry, log '%s' position %s", IO_RPL_LOG_NAME,
@@ -2481,7 +2494,7 @@ reconnect done to recover from failed read");
      } // if (event_len == packet_error)
 	  
      retry_count=0;			// ok event, reset retry counter
-      thd->proc_info = "Queueing event from master";
+      thd->proc_info = "Queueing master event to the relay log";
      if (queue_event(mi,(const char*)mysql->net.read_pos + 1,
 		      event_len))
      {
@@ -2663,7 +2676,7 @@ log '%s' at position %s, relay log '%s' position: %s", RPL_LOG_NAME,

  while (!sql_slave_killed(thd,rli))
  {
-    thd->proc_info = "Processing master log event"; 
+    thd->proc_info = "Reading event from the relay log"; 
    DBUG_ASSERT(rli->sql_thd == thd);
    THD_CHECK_SENTRY(thd);
    if (exec_relay_log_event(thd,rli))
@@ -2695,6 +2708,12 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
  DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
  /* When master_pos_wait() wakes up it will check this and terminate */
  rli->slave_running= 0; 
+  /* 
+     Going out of the transaction. Necessary to mark it, in case the user
+     restarts replication from a non-transactional statement (with CHANGE
+     MASTER).
+  */
+  rli->inside_transaction= 0;
  /* Wake up master_pos_wait() */
  pthread_mutex_unlock(&rli->data_lock);
  DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
@@ -3386,7 +3405,7 @@ rli->relay_log_pos=%s rli->pending=%lu",
        pthread_mutex_unlock(&rli->log_space_lock);
        pthread_cond_broadcast(&rli->log_space_cond);
        // Note that wait_for_update unlocks lock_log !
-        rli->relay_log.wait_for_update(rli->sql_thd);
+        rli->relay_log.wait_for_update(rli->sql_thd, 1);
        // re-acquire data lock since we released it earlier
        pthread_mutex_lock(&rli->data_lock);
 	continue;

--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -123,7 +123,7 @@ class MYSQL_LOG
  }
  void set_max_size(ulong max_size_arg);
  void signal_update() { pthread_cond_broadcast(&update_cond);}
-  void wait_for_update(THD* thd);
+  void wait_for_update(THD* thd, bool master_or_slave);
  void set_need_start_event() { need_start_event = 1; }
  void init(enum_log_type log_type_arg,
 	    enum cache_type io_cache_type_arg,

--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -532,7 +532,7 @@ Increase max_allowed_packet on master";
 	  if (!thd->killed)
 	  {
 	    /* Note that the following call unlocks lock_log */
-	    mysql_bin_log.wait_for_update(thd);
+	    mysql_bin_log.wait_for_update(thd, 0);
 	  }
 	  else
 	    pthread_mutex_unlock(log_lock);
@@ -547,7 +547,7 @@ Increase max_allowed_packet on master";

 	if (read_packet)
 	{
-	  thd->proc_info = "sending update to slave";
+	  thd->proc_info = "Sending binlog event to slave";
 	  if (my_net_write(net, (char*)packet->ptr(), packet->length()) )
 	  {
 	    errmsg = "Failed on my_net_write()";
@@ -584,7 +584,7 @@ Increase max_allowed_packet on master";
    {
      bool loop_breaker = 0;
      // need this to break out of the for loop from switch
-      thd->proc_info = "switching to next log";
+      thd->proc_info = "Finished reading one binlog; switching to next binlog";
      switch (mysql_bin_log.find_next_log(&linfo, 1)) {
      case LOG_INFO_EOF:
 	loop_breaker = (flags & BINLOG_DUMP_NON_BLOCK);
@@ -623,14 +623,14 @@ Increase max_allowed_packet on master";
  (void)my_close(file, MYF(MY_WME));

  send_eof(&thd->net);
-  thd->proc_info = "waiting to finalize termination";
+  thd->proc_info = "Waiting to finalize termination";
  pthread_mutex_lock(&LOCK_thread_count);
  thd->current_linfo = 0;
  pthread_mutex_unlock(&LOCK_thread_count);
  DBUG_VOID_RETURN;

 err:
-  thd->proc_info = "waiting to finalize termination";
+  thd->proc_info = "Waiting to finalize termination";
  end_io_cache(&log);
  /*
    Exclude  iteration through thread list
@@ -866,7 +866,7 @@ int change_master(THD* thd, MASTER_INFO* mi)
    DBUG_RETURN(1);
  }

-  thd->proc_info = "changing master";
+  thd->proc_info = "Changing master";
  LEX_MASTER_INFO* lex_mi = &thd->lex.mi;
  // TODO: see if needs re-write
  if (init_master_info(mi, master_info_file, relay_log_info_file, 0))
@@ -932,7 +932,7 @@ int change_master(THD* thd, MASTER_INFO* mi)
  if (need_relay_log_purge)
  {
    mi->rli.skip_log_purge= 0;
-    thd->proc_info="purging old relay logs";
+    thd->proc_info="Purging old relay logs";
    if (purge_relay_logs(&mi->rli, thd,
 			 0 /* not only reset, but also reinit */,
 			 &errmsg))