diff --git a/client/Makefile.am b/client/Makefile.am
index 2c54ec459890fed1268409be34fa7bf77dc24984..612a5f01f8d90fe2583b0779ef84e2d549da49af 100644
--- a/client/Makefile.am
+++ b/client/Makefile.am
@@ -39,7 +39,7 @@ mysqlbinlog_SOURCES =   mysqlbinlog.cc ../mysys/mf_tempdir.c
 mysqlbinlog_DEPENDENCIES=   	$(LIBRARIES) $(pkglib_LTLIBRARIES)
 mysqlmanagerc_SOURCES =   mysqlmanagerc.c 
 mysqlmanagerc_DEPENDENCIES=   	$(LIBRARIES) $(pkglib_LTLIBRARIES)
-sql_src=log_event.h log_event.cc
+sql_src=log_event.h mysql_priv.h log_event.cc
 
 # Fix for mit-threads
 DEFS =			-DUNDEF_THREADS_HACK
diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc
index 35f0db76ad67f05eed533712b1a994a0e954caa0..90a1526c2c7353e8eb51705afd1da9ed304fedc6 100644
--- a/client/mysqlbinlog.cc
+++ b/client/mysqlbinlog.cc
@@ -14,12 +14,28 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
+/* 
+
+   TODO: print the catalog (some USE catalog.db ????).
+
+   Standalone program to read a MySQL binary log (or relay log);
+   can read files produced by 3.23, 4.x, 5.0 servers. 
+
+   Can read binlogs from 3.23/4.x/5.0 and relay logs from 4.x/5.0.
+   Should be able to read any file of these categories, even with --position.
+   An important fact: the Format_desc event of the log is at most the 3rd event
+   of the log; if it is the 3rd then there is this combination:
+   Format_desc_of_slave, Rotate_of_master, Format_desc_of_master.
+*/
+
 #define MYSQL_CLIENT
 #undef MYSQL_SERVER
 #include "client_priv.h"
 #include <time.h>
 #include <assert.h>
 #include "log_event.h"
+/* That one is necessary for defines of OPTION_NO_FOREIGN_KEY_CHECKS etc */
+#include "mysql_priv.h" 
 
 #define BIN_LOG_HEADER_SIZE	4
 #define PROBE_HEADER_LEN	(EVENT_LEN_OFFSET+4)
@@ -481,21 +497,26 @@ static int check_master_version(MYSQL* mysql)
 }
 
 
+/*
+  TODO fix this for new format (like local log); this will be done when 4.0 is
+  merged here (Victor's fixes are needed to make dump_remote_log_entries()
+  work).
+*/
+
 static void dump_remote_log_entries(const char* logname)
 {
   char buf[128];
-  char last_db[FN_REFLEN+1] = "";
+  LAST_EVENT_INFO last_event_info;
   uint len;
   NET* net = &mysql->net;
   int old_format;
   old_format = check_master_version(mysql);
 
   if (!position)
-    position = BIN_LOG_HEADER_SIZE; // protect the innocent from spam
+    position = BIN_LOG_HEADER_SIZE;
   if (position < BIN_LOG_HEADER_SIZE)
   {
     position = BIN_LOG_HEADER_SIZE;
-    // warn the guity
     sql_print_error("Warning: The position in the binary log can't be less than %d.\nStarting from position %d\n", BIN_LOG_HEADER_SIZE, BIN_LOG_HEADER_SIZE);
   }
   int4store(buf, position);
@@ -517,10 +538,11 @@ static void dump_remote_log_entries(const char* logname)
     DBUG_PRINT("info",( "len= %u, net->read_pos[5] = %d\n",
 			len, net->read_pos[5]));
     Log_event *ev = Log_event::read_log_event((const char*) net->read_pos + 1 ,
-					      len - 1, &error, old_format);
+					      len - 1, &error, 0);
+    //TODO this ,0) : we need to store the description_event like for local_log
     if (ev)
     {
-      ev->print(result_file, short_form, last_db);
+      ev->print(result_file, short_form, &last_event_info);
       if (ev->get_type_code() == LOAD_EVENT)
 	dump_remote_file(net, ((Load_log_event*)ev)->fname);
       delete ev;
@@ -531,29 +553,98 @@ static void dump_remote_log_entries(const char* logname)
 }
 
 
-static int check_header(IO_CACHE* file)
+static void check_header(IO_CACHE* file, 
+                        Format_description_log_event **description_event) 
 {
   byte header[BIN_LOG_HEADER_SIZE];
   byte buf[PROBE_HEADER_LEN];
-  int old_format=0;
 
+  *description_event= new Format_description_log_event(3);
+  my_off_t tmp_pos;
   my_off_t pos = my_b_tell(file);
   my_b_seek(file, (my_off_t)0);
   if (my_b_read(file, header, sizeof(header)))
     die("Failed reading header;  Probably an empty file");
   if (memcmp(header, BINLOG_MAGIC, sizeof(header)))
     die("File is not a binary log file");
-  if (!my_b_read(file, buf, sizeof(buf)))
+
+  /*
+    Imagine we are running with --position=1000. We still need to know the
+    binlog format's. So we still need to find, if there is one, the Format_desc
+    event, or to know if this is a 3.23 binlog. So we need to first read the
+    first events of the log, those around offset 4.
+    Even if we are reading a 3.23 binlog from the start (no --position): we need
+    to know the header length (which is 13 in 3.23, 19 in 4.x) to be able to
+    successfully print the first event (Start_log_event_v3). So even in this
+    case, we need to "probe" the first bytes of the log *before* we do a real
+    read_log_event(). Because read_log_event() needs to know the header's length
+    to work fine.
+  */
+  for(;;)
   {
-    if (buf[4] == START_EVENT)
+    tmp_pos= my_b_tell(file); /* should be 4 the first time */
+    if (my_b_read(file, buf, sizeof(buf)))
     {
-      uint event_len;
-      event_len = uint4korr(buf + EVENT_LEN_OFFSET);
-      old_format = (event_len < (LOG_EVENT_HEADER_LEN + START_HEADER_LEN));
+      if (file->error)
+        die("\
+Could not read entry at offset %lu : Error in log format or read error",
+            tmp_pos); 
+      /*
+        Otherwise this is just EOF : this log currently contains 0-2 events.
+        Maybe it's going to be filled in the next milliseconds; then we are
+        going to have a problem if this a 3.23 log (imagine we are locally
+        reading a 3.23 binlog which is being written presently): we won't know
+        it in read_log_event() and will fail().
+        Similar problems could happen with hot relay logs if --position is used
+        (but a --position which is posterior to the current size of the log).
+        These are rare problems anyway (reading a hot log + when we read the
+        first events there are not all there yet + when we read a bit later
+        there are more events + using a strange --position).
+      */
+      break;
+    }
+    else
+    {
+      DBUG_PRINT("info",("buf[4]=%d", buf[4]));
+      /* always test for a Start_v3, even if no --position */
+      if (buf[4] == START_EVENT_V3)       /* This is 3.23 or 4.x */
+      {
+        if (uint4korr(buf + EVENT_LEN_OFFSET) < 
+            (LOG_EVENT_MINIMAL_HEADER_LEN + START_V3_HEADER_LEN))
+        {
+          /* This is 3.23 (format 1) */
+          delete *description_event;
+          *description_event= new Format_description_log_event(1);
+        }
+        break;
+      }
+      else if (tmp_pos>=position)
+        break;
+      else if (buf[4] == FORMAT_DESCRIPTION_EVENT) /* This is 5.0 */
+      {
+        my_b_seek(file, tmp_pos); /* seek back to event's start */
+        if (!(*description_event= (Format_description_log_event*) 
+              Log_event::read_log_event(file, *description_event)))
+          /* EOF can't be hit here normally, so it's a real error */
+          die("Could not read a Format_description_log_event event \
+at offset %lu ; this could be a log format error or read error",
+              tmp_pos); 
+        DBUG_PRINT("info",("Setting description_event"));
+      }
+      else if (buf[4] == ROTATE_EVENT)
+      {
+        my_b_seek(file, tmp_pos); /* seek back to event's start */
+        if (!Log_event::read_log_event(file, *description_event))
+          /* EOF can't be hit here normally, so it's a real error */
+          die("Could not read a Rotate_log_event event \
+at offset %lu ; this could be a log format error or read error",
+              tmp_pos); 
+      }
+      else
+        break;
     }
   }
   my_b_seek(file, pos);
-  return old_format;
 }
 
 
@@ -562,11 +653,15 @@ static void dump_local_log_entries(const char* logname)
   File fd = -1;
   IO_CACHE cache,*file= &cache;
   ulonglong rec_count = 0;
-  char last_db[FN_REFLEN+1];
+  LAST_EVENT_INFO last_event_info;
   byte tmp_buff[BIN_LOG_HEADER_SIZE];
-  bool old_format = 0;
-
-  last_db[0]=0;
+  /* 
+     check_header() will set the pointer below.
+     Why do we need here a pointer on an event instead of an event ?
+     This is because the event will be created (alloced) in read_log_event()
+     (which returns a pointer) in check_header().
+  */
+  Format_description_log_event* description_event; 
 
   if (logname && logname[0] != '-')
   {
@@ -575,14 +670,14 @@ static void dump_local_log_entries(const char* logname)
     if (init_io_cache(file, fd, 0, READ_CACHE, (my_off_t) position, 0,
 		      MYF(MY_WME | MY_NABP)))
       exit(1);
-    old_format = check_header(file);
+    check_header(file, &description_event);
   }
-  else
+  else // reading from stdin; TODO: check that it works
   {
     if (init_io_cache(file, fileno(result_file), 0, READ_CACHE, (my_off_t) 0,
 		      0, MYF(MY_WME | MY_NABP | MY_DONT_CHECK_FILESIZE)))
       exit(1);
-    old_format = check_header(file);
+    check_header(file, &description_event);
     if (position)
     {
       /* skip 'position' characters from stdout */
@@ -599,6 +694,9 @@ static void dump_local_log_entries(const char* logname)
     file->seek_not_done=0;
   }
 
+  if (!description_event->is_valid())
+    die("Invalid Format_description log event; could be out of memory");
+
   if (!position)
     my_b_read(file, tmp_buff, BIN_LOG_HEADER_SIZE); // Skip header
   for (;;)
@@ -606,7 +704,7 @@ static void dump_local_log_entries(const char* logname)
     char llbuff[21];
     my_off_t old_off = my_b_tell(file);
 
-    Log_event* ev = Log_event::read_log_event(file, old_format);
+    Log_event* ev = Log_event::read_log_event(file, description_event);
     if (!ev)
     {
       if (file->error)
@@ -633,7 +731,7 @@ Could not read entry at offset %s : Error in log format or read error",
             continue; // next
           }
         }
-	ev->print(result_file, short_form, last_db);
+	ev->print(result_file, short_form, &last_event_info);
         break;
       case CREATE_FILE_EVENT:
       {
@@ -661,18 +759,18 @@ Could not read entry at offset %s : Error in log format or read error",
 	  filename and use LOCAL), prepared in the 'case EXEC_LOAD_EVENT'
 	  below.
         */
-	ce->print(result_file, short_form, last_db, true);
+	ce->print(result_file, short_form, &last_event_info, true);
 	load_processor.process(ce);
 	ev= 0;
 	break;
       }
       case APPEND_BLOCK_EVENT:
-	ev->print(result_file, short_form, last_db);
+	ev->print(result_file, short_form, &last_event_info);
 	load_processor.process((Append_block_log_event*)ev);
 	break;
       case EXEC_LOAD_EVENT:
       {
-	ev->print(result_file, short_form, last_db);
+	ev->print(result_file, short_form, &last_event_info);
 	Execute_load_log_event *exv= (Execute_load_log_event*)ev;
 	Create_file_log_event *ce= load_processor.grab_event(exv->file_id);
         /*
@@ -682,7 +780,7 @@ Could not read entry at offset %s : Error in log format or read error",
         */
         if (ce)
         {
-          ce->print(result_file, short_form, last_db,true);
+          ce->print(result_file, short_form, &last_event_info,true);
           my_free((char*)ce->fname,MYF(MY_WME));
           delete ce;
         }
@@ -691,17 +789,23 @@ Could not read entry at offset %s : Error in log format or read error",
 Create_file event for file_id: %u\n",exv->file_id);
 	break;
       }
+      case FORMAT_DESCRIPTION_EVENT:
+        delete description_event;
+        description_event= (Format_description_log_event*) ev;
+ 	ev->print(result_file, short_form, &last_event_info);
+        break;
       default:
-	ev->print(result_file, short_form, last_db);
+	ev->print(result_file, short_form, &last_event_info);
       }
     }
     rec_count++;
-    if (ev)
-      delete ev;
+    if (ev && ev->get_type_code()!=FORMAT_DESCRIPTION_EVENT)
+      delete ev; /* otherwise, deleted in the end */
   }
   if (fd >= 0)
     my_close(fd, MYF(MY_WME));
   end_io_cache(file);
+  delete description_event;
 }
 
 
diff --git a/include/my_sys.h b/include/my_sys.h
index 59d5767d204b7fddae874e2fe65a2c0eb9198b21..bfdb360c1ed45d59167dc9cf5b15cb1eca083ff1 100644
--- a/include/my_sys.h
+++ b/include/my_sys.h
@@ -498,6 +498,7 @@ typedef int (*qsort2_cmp)(const void *, const void *, const void *);
 
 /* tell write offset in the SEQ_APPEND cache */
 my_off_t my_b_append_tell(IO_CACHE* info);
+my_off_t my_b_safe_tell(IO_CACHE* info); /* picks the correct tell() */
 
 #define my_b_bytes_in_cache(info) (uint) (*(info)->current_end - \
 					  *(info)->current_pos)
diff --git a/mysql-test/r/rpl_session_var.result b/mysql-test/r/rpl_session_var.result
new file mode 100644
index 0000000000000000000000000000000000000000..d1ba04f36ec204abba00a3294bc4c03186fff4e1
--- /dev/null
+++ b/mysql-test/r/rpl_session_var.result
@@ -0,0 +1,43 @@
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+drop table if exists t1;
+Warnings:
+Note	1051	Unknown table 't1'
+create table t1(a varchar(10),b int);
+set @@session.sql_mode=pipes_as_concat;
+insert into t1 values('My'||'SQL', 1);
+set @@session.sql_mode=default;
+insert into t1 values('My'||'SQL', 2);
+select * from t1 where b<3 order by a;
+a	b
+0	2
+MySQL	1
+select * from t1 where b<3 order by a;
+a	b
+0	2
+MySQL	1
+set @@session.sql_mode=ignore_space;
+insert into t1 values(password          ('MySQL'), 3);
+set @@session.sql_mode=ansi_quotes;
+create table "t2" ("a" int);
+drop table t1, t2;
+set @@session.sql_mode=default;
+create table t1(a int auto_increment primary key);
+create table t2(b int, a int);
+set @@session.sql_auto_is_null=1;
+insert into t1 values(null);
+insert into t2 select 1,a from t1 where a is null;
+set @@session.sql_auto_is_null=0;
+insert into t1 values(null);
+insert into t2 select 2,a from t1 where a is null;
+select * from t2 order by b;
+b	a
+1	1
+select * from t2 order by b;
+b	a
+1	1
+drop table t1,t2;
diff --git a/mysql-test/t/rpl_session_var.test b/mysql-test/t/rpl_session_var.test
new file mode 100644
index 0000000000000000000000000000000000000000..39d3128d2605b9ef3c244a709aeafaa8e690bc39
--- /dev/null
+++ b/mysql-test/t/rpl_session_var.test
@@ -0,0 +1,42 @@
+# Replication of session variables.
+# FOREIGN_KEY_CHECKS is tested in rpl_insert_id.test
+
+source include/master-slave.inc;
+drop table if exists t1;
+create table t1(a varchar(10),b int);
+set @@session.sql_mode=pipes_as_concat;
+insert into t1 values('My'||'SQL', 1);
+set @@session.sql_mode=default;
+insert into t1 values('My'||'SQL', 2);
+select * from t1 where b<3 order by a;
+save_master_pos;
+connection slave;
+sync_with_master;
+select * from t1 where b<3 order by a;
+connection master;
+# if the slave does the next sync_with_master fine, then it means it accepts the
+# two lines of ANSI syntax below, which is what we want to check.
+set @@session.sql_mode=ignore_space;
+insert into t1 values(password          ('MySQL'), 3);
+set @@session.sql_mode=ansi_quotes;
+create table "t2" ("a" int);
+drop table t1, t2;
+set @@session.sql_mode=default;
+create table t1(a int auto_increment primary key);
+create table t2(b int, a int);
+set @@session.sql_auto_is_null=1;
+insert into t1 values(null);
+insert into t2 select 1,a from t1 where a is null;
+set @@session.sql_auto_is_null=0;
+insert into t1 values(null);
+insert into t2 select 2,a from t1 where a is null;
+select * from t2 order by b;
+save_master_pos;
+connection slave;
+sync_with_master;
+select * from t2 order by b;
+connection master;
+drop table t1,t2;
+save_master_pos;
+connection slave;
+sync_with_master;
diff --git a/mysys/mf_iocache2.c b/mysys/mf_iocache2.c
index bce08b9795bcb9391d1623b147398b3ce65240a9..232097d355b21375d85bbfe8142511a1fb459761 100644
--- a/mysys/mf_iocache2.c
+++ b/mysys/mf_iocache2.c
@@ -66,6 +66,13 @@ my_off_t my_b_append_tell(IO_CACHE* info)
   return res;
 }
 
+my_off_t my_b_safe_tell(IO_CACHE *info)
+{
+  if (unlikely(info->type == SEQ_READ_APPEND))
+    return my_b_append_tell(info);
+  return my_b_tell(info);
+}
+
 /*
   Make next read happen at the given position
   For write cache, make next write happen at the given position
diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc
index cbbb0d96d22c4266c61beb0c4e8b45bfa8ab80ed..6bf746c73e63ab69b4e0660ac3c6d1d6920dfcde 100644
--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@@ -987,10 +987,19 @@ innobase_commit_low(
 
                 trx->mysql_master_log_file_name
                                         = active_mi->rli.group_master_log_name;
+                /*
+                  Guilhem to Heikki: in 5.0 we don't need to do a computation
+                  (old_pos+len) to get the end_pos, because we already have the
+                  end_pos under hand in the replication code
+                  (Query_log_event::exec_event()).
+                  I tested the code change below (simulated a crash with kill
+                  -9) and got the good (binlog, position) displayed by InnoDB at
+                  crash recovery, so this code change is ok.
+                */
                 trx->mysql_master_log_pos = ((ib_longlong)
-                                         (active_mi->rli.group_master_log_pos +
-                                          active_mi->rli.event_len
-                                          ));
+                                             (active_mi->rli.future_group_master_log_pos 
+                                              ));
+                
         }
 #endif /* HAVE_REPLICATION */
 
diff --git a/sql/log.cc b/sql/log.cc
index 95ee4928971b45a07c6df78ed77383809dfbcb10..e73e009aaaa0fe0250a81fe11a0a0b11e22db6ca 100644
--- a/sql/log.cc
+++ b/sql/log.cc
@@ -84,7 +84,8 @@ static int find_uniq_filename(char *name)
 MYSQL_LOG::MYSQL_LOG()
   :bytes_written(0), last_time(0), query_start(0), name(0),
    file_id(1), open_count(1), log_type(LOG_CLOSED), write_error(0), inited(0),
-   need_start_event(1)
+   need_start_event(1), description_event_for_exec(0),
+   description_event_for_queue(0)
 {
   /*
     We don't want to initialize LOCK_Log here as such initialization depends on
@@ -111,6 +112,8 @@ void MYSQL_LOG::cleanup()
   {
     inited= 0;
     close(LOG_CLOSE_INDEX);
+    delete description_event_for_queue;
+    delete description_event_for_exec;
     (void) pthread_mutex_destroy(&LOCK_log);
     (void) pthread_mutex_destroy(&LOCK_index);
     (void) pthread_cond_destroy(&update_cond);
@@ -179,7 +182,8 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
 		     const char *new_name, const char *index_file_name_arg,
 		     enum cache_type io_cache_type_arg,
 		     bool no_auto_events_arg,
-                     ulong max_size_arg)
+                     ulong max_size_arg,
+                     bool null_created_arg)
 {
   char buff[512];
   File file= -1, index_file_nr= -1;
@@ -272,8 +276,8 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
       if (my_b_safe_write(&log_file, (byte*) BINLOG_MAGIC,
 			  BIN_LOG_HEADER_SIZE))
         goto err;
-      bytes_written += BIN_LOG_HEADER_SIZE;
-      write_file_name_to_index_file=1;
+      bytes_written+= BIN_LOG_HEADER_SIZE;
+      write_file_name_to_index_file= 1;
     }
 
     if (!my_b_inited(&index_file))
@@ -302,10 +306,42 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
     }
     if (need_start_event && !no_auto_events)
     {
-      need_start_event=0;
-      Start_log_event s;
+      /*
+        In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event
+        even if this is not the very first binlog.
+      */
+      Format_description_log_event s(BINLOG_VERSION);
+      if (!s.is_valid())
+        goto err;
       s.set_log_pos(this);
-      s.write(&log_file);
+      if (null_created_arg)
+        s.created= 0;
+      if (s.write(&log_file))
+        goto err;
+      bytes_written+= s.get_event_len();
+    }
+    if (description_event_for_queue &&
+        description_event_for_queue->binlog_version>=4)
+    {
+      /*
+        This is a relay log written to by the I/O slave thread.
+        Write the event so that others can later know the format of this relay
+        log.
+        Note that this event is very close to the original event from the
+        master (it has binlog version of the master, event types of the
+        master), so this is suitable to parse the next relay log's event. It
+        has been produced by
+        Format_description_log_event::Format_description_log_event(char*
+        buf,).
+        Why don't we want to write the description_event_for_queue if this event
+        is for format<4 (3.23 or 4.x): this is because in that case, the
+        description_event_for_queue describes the data received from the master,
+        but not the data written to the relay log (*conversion*), which is in
+        format 4 (slave's).
+      */
+      if (description_event_for_queue->write(&log_file))
+        goto err;
+      bytes_written+= description_event_for_queue->get_event_len();
     }
     if (flush_io_cache(&log_file))
       goto err;
@@ -596,7 +632,7 @@ bool MYSQL_LOG::reset_logs(THD* thd)
   if (!thd->slave_thread)
     need_start_event=1;
   open(save_name, save_log_type, 0, index_file_name,
-       io_cache_type, no_auto_events, max_size);
+       io_cache_type, no_auto_events, max_size, 0);
   my_free((gptr) save_name, MYF(0));
 
 err:  
@@ -986,8 +1022,17 @@ void MYSQL_LOG::new_file(bool need_lock)
      Note that at this point, log_type != LOG_CLOSED (important for is_open()).
   */
 
+  /* 
+     new_file() is only used for rotation (in FLUSH LOGS or because size >
+     max_binlog_size or max_relay_log_size). 
+     If this is a binary log, the Format_description_log_event at the beginning of
+     the new file should have created=0 (to distinguish with the
+     Format_description_log_event written at server startup, which should
+     trigger temp tables deletion on slaves.
+  */ 
+
   open(old_name, save_log_type, new_name_ptr, index_file_name, io_cache_type,
-       no_auto_events, max_size);
+       no_auto_events, max_size, 1);
   my_free(old_name,MYF(0));
 
 end:
@@ -1282,6 +1327,12 @@ bool MYSQL_LOG::write(Log_event* event_info)
       }
 #endif
 
+#if MYSQL_VERSION_ID < 50000
+      /*
+        In 5.0 this is not needed anymore as we store the value of
+        FOREIGN_KEY_CHECKS in a binary way in the Query event's header.
+        The code below was enabled in 4.0 and 4.1.
+      */
       /*
 	If the user has set FOREIGN_KEY_CHECKS=0 we wrap every SQL
 	command in the binlog inside:
@@ -1297,6 +1348,7 @@ bool MYSQL_LOG::write(Log_event* event_info)
 	if (e.write(file))
 	  goto err;
       }
+#endif
     }
 
     /* Write the SQL command */
@@ -1307,6 +1359,7 @@ bool MYSQL_LOG::write(Log_event* event_info)
 
     /* Write log events to reset the 'run environment' of the SQL command */
 
+#if MYSQL_VERSION_ID < 50000
     if (thd && thd->options & OPTION_NO_FOREIGN_KEY_CHECKS)
     {
       Query_log_event e(thd, "SET FOREIGN_KEY_CHECKS=1", 24, 0);
@@ -1314,6 +1367,7 @@ bool MYSQL_LOG::write(Log_event* event_info)
       if (e.write(file))
 	goto err;
     }
+#endif
 
     /*
       Tell for transactional table handlers up to which position in the
@@ -1720,6 +1774,7 @@ void MYSQL_LOG::close(uint exiting)
       Stop_log_event s;
       s.set_log_pos(this);
       s.write(&log_file);
+      bytes_written+= s.get_event_len();
       signal_update();
     }
 #endif /* HAVE_REPLICATION */
diff --git a/sql/log_event.cc b/sql/log_event.cc
index 8c93a22cf6e44e2707e37290ff4c9386eba26365..018366507e6828ea0dbe122480e34cdb5b0f8dbf 100644
--- a/sql/log_event.cc
+++ b/sql/log_event.cc
@@ -192,9 +192,25 @@ static inline int read_str(char * &buf, char *buf_end, char * &str,
   return 0;
 }
 
+/*
+  Prints a "session_var=value" string. Used by mysqlbinlog to print some SET
+  commands just before it prints a query.
+*/
+
+static void print_set_option(FILE* file, uint32 bits_changed, uint32 option,
+                             uint32 flags, const char* name, bool* need_comma) 
+{
+  if (bits_changed & option)
+  {
+    if (*need_comma)
+      fprintf(file,", ");
+    fprintf(file,"%s=%d", name, (bool)(flags & option));
+    *need_comma= 1;
+  }
+}
 
 /**************************************************************************
-	Log_event methods
+	Log_event methods (= the parent class of all events)
 **************************************************************************/
 
 /*
@@ -204,7 +220,7 @@ static inline int read_str(char * &buf, char *buf_end, char * &str,
 const char* Log_event::get_type_str()
 {
   switch(get_type_code()) {
-  case START_EVENT:  return "Start";
+  case START_EVENT_V3:  return "Start_v3";
   case STOP_EVENT:   return "Stop";
   case QUERY_EVENT:  return "Query";
   case ROTATE_EVENT: return "Rotate";
@@ -218,6 +234,7 @@ const char* Log_event::get_type_str()
   case EXEC_LOAD_EVENT: return "Exec_load";
   case RAND_EVENT: return "RAND";
   case USER_VAR_EVENT: return "User var";
+  case FORMAT_DESCRIPTION_EVENT: return "Format_desc";
   default: return "Unknown";				/* impossible */ 
   }
 }
@@ -254,24 +271,66 @@ Log_event::Log_event()
   Log_event::Log_event()
 */
 
-Log_event::Log_event(const char* buf, bool old_format)
+Log_event::Log_event(const char* buf,
+                     const Format_description_log_event* description_event) 
   :temp_buf(0), cached_event_len(0), cache_stmt(0)
 {
+#ifndef MYSQL_CLIENT
+  thd = 0;
+#endif  
   when = uint4korr(buf);
   server_id = uint4korr(buf + SERVER_ID_OFFSET);
-  if (old_format)
+  if (description_event->binlog_version==1)
   {
-    log_pos=0;
-    flags=0;
+    log_pos= 0;
+    flags= 0;
+    return;
   }
-  else
+  /* 4.0 or newer */
+  log_pos= uint4korr(buf + LOG_POS_OFFSET);
+  /*
+    If the log is 4.0 (so here it can only be a 4.0 relay log read by the SQL
+    thread or a 4.0 master binlog read by the I/O thread), log_pos is the
+    beginning of the event: we transform it into the end of the event, which is
+    more useful.
+    But how do you know that the log is 4.0: you know it if description_event is
+    version 3 *and* you are not reading a Format_desc (remember that mysqlbinlog
+    starts by assuming that 5.0 logs are in 4.0 format, until it finds a
+    Format_desc).
+  */
+  if (description_event->binlog_version==3 &&
+      buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT &&
+      /*
+        If log_pos=0, don't change it. log_pos==0 is a marker to mean
+        "don't change rli->group_master_log_pos" (see
+        inc_group_relay_log_pos()). As it is unreal log_pos, adding the event
+        len's is nonsense. For example, a fake Rotate event should 
+        not have its log_pos (which is 0) changed or it will modify
+        Exec_master_log_pos in SHOW SLAVE STATUS, displaying a nonsense value of
+        (a non-zero offset which does not exist in the master's binlog, so which
+        will cause problems if the user uses this value in CHANGE MASTER).
+      */
+      log_pos)
+    log_pos+= uint4korr(buf + EVENT_LEN_OFFSET);
+  flags= uint2korr(buf + FLAGS_OFFSET);
+  if ((buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) ||
+      (buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT))
   {
-    log_pos = uint4korr(buf + LOG_POS_OFFSET);
-    flags = uint2korr(buf + FLAGS_OFFSET);
+    /*
+      These events always have a header which stops here (i.e. their header is
+      FROZEN).
+    */
+    /*
+      Initialization to zero of all other Log_event members as they're not
+      specified. Currently there are no such members; in the future there will
+      be an event UID (but Format_description and Rotate don't need this UID, as
+      they are not propagated through --log-slave-updates (remember the UID is
+      used to not play a query twice when you have two masters which are slaves
+      of a 3rd master). Then we are done.
+    */
+    return;
   }
-#ifndef MYSQL_CLIENT
-  thd = 0;
-#endif  
+  /* otherwise, go on with reading the header from buf (nothing now) */
 }
 
 #ifndef MYSQL_CLIENT
@@ -322,10 +381,10 @@ int Log_event::exec_event(struct st_relay_log_info* rli)
       has already been updated.
     */
     if ((thd->options & OPTION_BEGIN) && opt_using_transactions)
-      rli->inc_event_relay_log_pos(get_event_len());
+      rli->inc_event_relay_log_pos();
     else
     {
-      rli->inc_group_relay_log_pos(get_event_len(),log_pos);
+      rli->inc_group_relay_log_pos(log_pos);
       flush_relay_log_info(rli);
       /* 
          Note that Rotate_log_event::exec_event() does not call this function,
@@ -387,7 +446,7 @@ void Log_event::init_show_field_list(List<Item>* field_list)
   field_list->push_back(new Item_empty_string("Event_type", 20));
   field_list->push_back(new Item_return_int("Server_id", 10,
 					    MYSQL_TYPE_LONG));
-  field_list->push_back(new Item_return_int("Orig_log_pos", 11,
+  field_list->push_back(new Item_return_int("End_log_pos", 11,
 					    MYSQL_TYPE_LONGLONG));
   field_list->push_back(new Item_empty_string("Info", 20));
 }
@@ -410,6 +469,12 @@ int Log_event::write(IO_CACHE* file)
 
 int Log_event::write_header(IO_CACHE* file)
 {
+  /*
+    Header will be of size LOG_EVENT_HEADER_LEN for all events, except for
+    FORMAT_DESCRIPTION_EVENT and ROTATE_EVENT, where it will be
+    LOG_EVENT_MINIMAL_HEADER_LEN (remember these 2 have a frozen header, because
+    we read them before knowing the format).
+  */
   char buf[LOG_EVENT_HEADER_LEN];
   char* pos = buf;
   int4store(pos, (ulong) when); // timestamp
@@ -417,19 +482,45 @@ int Log_event::write_header(IO_CACHE* file)
   *pos++ = get_type_code(); // event type code
   int4store(pos, server_id);
   pos += 4;
-  long tmp=get_data_size() + LOG_EVENT_HEADER_LEN;
+  long tmp; // total length of the event
+  switch (get_type_code())
+    {
+    case FORMAT_DESCRIPTION_EVENT:
+    case ROTATE_EVENT:
+      tmp= get_data_size() + LOG_EVENT_MINIMAL_HEADER_LEN;
+      break;
+    default:
+      tmp= get_data_size() + LOG_EVENT_HEADER_LEN;
+      break;
+    }
   int4store(pos, tmp);
   pos += 4;
   int4store(pos, log_pos);
   pos += 4;
   int2store(pos, flags);
   pos += 2;
+  switch (get_type_code())
+    {
+    case FORMAT_DESCRIPTION_EVENT:
+    case ROTATE_EVENT:
+      break;
+    default:
+      /*
+        Other data to print in the header (nothing now); in that case increment
+        pos.
+      */
+      break;
+    }
   return (my_b_safe_write(file, (byte*) buf, (uint) (pos - buf)));
 }
 
 
 /*
   Log_event::read_log_event()
+
+  This needn't be format-tolerant, because we only read
+  LOG_EVENT_MINIMAL_HEADER_LEN (we just want to read the event's length).
+
 */
 
 #ifndef MYSQL_CLIENT
@@ -438,7 +529,7 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet,
 {
   ulong data_len;
   int result=0;
-  char buf[LOG_EVENT_HEADER_LEN];
+  char buf[LOG_EVENT_MINIMAL_HEADER_LEN];
   DBUG_ENTER("read_log_event");
 
   if (log_lock)
@@ -458,24 +549,25 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet,
     goto end;
   }
   data_len= uint4korr(buf + EVENT_LEN_OFFSET);
-  if (data_len < LOG_EVENT_HEADER_LEN ||
+  if (data_len < LOG_EVENT_MINIMAL_HEADER_LEN ||
       data_len > current_thd->variables.max_allowed_packet)
   {
     DBUG_PRINT("error",("data_len: %ld", data_len));
-    result= ((data_len < LOG_EVENT_HEADER_LEN) ? LOG_READ_BOGUS :
+    result= ((data_len < LOG_EVENT_MINIMAL_HEADER_LEN) ? LOG_READ_BOGUS :
 	     LOG_READ_TOO_LARGE);
     goto end;
   }
   packet->append(buf, sizeof(buf));
-  data_len-= LOG_EVENT_HEADER_LEN;
+  data_len-= LOG_EVENT_MINIMAL_HEADER_LEN;
   if (data_len)
   {
     if (packet->append(file, data_len))
     {
       /*
-	Here we should never hit EOF in a non-error condition.
+	Here if we hit EOF it's really an error: as data_len is >=0
+        there's supposed to be more bytes available. 
 	EOF means we are reading the event partially, which should
-	never happen.
+	never happen: either we read badly or the binlog is truncated.
       */
       result= file->error >= 0 ? LOG_READ_TRUNC: LOG_READ_IO;
       /* Implicit goto end; */
@@ -502,24 +594,42 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet,
   Log_event::read_log_event()
 
   NOTE:
-    Allocates memory;  The caller is responsible for clean-up
+    Allocates memory;  The caller is responsible for clean-up.
 */
 
 #ifndef MYSQL_CLIENT
 Log_event* Log_event::read_log_event(IO_CACHE* file,
 				     pthread_mutex_t* log_lock,
-				     bool old_format)
+                                     const Format_description_log_event *description_event)
 #else
-Log_event* Log_event::read_log_event(IO_CACHE* file, bool old_format)
+Log_event* Log_event::read_log_event(IO_CACHE* file,
+                                     const Format_description_log_event *description_event)
 #endif  
 {
-  char head[LOG_EVENT_HEADER_LEN];
-  uint header_size= old_format ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
+  DBUG_ASSERT(description_event);
+  char head[LOG_EVENT_MINIMAL_HEADER_LEN];
+  /*
+    First we only want to read at most LOG_EVENT_MINIMAL_HEADER_LEN, just to
+    check the event for sanity and to know its length; no need to really parse
+    it. We say "at most" because this could be a 3.23 master, which has header
+    of 13 bytes, whereas LOG_EVENT_MINIMAL_HEADER_LEN is 19 bytes (it's "minimal"
+    over the set {MySQL >=4.0}).
+  */
+  uint header_size= min(description_event->common_header_len,
+                        LOG_EVENT_MINIMAL_HEADER_LEN);
 
   LOCK_MUTEX;
+  DBUG_PRINT("info", ("my_b_tell=%lu", my_b_tell(file)));
   if (my_b_read(file, (byte *) head, header_size))
   {
+    DBUG_PRINT("info", ("Log_event::read_log_event(IO_CACHE*,Format_desc*) \
+failed my_b_read"));
     UNLOCK_MUTEX;
+    /*
+      No error here; it could be that we are at the file's end. However if the
+      next my_b_read() fails (below), it will be an error as we were able to
+      read the first bytes.
+    */
     return 0;
   }
 
@@ -553,7 +663,8 @@ Log_event* Log_event::read_log_event(IO_CACHE* file, bool old_format)
     error = "read error";
     goto err;
   }
-  if ((res = read_log_event(buf, data_len, &error, old_format)))
+  if ((res= read_log_event(buf, data_len, &error,
+                           description_event))) 
     res->register_temp_buf(buf);
 
 err:
@@ -580,87 +691,105 @@ Error in Log_event::read_log_event(): '%s', data_len: %d, event_type: %d",
 
 /*
   Log_event::read_log_event()
+  Binlog format tolerance is in (buf, event_len, description_event)
+  constructors.
 */
 
-Log_event* Log_event::read_log_event(const char* buf, int event_len,
-				     const char **error, bool old_format)
+Log_event* Log_event::read_log_event(const char* buf, uint event_len,
+				     const char **error,
+                                     const Format_description_log_event *description_event)
 {
+  DBUG_ENTER("Log_event::read_log_event(char*,...)");
+  DBUG_ASSERT(description_event);
+  DBUG_PRINT("info", ("binlog_version=%d", description_event->binlog_version));
   if (event_len < EVENT_LEN_OFFSET ||
       (uint) event_len != uint4korr(buf+EVENT_LEN_OFFSET))
   {
     *error="Sanity check failed";		// Needed to free buffer
-    return NULL; // general sanity check - will fail on a partial read
+    DBUG_RETURN(NULL); // general sanity check - will fail on a partial read
   }
   
   Log_event* ev = NULL;
   
   switch(buf[EVENT_TYPE_OFFSET]) {
   case QUERY_EVENT:
-    ev  = new Query_log_event(buf, event_len, old_format);
+    ev  = new Query_log_event(buf, event_len, description_event);
     break;
   case LOAD_EVENT:
-    ev = new Create_file_log_event(buf, event_len, old_format);
+    ev = new Create_file_log_event(buf, event_len, description_event);
     break;
   case NEW_LOAD_EVENT:
-    ev = new Load_log_event(buf, event_len, old_format);
+    ev = new Load_log_event(buf, event_len, description_event);
     break;
   case ROTATE_EVENT:
-    ev = new Rotate_log_event(buf, event_len, old_format);
+    ev = new Rotate_log_event(buf, event_len, description_event);
     break;
 #ifdef HAVE_REPLICATION
-  case SLAVE_EVENT:
+  case SLAVE_EVENT: /* can never happen (unused event) */
     ev = new Slave_log_event(buf, event_len);
     break;
 #endif /* HAVE_REPLICATION */
   case CREATE_FILE_EVENT:
-    ev = new Create_file_log_event(buf, event_len, old_format);
+    ev = new Create_file_log_event(buf, event_len, description_event);
     break;
   case APPEND_BLOCK_EVENT:
-    ev = new Append_block_log_event(buf, event_len);
+    ev = new Append_block_log_event(buf, event_len, description_event);
     break;
   case DELETE_FILE_EVENT:
-    ev = new Delete_file_log_event(buf, event_len);
+    ev = new Delete_file_log_event(buf, event_len, description_event);
     break;
   case EXEC_LOAD_EVENT:
-    ev = new Execute_load_log_event(buf, event_len);
+    ev = new Execute_load_log_event(buf, event_len, description_event);
     break;
-  case START_EVENT:
-    ev = new Start_log_event(buf, old_format);
+  case START_EVENT_V3: /* this is sent only by MySQL <=4.x */
+    ev = new Start_log_event_v3(buf, description_event);
     break;
 #ifdef HAVE_REPLICATION
   case STOP_EVENT:
-    ev = new Stop_log_event(buf, old_format);
+    ev = new Stop_log_event(buf, description_event);
     break;
 #endif /* HAVE_REPLICATION */
   case INTVAR_EVENT:
-    ev = new Intvar_log_event(buf, old_format);
+    ev = new Intvar_log_event(buf, description_event);
     break;
   case RAND_EVENT:
-    ev = new Rand_log_event(buf, old_format);
+    ev = new Rand_log_event(buf, description_event);
     break;
   case USER_VAR_EVENT:
-    ev = new User_var_log_event(buf, old_format);
+    ev = new User_var_log_event(buf, description_event);
+    break;
+  case FORMAT_DESCRIPTION_EVENT:
+    ev = new Format_description_log_event(buf, event_len, description_event); 
     break;
   default:
     break;
   }
+  /*
+    is_valid() are small event-specific sanity tests which are important; for
+    example there are some my_malloc() in constructors
+    (e.g. Query_log_event::Query_log_event(char*...)); when these my_malloc()
+    fail we can't return an error out of the constructor (because constructor is
+    "void") ; so instead we leave the pointer we wanted to allocate
+    (e.g. 'query') to 0 and we test it in is_valid(). Same for
+    Format_description_log_event, member 'post_header_len'. 
+  */
   if (!ev || !ev->is_valid())
   {
     delete ev;
 #ifdef MYSQL_CLIENT
-    if (!force_opt)
+    if (!force_opt) /* then mysqlbinlog dies */
     {
       *error= "Found invalid event in binary log";
-      return 0;
+      DBUG_RETURN(0);
     }
-    ev= new Unknown_log_event(buf, old_format);
+    ev= new Unknown_log_event(buf, description_event);
 #else
     *error= "Found invalid event in binary log";
-    return 0;
+    DBUG_RETURN(0);
 #endif
   }
   ev->cached_event_len = event_len;
-  return ev;  
+  DBUG_RETURN(ev);  
 }
 
 #ifdef MYSQL_CLIENT
@@ -674,7 +803,7 @@ void Log_event::print_header(FILE* file)
   char llbuff[22];
   fputc('#', file);
   print_timestamp(file);
-  fprintf(file, " server id %d  log_pos %s ", server_id,
+  fprintf(file, " server id %d  end_log_pos %s ", server_id,
 	  llstr(log_pos,llbuff)); 
 }
 
@@ -708,13 +837,30 @@ void Log_event::print_timestamp(FILE* file, time_t* ts)
 
 /*
   Log_event::set_log_pos()
+  Only used when we are writing an event which we created, to the BINlog. That
+  is, when we have parsed and executed a query; we then want to set the event's
+  log_pos to what it is going to be in the binlog after we write it. Note that
+  this is the position of the END of the event.
 */
 
 #ifndef MYSQL_CLIENT
 void Log_event::set_log_pos(MYSQL_LOG* log)
 {
+  /*
+    Note that with a SEQ_READ_APPEND cache, my_b_tell() does not work well.
+    So this will give slightly wrong positions for the Format_desc/Rotate/Stop
+    events which the slave writes to its relay log. For example, the initial
+    Format_desc will have end_log_pos=91 instead of 95. Because after writing
+    the first 4 bytes of the relay log, my_b_tell() still reports 0. Because
+    my_b_append() does not update the counter which my_b_tell() later uses (one
+    should probably use my_b_append_tell() to work around this).
+    To get right positions even when writing to the relay log, we use the (new)
+    my_b_safe_tell().
+    Note that this raises a question on the correctness of all these
+    DBUG_ASSERT(my_b_tell()=rli->event_relay_log_pos).
+  */
   if (!log_pos)
-    log_pos = my_b_tell(&log->log_file);
+    log_pos = my_b_safe_tell(&log->log_file)+get_event_len();
 }
 #endif /* !MYSQL_CLIENT */
 
@@ -727,10 +873,15 @@ void Log_event::set_log_pos(MYSQL_LOG* log)
 
 /*
   Query_log_event::pack_info()
+  This (which is used only for SHOW BINLOG EVENTS) could be updated to
+  print SET @@session_var=. But this is not urgent, as SHOW BINLOG EVENTS is
+  only an information, it does not produce suitable queries to replay (for
+  example it does not print LOAD DATA INFILE).
 */
 
 void Query_log_event::pack_info(Protocol *protocol)
 {
+  // TODO: show the catalog ??
   char *buf, *pos;
   if (!(buf= my_malloc(9 + db_len + q_len, MYF(MY_WME))))
     return;
@@ -768,7 +919,7 @@ int Query_log_event::write(IO_CACHE* file)
 
 int Query_log_event::write_data(IO_CACHE* file)
 {
-  char buf[QUERY_HEADER_LEN]; 
+  char buf[QUERY_HEADER_LEN+1+4+1+8+1+1+catalog_len]; 
 
   if (!query)
     return -1;
@@ -814,10 +965,46 @@ int Query_log_event::write_data(IO_CACHE* file)
   int4store(buf + Q_EXEC_TIME_OFFSET, exec_time);
   buf[Q_DB_LEN_OFFSET] = (char) db_len;
   int2store(buf + Q_ERR_CODE_OFFSET, error_code);
+  int2store(buf + Q_STATUS_VARS_LEN_OFFSET, status_vars_len);
 
-  return (my_b_safe_write(file, (byte*) buf, QUERY_HEADER_LEN) ||
-	  my_b_safe_write(file, (db) ? (byte*) db : (byte*)"", db_len + 1) ||
-	  my_b_safe_write(file, (byte*) query, q_len)) ? -1 : 0;
+  /*
+    You MUST always write status vars in increasing order of code. This
+    guarantees that a slightly older slave will be able to parse those he
+    knows.
+  */
+  char* start= buf+QUERY_HEADER_LEN;
+  if (flags2_inited)
+  {
+    *(start++)= Q_FLAGS2_CODE;
+    int4store(start, flags2);
+    start+= 4;
+  }
+  if (sql_mode_inited)
+  {
+    *(start++)= Q_SQL_MODE_CODE;
+    int8store(start, sql_mode);
+    start+= 8;
+  }
+  if (catalog)
+  {
+    *(start++)= Q_CATALOG_CODE;
+    *(start++)= catalog_len;
+    bmove(start, catalog, catalog_len);
+    start+= catalog_len;
+  }
+  /*
+    Here there could be code like
+    if (command-line-option-which-says-"log_this_variable")
+    {
+    *(start++)= Q_THIS_VARIABLE_CODE;
+    int4store(start, this_variable);
+    start+= 4;
+    }
+  */
+  
+  return (my_b_safe_write(file, (byte*) buf, (start-buf)) ||
+  	  my_b_safe_write(file, (db) ? (byte*) db : (byte*)"", db_len + 1) ||
+  	  my_b_safe_write(file, (byte*) query, q_len)) ? -1 : 0;
 }
 
 
@@ -830,68 +1017,177 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg,
 				 ulong query_length, bool using_trans)
   :Log_event(thd_arg, !thd_arg->tmp_table_used ?
 	     0 : LOG_EVENT_THREAD_SPECIFIC_F, using_trans),
-   data_buf(0), query(query_arg),
+   data_buf(0), query(query_arg), catalog(thd_arg->catalog),
    db(thd_arg->db), q_len((uint32) query_length),
-  error_code(thd_arg->killed != THD::NOT_KILLED ? thd->killed_errno() : thd_arg->net.last_errno),
+   error_code(thd_arg->killed != THD::NOT_KILLED ? thd->killed_errno() : thd_arg->net.last_errno),
    thread_id(thd_arg->thread_id),
    /* save the original thread id; we already know the server id */
-   slave_proxy_id(thd_arg->variables.pseudo_thread_id)
+   slave_proxy_id(thd_arg->variables.pseudo_thread_id),
+   flags2_inited(1), sql_mode_inited(1), flags2(0), sql_mode(0)
 {
   time_t end_time;
   time(&end_time);
   exec_time = (ulong) (end_time  - thd->start_time);
+  catalog_len = (catalog) ? (uint32) strlen(catalog) : 0;
+  status_vars_len= 1+4+1+8+1+1+catalog_len;
   db_len = (db) ? (uint32) strlen(db) : 0;
+  /*
+    If we don't use flags2 for anything else than options contained in
+    thd->options, it would be more efficient to flags2=thd_arg->options
+    (OPTIONS_WRITTEN_TO_BINLOG would be used only at reading time).
+    But it's likely that we don't want to use 32 bits for 3 bits; in the future
+    we will probably want to reclaim the 29 bits. So we need the &.
+  */
+  flags2= thd_arg->options & OPTIONS_WRITTEN_TO_BIN_LOG;
+  sql_mode= thd_arg->variables.sql_mode;
+  DBUG_PRINT("info",("Query_log_event has flags2=%lu sql_mode=%lu",flags2,sql_mode));
 }
 #endif /* MYSQL_CLIENT */
 
 
 /*
   Query_log_event::Query_log_event()
+  This is used by the SQL slave thread to prepare the event before execution.
 */
 
-Query_log_event::Query_log_event(const char* buf, int event_len,
-				 bool old_format)
-  :Log_event(buf, old_format),data_buf(0), query(NULL), db(NULL)
+Query_log_event::Query_log_event(const char* buf, uint event_len,
+                                 const Format_description_log_event *description_event)
+  :Log_event(buf, description_event), data_buf(0), query(NULL),
+   catalog(NULL), db(NULL), catalog_len(0), status_vars_len(0),
+   flags2_inited(0), sql_mode_inited(0)
 {
   ulong data_len;
-  if (old_format)
-  {
-    if ((uint)event_len < OLD_HEADER_LEN + QUERY_HEADER_LEN)
-      return;				
-    data_len = event_len - (QUERY_HEADER_LEN + OLD_HEADER_LEN);
-    buf += OLD_HEADER_LEN;
-  }
-  else
-  {
-    if ((uint)event_len < QUERY_EVENT_OVERHEAD)
-      return;				
-    data_len = event_len - QUERY_EVENT_OVERHEAD;
-    buf += LOG_EVENT_HEADER_LEN;
-  }
-
+  uint8 common_header_len, post_header_len;
+  DBUG_ENTER("Query_log_event::Query_log_event(char*,...)");
+  common_header_len= description_event->common_header_len;
+  post_header_len= description_event->post_header_len[QUERY_EVENT-1]; 
+  DBUG_PRINT("info",("event_len=%ld, common_header_len=%d, post_header_len=%d",
+                     event_len, common_header_len, post_header_len));
+  
+  /*
+    We test if the event's length is sensible, and if so we compute data_len.
+    We cannot rely on QUERY_HEADER_LEN here as it would not be format-tolerant.
+    We use QUERY_HEADER_MINIMAL_LEN which is the same for 3.23, 4.0 & 5.0.
+  */
+  if (event_len < (uint)(common_header_len + post_header_len))
+    return;				
+  data_len = event_len - (common_header_len + post_header_len);
+  buf+= common_header_len;
+  
+  slave_proxy_id= thread_id = uint4korr(buf + Q_THREAD_ID_OFFSET);
   exec_time = uint4korr(buf + Q_EXEC_TIME_OFFSET);
+  db_len = (uint)buf[Q_DB_LEN_OFFSET];
   error_code = uint2korr(buf + Q_ERR_CODE_OFFSET);
 
-  if (!(data_buf = (char*) my_malloc(data_len + 1, MYF(MY_WME))))
+  /*
+    5.0 format starts here.
+    Depending on the format, we may or not have affected/warnings etc
+    The remnent post-header to be parsed has length:
+  */
+  uint32 tmp= post_header_len - QUERY_HEADER_MINIMAL_LEN; 
+  if (tmp)
+  {
+    status_vars_len= uint2korr(buf + Q_STATUS_VARS_LEN_OFFSET);
+    DBUG_PRINT("info", ("Query_log_event has status_vars_len=%d",
+                        status_vars_len));
+    tmp-= 2;
+  }
+  /* we have parsed everything we know in the post header */
+#ifndef DBUG_OFF
+  if (tmp) /* this is probably a master newer than us */
+    DBUG_PRINT("info", ("Query_log_event has longer post header than we know\
+  (%d more bytes)", tmp));
+#endif
+  
+  /*
+    Suppose you have a 4.0 master with --ansi and a 5.0 slave with --ansi.
+    The slave sets flags2 to 0 (because that's a 4.0 event);  if we simply use
+    this value of 0, we will cancel --ansi on the slave, which is
+    unwanted. In this example 0 means "unset", not really "set to 0".
+    Fortunately we have flags2_inited==0 to distinguish between "unset" and
+    "set to 0". See below.
+  */
+  
+  /* variable-part: the status vars; only in MySQL 5.0  */
+  
+  const char* start= buf+post_header_len;
+  const char* end= start+status_vars_len;
+  /*
+    The place from which we will start string duplication.
+  */
+  const char* start_dup= end;
+  
+  for (const char* pos=start;pos<end;)
+  {
+    switch (*pos)
+    {
+    case Q_FLAGS2_CODE:
+      flags2_inited= 1;
+      flags2= uint4korr(++pos);
+      DBUG_PRINT("info",("In Query_log_event, read flags2=%lu", flags2));
+      pos+= 4;
+      break;
+    case Q_SQL_MODE_CODE:
+      sql_mode_inited= 1;
+      sql_mode= uint8korr(++pos);
+      DBUG_PRINT("info",("In Query_log_event, read sql_mode=%lu", sql_mode));
+      pos+= 8;
+      break;
+    case Q_CATALOG_CODE:
+      catalog_len= *(++pos);
+      /*
+        Now 'pos' points to beginning of catalog - 1.
+        The catalog must be included in the string which we will duplicate
+        later. If string status vars having a smaller code had been seen before
+        and so marked to-be-duplicated, start_dup would be != end and we would
+        not need (and want) to change start_dup (because this would cut the
+        previously marked status vars).
+      */
+      if (start_dup==end)
+        start_dup= ++pos;
+      pos+= catalog_len;
+      break;
+    default:
+      /* That's why you must write status vars in growing order of code */
+      DBUG_PRINT("info",("Query_log_event has unknown status vars (first has\
+ code=%d), skipping the rest of them", *pos));
+      pos= end;
+    }
+  }
+  
+  /* A 2nd variable part; this is common to all versions */ 
+  
+  data_len-= start_dup-start; /* cut not-to-be-duplicated head */
+  if (!(data_buf = (char*) my_strdup_with_length(start_dup,
+                                                 data_len,
+                                                 MYF(MY_WME))))
     return;
-
-  memcpy(data_buf, buf + Q_DATA_OFFSET, data_len);
-  slave_proxy_id= thread_id= uint4korr(buf + Q_THREAD_ID_OFFSET);
-  db = data_buf;
-  db_len = (uint)buf[Q_DB_LEN_OFFSET];
-  query=data_buf + db_len + 1;
-  q_len = data_len - 1 - db_len;
-  *((char*)query+q_len) = 0;
+  
+  const char* tmp_buf= data_buf;
+  /* Now set event's pointers to point to bits of the new string */
+  if (catalog_len)
+  {
+    catalog= tmp_buf;
+    tmp_buf+= end-start_dup; /* "seek" to db */
+  }
+  db= tmp_buf;
+  query= tmp_buf + db_len + 1;
+  q_len = data_buf + data_len - query;
+  /* This is used to detect wrong parsing. Could be removed in the future. */
+  DBUG_PRINT("info", ("catalog_len=%d catalog='%s' db='%s' q_len=%d",
+                      catalog_len, catalog, db, q_len));
+  DBUG_VOID_RETURN;
 }
 
-
 /*
   Query_log_event::print()
 */
 
 #ifdef MYSQL_CLIENT
-void Query_log_event::print(FILE* file, bool short_form, char* last_db)
+void Query_log_event::print(FILE* file, bool short_form,
+                            LAST_EVENT_INFO* last_event_info)
 {
+  // TODO: print the catalog ??
   char buff[40],*end;				// Enough for SET TIMESTAMP
   if (!short_form)
   {
@@ -902,10 +1198,10 @@ void Query_log_event::print(FILE* file, bool short_form, char* last_db)
 
   bool same_db = 0;
 
-  if (db && last_db)
+  if (db && last_event_info->db)
   {
-    if (!(same_db = !memcmp(last_db, db, db_len + 1)))
-      memcpy(last_db, db, db_len + 1);
+    if (!(same_db = !memcmp(last_event_info->db, db, db_len + 1)))
+      memcpy(last_event_info->db, db, db_len + 1);
   }
   
   if (db && db[0] && !same_db)
@@ -916,6 +1212,67 @@ void Query_log_event::print(FILE* file, bool short_form, char* last_db)
   my_fwrite(file, (byte*) buff, (uint) (end-buff),MYF(MY_NABP | MY_WME));
   if (flags & LOG_EVENT_THREAD_SPECIFIC_F)
     fprintf(file,"SET @@session.pseudo_thread_id=%lu;\n",(ulong)thread_id);
+  /*
+    Now the session variables;
+    it's more efficient to pass SQL_MODE as a number instead of a
+    comma-separated list.
+    FOREIGN_KEY_CHECKS, SQL_AUTO_IS_NULL, UNIQUE_CHECKS are session-only
+    variables (they have no global version; they're not listed in sql_class.h),
+    The tests below work for pure binlogs or pure relay logs. Won't work for
+    mixed relay logs but we don't create mixed relay logs (that is, there is no
+    relay log with a format change except within the 3 first events, which
+    mysqlbinlog handles gracefully). So this code should always be good.
+  */
+
+  uint32 tmp;
+
+  if (likely(flags2_inited)) /* likely as this will mainly read 5.0 logs */
+  {
+    /* tmp is a bitmask of bits which have changed. */
+    if (likely(last_event_info->flags2_inited)) 
+      /* All bits which have changed */
+      tmp= (last_event_info->flags2) ^ flags2;
+    else /* that's the first Query event we read */
+    {
+      last_event_info->flags2_inited= 1;
+      tmp= ~((uint32)0); /* all bits have changed */
+    }
+
+    if (unlikely(tmp)) /* some bits have changed */
+    {
+      bool need_comma= 0;
+      fprintf(file, "SET ");
+      print_set_option(file, tmp, OPTION_NO_FOREIGN_KEY_CHECKS, ~flags2,
+                   "@@session.foreign_key_checks", &need_comma);
+      print_set_option(file, tmp, OPTION_AUTO_IS_NULL, flags2,
+                   "@@session.sql_auto_is_null", &need_comma);
+      print_set_option(file, tmp, OPTION_RELAXED_UNIQUE_CHECKS, ~flags2,
+                   "@@session.unique_checks", &need_comma);
+      fprintf(file,";\n");
+      last_event_info->flags2= flags2;
+    }
+  }
+
+  /*
+    If flags2_inited==0, this is an event from 3.23 or 4.0; nothing to print
+    (remember we don't produce mixed relay logs so there cannot be 5.0 events
+    before that one so there is nothing to reset).
+  */
+
+  if (likely(sql_mode_inited))
+  {
+    if (unlikely(!last_event_info->sql_mode_inited)) /* first Query event */
+    {
+      last_event_info->sql_mode_inited= 1;
+      last_event_info->sql_mode= ~sql_mode; // force a difference to force write
+    }
+    if (unlikely(last_event_info->sql_mode != sql_mode))
+    {
+      fprintf(file,"SET @@session.sql_mode=%lu;\n",(ulong)sql_mode);
+      last_event_info->sql_mode= sql_mode;
+    }
+  }
+
   my_fwrite(file, (byte*) query, q_len, MYF(MY_NABP | MY_WME));
   fprintf(file, ";\n");
 }
@@ -930,6 +1287,13 @@ void Query_log_event::print(FILE* file, bool short_form, char* last_db)
 int Query_log_event::exec_event(struct st_relay_log_info* rli)
 {
   int expected_error,actual_error= 0;
+  /*
+    Colleagues: please never free(thd->catalog) in MySQL. This would lead to
+    bugs as here thd->catalog is a part of an alloced block, not an entire
+    alloced block (see Query_log_event::exec_event()). Same for thd->db.
+    Thank you.
+  */
+  thd->catalog= (char*) catalog;
   thd->db= (char*) rewrite_db(db);
 
   /*
@@ -938,10 +1302,11 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
     When InnoDB will want to store, the positions in rli won't have
     been updated yet, so group_master_log_* will point to old BEGIN
     and event_master_log* will point to the beginning of current COMMIT.
-    So the position to store is event_master_log_pos + event_len
-    since we must store the pos of the END of the current log event (COMMIT).
+    But log_pos of the COMMIT Query event is what we want, i.e. the pos of the
+    END of the current log event (COMMIT). We save it in rli so that InnoDB can
+    access it.
   */
-  rli->event_len= get_event_len();
+  rli->future_group_master_log_pos= log_pos;
 
   if (db_ok(thd->db, replicate_do_db, replicate_ignore_db))
   {
@@ -955,9 +1320,34 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
     thd->clear_error();
     thd->variables.pseudo_thread_id= thread_id;		// for temp tables
 
+    if (flags2_inited)
+      /*
+        all bits of thd->options which are 1 in OPTIONS_WRITTEN_TO_BIN_LOG must
+        take their value from flags2.
+      */
+      thd->options= flags2|(thd->options & ~(ulong)OPTIONS_WRITTEN_TO_BIN_LOG);
+    /*
+      else, we are in a 3.23/4.0 binlog; we previously received a
+      Rotate_log_event which reset thd->options and sql_mode, so nothing to do.
+    */
+    
+    /*
+      We do not replicate IGNORE_DIR_IN_CREATE. That is, if the master is a
+      slave which runs with SQL_MODE=IGNORE_DIR_IN_CREATE, this should not
+      force us to ignore the dir too. Imagine you are a ring of machines, and
+      one has a disk problem so that you temporarily need IGNORE_DIR_IN_CREATE
+      on this machine; you don't want it to propagate elsewhere (you don't want
+      all slaves to start ignoring the dirs).
+    */
+    if (sql_mode_inited)
+      thd->variables.sql_mode= (thd->variables.sql_mode&MODE_NO_DIR_IN_CREATE)|
+        (sql_mode & ~(uint32)MODE_NO_DIR_IN_CREATE);
+    
     /*
       Sanity check to make sure the master did not get a really bad
-      error on the query.
+      error on the query (for example, if the query on the master was killed, we
+      don't want to play it on the slave, because we don't know how much of it
+      we have to play).
     */
     if (ignored_error_code((expected_error = error_code)) ||
 	!check_expected_error(thd,rli,expected_error))
@@ -1014,12 +1404,33 @@ Default database: '%s'",
                           print_slave_db_safe(db));
         thd->query_error= 1;
       }
+      /*
+        TODO: compare the values of "affected rows" around here. Something
+        like:
+        if ((uint32) affected_in_event != (uint32) affected_on_slave)
+        {
+        sql_print_error("Slave: did not get the expected number of affected \
+        rows running query from master - expected %d, got %d (this numbers \
+        should have matched modulo 4294967296).", 0, ...);
+        thd->query_error = 1;
+        }
+        We may also want an option to tell the slave to ignore "affected"
+        mismatch. This mismatch could be implemented with a new ER_ code, and
+        to ignore it you would use --slave-skip-errors...
+        
+        To do the comparison we need to know the value of "affected" which the
+        above mysql_parse() computed. And we need to know the value of
+        "affected" in the master's binlog. Both will be implemented later. The
+        important thing is that we now have the format ready to log the values
+        of "affected" in the binlog. So we can release 5.0.0 before effectively
+        logging "affected" and effectively comparing it.
+      */
     } 
-    /* 
-       End of sanity check. If the test was wrong, the query got a really bad
-       error on the master, which could be inconsistent, abort and tell DBA to
-       check/fix it. check_expected_error() already printed the message to
-       stderr and rli, and set thd->query_error to 1.
+    /*
+      End of sanity check. If the test was false, the query got a really bad
+      error on the master, which could be inconsistent. check_expected_error()
+      already printed the message to stderr and rli, to tell the DBA to fix
+      things, and it has set thd->query_error to 1.
     */
   } /* End of if (db_ok(... */
 
@@ -1039,15 +1450,23 @@ Default database: '%s'",
 
 
 /**************************************************************************
-	Start_log_event methods
+	Start_log_event_v3 methods
 **************************************************************************/
 
+#ifndef MYSQL_CLIENT
+Start_log_event_v3::Start_log_event_v3() :Log_event(), binlog_version(BINLOG_VERSION)
+{
+  created= when;
+  memcpy(server_version, ::server_version, ST_SERVER_VER_LEN);
+}
+#endif
+
 /*
-  Start_log_event::pack_info()
+  Start_log_event_v3::pack_info()
 */
 
 #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
-void Start_log_event::pack_info(Protocol *protocol)
+void Start_log_event_v3::pack_info(Protocol *protocol)
 {
   char buf[12 + ST_SERVER_VER_LEN + 14 + 22], *pos;
   pos= strmov(buf, "Server ver: ");
@@ -1060,35 +1479,43 @@ void Start_log_event::pack_info(Protocol *protocol)
 
 
 /*
-  Start_log_event::print()
+  Start_log_event_v3::print()
 */
 
 #ifdef MYSQL_CLIENT
-void Start_log_event::print(FILE* file, bool short_form, char* last_db)
+void Start_log_event_v3::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
-  if (short_form)
-    return;
-
-  print_header(file);
-  fprintf(file, "\tStart: binlog v %d, server v %s created ", binlog_version,
-	  server_version);
-  print_timestamp(file);
-  if (created)
-    fprintf(file," at startup");
-  fputc('\n', file);
+  if (!short_form)
+  {
+    print_header(file);
+    fprintf(file, "\tStart: binlog v %d, server v %s created ", binlog_version,
+            server_version);
+    print_timestamp(file);
+    if (created)
+      fprintf(file," at startup");
+    fputc('\n', file);
+  }
+#ifdef WHEN_WE_HAVE_THE_RESET_CONNECTION_SQL_COMMAND
+  /*
+    This is for mysqlbinlog: like in replication, we want to delete the stale
+    tmp files left by an unclean shutdown of mysqld (temporary tables). Probably
+    this can be done with RESET CONNECTION (syntax to be defined).
+  */
+  fprintf(file,"RESET CONNECTION;\n");
+#endif
   fflush(file);
 }
 #endif /* MYSQL_CLIENT */
 
 /*
-  Start_log_event::Start_log_event()
+  Start_log_event_v3::Start_log_event_v3()
 */
 
-Start_log_event::Start_log_event(const char* buf,
-				 bool old_format)
-  :Log_event(buf, old_format)
+Start_log_event_v3::Start_log_event_v3(const char* buf,
+                                       const Format_description_log_event* description_event)
+  :Log_event(buf, description_event)
 {
-  buf += (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
+  buf += description_event->common_header_len;
   binlog_version = uint2korr(buf+ST_BINLOG_VER_OFFSET);
   memcpy(server_version, buf+ST_SERVER_VER_OFFSET,
 	 ST_SERVER_VER_LEN);
@@ -1097,12 +1524,12 @@ Start_log_event::Start_log_event(const char* buf,
 
 
 /*
-  Start_log_event::write_data()
+  Start_log_event_v3::write_data()
 */
 
-int Start_log_event::write_data(IO_CACHE* file)
+int Start_log_event_v3::write_data(IO_CACHE* file)
 {
-  char buff[START_HEADER_LEN];
+  char buff[START_V3_HEADER_LEN];
   int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version);
   memcpy(buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN);
   int4store(buff + ST_CREATED_OFFSET,created);
@@ -1110,7 +1537,7 @@ int Start_log_event::write_data(IO_CACHE* file)
 }
 
 /*
-  Start_log_event::exec_event()
+  Start_log_event_v3::exec_event()
 
   The master started
 
@@ -1129,18 +1556,24 @@ int Start_log_event::write_data(IO_CACHE* file)
 */
 
 #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
-int Start_log_event::exec_event(struct st_relay_log_info* rli)
+int Start_log_event_v3::exec_event(struct st_relay_log_info* rli)
 {
-  DBUG_ENTER("Start_log_event::exec_event");
-
-  switch (rli->mi->old_format) {
-  case BINLOG_FORMAT_CURRENT:
-    /* 
-       This is 4.x, so a Start_log_event is only at master startup,
-       so we are sure the master has restarted and cleared his temp tables.
+  DBUG_ENTER("Start_log_event_v3::exec_event");
+  switch (rli->relay_log.description_event_for_exec->binlog_version)
+  {
+  case 3:
+  case 4:
+    /*
+      This can either be 4.x (then a Start_log_event_v3 is only at master
+      startup so we are sure the master has restarted and cleared his temp
+      tables; the event always has 'created'>0) or 5.0 (then we have to test
+      'created').
     */
-    close_temporary_tables(thd);
-    cleanup_load_tmpdir();
+    if (created)
+    {
+      close_temporary_tables(thd);
+      cleanup_load_tmpdir();
+    }
     /*
       As a transaction NEVER spans on 2 or more binlogs:
       if we have an active transaction at this point, the master died while
@@ -1163,33 +1596,264 @@ binary log.");
        Now the older formats; in that case load_tmpdir is cleaned up by the I/O
        thread.
     */
-  case BINLOG_FORMAT_323_LESS_57:
+  case 1:
+    if (strncmp(rli->relay_log.description_event_for_exec->server_version, 
+                "3.23.57",7) >= 0 && created)
+    {
+      /*
+        Can distinguish, based on the value of 'created': this event was
+        generated at master startup.
+      */
+      close_temporary_tables(thd);
+    }
     /*
-      Cannot distinguish a Start_log_event generated at master startup and
-      one generated by master FLUSH LOGS, so cannot be sure temp tables
-      have to be dropped. So do nothing.
+      Otherwise, can't distinguish a Start_log_event generated at master startup
+      and one generated by master FLUSH LOGS, so cannot be sure temp tables have
+      to be dropped. So do nothing.
     */
     break;
-  case BINLOG_FORMAT_323_GEQ_57:
-    /*
-      Can distinguish, based on the value of 'created',
-      which was generated at master startup.
-    */
-    if (created)
-      close_temporary_tables(thd);
-    break;
   default:
     /* this case is impossible */
-    return 1;
+    DBUG_RETURN(1);
   }
-
   DBUG_RETURN(Log_event::exec_event(rli));
 }
 #endif /* defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) */
 
-/**************************************************************************
-	Load_log_event methods
-**************************************************************************/
+/***************************************************************************
+       Format_description_log_event methods
+****************************************************************************/
+
+/*
+  Format_description_log_event 1st ctor.
+
+  SYNOPSIS
+    Format_description_log_event::Format_description_log_event
+      binlog_version          	  the binlog version for which we want to build
+                                  an event. Can be 1 (=MySQL 3.23), 3 (=4.0.x
+                                  x>=2 and 4.1) or 4 (MySQL 5.0). Note that the
+                                  old 4.0 (binlog version 2) is not supported;
+                                  it should not be used for replication with
+                                  5.0.
+
+  DESCRIPTION
+    Ctor. Can be used to create the event to write to the binary log (when the
+    server starts or when FLUSH LOGS), or to create artificial events to parse
+    binlogs from MySQL 3.23 or 4.x.
+    When in a client, only the 2nd use is possible.
+
+  TODO
+    Update this code with the new event for LOAD DATA, once they are pushed (in
+    4.1 or 5.0). If it's in 5.0, only the "case 4" block should be updated.
+
+*/
+
+Format_description_log_event::Format_description_log_event(uint8 binlog_ver,
+                                                           const char* server_ver) 
+  : Start_log_event_v3()
+{
+  created= when;
+  binlog_version= binlog_ver;
+  switch(binlog_ver)
+  {
+  case 4: /* MySQL 5.0 */
+    memcpy(server_version, ::server_version, ST_SERVER_VER_LEN);
+    common_header_len= LOG_EVENT_HEADER_LEN;
+    number_of_event_types= LOG_EVENT_TYPES;
+    /* we'll catch my_malloc() error in is_valid() */
+    post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8),
+                                       MYF(0)); 
+    /*
+      This long list of assignments is not beautiful, but I see no way to
+      make it nicer, as the right members are #defines, not array members, so
+      it's impossible to write a loop.
+    */
+    if (post_header_len)
+    {
+      post_header_len[START_EVENT_V3-1]= START_V3_HEADER_LEN;
+      post_header_len[QUERY_EVENT-1]= QUERY_HEADER_LEN;
+      post_header_len[STOP_EVENT-1]= 0;
+      post_header_len[ROTATE_EVENT-1]= ROTATE_HEADER_LEN;
+      post_header_len[INTVAR_EVENT-1]= 0;
+      post_header_len[LOAD_EVENT-1]= LOAD_HEADER_LEN;
+      post_header_len[SLAVE_EVENT-1]= 0;
+      post_header_len[CREATE_FILE_EVENT-1]= CREATE_FILE_HEADER_LEN;
+      post_header_len[APPEND_BLOCK_EVENT-1]= APPEND_BLOCK_HEADER_LEN;
+      post_header_len[EXEC_LOAD_EVENT-1]= EXEC_LOAD_HEADER_LEN;
+      post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN;
+      post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1];
+      post_header_len[RAND_EVENT-1]= 0;
+      post_header_len[USER_VAR_EVENT-1]= 0;
+      post_header_len[FORMAT_DESCRIPTION_EVENT-1]= FORMAT_DESCRIPTION_HEADER_LEN;
+    }
+    break;
+
+  case 1: /* 3.23 */
+  case 3: /* 4.0.x x>=2 */
+    /*
+      We build an artificial (i.e. not sent by the master) event, which
+      describes what those old master versions send.
+    */
+    if (binlog_ver==1)
+      strmov(server_version, server_ver ? server_ver : "3.23");
+    else
+      strmov(server_version, server_ver ? server_ver : "4.0");
+    common_header_len= binlog_ver==1 ? OLD_HEADER_LEN :
+      LOG_EVENT_MINIMAL_HEADER_LEN;  
+    /*
+      The first new event in binlog version 4 is Format_desc. So any event type
+      after that does not exist in older versions. We use the events known by
+      version 3, even if version 1 had only a subset of them (this is not a
+      problem: it uses a few bytes for nothing but unifies code; it does not
+      make the slave detect less corruptions).
+    */
+    number_of_event_types= FORMAT_DESCRIPTION_EVENT - 1;
+    post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8),
+                                       MYF(0)); 
+    if (post_header_len)
+    {
+      post_header_len[START_EVENT_V3-1]= START_V3_HEADER_LEN;
+      post_header_len[QUERY_EVENT-1]= QUERY_HEADER_MINIMAL_LEN;
+      post_header_len[STOP_EVENT-1]= 0;
+      post_header_len[ROTATE_EVENT-1]= (binlog_ver==1) ? 0 : ROTATE_HEADER_LEN;
+      post_header_len[INTVAR_EVENT-1]= 0;
+      post_header_len[LOAD_EVENT-1]= LOAD_HEADER_LEN;
+      post_header_len[SLAVE_EVENT-1]= 0;
+      post_header_len[CREATE_FILE_EVENT-1]= CREATE_FILE_HEADER_LEN;
+      post_header_len[APPEND_BLOCK_EVENT-1]= APPEND_BLOCK_HEADER_LEN;
+      post_header_len[EXEC_LOAD_EVENT-1]= EXEC_LOAD_HEADER_LEN;
+      post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN;
+      post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1];
+      post_header_len[RAND_EVENT-1]= 0;
+      post_header_len[USER_VAR_EVENT-1]= 0;
+    }
+    break;
+  default: /* Includes binlog version 2 i.e. 4.0.x x<=1 */
+    post_header_len= 0; /* will make is_valid() fail */
+    break;
+  }
+}
+
+Format_description_log_event::Format_description_log_event(const char* buf,
+                                                           uint event_len,
+                                                           const
+                                                           Format_description_log_event*
+                                                           description_event) 
+  /*
+    The problem with this constructor is that the fixed header may have a length
+    different from this version, but we don't know this length as we have not 
+    read the Format_description_log_event which says it, yet. This length is in
+    the post-header of the event, but we don't know where the post-header
+    starts. 
+    So this type of event HAS to:
+    - either have the header's length at the beginning (in the header, at a
+    fixed position which will never be changed), not in the post-header. That
+    would make the header be "shifted" compared to other events.
+    - or have a header of size LOG_EVENT_MINIMAL_HEADER_LEN (19), in all future
+    versions, so that we know for sure.
+    I (Guilhem) chose the 2nd solution. Rotate has the same constraint (because
+    it is sent before Format_description_log_event).
+  */
+ 
+  :Start_log_event_v3(buf, description_event)
+{
+  DBUG_ENTER("Format_description_log_event::Format_description_log_event(char*,...)");
+  buf+= LOG_EVENT_MINIMAL_HEADER_LEN;
+  if ((common_header_len=buf[ST_COMMON_HEADER_LEN_OFFSET]) < OLD_HEADER_LEN)
+    DBUG_VOID_RETURN; /* sanity check */
+  number_of_event_types=
+    event_len-(LOG_EVENT_MINIMAL_HEADER_LEN+ST_COMMON_HEADER_LEN_OFFSET+1);
+  DBUG_PRINT("info", ("common_header_len=%d number_of_event_types=%d",
+                      common_header_len, number_of_event_types)); 
+  /* If alloc fails, we'll detect it in is_valid() */
+  post_header_len= (uint8*) my_memdup(buf+ST_COMMON_HEADER_LEN_OFFSET+1,
+                                      number_of_event_types*
+                                      sizeof(*post_header_len),
+                                      MYF(0));  
+  DBUG_VOID_RETURN;
+}
+
+int Format_description_log_event::write_data(IO_CACHE* file)
+{
+  /*
+    We don't call Start_log_event_v3::write_data() because this would make 2
+    my_b_safe_write().
+  */
+  char buff[FORMAT_DESCRIPTION_HEADER_LEN];
+  int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version);
+  memcpy(buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN);
+  int4store(buff + ST_CREATED_OFFSET,created);
+  buff[ST_COMMON_HEADER_LEN_OFFSET]= LOG_EVENT_HEADER_LEN;    
+  memcpy(buff+ST_COMMON_HEADER_LEN_OFFSET+1, (byte*) post_header_len,
+         LOG_EVENT_TYPES);
+  return (my_b_safe_write(file, (byte*) buff, sizeof(buff)) ? -1 : 0);
+}
+  
+/*
+  SYNOPSIS
+    Format_description_log_event::exec_event()
+  
+  IMPLEMENTATION
+    Save the information which describes the binlog's format, to be able to
+    read all coming events.
+    Call Start_log_event_v3::exec_event().
+*/
+
+#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
+int Format_description_log_event::exec_event(struct st_relay_log_info* rli)
+{
+  DBUG_ENTER("Format_description_log_event::exec_event");
+  
+  /* save the information describing this binlog */  
+  delete rli->relay_log.description_event_for_exec;
+  rli->relay_log.description_event_for_exec= this;
+
+  /*
+    If this event comes from ourselves, there is no cleaning task to perform,
+    we don't call Start_log_event_v3::exec_event() (this was just to update the
+    log's description event).
+  */
+  if (server_id == (uint32) ::server_id)
+  {
+    /*
+      Do not modify rli->group_master_log_pos, as this event did not exist on
+      the master. That is, just update the *relay log* coordinates; this is done
+      by passing log_pos=0 to inc_group_relay_log_pos, like we do in
+      Stop_log_event::exec_event().
+    */
+    rli->inc_group_relay_log_pos(0);
+    flush_relay_log_info(rli);
+    DBUG_RETURN(0);
+  }
+
+  /*
+    If the event was not requested by the slave i.e. the master sent it while
+    the slave asked for a position >4, the event will make
+    rli->group_master_log_pos advance. Say that the slave asked for position
+    1000, and the Format_desc event's end is 95. Then in the beginning of
+    replication rli->group_master_log_pos will be 0, then 95, then jump to first
+    really asked event (which is >95). So this is ok.
+  */
+  DBUG_RETURN(Start_log_event_v3::exec_event(rli));
+}
+#endif
+
+  /**************************************************************************
+  	Load_log_event methods
+   General note about Load_log_event: the binlogging of LOAD DATA INFILE is
+   going to be changed in 5.0 (or maybe in 4.1; not decided yet).
+   However, the 5.0 slave could still have to read such events (from a 4.x
+   master), convert them (which just means maybe expand the header, when 5.0
+   servers have a UID in events) (remember that whatever is after the header
+   will be like in 4.x, as this event's format is not modified in 5.0 as we
+   will use new types of events to log the new LOAD DATA INFILE features).
+   To be able to read/convert, we just need to not assume that the common
+   header is of length LOG_EVENT_HEADER_LEN (we must use the description
+   event).
+   Note that I (Guilhem) manually tested replication of a big LOAD DATA INFILE
+   between 3.23 and 5.0, and between 4.0 and 5.0, and it works fine (and the
+   positions displayed in SHOW SLAVE STATUS then are fine too).
+  **************************************************************************/
 
 /*
   Load_log_event::pack_info()
@@ -1441,15 +2105,24 @@ Load_log_event::Load_log_event(THD *thd_arg, sql_exchange *ex,
     constructed event.
 */
 
-Load_log_event::Load_log_event(const char *buf, int event_len,
-			       bool old_format)
-  :Log_event(buf, old_format),num_fields(0),fields(0),
+Load_log_event::Load_log_event(const char *buf, uint event_len,
+                               const Format_description_log_event *description_event)
+  :Log_event(buf, description_event),num_fields(0),fields(0),
    field_lens(0),field_block_len(0),
    table_name(0),db(0),fname(0)
 {
   if (!event_len) // derived class, will call copy_log_event() itself
     return;
-  copy_log_event(buf, event_len, old_format);
+  /*
+    I (Guilhem) manually tested replication of LOAD DATA INFILE for 3.23->5.0,
+    4.0->5.0 and 5.0->5.0 and it works.
+  */
+  copy_log_event(buf, event_len,
+                 ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
+                  LOAD_HEADER_LEN + 
+                  description_event->common_header_len :
+                  LOAD_HEADER_LEN + LOG_EVENT_HEADER_LEN),
+                 description_event);
 }
 
 
@@ -1458,12 +2131,13 @@ Load_log_event::Load_log_event(const char *buf, int event_len,
 */
 
 int Load_log_event::copy_log_event(const char *buf, ulong event_len,
-				   bool old_format)
+                                   int body_offset,
+                                   const Format_description_log_event *description_event)
 {
   uint data_len;
   char* buf_end = (char*)buf + event_len;
-  uint header_len= old_format ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
-  const char* data_head = buf + header_len;
+  /* this is the beginning of the post-header */
+  const char* data_head = buf + description_event->common_header_len;
   slave_proxy_id= thread_id= uint4korr(data_head + L_THREAD_ID_OFFSET);
   exec_time = uint4korr(data_head + L_EXEC_TIME_OFFSET);
   skip_lines = uint4korr(data_head + L_SKIP_LINES_OFFSET);
@@ -1471,10 +2145,6 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
   db_len = (uint)data_head[L_DB_LEN_OFFSET];
   num_fields = uint4korr(data_head + L_NUM_FIELDS_OFFSET);
 	  
-  int body_offset = ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
-		     LOAD_HEADER_LEN + header_len :
-		     get_data_body_offset());
-  
   if ((int) event_len < body_offset)
     return 1;
   /*
@@ -1498,6 +2168,12 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
   fname = db + db_len + 1;
   fname_len = strlen(fname);
   // null termination is accomplished by the caller doing buf[event_len]=0
+
+  /*
+    In 5.0 this event will have the same format, as we are planning to log LOAD
+    DATA INFILE in a completely different way (as a plain-text query) since 4.1
+    or 5.0 (Dmitri's WL#874)
+  */
   return 0;
 }
 
@@ -1507,13 +2183,13 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
 */
 
 #ifdef MYSQL_CLIENT
-void Load_log_event::print(FILE* file, bool short_form, char* last_db)
+void Load_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
-  print(file, short_form, last_db, 0);
+  print(file, short_form, last_event_info, 0);
 }
 
 
-void Load_log_event::print(FILE* file, bool short_form, char* last_db,
+void Load_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info,
 			   bool commented)
 {
   if (!short_form)
@@ -1524,10 +2200,10 @@ void Load_log_event::print(FILE* file, bool short_form, char* last_db,
   }
 
   bool same_db = 0;
-  if (db && last_db)
+  if (db && last_event_info->db)
   {
-    if (!(same_db = !memcmp(last_db, db, db_len + 1)))
-      memcpy(last_db, db, db_len + 1);
+    if (!(same_db = !memcmp(last_event_info->db, db, db_len + 1)))
+      memcpy(last_event_info->db, db, db_len + 1);
   }
   
   if (db && db[0] && !same_db)
@@ -1839,17 +2515,17 @@ void Rotate_log_event::pack_info(Protocol *protocol)
 */
 
 #ifdef MYSQL_CLIENT
-void Rotate_log_event::print(FILE* file, bool short_form, char* last_db)
+void Rotate_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   char buf[22];
+
   if (short_form)
     return;
-
   print_header(file);
   fprintf(file, "\tRotate to ");
   if (new_log_ident)
     my_fwrite(file, (byte*) new_log_ident, (uint)ident_len, 
-	      MYF(MY_NABP | MY_WME));
+              MYF(MY_NABP | MY_WME));
   fprintf(file, "  pos: %s", llstr(pos, buf));
   fputc('\n', file);
   fflush(file);
@@ -1861,28 +2537,21 @@ void Rotate_log_event::print(FILE* file, bool short_form, char* last_db)
   Rotate_log_event::Rotate_log_event()
 */
 
-Rotate_log_event::Rotate_log_event(const char* buf, int event_len,
-				   bool old_format)
-  :Log_event(buf, old_format),new_log_ident(NULL),alloced(0)
+Rotate_log_event::Rotate_log_event(const char* buf, uint event_len,
+                                   const Format_description_log_event* description_event)
+  :Log_event(buf, description_event) ,new_log_ident(NULL),alloced(0)
 {
   // The caller will ensure that event_len is what we have at EVENT_LEN_OFFSET
-  int header_size = (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
+  uint8 header_size= description_event->common_header_len;
+  uint8 post_header_len= description_event->post_header_len[ROTATE_EVENT-1];
   uint ident_offset;
   if (event_len < header_size)
     return;
   buf += header_size;
-  if (old_format)
-  {
-    ident_len = (uint)(event_len - OLD_HEADER_LEN);
-    pos = 4;
-    ident_offset = 0;
-  }
-  else
-  {
-    ident_len = (uint)(event_len - ROTATE_EVENT_OVERHEAD);
-    pos = uint8korr(buf + R_POS_OFFSET);
-    ident_offset = ROTATE_HEADER_LEN;
-  }
+  pos = post_header_len ? uint8korr(buf + R_POS_OFFSET) : 4;
+  ident_len = (uint)(event_len -
+                     (header_size+post_header_len)); 
+  ident_offset = post_header_len; 
   set_if_smaller(ident_len,FN_REFLEN-1);
   if (!(new_log_ident= my_strdup_with_length((byte*) buf +
 					     ident_offset,
@@ -1909,14 +2578,15 @@ int Rotate_log_event::write_data(IO_CACHE* file)
 /*
   Rotate_log_event::exec_event()
 
-  Got a rotate log even from the master
+  Got a rotate log event from the master
 
   IMPLEMENTATION
     This is mainly used so that we can later figure out the logname and
     position for the master.
 
-    We can't rotate the slave as this will cause infinitive rotations
+    We can't rotate the slave's BINlog as this will cause infinitive rotations
     in a A -> B -> A setup.
+    The NOTES below is a wrong comment which will disappear when 4.1 is merged.
 
   RETURN VALUES
     0	ok
@@ -1928,7 +2598,7 @@ int Rotate_log_event::exec_event(struct st_relay_log_info* rli)
   DBUG_ENTER("Rotate_log_event::exec_event");
 
   pthread_mutex_lock(&rli->data_lock);
-  rli->event_relay_log_pos += get_event_len();
+  rli->event_relay_log_pos= my_b_tell(rli->cur_log);
   /*
     If we are in a transaction: the only normal case is when the I/O thread was
     copying a big transaction, then it was stopped and restarted: we have this
@@ -1945,10 +2615,21 @@ int Rotate_log_event::exec_event(struct st_relay_log_info* rli)
   {
     memcpy(rli->group_master_log_name, new_log_ident, ident_len+1);
     rli->notify_group_master_log_name_update();
-    rli->group_master_log_pos = pos;
-    rli->group_relay_log_pos = rli->event_relay_log_pos;
-    DBUG_PRINT("info", ("group_master_log_pos: %lu",
+    rli->group_master_log_pos= pos;
+    rli->group_relay_log_pos= rli->event_relay_log_pos;
+    DBUG_PRINT("info", ("group_master_log_name: '%s' group_master_log_pos:\
+%lu",
+                        rli->group_master_log_name,
                         (ulong) rli->group_master_log_pos));
+    /*
+      Reset thd->options and sql_mode, because this could be the signal of a
+      master's downgrade from 5.0 to 4.0.
+      However, no need to reset description_event_for_exec: indeed, if the next
+      master is 5.0 (even 5.0.1) we will soon get a Format_desc; if the next
+      master is 4.0 then the events are in the slave's format (conversion).
+    */
+    set_slave_thread_options(thd);
+    thd->variables.sql_mode= global_system_variables.sql_mode;
   }
   pthread_mutex_unlock(&rli->data_lock);
   pthread_cond_broadcast(&rli->data_cond);
@@ -1982,12 +2663,13 @@ void Intvar_log_event::pack_info(Protocol *protocol)
   Intvar_log_event::Intvar_log_event()
 */
 
-Intvar_log_event::Intvar_log_event(const char* buf, bool old_format)
-  :Log_event(buf, old_format)
+Intvar_log_event::Intvar_log_event(const char* buf,
+                                   const Format_description_log_event* description_event)
+  :Log_event(buf, description_event)
 {
-  buf += (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
-  type = buf[I_TYPE_OFFSET];
-  val = uint8korr(buf+I_VAL_OFFSET);
+  buf+= description_event->common_header_len;
+  type= buf[I_TYPE_OFFSET];
+  val= uint8korr(buf+I_VAL_OFFSET);
 }
 
 
@@ -2023,7 +2705,7 @@ int Intvar_log_event::write_data(IO_CACHE* file)
 */
 
 #ifdef MYSQL_CLIENT
-void Intvar_log_event::print(FILE* file, bool short_form, char* last_db)
+void Intvar_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   char llbuff[22];
   const char *msg;
@@ -2066,7 +2748,7 @@ int Intvar_log_event::exec_event(struct st_relay_log_info* rli)
     thd->next_insert_id = val;
     break;
   }
-  rli->inc_event_relay_log_pos(get_event_len());
+  rli->inc_event_relay_log_pos();
   return 0;
 }
 #endif
@@ -2089,12 +2771,13 @@ void Rand_log_event::pack_info(Protocol *protocol)
 #endif
 
 
-Rand_log_event::Rand_log_event(const char* buf, bool old_format)
-  :Log_event(buf, old_format)
+Rand_log_event::Rand_log_event(const char* buf,
+                               const Format_description_log_event* description_event)
+  :Log_event(buf, description_event)
 {
-  buf += (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
-  seed1 = uint8korr(buf+RAND_SEED1_OFFSET);
-  seed2 = uint8korr(buf+RAND_SEED2_OFFSET);
+  buf+= description_event->common_header_len;
+  seed1= uint8korr(buf+RAND_SEED1_OFFSET);
+  seed2= uint8korr(buf+RAND_SEED2_OFFSET);
 }
 
 
@@ -2108,7 +2791,7 @@ int Rand_log_event::write_data(IO_CACHE* file)
 
 
 #ifdef MYSQL_CLIENT
-void Rand_log_event::print(FILE* file, bool short_form, char* last_db)
+void Rand_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   char llbuff[22],llbuff2[22];
   if (!short_form)
@@ -2128,7 +2811,7 @@ int Rand_log_event::exec_event(struct st_relay_log_info* rli)
 {
   thd->rand.seed1= (ulong) seed1;
   thd->rand.seed2= (ulong) seed2;
-  rli->inc_event_relay_log_pos(get_event_len());
+  rli->inc_event_relay_log_pos();
   return 0;
 }
 #endif /* !MYSQL_CLIENT */
@@ -2192,10 +2875,11 @@ void User_var_log_event::pack_info(Protocol* protocol)
 #endif /* !MYSQL_CLIENT */
 
 
-User_var_log_event::User_var_log_event(const char* buf, bool old_format)
-  :Log_event(buf, old_format)
+User_var_log_event::User_var_log_event(const char* buf,
+                                       const Format_description_log_event* description_event)
+  :Log_event(buf, description_event)
 {
-  buf+= (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
+  buf+= description_event->common_header_len;
   name_len= uint4korr(buf);
   name= (char *) buf + UV_NAME_LEN_SIZE;
   buf+= UV_NAME_LEN_SIZE + name_len;
@@ -2269,7 +2953,7 @@ int User_var_log_event::write_data(IO_CACHE* file)
 */
 
 #ifdef MYSQL_CLIENT
-void User_var_log_event::print(FILE* file, bool short_form, char* last_db)
+void User_var_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   if (!short_form)
   {
@@ -2359,7 +3043,7 @@ int User_var_log_event::exec_event(struct st_relay_log_info* rli)
   e.update_hash(val, val_len, type, charset, DERIVATION_NONE);
   free_root(&thd->mem_root,0);
 
-  rli->inc_event_relay_log_pos(get_event_len());
+  rli->inc_event_relay_log_pos();
   return 0;
 }
 #endif /* !MYSQL_CLIENT */
@@ -2371,7 +3055,7 @@ int User_var_log_event::exec_event(struct st_relay_log_info* rli)
 
 #ifdef HAVE_REPLICATION
 #ifdef MYSQL_CLIENT
-void Unknown_log_event::print(FILE* file, bool short_form, char* last_db)
+void Unknown_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   if (short_form)
     return;
@@ -2401,7 +3085,7 @@ void Slave_log_event::pack_info(Protocol *protocol)
 #ifndef MYSQL_CLIENT
 Slave_log_event::Slave_log_event(THD* thd_arg,
 				 struct st_relay_log_info* rli)
-  :Log_event(thd_arg, 0, 0), mem_pool(0), master_host(0)
+  :Log_event(thd_arg, 0, 0) , mem_pool(0), master_host(0)
 {
   DBUG_ENTER("Slave_log_event");
   if (!rli->inited)				// QQ When can this happen ?
@@ -2442,7 +3126,7 @@ Slave_log_event::~Slave_log_event()
 
 
 #ifdef MYSQL_CLIENT
-void Slave_log_event::print(FILE* file, bool short_form, char* last_db)
+void Slave_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   char llbuff[22];
   if (short_form)
@@ -2488,12 +3172,13 @@ void Slave_log_event::init_from_mem_pool(int data_size)
 }
 
 
-Slave_log_event::Slave_log_event(const char* buf, int event_len)
-  :Log_event(buf,0),mem_pool(0),master_host(0)
+/* This code is not used, so has not been updated to be format-tolerant */
+Slave_log_event::Slave_log_event(const char* buf, uint event_len)
+  :Log_event(buf,0) /*unused event*/ ,mem_pool(0),master_host(0)
 {
-  event_len -= LOG_EVENT_HEADER_LEN;
-  if (event_len < 0)
+  if (event_len < LOG_EVENT_HEADER_LEN)
     return;
+  event_len -= LOG_EVENT_HEADER_LEN;
   if (!(mem_pool = (char*) my_malloc(event_len + 1, MYF(MY_WME))))
     return;
   memcpy(mem_pool, buf + LOG_EVENT_HEADER_LEN, event_len);
@@ -2521,7 +3206,7 @@ int Slave_log_event::exec_event(struct st_relay_log_info* rli)
 */
 
 #ifdef MYSQL_CLIENT
-void Stop_log_event::print(FILE* file, bool short_form, char* last_db)
+void Stop_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
 {
   if (short_form)
     return;
@@ -2543,7 +3228,7 @@ void Stop_log_event::print(FILE* file, bool short_form, char* last_db)
   We used to clean up slave_load_tmpdir, but this is useless as it has been
   cleared at the end of LOAD DATA INFILE.
   So we have nothing to do here.
-  The place were we must do this cleaning is in Start_log_event::exec_event(),
+  The place were we must do this cleaning is in Start_log_event_v3::exec_event(),
   not here. Because if we come here, the master was sane.
 */
 
@@ -2557,7 +3242,7 @@ int Stop_log_event::exec_event(struct st_relay_log_info* rli)
     could give false triggers in MASTER_POS_WAIT() that we have reached
     the target position when in fact we have not.
   */
-  rli->inc_group_relay_log_pos(get_event_len(), 0);
+  rli->inc_group_relay_log_pos(0);
   flush_relay_log_info(rli);
   return 0;
 }
@@ -2636,20 +3321,40 @@ int Create_file_log_event::write_base(IO_CACHE* file)
   Create_file_log_event ctor
 */
 
-Create_file_log_event::Create_file_log_event(const char* buf, int len,
-					     bool old_format)
-  :Load_log_event(buf,0,old_format),fake_base(0),block(0),inited_from_old(0)
-{
-  int block_offset;
-  if (copy_log_event(buf,len,old_format))
+Create_file_log_event::Create_file_log_event(const char* buf, uint len,
+                                             const Format_description_log_event* description_event)
+  :Load_log_event(buf,0,description_event),fake_base(0),block(0),inited_from_old(0)
+{
+  uint block_offset;
+  uint header_len= description_event->common_header_len;
+  uint8 load_header_len= description_event->post_header_len[LOAD_EVENT-1];
+  uint8 create_file_header_len= description_event->post_header_len[CREATE_FILE_EVENT-1];
+  if (copy_log_event(buf,len,
+                     ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
+                      load_header_len + header_len :
+                      (fake_base ? (header_len+load_header_len) :
+                       (header_len+load_header_len) +
+                       create_file_header_len)),
+                     description_event))
     return;
-  if (!old_format)
+  if (description_event->binlog_version!=1)
   {
-    file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN +
-			+ LOAD_HEADER_LEN + CF_FILE_ID_OFFSET);
+    file_id= uint4korr(buf + 
+                       header_len +
+		       load_header_len + CF_FILE_ID_OFFSET);
     // + 1 for \0 terminating fname  
-    block_offset = (LOG_EVENT_HEADER_LEN + Load_log_event::get_data_size() +
-		    CREATE_FILE_HEADER_LEN + 1);
+    /*
+      Note that it's ok to use get_data_size() below, because it is computed
+      with values we have already read from this event (because we called
+      copy_log_event()); we are not using slave's format info to decode master's
+      format, we are really using master's format info.
+      Anyway, both formats should be identical (except the common_header_len) as
+      these Load events are not changed between 4.0 and 5.0 (as logging of LOAD
+      DATA INFILE does not use Load_log_event in 5.0).
+    */
+    block_offset= description_event->common_header_len +
+      Load_log_event::get_data_size() +
+      create_file_header_len + 1;
     if (len < block_offset)
       return;
     block = (char*)buf + block_offset;
@@ -2669,18 +3374,18 @@ Create_file_log_event::Create_file_log_event(const char* buf, int len,
 
 #ifdef MYSQL_CLIENT
 void Create_file_log_event::print(FILE* file, bool short_form, 
-				  char* last_db, bool enable_local)
+				  LAST_EVENT_INFO* last_event_info, bool enable_local)
 {
   if (short_form)
   {
     if (enable_local && check_fname_outside_temp_buf())
-      Load_log_event::print(file, 1, last_db);
+      Load_log_event::print(file, 1, last_event_info);
     return;
   }
 
   if (enable_local)
   {
-    Load_log_event::print(file, 1, last_db, !check_fname_outside_temp_buf());
+    Load_log_event::print(file, 1, last_event_info, !check_fname_outside_temp_buf());
     /* 
        That one is for "file_id: etc" below: in mysqlbinlog we want the #, in
        SHOW BINLOG EVENTS we don't.
@@ -2693,9 +3398,9 @@ void Create_file_log_event::print(FILE* file, bool short_form,
 
 
 void Create_file_log_event::print(FILE* file, bool short_form,
-				  char* last_db)
+				  LAST_EVENT_INFO* last_event_info)
 {
-  print(file,short_form,last_db,0);
+  print(file,short_form,last_event_info,0);
 }
 #endif /* MYSQL_CLIENT */
 
@@ -2809,14 +3514,19 @@ Append_block_log_event::Append_block_log_event(THD* thd_arg, const char* db_arg,
   Append_block_log_event ctor
 */
 
-Append_block_log_event::Append_block_log_event(const char* buf, int len)
-  :Log_event(buf, 0),block(0)
+Append_block_log_event::Append_block_log_event(const char* buf, uint len,
+                                               const Format_description_log_event* description_event)
+  :Log_event(buf, description_event),block(0)
 {
-  if ((uint)len < APPEND_BLOCK_EVENT_OVERHEAD)
+  uint8 common_header_len= description_event->common_header_len; 
+  uint8 append_block_header_len=
+    description_event->post_header_len[APPEND_BLOCK_EVENT-1];
+  uint total_header_len= common_header_len+append_block_header_len;
+  if (len < total_header_len)
     return;
-  file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + AB_FILE_ID_OFFSET);
-  block = (char*)buf + APPEND_BLOCK_EVENT_OVERHEAD;
-  block_len = len - APPEND_BLOCK_EVENT_OVERHEAD;
+  file_id= uint4korr(buf + common_header_len + AB_FILE_ID_OFFSET);
+  block= (char*)buf + total_header_len;
+  block_len= len - total_header_len;
 }
 
 
@@ -2839,7 +3549,7 @@ int Append_block_log_event::write_data(IO_CACHE* file)
 
 #ifdef MYSQL_CLIENT  
 void Append_block_log_event::print(FILE* file, bool short_form,
-				   char* last_db)
+				   LAST_EVENT_INFO* last_event_info)
 {
   if (short_form)
     return;
@@ -2921,12 +3631,15 @@ Delete_file_log_event::Delete_file_log_event(THD *thd_arg, const char* db_arg,
   Delete_file_log_event ctor
 */
 
-Delete_file_log_event::Delete_file_log_event(const char* buf, int len)
-  :Log_event(buf, 0),file_id(0)
+Delete_file_log_event::Delete_file_log_event(const char* buf, uint len,
+                                             const Format_description_log_event* description_event)
+  :Log_event(buf, description_event),file_id(0)
 {
-  if ((uint)len < DELETE_FILE_EVENT_OVERHEAD)
+  uint8 common_header_len= description_event->common_header_len;
+  uint8 delete_file_header_len= description_event->post_header_len[DELETE_FILE_EVENT-1];
+  if (len < (uint)(common_header_len + delete_file_header_len))
     return;
-  file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + AB_FILE_ID_OFFSET);
+  file_id= uint4korr(buf + common_header_len + DF_FILE_ID_OFFSET);
 }
 
 
@@ -2948,7 +3661,7 @@ int Delete_file_log_event::write_data(IO_CACHE* file)
 
 #ifdef MYSQL_CLIENT  
 void Delete_file_log_event::print(FILE* file, bool short_form,
-				  char* last_db)
+				  LAST_EVENT_INFO* last_event_info)
 {
   if (short_form)
     return;
@@ -3011,12 +3724,15 @@ Execute_load_log_event::Execute_load_log_event(THD *thd_arg, const char* db_arg,
   Execute_load_log_event ctor
 */
 
-Execute_load_log_event::Execute_load_log_event(const char* buf, int len)
-  :Log_event(buf, 0), file_id(0)
+Execute_load_log_event::Execute_load_log_event(const char* buf, uint len,
+                                               const Format_description_log_event* description_event)
+  :Log_event(buf, description_event), file_id(0)
 {
-  if ((uint)len < EXEC_LOAD_EVENT_OVERHEAD)
+  uint8 common_header_len= description_event->common_header_len;
+  uint8 exec_load_header_len= description_event->post_header_len[EXEC_LOAD_EVENT-1];
+  if (len < (uint)(common_header_len+exec_load_header_len))
     return;
-  file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + EL_FILE_ID_OFFSET);
+  file_id= uint4korr(buf + common_header_len + EL_FILE_ID_OFFSET);
 }
 
 
@@ -3038,7 +3754,7 @@ int Execute_load_log_event::write_data(IO_CACHE* file)
 
 #ifdef MYSQL_CLIENT  
 void Execute_load_log_event::print(FILE* file, bool short_form,
-				   char* last_db)
+				   LAST_EVENT_INFO* last_event_info)
 {
   if (short_form)
     return;
@@ -3085,8 +3801,8 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli)
     goto err;
   }
   if (!(lev = (Load_log_event*)Log_event::read_log_event(&file,
-							 (pthread_mutex_t*)0,
-							 (bool)0)) ||
+                                                         (pthread_mutex_t*)0,
+                                                         rli->relay_log.description_event_for_exec)) ||
       lev->get_type_code() != NEW_LOAD_EVENT)
   {
     slave_print_error(rli,0, "Error in Exec_load event: file '%s' appears corrupted", fname);
diff --git a/sql/log_event.h b/sql/log_event.h
index 6cc8a7ca06d50b511877f835ce4ba1d13e4ee4a6..58691c6a24fbe86fc8602ac933c9202866fbfd78 100644
--- a/sql/log_event.h
+++ b/sql/log_event.h
@@ -34,13 +34,43 @@
 #define LOG_READ_TOO_LARGE -7
 
 #define LOG_EVENT_OFFSET 4
-
-#define BINLOG_VERSION    3
+ 
+/* 
+   3 is MySQL 4.x; 4 is MySQL 5.0.0.
+   Compared to version 3, version 4 has:
+   - a different Start_log_event, which includes info about the binary log
+   (sizes of headers); this info is included for better compatibility if the
+   master's MySQL version is different from the slave's.
+   - all events have a unique ID (the triplet (server_id, timestamp at server
+   start, other) to be sure an event is not executed more than once in a
+   multimaster setup, example:
+                M1
+              /   \
+             v     v
+             M2    M3
+             \     /
+              v   v
+                S
+   if a query is run on M1, it will arrive twice on S, so we need that S
+   remembers the last unique ID it has processed, to compare and know if the
+   event should be skipped or not. Example of ID: we already have the server id
+   (4 bytes), plus:
+   timestamp_when_the_master_started (4 bytes), a counter (a sequence number
+   which increments every time we write an event to the binlog) (3 bytes).
+   Q: how do we handle when the counter is overflowed and restarts from 0 ?
+   
+   - Query and Load (Create or Execute) events may have a more precise timestamp
+   (with microseconds), number of matched/affected/warnings rows 
+   and fields of session variables: SQL_MODE,
+   FOREIGN_KEY_CHECKS, UNIQUE_CHECKS, SQL_AUTO_IS_NULL, the collations and
+   charsets, the PASSWORD() version (old/new/...). 
+*/
+#define BINLOG_VERSION    4
 
 /*
  We could have used SERVER_VERSION_LENGTH, but this introduces an
  obscure dependency - if somebody decided to change SERVER_VERSION_LENGTH
- this would have broken the replication protocol
+ this would break the replication protocol
 */
 #define ST_SERVER_VER_LEN 50
 
@@ -49,6 +79,12 @@
   TERMINATED etc).
 */
 
+/*
+  These are flags and structs to handle all the LOAD DATA INFILE options (LINES
+  TERMINATED etc).
+  DUMPFILE_FLAG is probably useless (DUMPFILE is a clause of SELECT, not of LOAD
+  DATA).
+*/
 #define DUMPFILE_FLAG		0x1
 #define OPT_ENCLOSED_FLAG	0x2
 #define REPLACE_FLAG		0x4
@@ -136,16 +172,28 @@ struct sql_ex_info
 
 #define LOG_EVENT_HEADER_LEN 19     /* the fixed header length */
 #define OLD_HEADER_LEN       13     /* the fixed header length in 3.23 */
-
+/* 
+   Fixed header length, where 4.x and 5.0 agree. That is, 5.0 may have a longer
+   header (it will for sure when we have the unique event's ID), but at least
+   the first 19 bytes are the same in 4.x and 5.0. So when we have the unique
+   event's ID, LOG_EVENT_HEADER_LEN will be something like 26, but
+   LOG_EVENT_MINIMAL_HEADER_LEN will remain 19.
+*/
+#define LOG_EVENT_MINIMAL_HEADER_LEN 19     
+ 
 /* event-specific post-header sizes */
-#define QUERY_HEADER_LEN     (4 + 4 + 1 + 2)
+// where 3.23, 4.x and 5.0 agree
+#define QUERY_HEADER_MINIMAL_LEN     (4 + 4 + 1 + 2)
+// where 5.0 differs: 2 for len of N-bytes vars.
+#define QUERY_HEADER_LEN     (QUERY_HEADER_MINIMAL_LEN + 2) 
 #define LOAD_HEADER_LEN      (4 + 4 + 4 + 1 +1 + 4)
-#define START_HEADER_LEN     (2 + ST_SERVER_VER_LEN + 4)
-#define ROTATE_HEADER_LEN    8
+#define START_V3_HEADER_LEN     (2 + ST_SERVER_VER_LEN + 4)
+#define ROTATE_HEADER_LEN    8 // this is FROZEN (the Rotate post-header is frozen)
 #define CREATE_FILE_HEADER_LEN 4
 #define APPEND_BLOCK_HEADER_LEN 4
 #define EXEC_LOAD_HEADER_LEN   4
 #define DELETE_FILE_HEADER_LEN 4
+#define FORMAT_DESCRIPTION_HEADER_LEN (START_V3_HEADER_LEN+1+LOG_EVENT_TYPES)
 
 /* 
    Event header offsets; 
@@ -158,11 +206,12 @@ struct sql_ex_info
 #define LOG_POS_OFFSET       13
 #define FLAGS_OFFSET         17
 
-/* start event post-header */
+/* start event post-header (for v3 and v4) */
 
 #define ST_BINLOG_VER_OFFSET  0
 #define ST_SERVER_VER_OFFSET  2
 #define ST_CREATED_OFFSET     (ST_SERVER_VER_OFFSET + ST_SERVER_VER_LEN)
+#define ST_COMMON_HEADER_LEN_OFFSET (ST_CREATED_OFFSET + 4)
 
 /* slave event post-header (this event is never written) */
 
@@ -176,7 +225,13 @@ struct sql_ex_info
 #define Q_EXEC_TIME_OFFSET	4
 #define Q_DB_LEN_OFFSET		8
 #define Q_ERR_CODE_OFFSET	9
+#define Q_STATUS_VARS_LEN_OFFSET 11
 #define Q_DATA_OFFSET		QUERY_HEADER_LEN
+/* these are codes, not offsets; not more than 256 values (1 byte). */
+#define Q_FLAGS2_CODE           0
+#define Q_SQL_MODE_CODE         1
+#define Q_CATALOG_CODE          2
+
 
 /* Intvar event post-header */
 
@@ -228,16 +283,6 @@ struct sql_ex_info
 /* DF = "Delete File" */
 #define DF_FILE_ID_OFFSET  0
 
-#define QUERY_EVENT_OVERHEAD	(LOG_EVENT_HEADER_LEN+QUERY_HEADER_LEN)
-#define QUERY_DATA_OFFSET	(LOG_EVENT_HEADER_LEN+QUERY_HEADER_LEN)
-#define ROTATE_EVENT_OVERHEAD	(LOG_EVENT_HEADER_LEN+ROTATE_HEADER_LEN)
-#define LOAD_EVENT_OVERHEAD	(LOG_EVENT_HEADER_LEN+LOAD_HEADER_LEN)
-#define CREATE_FILE_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+\
- +LOAD_HEADER_LEN+CREATE_FILE_HEADER_LEN)
-#define DELETE_FILE_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+DELETE_FILE_HEADER_LEN)
-#define EXEC_LOAD_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+EXEC_LOAD_HEADER_LEN)
-#define APPEND_BLOCK_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+APPEND_BLOCK_HEADER_LEN)
-
 /* 4 bytes which all binlogs should begin with */
 #define BINLOG_MAGIC        "\xfe\x62\x69\x6e"
 
@@ -264,15 +309,54 @@ struct sql_ex_info
 */
 #define LOG_EVENT_THREAD_SPECIFIC_F 0x4 
 
+/* 
+   OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be written
+   to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written into the
+   Format_description_log_event, so that if later we don't want to replicate a
+   variable we did replicate, or the contrary, it's doable. But it should not be
+   too hard to decide once for all of what we replicate and what we don't, among
+   the fixed 32 bits of thd->options.
+   I (Guilhem) have read through every option's usage, and it looks like
+   OPTION_AUTO_IS_NULL and OPTION_NO_FOREIGN_KEYS are the only ones which alter 
+   how the query modifies the table. It's good to replicate
+   OPTION_RELAXED_UNIQUE_CHECKS too because otherwise, the slave may insert data
+   slower than the master, in InnoDB.
+   OPTION_BIG_SELECTS is not needed (the slave thread runs with
+   max_join_size=HA_POS_ERROR) and OPTION_BIG_TABLES is not needed either, as
+   the manual says (because a too big in-memory temp table is automatically
+   written to disk).
+*/ 
+#define OPTIONS_WRITTEN_TO_BIN_LOG (OPTION_AUTO_IS_NULL | \
+OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_RELAXED_UNIQUE_CHECKS)
+
 enum Log_event_type
 {
-  UNKNOWN_EVENT= 0, START_EVENT= 1, QUERY_EVENT= 2, STOP_EVENT= 3,
-  ROTATE_EVENT= 4, INTVAR_EVENT= 5, LOAD_EVENT=6, SLAVE_EVENT= 7, 
-  CREATE_FILE_EVENT= 8, APPEND_BLOCK_EVENT= 9, EXEC_LOAD_EVENT= 10,
-  DELETE_FILE_EVENT= 11, NEW_LOAD_EVENT= 12, RAND_EVENT= 13,
-  USER_VAR_EVENT= 14
+  /*
+    Every time you update this enum (when you add a type), you have to
+    update the code of Format_description_log_event::Format_description_log_event().
+    Make sure you always insert new types ***BEFORE*** ENUM_END_EVENT.
+  */
+  UNKNOWN_EVENT= 0, START_EVENT_V3, QUERY_EVENT, STOP_EVENT, ROTATE_EVENT,
+  INTVAR_EVENT, LOAD_EVENT, SLAVE_EVENT, CREATE_FILE_EVENT,
+  APPEND_BLOCK_EVENT, EXEC_LOAD_EVENT, DELETE_FILE_EVENT,
+  /*
+    NEW_LOAD_EVENT is like LOAD_EVENT except that it has a longer sql_ex,
+    allowing multibyte TERMINATED BY etc; both types share the same class
+    (Load_log_event)
+  */
+  NEW_LOAD_EVENT, 
+  RAND_EVENT, USER_VAR_EVENT,
+  FORMAT_DESCRIPTION_EVENT, 
+  ENUM_END_EVENT /* end marker */
 };
 
+/* 
+   The number of types we handle in Format_description_log_event (UNKNOWN_EVENT
+   is not to be handled, it does not exist in binlogs, it does not have a
+   format).
+*/
+#define LOG_EVENT_TYPES (ENUM_END_EVENT-1)
+
 enum Int_event_type
 {
   INVALID_INT_EVENT = 0, LAST_INSERT_ID_EVENT = 1, INSERT_ID_EVENT = 2
@@ -285,8 +369,33 @@ class MYSQL_LOG;
 class THD;
 #endif
 
+class Format_description_log_event;
+
 struct st_relay_log_info;
 
+#ifdef MYSQL_CLIENT
+/*
+  A structure for mysqlbinlog to remember the last db, flags2, sql_mode etc; it
+  is passed to events' print() methods, so that they print only the necessary
+  USE and SET commands.
+*/
+typedef struct st_last_event_info
+{
+  // TODO: have the last catalog here ??
+  char db[FN_REFLEN+1]; // TODO: make this a LEX_STRING when thd->db is
+  bool flags2_inited;
+  uint32 flags2;
+  bool sql_mode_inited;
+  ulonglong sql_mode;
+  st_last_event_info()
+    : flags2_inited(0), flags2(0), sql_mode_inited(0), sql_mode(0) 
+    {
+      db[0]= 0; /* initially, the db is unknown */
+    }
+} LAST_EVENT_INFO;
+#endif
+
+
 /*****************************************************************************
 
   Log_event class
@@ -337,21 +446,26 @@ class Log_event
   uint16 flags;
 
   bool cache_stmt;
+
 #ifndef MYSQL_CLIENT
   THD* thd;
 
-  Log_event(THD* thd_arg, uint16 flags_arg, bool cache_stmt);
   Log_event();
+  Log_event(THD* thd_arg, uint16 flags_arg, bool cache_stmt);
   /*
     read_log_event() functions read an event from a binlog or relay log; used by
     SHOW BINLOG EVENTS, the binlog_dump thread on the master (reads master's
     binlog), the slave IO thread (reads the event sent by binlog_dump), the
     slave SQL thread (reads the event from the relay log).
+    If mutex is 0, the read will proceed without mutex.
+    We need the description_event to be able to parse the event (to know the
+    post-header's size); in fact in read_log_event we detect the event's type,
+    then call the specific event's constructor and pass description_event as an
+    argument.
   */
-  // if mutex is 0, the read will proceed without mutex
   static Log_event* read_log_event(IO_CACHE* file,
 				   pthread_mutex_t* log_lock,
-				   bool old_format);
+                                   const Format_description_log_event *description_event);
   static int read_log_event(IO_CACHE* file, String* packet,
 			    pthread_mutex_t* log_lock);
   /* set_log_pos() is used to fill log_pos with tell(log). */
@@ -379,10 +493,12 @@ class Log_event
     return thd ? thd->db : 0;
   }
 #else
+  Log_event() : temp_buf(0) {}
  // avoid having to link mysqlbinlog against libpthread
-  static Log_event* read_log_event(IO_CACHE* file, bool old_format);
+  static Log_event* read_log_event(IO_CACHE* file,
+                                   const Format_description_log_event *description_event);
   /* print*() functions are used by mysqlbinlog */
-  virtual void print(FILE* file, bool short_form = 0, char* last_db = 0) = 0;
+  virtual void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0) = 0;
   void print_timestamp(FILE* file, time_t *ts = 0);
   void print_header(FILE* file);
 #endif  
@@ -405,9 +521,9 @@ class Log_event
   virtual int write_data_body(IO_CACHE* file __attribute__((unused)))
   { return 0; }
   virtual Log_event_type get_type_code() = 0;
-  virtual bool is_valid() = 0;
+  virtual const bool is_valid() = 0;
   inline bool get_cache_stmt() { return cache_stmt; }
-  Log_event(const char* buf, bool old_format);
+  Log_event(const char* buf, const Format_description_log_event* description_event);
   virtual ~Log_event() { free_temp_buf();}
   void register_temp_buf(char* buf) { temp_buf = buf; }
   void free_temp_buf()
@@ -419,18 +535,37 @@ class Log_event
     }
   }
   virtual int get_data_size() { return 0;}
-  virtual int get_data_body_offset() { return 0; }
   int get_event_len()
   {
-    return (cached_event_len ? cached_event_len :
-	    (cached_event_len = LOG_EVENT_HEADER_LEN + get_data_size()));
+    /*
+      We don't re-use the cached event's length anymore (we did in 4.x) because
+      this leads to nasty problems: when the 5.0 slave reads an event from a 4.0
+      master, it caches the event's length, then this event is converted before
+      it goes into the relay log, so it would be written to the relay log with
+      its old length, which is garbage.
+    */
+    return (cached_event_len=(LOG_EVENT_HEADER_LEN + get_data_size()));
   }
-  static Log_event* read_log_event(const char* buf, int event_len,
-				   const char **error, bool old_format);
+  static Log_event* read_log_event(const char* buf, uint event_len,
+				   const char **error,
+                                   const Format_description_log_event
+                                   *description_event); 
   /* returns the human readable name of the event's type */
   const char* get_type_str();
 };
 
+/* 
+   One class for each type of event.
+   Two constructors for each class:
+   - one to create the event for logging (when the server acts as a master),
+   called after an update to the database is done,
+   which accepts parameters like the query, the database, the options for LOAD
+   DATA INFILE...
+   - one to create the event from a packet (when the server acts as a slave),
+   called before reproducing the update, which accepts parameters (like a
+   buffer). Used to read from the master, from the relay log, and in
+   mysqlbinlog. This constructor must be format-tolerant.
+*/
 
 /*****************************************************************************
 
@@ -445,6 +580,7 @@ class Query_log_event: public Log_event
   char* data_buf;
 public:
   const char* query;
+  const char* catalog;
   const char* db;
   /*
     If we already know the length of the query string
@@ -462,6 +598,52 @@ class Query_log_event: public Log_event
      BUG#1686).
   */
   ulong slave_proxy_id;
+  
+  /* 
+     Binlog format 3 and 4 start to differ (as far as class members are
+     concerned) from here. 
+  */
+  
+  uint32 catalog_len;
+  
+  /*
+    We want to be able to store a variable number of N-bit status vars:
+    (generally N=32; but N=64 for SQL_MODE) a user may want to log the number of
+    affected rows (for debugging) while another does not want to lose 4 bytes in
+    this.
+    The storage on disk is the following:
+    status_vars_len is part of the post-header,
+    status_vars are in the variable-length part, after the post-header, before
+    the db & query.
+    status_vars on disk is a sequence of pairs (code, value) where 'code' means
+    'sql_mode', 'affected' etc. Sometimes 'value' must be a short string, so its
+    first byte is its length. For now the order of status vars is:
+    flags2 - sql_mode - catalog.
+    We should add the same thing to Load_log_event, but in fact
+    LOAD DATA INFILE is going to be logged with a new type of event (logging of
+    the plain text query), so Load_log_event would be frozen, so no need. The
+    new way of logging LOAD DATA INFILE would use a derived class of
+    Query_log_event, so automatically benefit from the work already done for
+    status variables in Query_log_event.
+ */
+  uint16 status_vars_len;
+  
+  /*
+    'flags2' is a second set of flags (on top of those in Log_event), for
+    session variables. These are thd->options which is & against a mask
+    (OPTIONS_WRITTEN_TO_BINLOG).
+    flags2_inited helps make a difference between flags2==0 (3.23 or 4.x
+    master, we don't know flags2, so use the slave server's global options) and
+    flags2==0 (5.0 master, we know this has a meaning of flags all down which
+    must influence the query).
+  */
+  bool flags2_inited; 
+  bool sql_mode_inited;
+  
+  uint32 flags2; 
+  /* In connections sql_mode is 32 bits now but will be 64 bits soon */
+  ulonglong sql_mode;
+  
 #ifndef MYSQL_CLIENT
 
   Query_log_event(THD* thd_arg, const char* query_arg, ulong query_length,
@@ -472,10 +654,11 @@ class Query_log_event: public Log_event
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif
 
-  Query_log_event(const char* buf, int event_len, bool old_format);
+  Query_log_event(const char* buf, uint event_len,
+                  const Format_description_log_event *description_event);
   ~Query_log_event()
   {
     if (data_buf)
@@ -486,14 +669,11 @@ class Query_log_event: public Log_event
   Log_event_type get_type_code() { return QUERY_EVENT; }
   int write(IO_CACHE* file);
   int write_data(IO_CACHE* file); // returns 0 on success, -1 on error
-  bool is_valid() { return query != 0; }
+  const bool is_valid() { return query != 0; }
   int get_data_size()
   {
-    return (q_len + db_len + 2
-	    + 4	// thread_id
-	    + 4	// exec_time
-	    + 2	// error_code
-	    );
+    /* Note that the "1" below is the db's length. */
+    return (q_len + db_len + 1 + status_vars_len + QUERY_HEADER_LEN); 
   }
 };
 
@@ -504,6 +684,7 @@ class Query_log_event: public Log_event
   Slave Log Event class
   Note that this class is currently not used at all; no code writes a
   Slave_log_event (though some code in repl_failsafe.cc reads Slave_log_event).
+  So it's not a problem if this code is not maintained.
 
  ****************************************************************************/
 class Slave_log_event: public Log_event
@@ -524,13 +705,13 @@ class Slave_log_event: public Log_event
   void pack_info(Protocol* protocol);
   int exec_event(struct st_relay_log_info* rli);
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif  
 
-  Slave_log_event(const char* buf, int event_len);
+  Slave_log_event(const char* buf, uint event_len);
   ~Slave_log_event();
   int get_data_size();
-  bool is_valid() { return master_host != 0; }
+  const bool is_valid() { return master_host != 0; }
   Log_event_type get_type_code() { return SLAVE_EVENT; }
   int write_data(IO_CACHE* file );
 };
@@ -546,12 +727,18 @@ class Slave_log_event: public Log_event
 class Load_log_event: public Log_event
 {
 protected:
-  int copy_log_event(const char *buf, ulong event_len, bool old_format);
+  int copy_log_event(const char *buf, ulong event_len, 
+                     int body_offset, const Format_description_log_event* description_event);
 
 public:
   ulong thread_id;
   ulong slave_proxy_id;
   uint32 table_name_len;
+  /*
+    No need to have a catalog, as these events can only come from 4.x.
+    TODO: this may become false if Dmitri pushes his new LOAD DATA INFILE in
+    5.0 only (not in 4.x).
+  */
   uint32 db_len;
   uint32 fname_len;
   uint32 num_fields;
@@ -597,11 +784,18 @@ class Load_log_event: public Log_event
 		 bool use_rli_only_for_errors);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
-  void print(FILE* file, bool short_form, char* last_db, bool commented);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info = 0);
+  void print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool commented);
 #endif
 
-  Load_log_event(const char* buf, int event_len, bool old_format);
+  /*
+    Note that for all the events related to LOAD DATA (Load_log_event,
+    Create_file/Append/Exec/Delete, we pass description_event; however as
+    logging of LOAD DATA is going to be changed in 4.1 or 5.0, this is only used
+    for the common_header_len (post_header_len will not be changed).
+  */
+  Load_log_event(const char* buf, uint event_len,
+                 const Format_description_log_event* description_event); 
   ~Load_log_event()
   {}
   Log_event_type get_type_code()
@@ -610,27 +804,31 @@ class Load_log_event: public Log_event
   }
   int write_data_header(IO_CACHE* file); 
   int write_data_body(IO_CACHE* file); 
-  bool is_valid() { return table_name != 0; }
+  const bool is_valid() { return table_name != 0; }
   int get_data_size()
   {
-    return (table_name_len + 2 + db_len + 2 + fname_len
-	    + 4 // thread_id
-	    + 4 // exec_time
-	    + 4 // skip_lines
-	    + 4 // field block len
+    return (table_name_len + db_len + 2 + fname_len
+	    + LOAD_HEADER_LEN
 	    + sql_ex.data_size() + field_block_len + num_fields);
   }
-  int get_data_body_offset() { return LOAD_EVENT_OVERHEAD; }
 };
 
 extern char server_version[SERVER_VERSION_LENGTH];
 
 /*****************************************************************************
 
-  Start Log Event class
+  Start Log Event_v3 class
 
+  Start_log_event_v3 is the Start_log_event of binlog format 3 (MySQL 3.23 and
+  4.x).
+  Format_description_log_event derives from Start_log_event_v3; it is the
+  Start_log_event of binlog format 4 (MySQL 5.0), that is, the event that
+  describes the other events' header/postheader lengths. This event is sent by
+  MySQL 5.0 whenever it starts sending a new binlog if the requested position
+  is >4 (otherwise if ==4 the event will be sent naturally).
+ 
  ****************************************************************************/
-class Start_log_event: public Log_event
+class Start_log_event_v3: public Log_event
 {
 public:
   /* 
@@ -658,27 +856,81 @@ class Start_log_event: public Log_event
   char server_version[ST_SERVER_VER_LEN];
 
 #ifndef MYSQL_CLIENT
-  Start_log_event() :Log_event(), binlog_version(BINLOG_VERSION)
-  {
-    created = (time_t) when;
-    memcpy(server_version, ::server_version, ST_SERVER_VER_LEN);
-  }
+  Start_log_event_v3();
 #ifdef HAVE_REPLICATION
   void pack_info(Protocol* protocol);
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  Start_log_event_v3() {}
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif  
 
-  Start_log_event(const char* buf, bool old_format);
-  ~Start_log_event() {}
-  Log_event_type get_type_code() { return START_EVENT;}
+  Start_log_event_v3(const char* buf,
+                     const Format_description_log_event* description_event);
+  ~Start_log_event_v3() {}
+  Log_event_type get_type_code() { return START_EVENT_V3;}
   int write_data(IO_CACHE* file);
-  bool is_valid() { return 1; }
+  const bool is_valid() { return 1; }
   int get_data_size()
   {
-    return START_HEADER_LEN;
+    return START_V3_HEADER_LEN; //no variable-sized part
+  }
+};
+
+/* 
+   For binlog version 4.
+   This event is saved by threads which read it, as they need it for future
+   use (to decode the ordinary events).
+*/
+
+class Format_description_log_event: public Start_log_event_v3
+{
+public:
+  /* 
+     The size of the fixed header which _all_ events have
+     (for binlogs written by this version, this is equal to
+     LOG_EVENT_HEADER_LEN), except FORMAT_DESCRIPTION_EVENT and ROTATE_EVENT
+     (those have a header of size LOG_EVENT_MINIMAL_HEADER_LEN).
+  */
+  uint8 common_header_len;
+  uint8 number_of_event_types;
+  /* The list of post-headers' lengthes */
+  uint8 *post_header_len;
+
+  Format_description_log_event(uint8 binlog_ver, const char* server_ver=0);
+
+#ifndef MYSQL_CLIENT
+#ifdef HAVE_REPLICATION
+  int exec_event(struct st_relay_log_info* rli);
+#endif /* HAVE_REPLICATION */
+#endif  
+  
+  Format_description_log_event(const char* buf, uint event_len,
+                               const Format_description_log_event* description_event);
+  ~Format_description_log_event() { my_free((gptr)post_header_len, MYF(0)); }
+  Log_event_type get_type_code() { return FORMAT_DESCRIPTION_EVENT;}
+  int write_data(IO_CACHE* file);
+  const bool is_valid() 
+    { 
+      return ((common_header_len >= ((binlog_version==1) ? OLD_HEADER_LEN :
+                                     LOG_EVENT_MINIMAL_HEADER_LEN)) && 
+              (post_header_len != NULL)); 
+    }
+  int get_event_len()
+  {
+    int i= LOG_EVENT_MINIMAL_HEADER_LEN + get_data_size();
+    DBUG_PRINT("info",("event_len=%d",i));
+    return i;
+  }
+  int get_data_size()
+  {
+    /*
+      The vector of post-header lengths is considered as part of the
+      post-header, because in a given version it never changes (contrary to the
+      query in a Query_log_event).
+    */
+    return FORMAT_DESCRIPTION_HEADER_LEN;
   }
 };
 
@@ -705,23 +957,26 @@ class Intvar_log_event: public Log_event
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif  
 
-  Intvar_log_event(const char* buf, bool old_format);
+  Intvar_log_event(const char* buf, const Format_description_log_event* description_event);
   ~Intvar_log_event() {}
   Log_event_type get_type_code() { return INTVAR_EVENT;}
   const char* get_var_type_name();
   int get_data_size() { return  9; /* sizeof(type) + sizeof(val) */;}
   int write_data(IO_CACHE* file);
-  bool is_valid() { return 1; }
+  const bool is_valid() { return 1; }
 };
 
 /*****************************************************************************
 
   Rand Log Event class
 
-  Logs random seed used by the next RAND(), and by PASSWORD() in 4.1.
+  Logs random seed used by the next RAND(), and by PASSWORD() in 4.1.0. 
+  4.1.1 does not need it (it's repeatable again) so this event needn't be
+  written in 4.1.1 for PASSWORD() (but the fact that it is written is just a
+  waste, it does not cause bugs).
 
  ****************************************************************************/
 class Rand_log_event: public Log_event
@@ -739,15 +994,15 @@ class Rand_log_event: public Log_event
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif
 
-  Rand_log_event(const char* buf, bool old_format);
+  Rand_log_event(const char* buf, const Format_description_log_event* description_event);
   ~Rand_log_event() {}
   Log_event_type get_type_code() { return RAND_EVENT;}
   int get_data_size() { return 16; /* sizeof(ulonglong) * 2*/ }
   int write_data(IO_CACHE* file);
-  bool is_valid() { return 1; }
+  const bool is_valid() { return 1; }
 };
 
 /*****************************************************************************
@@ -757,6 +1012,9 @@ class Rand_log_event: public Log_event
   Every time a query uses the value of a user variable, a User_var_log_event is
   written before the Query_log_event, to set the user variable.
 
+  Every time a query uses the value of a user variable, a User_var_log_event is
+  written before the Query_log_event, to set the user variable.
+
  ****************************************************************************/
 class User_var_log_event: public Log_event
 {
@@ -778,10 +1036,10 @@ class User_var_log_event: public Log_event
   void pack_info(Protocol* protocol);
   int exec_event(struct st_relay_log_info* rli);
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif
 
-  User_var_log_event(const char* buf, bool old_format);
+  User_var_log_event(const char* buf, const Format_description_log_event* description_event);
   ~User_var_log_event() {}
   Log_event_type get_type_code() { return USER_VAR_EVENT;}
   int get_data_size()
@@ -791,7 +1049,7 @@ class User_var_log_event: public Log_event
 	UV_CHARSET_NUMBER_SIZE + UV_VAL_LEN_SIZE + val_len);
     }
   int write_data(IO_CACHE* file);
-  bool is_valid() { return 1; }
+  const bool is_valid() { return 1; }
 };
 
 /*****************************************************************************
@@ -809,15 +1067,15 @@ class Stop_log_event: public Log_event
   {}
   int exec_event(struct st_relay_log_info* rli);
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif  
 
-  Stop_log_event(const char* buf, bool old_format):
-    Log_event(buf, old_format)
+  Stop_log_event(const char* buf, const Format_description_log_event* description_event):
+    Log_event(buf, description_event)
   {}
   ~Stop_log_event() {}
   Log_event_type get_type_code() { return STOP_EVENT;}
-  bool is_valid() { return 1; }
+  const bool is_valid() { return 1; }
 };
 
 #endif /* HAVE_REPLICATION */
@@ -850,18 +1108,23 @@ class Rotate_log_event: public Log_event
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif
 
-  Rotate_log_event(const char* buf, int event_len, bool old_format);
+  Rotate_log_event(const char* buf, uint event_len,
+                   const Format_description_log_event* description_event);
   ~Rotate_log_event()
   {
     if (alloced)
       my_free((gptr) new_log_ident, MYF(0));
   }
   Log_event_type get_type_code() { return ROTATE_EVENT;}
+  int get_event_len()
+  {
+    return (LOG_EVENT_MINIMAL_HEADER_LEN + get_data_size());
+  }
   int get_data_size() { return  ident_len + ROTATE_HEADER_LEN;}
-  bool is_valid() { return new_log_ident != 0; }
+  const bool is_valid() { return new_log_ident != 0; }
   int write_data(IO_CACHE* file);
 };
 
@@ -899,11 +1162,12 @@ class Create_file_log_event: public Load_log_event
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
-  void print(FILE* file, bool short_form, char* last_db, bool enable_local);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
+  void print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool enable_local);
 #endif  
   
-  Create_file_log_event(const char* buf, int event_len, bool old_format);
+  Create_file_log_event(const char* buf, uint event_len,
+                        const Format_description_log_event* description_event);
   ~Create_file_log_event() {}
 
   Log_event_type get_type_code()
@@ -916,12 +1180,7 @@ class Create_file_log_event: public Load_log_event
 	    Load_log_event::get_data_size() +
 	    4 + 1 + block_len);
   }
-  int get_data_body_offset()
-  {
-    return (fake_base ? LOAD_EVENT_OVERHEAD:
-	    LOAD_EVENT_OVERHEAD + CREATE_FILE_HEADER_LEN);
-  }
-  bool is_valid() { return inited_from_old || block != 0; }
+  const bool is_valid() { return inited_from_old || block != 0; }
   int write_data_header(IO_CACHE* file);
   int write_data_body(IO_CACHE* file);
   /*
@@ -963,14 +1222,15 @@ class Append_block_log_event: public Log_event
   void pack_info(Protocol* protocol);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif
   
-  Append_block_log_event(const char* buf, int event_len);
+  Append_block_log_event(const char* buf, uint event_len, 
+                         const Format_description_log_event* description_event);
   ~Append_block_log_event() {}
   Log_event_type get_type_code() { return APPEND_BLOCK_EVENT;}
   int get_data_size() { return  block_len + APPEND_BLOCK_HEADER_LEN ;}
-  bool is_valid() { return block != 0; }
+  const bool is_valid() { return block != 0; }
   int write_data(IO_CACHE* file);
   const char* get_db() { return db; }
 };
@@ -993,15 +1253,16 @@ class Delete_file_log_event: public Log_event
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
-  void print(FILE* file, bool short_form, char* last_db, bool enable_local);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
+  void print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool enable_local);
 #endif  
   
-  Delete_file_log_event(const char* buf, int event_len);
+  Delete_file_log_event(const char* buf, uint event_len,
+                        const Format_description_log_event* description_event);
   ~Delete_file_log_event() {}
   Log_event_type get_type_code() { return DELETE_FILE_EVENT;}
   int get_data_size() { return DELETE_FILE_HEADER_LEN ;}
-  bool is_valid() { return file_id != 0; }
+  const bool is_valid() { return file_id != 0; }
   int write_data(IO_CACHE* file);
   const char* get_db() { return db; }
 };
@@ -1024,14 +1285,15 @@ class Execute_load_log_event: public Log_event
   int exec_event(struct st_relay_log_info* rli);
 #endif /* HAVE_REPLICATION */
 #else
-  void print(FILE* file, bool short_form = 0, char* last_db = 0);
+  void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
 #endif  
   
-  Execute_load_log_event(const char* buf, int event_len);
+  Execute_load_log_event(const char* buf, uint event_len, 
+                         const Format_description_log_event* description_event);
   ~Execute_load_log_event() {}
   Log_event_type get_type_code() { return EXEC_LOAD_EVENT;}
   int get_data_size() { return  EXEC_LOAD_HEADER_LEN ;}
-  bool is_valid() { return file_id != 0; }
+  const bool is_valid() { return file_id != 0; }
   int write_data(IO_CACHE* file);
   const char* get_db() { return db; }
 };
@@ -1040,13 +1302,18 @@ class Execute_load_log_event: public Log_event
 class Unknown_log_event: public Log_event
 {
 public:
-  Unknown_log_event(const char* buf, bool old_format):
-    Log_event(buf, old_format)
+  /*
+    Even if this is an unknown event, we still pass description_event to
+    Log_event's ctor, this way we can extract maximum information from the
+    event's header (the unique ID for example).
+  */
+  Unknown_log_event(const char* buf, const Format_description_log_event* description_event):
+    Log_event(buf, description_event)
   {}
   ~Unknown_log_event() {}
-  void print(FILE* file, bool short_form= 0, char* last_db= 0);
+  void print(FILE* file, bool short_form= 0, LAST_EVENT_INFO* last_event_info= 0);
   Log_event_type get_type_code() { return UNKNOWN_EVENT;}
-  bool is_valid() { return 1; }
+  const bool is_valid() { return 1; }
 };
 #endif  
 
diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h
index c963e601d2ed65c5c3d1aed44d59009486057a59..39b8fbcb1a98c6b4e473f1d1baca42c92f1a0ac1 100644
--- a/sql/mysql_priv.h
+++ b/sql/mysql_priv.h
@@ -14,6 +14,15 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
+/*
+  Mostly this file is used in the server. But a little part of it is used in
+  mysqlbinlog too (definition of SELECT_DISTINCT and others).
+  The consequence is that 90% of the file is wrapped in #ifndef MYSQL_CLIENT,
+  except the part which must be in the server and in the client.
+*/
+
+#ifndef MYSQL_CLIENT
+
 #include <my_global.h>
 #include <assert.h>
 #include <mysql_version.h>
@@ -176,7 +185,15 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
 #define TEST_NO_STACKTRACE	512
 #define TEST_SIGINT		1024	/* Allow sigint on threads */
 
-/* options for select set by the yacc parser (stored in lex->options) */
+#endif
+
+/* 
+   This is included in the server and in the client.
+   Options for select set by the yacc parser (stored in lex->options).
+   None of the 32 defines below should have its value changed, or this will
+   break replication.
+*/
+
 #define SELECT_DISTINCT		(1L << 0)
 #define SELECT_STRAIGHT_JOIN	(1L << 1)
 #define SELECT_DESCRIBE		(1L << 2)
@@ -214,6 +231,9 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
 #define OPTION_RELAXED_UNIQUE_CHECKS    (1L << 27)
 #define SELECT_NO_UNLOCK                (1L << 28)
 
+/* The rest of the file is included in the server only */
+#ifndef MYSQL_CLIENT
+
 /* options for UNION set by the yacc parser (stored in unit->union_option) */
 #define UNION_ALL               1
 
@@ -1102,3 +1122,5 @@ inline void setup_table_map(TABLE *table, TABLE_LIST *table_list, uint tablenr)
   table->map= (table_map) 1 << tablenr;
   table->force_index= table_list->force_index;
 }
+
+#endif /* MYSQL_CLIENT */
diff --git a/sql/mysqld.cc b/sql/mysqld.cc
index 7b3d8411e32324d51e461ada24549393f85cef0e..6c321972c4b8f6aa14d8b911960c51c6b2582982 100644
--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@@ -2023,7 +2023,7 @@ bool open_log(MYSQL_LOG *log, const char *hostname,
   }
   return log->open(opt_name, type, 0, index_file_name,
 		   (read_append) ? SEQ_READ_APPEND : WRITE_CACHE,
-		   no_auto_events, max_size);
+		   no_auto_events, max_size, 0);
 }
 
 
diff --git a/sql/slave.cc b/sql/slave.cc
index 3f8a3c3761823c90ad5ba554814eabf90839fbd7..bc9471a5368a85e0cb6aa3182285e499a8db528c 100644
--- a/sql/slave.cc
+++ b/sql/slave.cc
@@ -73,8 +73,6 @@ static int safe_sleep(THD* thd, int sec, CHECK_KILLED_FUNC thread_killed,
 static int request_table_dump(MYSQL* mysql, const char* db, const char* table);
 static int create_table_from_dump(THD* thd, MYSQL *mysql, const char* db,
 				  const char* table_name, bool overwrite);
-static int check_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi);
-
 
 /*
   Find out which replications threads are running
@@ -215,6 +213,12 @@ static byte* get_table_key(TABLE_RULE_ENT* e, uint* len,
     pos			Position in relay log file 
     need_data_lock	Set to 1 if this functions should do mutex locks
     errmsg		Store pointer to error message here
+    look_for_description_event 
+                        1 if we should look for such an event. We only need
+                        this when the SQL thread starts and opens an existing
+                        relay log and has to execute it (possibly from an offset
+                        >4); then we need to read the first event of the relay
+                        log to be able to parse the events we have to execute.
 
   DESCRIPTION
   - Close old open relay log files.
@@ -232,15 +236,35 @@ static byte* get_table_key(TABLE_RULE_ENT* e, uint* len,
 
 int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
 		       ulonglong pos, bool need_data_lock,
-		       const char** errmsg)
+		       const char** errmsg,
+                       bool look_for_description_event)
 {
   DBUG_ENTER("init_relay_log_pos");
+  DBUG_PRINT("info", ("pos=%lu", pos));
 
   *errmsg=0;
   pthread_mutex_t *log_lock=rli->relay_log.get_log_lock();
   
   if (need_data_lock)
     pthread_mutex_lock(&rli->data_lock);
+
+  /*
+    Slave threads are not the only users of init_relay_log_pos(). CHANGE MASTER
+    is, too, and init_slave() too; these 2 functions allocate a description
+    event in init_relay_log_pos, which is not freed by the terminating SQL slave
+    thread as that thread is not started by these functions. So we have to free
+    the description_event here, in case, so that there is no memory leak in
+    running, say, CHANGE MASTER.
+  */
+  delete rli->relay_log.description_event_for_exec;
+  /*
+    By default the relay log is in binlog format 3 (4.0).
+    Even if format is 4, this will work enough to read the first event
+    (Format_desc) (remember that format 4 is just lenghtened compared to format
+    3; format 3 is a prefix of format 4). 
+  */
+  rli->relay_log.description_event_for_exec= new
+    Format_description_log_event(3);
   
   pthread_mutex_lock(log_lock);
   
@@ -280,8 +304,8 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
       In this case, we will use the same IO_CACHE pointer to
       read data as the IO thread is using to write data.
     */
-    if (my_b_tell((rli->cur_log=rli->relay_log.get_log_file())) == 0 &&
-	check_binlog_magic(rli->cur_log,errmsg))
+    my_b_seek((rli->cur_log=rli->relay_log.get_log_file()), (off_t)0);
+    if (check_binlog_magic(rli->cur_log,errmsg))
       goto err;
     rli->cur_log_old_open_count=rli->relay_log.get_open_count();
   }
@@ -295,8 +319,85 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
       goto err;
     rli->cur_log = &rli->cache_buf;
   }
-  if (pos >= BIN_LOG_HEADER_SIZE)
+  /*
+    In all cases, check_binlog_magic() has been called so we're at offset 4 for
+    sure.
+  */
+  if (pos > BIN_LOG_HEADER_SIZE) /* If pos<=4, we stay at 4 */
+  {
+    Log_event* ev;
+    while (look_for_description_event) 
+    {
+      /*
+        Read the possible Format_description_log_event; if position was 4, no need, it will
+        be read naturally.
+      */
+      DBUG_PRINT("info",("looking for a Format_description_log_event"));
+
+      if (my_b_tell(rli->cur_log) >= pos)
+        break;
+
+      /*
+        Because of we have rli->data_lock and log_lock, we can safely read an
+        event
+      */
+      if (!(ev=Log_event::read_log_event(rli->cur_log,0,
+                                         rli->relay_log.description_event_for_exec)))
+      {
+        DBUG_PRINT("info",("could not read event, rli->cur_log->error=%d",
+                           rli->cur_log->error));
+        if (rli->cur_log->error) /* not EOF */
+        {
+          *errmsg= "I/O error reading event at position 4";
+          goto err;
+        }
+        break;
+      }
+      else if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
+      {
+        DBUG_PRINT("info",("found Format_description_log_event"));
+        delete rli->relay_log.description_event_for_exec;
+        rli->relay_log.description_event_for_exec= (Format_description_log_event*) ev;
+        /*
+          As ev was returned by read_log_event, it has passed is_valid(), so
+          my_malloc() in ctor worked, no need to check again.
+        */
+        /*
+          Ok, we found a Format_description event. But it is not sure that this
+          describes the whole relay log; indeed, one can have this sequence
+          (starting from position 4):
+          Format_desc (of slave)
+          Rotate (of master)
+          Format_desc (of slave)
+          So the Format_desc which really describes the rest of the relay log is
+          the 3rd event (it can't be further than that, because we rotate the
+          relay log when we queue a Rotate event from the master).
+          But what describes the Rotate is the first Format_desc.
+          So what we do is:
+          go on searching for Format_description events, until you exceed the
+          position (argument 'pos') or until you find another event than Rotate
+          or Format_desc.
+        */
+      }
+      else 
+      {
+        DBUG_PRINT("info",("found event of another type=%d",
+                           ev->get_type_code()));
+        look_for_description_event= (ev->get_type_code() == ROTATE_EVENT);
+        delete ev;
+      }
+    }
     my_b_seek(rli->cur_log,(off_t)pos);
+#ifndef DBUG_OFF
+  {
+    char llbuf1[22], llbuf2[22];
+    DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
+                        llstr(my_b_tell(rli->cur_log),llbuf1), 
+                        llstr(rli->event_relay_log_pos,llbuf2)));
+  }
+#endif
+
+  }
 
 err:
   /*
@@ -311,6 +412,8 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
 
   if (need_data_lock)
     pthread_mutex_unlock(&rli->data_lock);
+  if (!rli->relay_log.description_event_for_exec->is_valid() && !*errmsg)
+    *errmsg= "Invalid Format_description log event; could be out of memory";
 
   DBUG_RETURN ((*errmsg) ? 1 : 0);
 }
@@ -428,13 +531,15 @@ int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset,
 	  sizeof(rli->group_relay_log_name)-1);
   strmake(rli->event_relay_log_name, rli->relay_log.get_log_fname(),
  	  sizeof(rli->event_relay_log_name)-1);
-  // Just first log with magic number and nothing else
-  rli->log_space_total= BIN_LOG_HEADER_SIZE;
   rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
-  rli->relay_log.reset_bytes_written();
+  if (count_relay_log_space(rli))
+  {
+    *errmsg= "Error counting relay log space";
+    goto err;
+  }
   if (!just_reset)
     error= init_relay_log_pos(rli, rli->group_relay_log_name, rli->group_relay_log_pos,
-  			      0 /* do not need data lock */, errmsg);
+  			      0 /* do not need data lock */, errmsg, 0);
   
 err:
 #ifndef DBUG_OFF
@@ -693,6 +798,10 @@ static TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len)
     different results. Note also the order of precedence of the do/ignore 
     rules (see code below). For that reason, users should not set conflicting 
     rules because they may get unpredicted results.
+    Thought which arose from a question of a big customer "I want to include all
+    tables like "abc.%" except the "%.EFG"". This can't be done now. If we
+    supported Perl regexps we could do it with this pattern: /^abc\.(?!EFG)/
+    (I could not find an equivalent in the regex library MySQL uses).
 
   RETURN VALUES
     0           should not be logged/replicated
@@ -1087,30 +1196,74 @@ static int init_intvar_from_file(int* var, IO_CACHE* f, int default_val)
   return 1;
 }
 
+/*
+  Note that we rely on the master's version (3.23, 4.0.14 etc) instead of
+  relying on the binlog's version. This is not perfect: imagine an upgrade
+  of the master without waiting that all slaves are in sync with the master;
+  then a slave could be fooled about the binlog's format. This is what happens
+  when people upgrade a 3.23 master to 4.0 without doing RESET MASTER: 4.0
+  slaves are fooled. So we do this only to distinguish between 3.23 and more
+  recent masters (it's too late to change things for 3.23).
+  
+  RETURNS
+  0       ok
+  1       error
+*/
 
 static int get_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi)
 {
   const char* errmsg= 0;
   
-  /*
-    Note the following switch will bug when we have MySQL branch 30 ;)
-  */
-  switch (*mysql->server_version) {
-  case '3':
-    mi->old_format = 
-      (strncmp(mysql->server_version, "3.23.57", 7) < 0) /* < .57 */ ?
-      BINLOG_FORMAT_323_LESS_57 : 
-      BINLOG_FORMAT_323_GEQ_57 ;
-    break;
-  case '4':
-  case '5':
-    mi->old_format = BINLOG_FORMAT_CURRENT;
-    break;
-  default:
+  if (!my_isdigit(&my_charset_bin,*mysql->server_version))
     errmsg = "Master reported unrecognized MySQL version";
-    break;
+  else
+  {
+    /*
+      Note the following switch will bug when we have MySQL branch 30 ;)
+    */
+    switch (*mysql->server_version) 
+    {
+    case '0':
+    case '1':
+    case '2':
+      errmsg = "Master reported unrecognized MySQL version";
+      break;
+    case '3':
+      mi->rli.relay_log.description_event_for_queue= new
+        Format_description_log_event(1, mysql->server_version); 
+      break;
+    case '4':
+      mi->rli.relay_log.description_event_for_queue= new
+        Format_description_log_event(3, mysql->server_version); 
+      break;
+    default: 
+      /*
+        Master is MySQL >=5.0. Give a default Format_desc event, so that we can
+        take the early steps (like tests for "is this a 3.23 master") which we
+        have to take before we receive the real master's Format_desc which will
+        override this one. Note that the Format_desc we create below is garbage
+        (it has the format of the *slave*); it's only good to help know if the
+        master is 3.23, 4.0, etc.
+      */
+      mi->rli.relay_log.description_event_for_queue= new
+        Format_description_log_event(4, mysql->server_version); 
+      break;
+    }
   }
-
+  
+  /* 
+     This does not mean that a 5.0 slave will be able to read a 6.0 master; but
+     as we don't know yet, we don't want to forbid this for now. If a 5.0 slave
+     can't read a 6.0 master, this will show up when the slave can't read some
+     events sent by the master, and there will be error messages.
+  */
+  
+  if (errmsg)
+  {
+    sql_print_error(errmsg);
+    return 1;
+  }
+  
   MYSQL_RES *master_clock_res;
   MYSQL_ROW master_clock_row;
   time_t slave_clock;
@@ -1385,7 +1538,7 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname)
   if (open_log(&rli->relay_log, glob_hostname, opt_relay_logname,
 	       "-relay-bin", opt_relaylog_index_name,
 	       LOG_BIN, 1 /* read_append cache */,
-	       1 /* no auto events */,
+	       0 /* starting from 5.0 we want relay logs to have auto events */,
                max_relay_log_size ? max_relay_log_size : max_binlog_size))
   {
     sql_print_error("Failed in open_log() called from init_relay_log_info()");
@@ -1419,7 +1572,7 @@ file '%s', errno %d)", fname, my_errno);
 
     /* Init relay log with first entry in the relay index file */
     if (init_relay_log_pos(rli,NullS,BIN_LOG_HEADER_SIZE,0 /* no data lock */,
-			   &msg))
+			   &msg, 0))
     {
       sql_print_error("Failed to open the relay log 'FIRST' (relay_log_pos 4)");
       goto err;
@@ -1484,7 +1637,7 @@ Failed to open the existing relay log info file '%s' (errno %d)",
 			   rli->group_relay_log_name,
 			   rli->group_relay_log_pos,
 			   0 /* no data lock*/,
-			   &msg))
+			   &msg, 0))
     {
       char llbuf[22];
       sql_print_error("Failed to open the relay log '%s' (relay_log_pos %s)",
@@ -1493,8 +1646,18 @@ Failed to open the existing relay log info file '%s' (errno %d)",
       goto err;
     }
   }
-  DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
-  DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
+
+#ifndef DBUG_OFF
+  {
+    char llbuf1[22], llbuf2[22];
+    DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
+                        llstr(my_b_tell(rli->cur_log),llbuf1), 
+                        llstr(rli->event_relay_log_pos,llbuf2)));
+    DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
+    DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
+  }
+#endif
+
   /*
     Now change the cache from READ to WRITE - must do this
     before flush_relay_log_info
@@ -2251,14 +2414,18 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
     goto err;
   }    
 
+  int cmp_result;
+
   /* The "compare and wait" main loop */
   while (!thd->killed &&
          init_abort_pos_wait == abort_pos_wait &&
          slave_running)
   {
-    bool pos_reached;
-    int cmp_result= 0;
-    DBUG_ASSERT(*group_master_log_name || group_master_log_pos == 0);
+    /*
+      If we are after RESET SLAVE, and the SQL slave thread has not processed
+      any event yet, it could be that group_master_log_name is "". In that case,
+      just wait for more events (as there is no sensible comparison to do).
+    */
     if (*group_master_log_name)
     {
       char *basename= group_master_log_name + dirname_length(group_master_log_name);
@@ -2280,13 +2447,12 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
       if (group_master_log_name_extension < log_name_extension)
         cmp_result = -1 ;
       else
-        cmp_result= (group_master_log_name_extension > log_name_extension) ? 1 : 0 ;
+        cmp_result= (group_master_log_name_extension > log_name_extension) ? 
+          1 : 0 ;
+      if (((!cmp_result && group_master_log_pos >= (ulonglong)log_pos) ||
+           cmp_result > 0) || thd->killed)
+        break;
     }
-    pos_reached = ((!cmp_result && group_master_log_pos >= (ulonglong)log_pos) ||
-                   cmp_result > 0);
-    if (pos_reached || thd->killed)
-      break;
-
     //wait for master update, with optional timeout.
     
     DBUG_PRINT("info",("Waiting for master update"));
@@ -2343,6 +2509,11 @@ improper_arguments: %d  timed_out: %d",
   DBUG_RETURN( error ? error : event_count );
 }
 
+void set_slave_thread_options(THD* thd)
+{
+  thd->options = ((opt_log_slave_updates) ? OPTION_BIN_LOG:0) |
+    OPTION_AUTO_IS_NULL;
+}
 
 /*
   init_slave_thread()
@@ -2359,8 +2530,7 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
   thd->master_access= ~0;
   thd->priv_user = 0;
   thd->slave_thread = 1;
-  thd->options = ((opt_log_slave_updates) ? OPTION_BIN_LOG:0) |
-    OPTION_AUTO_IS_NULL;
+  set_slave_thread_options(thd);
   /* 
      It's nonsense to constrain the slave threads with max_join_size; if a
      query succeeded on master, we HAVE to execute it.
@@ -2623,13 +2793,14 @@ bool st_relay_log_info::is_until_satisfied()
   
   if (until_log_names_cmp_result == UNTIL_LOG_NAMES_CMP_UNKNOWN)
   {
-    /* 
-       We have no cached comaprison results so we should compare log names
-       and cache result
+    /*
+      We have no cached comparison results so we should compare log names
+      and cache result.
+      If we are after RESET SLAVE, and the SQL slave thread has not processed
+      any event yet, it could be that group_master_log_name is "". In that case,
+      just wait for more events (as there is no sensible comparison to do).
     */
 
-    DBUG_ASSERT(*log_name || log_pos == 0);
-    
     if (*log_name)
     {
       const char *basename= log_name + dirname_length(log_name);
@@ -2704,28 +2875,45 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
     int exec_res;
 
     /*
-      Skip queries originating from this server or number of
-      queries specified by the user in slave_skip_counter
-      We can't however skip event's that has something to do with the
+      Queries originating from this server must be skipped.
+      Low-level events (Format_desc, Rotate, Stop) from this server
+      must also be skipped. But for those we don't want to modify
+      group_master_log_pos, because these events did not exist on the master.
+      Format_desc is not completely skipped.
+      Skip queries specified by the user in slave_skip_counter.
+      We can't however skip events that has something to do with the
       log files themselves.
+      Filtering on own server id is extremely important, to ignore execution of
+      events created by the creation/rotation of the relay log (remember that
+      now the relay log starts with its Format_desc, has a Rotate etc).
     */
-
-    if (ev->server_id == (uint32) ::server_id ||
-	(rli->slave_skip_counter && type_code != ROTATE_EVENT))
+    
+    DBUG_PRINT("info",("type_code=%d, server_id=%d",type_code,ev->server_id));
+    
+    if ((ev->server_id == (uint32) ::server_id &&
+         type_code!= FORMAT_DESCRIPTION_EVENT) ||
+ 	(rli->slave_skip_counter && 
+         type_code != ROTATE_EVENT && type_code != STOP_EVENT &&
+         type_code != START_EVENT_V3 && type_code!= FORMAT_DESCRIPTION_EVENT))
     {
-      /* TODO: I/O thread should not even log events with the same server id */
-      rli->inc_group_relay_log_pos(ev->get_event_len(),
-		   type_code != STOP_EVENT ? ev->log_pos : LL(0),
-		   1/* skip lock*/);
+      DBUG_PRINT("info", ("event skipped"));
+      rli->inc_group_relay_log_pos((type_code == ROTATE_EVENT || 
+                                    type_code == STOP_EVENT ||
+                                    type_code == FORMAT_DESCRIPTION_EVENT) ?
+                                   LL(0) : ev->log_pos,
+                                   1/* skip lock*/);
       flush_relay_log_info(rli);
-
+      
       /*
-	Protect against common user error of setting the counter to 1
-	instead of 2 while recovering from an failed auto-increment insert
+ 	Protect against common user error of setting the counter to 1
+ 	instead of 2 while recovering from an insert which used auto_increment,
+ 	rand or user var.
       */
       if (rli->slave_skip_counter && 
-	  !((type_code == INTVAR_EVENT || type_code == STOP_EVENT) &&
-	    rli->slave_skip_counter == 1))
+ 	  !((type_code == INTVAR_EVENT ||
+             type_code == RAND_EVENT || 
+             type_code == USER_VAR_EVENT) &&
+ 	    rli->slave_skip_counter == 1))
         --rli->slave_skip_counter;
       pthread_mutex_unlock(&rli->data_lock);
       delete ev;     
@@ -2741,7 +2929,16 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
     ev->thd = thd;
     exec_res = ev->exec_event(rli);
     DBUG_ASSERT(rli->sql_thd==thd);
-    delete ev;
+    /* 
+       Format_description_log_event should not be deleted because it will be
+       used to read info about the relay log's format; it will be deleted when
+       the SQL thread does not need it, i.e. when this thread terminates.
+    */
+    if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
+    {
+      DBUG_PRINT("info", ("Deleting the event after it has been executed"));
+      delete ev;
+    }
     return exec_res;
   }
   else
@@ -2842,7 +3039,8 @@ extern "C" pthread_handler_decl(handle_slave_io,arg)
   thd->proc_info = "Checking master version";
   if (get_master_version_and_clock(mysql, mi))
     goto err;
-  if (!mi->old_format)
+
+  if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1)
   {
     /*
       Register ourselves with the master.
@@ -3043,6 +3241,9 @@ log space");
   pthread_mutex_lock(&mi->run_lock);
   mi->slave_running = 0;
   mi->io_thd = 0;
+  /* Forget the relay log's format */
+  delete mi->rli.relay_log.description_event_for_queue;
+  mi->rli.relay_log.description_event_for_queue= 0;
   // TODO: make rpl_status part of MASTER_INFO
   change_rpl_status(RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE);
   mi->abort_slave = 0; // TODO: check if this is needed
@@ -3137,15 +3338,38 @@ extern "C" pthread_handler_decl(handle_slave_sql,arg)
   if (init_relay_log_pos(rli,
 			 rli->group_relay_log_name,
 			 rli->group_relay_log_pos,
-			 1 /*need data lock*/, &errmsg))
+			 1 /*need data lock*/, &errmsg,
+                         1 /*look for a description_event*/))
   {
     sql_print_error("Error initializing relay log position: %s",
 		    errmsg);
     goto err;
   }
   THD_CHECK_SENTRY(thd);
-  DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
-  DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
+#ifndef DBUG_OFF
+  {
+    char llbuf1[22], llbuf2[22];
+    DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
+                        llstr(my_b_tell(rli->cur_log),llbuf1), 
+                        llstr(rli->event_relay_log_pos,llbuf2)));
+    DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
+    /*
+      Wonder if this is correct. I (Guilhem) wonder if my_b_tell() returns the
+      correct position when it's called just after my_b_seek() (the questionable
+      stuff is those "seek is done on next read" comments in the my_b_seek()
+      source code).
+      The crude reality is that this assertion randomly fails whereas
+      replication seems to work fine. And there is no easy explanation why it
+      fails (as we my_b_seek(rli->event_relay_log_pos) at the very end of
+      init_relay_log_pos() called above). Maybe the assertion would be
+      meaningful if we held rli->data_lock between the my_b_seek() and the
+      DBUG_ASSERT().
+    */
+#ifdef SHOULD_BE_CHECKED
+    DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
+#endif
+  }
+#endif
   DBUG_ASSERT(rli->sql_thd == thd);
 
   DBUG_PRINT("master_info",("log_file_name: %s  position: %s",
@@ -3205,11 +3429,9 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
   DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
   /* When master_pos_wait() wakes up it will check this and terminate */
   rli->slave_running= 0; 
-  /* 
-     Going out of the transaction. Necessary to mark it, in case the user
-     restarts replication from a non-transactional statement (with CHANGE
-     MASTER).
-  */
+  /* Forget the relay log's format */
+  delete rli->relay_log.description_event_for_exec;
+  rli->relay_log.description_event_for_exec= 0;
   /* Wake up master_pos_wait() */
   pthread_mutex_unlock(&rli->data_lock);
   DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
@@ -3299,7 +3521,7 @@ static int process_io_create_file(MASTER_INFO* mi, Create_file_log_event* cev)
       {
 	net_write_command(net, 0, "", 0, "", 0);/* 3.23 master wants it */
 	Execute_load_log_event xev(thd,0,0);
-	xev.log_pos = mi->master_log_pos;
+	xev.log_pos = cev->log_pos;
 	if (unlikely(mi->rli.relay_log.append(&xev)))
 	{
 	  sql_print_error("Slave I/O: error writing Exec_load event to \
@@ -3313,7 +3535,6 @@ relay log");
       {
 	cev->block = (char*)net->read_pos;
 	cev->block_len = num_bytes;
-	cev->log_pos = mi->master_log_pos;
 	if (unlikely(mi->rli.relay_log.append(cev)))
 	{
 	  sql_print_error("Slave I/O: error writing Create_file event to \
@@ -3327,7 +3548,7 @@ relay log");
       {
 	aev.block = (char*)net->read_pos;
 	aev.block_len = num_bytes;
-	aev.log_pos = mi->master_log_pos;
+	aev.log_pos = cev->log_pos;
 	if (unlikely(mi->rli.relay_log.append(&aev)))
 	{
 	  sql_print_error("Slave I/O: error writing Append_block event to \
@@ -3355,6 +3576,7 @@ relay log");
   DESCRIPTION
     Updates the master info with the place in the next binary
     log where we should start reading.
+    Rotate the relay log to avoid mixed-format relay logs.
 
   NOTES
     We assume we already locked mi->data_lock
@@ -3386,21 +3608,30 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev)
   if (disconnect_slave_event_count)
     events_till_disconnect++;
 #endif
+  /*
+    If description_event_for_queue is format <4, there is conversion in the
+    relay log to the slave's format (4). And Rotate can mean upgrade or
+    nothing. If upgrade, it's to 5.0 or newer, so we will get a Format_desc, so
+    no need to reset description_event_for_queue now. And if it's nothing (same
+    master version as before), no need (still using the slave's format).
+  */
+  if (mi->rli.relay_log.description_event_for_queue->binlog_version >= 4)
+  {
+    delete mi->rli.relay_log.description_event_for_queue;
+    /* start from format 3 (MySQL 4.0) again */
+    mi->rli.relay_log.description_event_for_queue= new
+      Format_description_log_event(3);
+  }
+
+  rotate_relay_log(mi); /* will take the right mutexes */
   DBUG_RETURN(0);
 }
 
-
 /*
-  queue_old_event()
-
-  Writes a 3.23 event to the relay log.
-
-  TODO: 
-    Test this code before release - it has to be tested on a separate
-    setup with 3.23 master 
+  Reads a 3.23 event and converts it to the slave's format. This code was copied
+  from MySQL 4.0.
 */
-
-static int queue_old_event(MASTER_INFO *mi, const char *buf,
+static int queue_binlog_ver_1_event(MASTER_INFO *mi, const char *buf,
 			   ulong event_len)
 {
   const char *errmsg = 0;
@@ -3408,7 +3639,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
   bool ignore_event= 0;
   char *tmp_buf = 0;
   RELAY_LOG_INFO *rli= &mi->rli;
-  DBUG_ENTER("queue_old_event");
+  DBUG_ENTER("queue_binlog_ver_1_event");
 
   /*
     If we get Load event, we need to pass a non-reusable buffer
@@ -3432,7 +3663,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
     connected to the master).
   */
   Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg,
-					    1 /*old format*/ );
+                                            mi->rli.relay_log.description_event_for_queue);
   if (unlikely(!ev))
   {
     sql_print_error("Read invalid event from master: '%s',\
@@ -3442,7 +3673,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
     DBUG_RETURN(1);
   }
   pthread_mutex_lock(&mi->data_lock);
-  ev->log_pos = mi->master_log_pos;
+  ev->log_pos= mi->master_log_pos; /* 3.23 events don't contain log_pos */
   switch (ev->get_type_code()) {
   case STOP_EVENT:
     ignore_event= 1;
@@ -3467,9 +3698,11 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
   {
     /* We come here when and only when tmp_buf != 0 */
     DBUG_ASSERT(tmp_buf);
+    inc_pos=event_len;
+    ev->log_pos+= inc_pos;
     int error = process_io_create_file(mi,(Create_file_log_event*)ev);
     delete ev;
-    mi->master_log_pos += event_len;
+    mi->master_log_pos += inc_pos;
     DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos));
     pthread_mutex_unlock(&mi->data_lock);
     my_free((char*)tmp_buf, MYF(0));
@@ -3481,6 +3714,12 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
   }
   if (likely(!ignore_event))
   {
+    if (ev->log_pos) 
+      /* 
+         Don't do it for fake Rotate events (see comment in
+      Log_event::Log_event(const char* buf...) in log_event.cc).
+      */
+      ev->log_pos+= event_len; /* make log_pos be the pos of the end of the event */
     if (unlikely(rli->relay_log.append(ev)))
     {
       delete ev;
@@ -3496,10 +3735,98 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
   DBUG_RETURN(0);
 }
 
+/*
+  Reads a 4.0 event and converts it to the slave's format. This code was copied
+  from queue_binlog_ver_1_event(), with some affordable simplifications.
+*/
+static int queue_binlog_ver_3_event(MASTER_INFO *mi, const char *buf,
+			   ulong event_len)
+{
+  const char *errmsg = 0;
+  ulong inc_pos;
+  char *tmp_buf = 0;
+  RELAY_LOG_INFO *rli= &mi->rli;
+  DBUG_ENTER("queue_binlog_ver_3_event");
+
+  /* read_log_event() will adjust log_pos to be end_log_pos */
+  Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg,
+                                            mi->rli.relay_log.description_event_for_queue);
+  if (unlikely(!ev))
+  {
+    sql_print_error("Read invalid event from master: '%s',\
+ master could be corrupt but a more likely cause of this is a bug",
+		    errmsg);
+    my_free((char*) tmp_buf, MYF(MY_ALLOW_ZERO_PTR));
+    DBUG_RETURN(1);
+  }
+  pthread_mutex_lock(&mi->data_lock);
+  switch (ev->get_type_code()) {
+  case STOP_EVENT:
+    goto err;
+  case ROTATE_EVENT:
+    if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
+    {
+      delete ev;
+      pthread_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(1);
+    }
+    inc_pos= 0;
+    break;
+  default:
+    inc_pos= event_len;
+    break;
+  }
+  if (unlikely(rli->relay_log.append(ev)))
+  {
+    delete ev;
+    pthread_mutex_unlock(&mi->data_lock);
+    DBUG_RETURN(1);
+  }
+  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
+  delete ev;
+  mi->master_log_pos+= inc_pos;
+err:
+  DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos));
+  pthread_mutex_unlock(&mi->data_lock);
+  DBUG_RETURN(0);
+}
+
+/*
+  queue_old_event()
+
+  Writes a 3.23 or 4.0 event to the relay log, after converting it to the 5.0
+  (exactly, slave's) format. To do the conversion, we create a 5.0 event from
+  the 3.23/4.0 bytes, then write this event to the relay log.
+
+  TODO: 
+    Test this code before release - it has to be tested on a separate
+    setup with 3.23 master or 4.0 master
+*/
+
+static int queue_old_event(MASTER_INFO *mi, const char *buf,
+			   ulong event_len)
+{
+  switch (mi->rli.relay_log.description_event_for_queue->binlog_version)
+  {
+  case 1:
+      return queue_binlog_ver_1_event(mi,buf,event_len);
+  case 3:
+      return queue_binlog_ver_3_event(mi,buf,event_len);
+  default: /* unsupported format; eg version 2 */
+    DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()",
+                       mi->rli.relay_log.description_event_for_queue->binlog_version));  
+    return 1;
+  }
+}
 
 /*
   queue_event()
 
+  If the event is 3.23/4.0, passes it to queue_old_event() which will convert
+  it. Otherwise, writes a 5.0 (or newer) event to the relay log. Then there is
+  no format conversion, it's pure read/write of bytes.
+  So a 5.0.0 slave's relay log can contain events in the slave's format or in
+  any >=5.0.0 format.
 */
 
 int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
@@ -3509,7 +3836,8 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
   RELAY_LOG_INFO *rli= &mi->rli;
   DBUG_ENTER("queue_event");
 
-  if (mi->old_format)
+  if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 &&
+      buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
     DBUG_RETURN(queue_old_event(mi,buf,event_len));
 
   pthread_mutex_lock(&mi->data_lock);
@@ -3536,7 +3864,7 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
     goto err;
   case ROTATE_EVENT:
   {
-    Rotate_log_event rev(buf,event_len,0);
+    Rotate_log_event rev(buf,event_len,mi->rli.relay_log.description_event_for_queue); 
     if (unlikely(process_io_rotate(mi,&rev)))
     {
       error= 1;
@@ -3549,6 +3877,47 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
     inc_pos= 0;
     break;
   }
+  case FORMAT_DESCRIPTION_EVENT:
+  {
+    /*
+      Create an event, and save it (when we rotate the relay log, we will have
+      to write this event again).
+    */
+    /*
+      We are the only thread which reads/writes description_event_for_queue. The
+      relay_log struct does not move (though some members of it can change), so
+      we needn't any lock (no rli->data_lock, no log lock).
+    */
+    Format_description_log_event* tmp= mi->rli.relay_log.description_event_for_queue;
+    const char* errmsg;
+    if (!(mi->rli.relay_log.description_event_for_queue= (Format_description_log_event*)
+          Log_event::read_log_event(buf, event_len, &errmsg,
+                                    mi->rli.relay_log.description_event_for_queue))) 
+    {
+      delete tmp;
+      error= 2;
+      goto err;
+    }
+    delete tmp;
+    /*
+      Set 'created' to 0, so that in next relay logs this event does not trigger
+      cleaning actions on the slave in Format_description_log_event::exec_event().
+    */
+    mi->rli.relay_log.description_event_for_queue->created= 0;
+    /* 
+       Though this does some conversion to the slave's format, this will
+       preserve the master's binlog format version, and number of event types. 
+    */
+    /* 
+       If the event was not requested by the slave (the slave did not ask for
+       it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos 
+    */
+    inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
+    DBUG_PRINT("info",("binlog format is now %d",
+                       mi->rli.relay_log.description_event_for_queue->binlog_version));  
+
+  }
+  break;
   default:
     inc_pos= event_len;
     break;
@@ -3574,20 +3943,29 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
       We still want to increment, so that we won't re-read this event from the
       master if the slave IO thread is now stopped/restarted (more efficient if
       the events we are ignoring are big LOAD DATA INFILE).
+      But events which were generated by this slave and which do not exist in
+      the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment
+      mi->master_log_pos.
     */
-    mi->master_log_pos+= inc_pos;
+    if (buf[EVENT_TYPE_OFFSET]!=FORMAT_DESCRIPTION_EVENT &&
+        buf[EVENT_TYPE_OFFSET]!=ROTATE_EVENT &&
+        buf[EVENT_TYPE_OFFSET]!=STOP_EVENT)
+      mi->master_log_pos+= inc_pos;
     DBUG_PRINT("info", ("master_log_pos: %d, event originating from the same server, ignored", (ulong) mi->master_log_pos));
   }  
   else /* write the event to the relay log */
-    if (likely(!(error= rli->relay_log.appendv(buf,event_len,0))))
+    if (likely(!(rli->relay_log.appendv(buf,event_len,0))))
     {
       mi->master_log_pos+= inc_pos;
       DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos));
       rli->relay_log.harvest_bytes_written(&rli->log_space_total);
     }
+    else
+      error=3;
 
 err:
   pthread_mutex_unlock(&mi->data_lock);
+  DBUG_PRINT("info", ("error=%d", error));
   DBUG_RETURN(error);
 }
 
@@ -3612,6 +3990,7 @@ void end_relay_log_info(RELAY_LOG_INFO* rli)
   }
   rli->inited = 0;
   rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
+  rli->relay_log.harvest_bytes_written(&rli->log_space_total);
   /*
     Delete the slave's temporary tables from memory.
     In the future there will be other actions than this, to ensure persistance
@@ -3832,6 +4211,7 @@ static IO_CACHE *reopen_relay_log(RELAY_LOG_INFO *rli, const char **errmsg)
     relay_log_pos	Current log pos
     pending		Number of bytes already processed from the event
   */
+  rli->event_relay_log_pos= max(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE);
   my_b_seek(cur_log,rli->event_relay_log_pos);
   DBUG_RETURN(cur_log);
 }
@@ -3890,28 +4270,40 @@ Log_event* next_event(RELAY_LOG_INFO* rli)
 	hot_log=0;				// Using old binary log
       }
     }
+
 #ifndef DBUG_OFF
     {
+      /* This is an assertion which sometimes fails, let's try to track it */
       char llbuf1[22], llbuf2[22];
-      DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
-      /*
-	The next assertion sometimes (very rarely) fails, let's try to track
-	it
-      */
-      DBUG_PRINT("info", ("\
-Before assert, my_b_tell(cur_log)=%s  rli->event_relay_log_pos=%s",
+      DBUG_PRINT("info", ("my_b_tell(cur_log)=%s rli->event_relay_log_pos=%s",
                           llstr(my_b_tell(cur_log),llbuf1), 
-                          llstr(rli->group_relay_log_pos,llbuf2)));
-       DBUG_ASSERT(my_b_tell(cur_log) == rli->event_relay_log_pos);
+                          llstr(rli->event_relay_log_pos,llbuf2)));
+      DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
+      DBUG_ASSERT(my_b_tell(cur_log) == rli->event_relay_log_pos);
     }
 #endif
     /*
       Relay log is always in new format - if the master is 3.23, the
-      I/O thread will convert the format for us
+      I/O thread will convert the format for us.
+      A problem: the description event may be in a previous relay log. So if the
+      slave has been shutdown meanwhile, we would have to look in old relay
+      logs, which may even have been deleted. So we need to write this
+      description event at the beginning of the relay log.
+      When the relay log is created when the I/O thread starts, easy: the master
+      will send the description event and we will queue it.
+      But if the relay log is created by new_file(): then the solution is:
+      MYSQL_LOG::open() will write the buffered description event.
     */
-    if ((ev=Log_event::read_log_event(cur_log,0,(bool)0 /* new format */)))
+    if ((ev=Log_event::read_log_event(cur_log,0,
+                                      rli->relay_log.description_event_for_exec)))
+      
     {
       DBUG_ASSERT(thd==rli->sql_thd);
+      /*
+        read it while we have a lock, to avoid a mutex lock in
+        inc_event_relay_log_pos()
+      */
+      rli->future_event_relay_log_pos= my_b_tell(cur_log);
       if (hot_log)
 	pthread_mutex_unlock(log_lock);
       DBUG_RETURN(ev);
@@ -4106,8 +4498,9 @@ void rotate_relay_log(MASTER_INFO* mi)
   DBUG_ENTER("rotate_relay_log");
   RELAY_LOG_INFO* rli= &mi->rli;
 
-  lock_slave_threads(mi);
-  pthread_mutex_lock(&rli->data_lock);
+  /* We don't lock rli->run_lock. This would lead to deadlocks. */
+  pthread_mutex_lock(&mi->run_lock);
+
   /* 
      We need to test inited because otherwise, new_file() will attempt to lock
      LOCK_log, which may not be inited (if we're not a slave).
@@ -4136,8 +4529,7 @@ void rotate_relay_log(MASTER_INFO* mi)
   */
   rli->relay_log.harvest_bytes_written(&rli->log_space_total);
 end:
-  pthread_mutex_unlock(&rli->data_lock);
-  unlock_slave_threads(mi);
+  pthread_mutex_unlock(&mi->run_lock);
   DBUG_VOID_RETURN;
 }
 
diff --git a/sql/slave.h b/sql/slave.h
index e42b93a47ef3d9d1b9784b0830e36dc2d5b12272..3313f587b3dcefef20816eb864b2ef3b2a5e3aaf 100644
--- a/sql/slave.h
+++ b/sql/slave.h
@@ -67,11 +67,6 @@ extern my_bool opt_log_slave_updates;
 extern ulonglong relay_log_space_limit;
 struct st_master_info;
 
-enum enum_binlog_formats {
-  BINLOG_FORMAT_CURRENT=0, /* 0 is important for easy 'if (mi->old_format)' */
-  BINLOG_FORMAT_323_LESS_57, 
-  BINLOG_FORMAT_323_GEQ_57 };
-
 /*
   TODO: this needs to be redone, but for now it does not matter since
   we do not have multi-master yet.
@@ -186,6 +181,8 @@ typedef struct st_relay_log_info
   ulonglong group_relay_log_pos;
   char event_relay_log_name[FN_REFLEN];
   ulonglong event_relay_log_pos;
+  ulonglong future_event_relay_log_pos;
+
   /* 
      Original log name and position of the group we're currently executing
      (whose coordinates are group_relay_log_name/pos in the relay log)
@@ -207,11 +204,13 @@ typedef struct st_relay_log_info
 
   /*
     InnoDB internally stores the master log position it has processed
-    so far; the position to store is really the sum of 
-    pos + pending + event_len here since we must store the pos of the
-    END of the current log event
+    so far; when the InnoDB code to store this position is called, we have not
+    updated rli->group_master_log_pos yet. So the position is the event's
+    log_pos (the position of the end of the event); we save it in the variable
+    below. It's the *coming* group_master_log_pos (the one which will be
+    group_master_log_pos in the coming milliseconds).
   */
-  int event_len;
+  ulonglong future_group_master_log_pos;
 
   time_t last_master_timestamp; 
 
@@ -285,16 +284,17 @@ typedef struct st_relay_log_info
       until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
   }
   
-  inline void inc_event_relay_log_pos(ulonglong val)
+  inline void inc_event_relay_log_pos()
   {
-    event_relay_log_pos+= val;
+    event_relay_log_pos= future_event_relay_log_pos;
   }
 
-  void inc_group_relay_log_pos(ulonglong val, ulonglong log_pos, bool skip_lock=0)
+  void inc_group_relay_log_pos(ulonglong log_pos,
+                               bool skip_lock=0)  
   {
     if (!skip_lock)
       pthread_mutex_lock(&data_lock);
-    inc_event_relay_log_pos(val);
+    inc_event_relay_log_pos();
     group_relay_log_pos= event_relay_log_pos;
     strmake(group_relay_log_name,event_relay_log_name,
             sizeof(group_relay_log_name)-1);
@@ -311,8 +311,31 @@ typedef struct st_relay_log_info
       not advance as it should on the non-transactional slave (it advances by
       big leaps, whereas it should advance by small leaps).
     */
-    if (log_pos) // 3.23 binlogs don't have log_posx
-      group_master_log_pos= log_pos+ val;
+    /*
+      In 4.x we used the event's len to compute the positions here. This is
+      wrong if the event was 3.23/4.0 and has been converted to 5.0, because
+      then the event's len is not what is was in the master's binlog, so this
+      will make a wrong group_master_log_pos (yes it's a bug in 3.23->4.0
+      replication: Exec_master_log_pos is wrong). Only way to solve this is to
+      have the original offset of the end of the event the relay log. This is
+      what we do in 5.0: log_pos has become "end_log_pos" (because the real use
+      of log_pos in 4.0 was to compute the end_log_pos; so better to store
+      end_log_pos instead of begin_log_pos.
+      If we had not done this fix here, the problem would also have appeared
+      when the slave and master are 5.0 but with different event length (for
+      example the slave is more recent than the master and features the event
+      UID). It would give false MASTER_POS_WAIT, false Exec_master_log_pos in
+      SHOW SLAVE STATUS, and so the user would do some CHANGE MASTER using this
+      value which would lead to badly broken replication.
+      Even the relay_log_pos will be corrupted in this case, because the len is
+      the relay log is not "val".
+      With the end_log_pos solution, we avoid computations involving lengthes.
+    */
+    DBUG_PRINT("info", ("log_pos=%lld group_master_log_pos=%lld",
+                        log_pos,group_master_log_pos));
+    if (log_pos) // some events (like fake Rotate) don't have log_pos
+      // when we are here, log_pos is the end of the event
+      group_master_log_pos= log_pos;
     pthread_cond_broadcast(&data_cond);
     if (!skip_lock)
       pthread_mutex_unlock(&data_lock);
@@ -389,7 +412,6 @@ typedef struct st_master_info
   int events_till_abort;
 #endif
   bool inited;
-  enum enum_binlog_formats old_format;
   volatile bool abort_slave, slave_running;
   volatile ulong slave_run_id;
   /* 
@@ -404,7 +426,7 @@ typedef struct st_master_info
   long clock_diff_with_master; 
   
   st_master_info()
-    :ssl(0), fd(-1),  io_thd(0), inited(0), old_format(BINLOG_FORMAT_CURRENT),
+    :ssl(0), fd(-1),  io_thd(0), inited(0),
      abort_slave(0),slave_running(0), slave_run_id(0)
   {
     host[0] = 0; user[0] = 0; password[0] = 0;
@@ -535,10 +557,12 @@ void lock_slave_threads(MASTER_INFO* mi);
 void unlock_slave_threads(MASTER_INFO* mi);
 void init_thread_mask(int* mask,MASTER_INFO* mi,bool inverse);
 int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,ulonglong pos,
-		       bool need_data_lock, const char** errmsg);
+		       bool need_data_lock, const char** errmsg,
+                       bool look_for_description_event);
 
 int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset,
 		     const char** errmsg);
+void set_slave_thread_options(THD* thd);
 void rotate_relay_log(MASTER_INFO* mi);
 
 extern "C" pthread_handler_decl(handle_slave_io,arg);
diff --git a/sql/sql_class.cc b/sql/sql_class.cc
index d1ebcdbd15e6ff86cf0bbff624b01b8a6ae34a09..b9051f88fbd54731d68981146f36e5673e23d54f 100644
--- a/sql/sql_class.cc
+++ b/sql/sql_class.cc
@@ -92,6 +92,7 @@ THD::THD():user_time(0), is_fatal_error(0),
 	   global_read_lock(0), bootstrap(0), spcont(NULL)
 {
   host= user= priv_user= db= ip= 0;
+  catalog= (char*)"std"; // the only catalog we have for now
   host_or_ip= "connecting host";
   locked=some_tables_deleted=no_errors=password= 0;
   query_start_used= 0;
diff --git a/sql/sql_class.h b/sql/sql_class.h
index 8263789a2a2a1b33ac314745ecc0183cc97370ad..f57272b6c6673c2ba7770f0e5ba21a1965a03720 100644
--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -26,6 +26,7 @@
 class Query_log_event;
 class Load_log_event;
 class Slave_log_event;
+class Format_description_log_event;
 class sp_rcontext;
 class sp_cache;
 
@@ -99,7 +100,14 @@ class MYSQL_LOG
   enum cache_type io_cache_type;
   bool write_error, inited;
   bool need_start_event;
-  bool no_auto_events;				// For relay binlog
+  /*
+    no_auto_events means we don't want any of these automatic events :
+    Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't want
+    a Rotate_log event to be written to the relay log. When we start a relay log
+    etc. So in 4.x this is 1 for relay logs, 0 for binlogs.
+    In 5.0 it's 0 for relay logs too!
+  */
+  bool no_auto_events;			     
   /* 
      The max size before rotation (usable only if log_type == LOG_BIN: binary
      logs and relay logs).
@@ -116,6 +124,18 @@ class MYSQL_LOG
 public:
   MYSQL_LOG();
   ~MYSQL_LOG();
+
+  /* 
+     These describe the log's format. This is used only for relay logs.
+     _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's
+     necessary to have 2 distinct objects, because the I/O thread may be reading
+     events in a different format from what the SQL thread is reading (consider
+     the case of a master which has been upgraded from 5.0 to 5.1 without doing
+     RESET MASTER, or from 4.x to 5.0).
+  */
+  Format_description_log_event *description_event_for_exec,
+    *description_event_for_queue;
+
   void reset_bytes_written()
   {
     bytes_written = 0;
@@ -144,7 +164,8 @@ class MYSQL_LOG
   bool open(const char *log_name,enum_log_type log_type,
 	    const char *new_name, const char *index_file_name_arg,
 	    enum cache_type io_cache_type_arg,
-	    bool no_auto_events_arg, ulong max_size);
+	    bool no_auto_events_arg, ulong max_size,
+            bool null_created);
   void new_file(bool need_lock= 1);
   bool write(THD *thd, enum enum_server_command command,
 	     const char *format,...);
@@ -590,9 +611,10 @@ class THD :public ilink,
      the connection
     priv_user - The user privilege we are using. May be '' for anonymous user.
     db - currently selected database
+    catalog - currently selected catalog
     ip - client IP
    */
-  char	  *host,*user,*priv_user,*db,*ip;
+  char	  *host,*user,*priv_user,*db,*catalog,*ip;
   char	  priv_host[MAX_HOSTNAME];
   /* remote (peer) port */
   uint16 peer_port;
diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc
index c0ee42771289cbb33fd31b8c007d189fa0ef9eb9..d6d523891838f6edc83dcc9a77dabfe4afe88821 100644
--- a/sql/sql_repl.cc
+++ b/sql/sql_repl.cc
@@ -48,16 +48,34 @@ int check_binlog_magic(IO_CACHE* log, const char** errmsg)
   return 0;
 }
 
+ /* 
+    fake_rotate_event() builds a fake (=which does not exist physically in any
+    binlog) Rotate event, which contains the name of the binlog we are going to
+    send to the slave (because the slave may not know it if it just asked for
+    MASTER_LOG_FILE='', MASTER_LOG_POS=4).
+    < 4.0.14, fake_rotate_event() was called only if the requested pos was
+    4. After this version we always call it, so that a 3.23.58 slave can rely on
+    it to detect if the master is 4.0 (and stop) (the _fake_ Rotate event has
+    zeros in the good positions which, by chance, make it possible for the 3.23
+    slave to detect that this event is unexpected) (this is luck which happens
+    because the master and slave disagree on the size of the header of
+    Log_event).
+ 
+    Relying on the event length of the Rotate event instead of these well-placed
+    zeros was not possible as Rotate events have a variable-length part. 
+*/
+
 static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
-                             ulonglong position, const char**errmsg)
+                             ulonglong position, const char** errmsg)
 {
+  DBUG_ENTER("fake_rotate_event");
   char header[LOG_EVENT_HEADER_LEN], buf[ROTATE_HEADER_LEN];
-  memset(header, 0, 4); // when does not matter
+  memset(header, 0, 4); // 'when' (the timestamp) does not matter, is set to 0
   header[EVENT_TYPE_OFFSET] = ROTATE_EVENT;
 
   char* p = log_file_name+dirname_length(log_file_name);
   uint ident_len = (uint) strlen(p);
-  ulong event_len = ident_len + ROTATE_EVENT_OVERHEAD;
+  ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN;
   int4store(header + SERVER_ID_OFFSET, server_id);
   int4store(header + EVENT_LEN_OFFSET, event_len);
   int2store(header + FLAGS_OFFSET, 0);
@@ -72,9 +90,9 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
   if (my_net_write(net, (char*)packet->ptr(), packet->length()))
   {
     *errmsg = "failed on my_net_write()";
-    return -1;
+    DBUG_RETURN(-1);
   }
-  return 0;
+  DBUG_RETURN(0);
 }
 
 static int send_file(THD *thd)
@@ -310,6 +328,36 @@ int purge_master_logs_before_date(THD* thd, time_t purge_time)
   return purge_error_message(thd ,res);
 }
 
+int test_for_non_eof_log_read_errors(int error, const char *errmsg)
+{
+  if (error == LOG_READ_EOF) 
+    return 0;
+  my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
+  switch (error) {
+  case LOG_READ_BOGUS:
+    errmsg = "bogus data in log event";
+    break;
+  case LOG_READ_TOO_LARGE:
+    errmsg = "log event entry exceeded max_allowed_packet; \
+Increase max_allowed_packet on master";
+    break;
+  case LOG_READ_IO:
+    errmsg = "I/O error reading log event";
+    break;
+  case LOG_READ_MEM:
+    errmsg = "memory allocation failed reading log event";
+    break;
+  case LOG_READ_TRUNC:
+    errmsg = "binlog truncated in the middle of event";
+    break;
+  default:
+    errmsg = "unknown error reading log event on the master";
+    break;
+  }
+  return error;
+}
+
+
 /*
   TODO: Clean up loop to only have one call to send_file()
 */
@@ -326,6 +374,7 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
   int error;
   const char *errmsg = "Unknown error";
   NET* net = &thd->net;
+  pthread_mutex_t *log_lock;
 #ifndef DBUG_OFF
   int left_events = max_binlog_dump_events;
 #endif
@@ -385,18 +434,25 @@ impossible position";
     goto err;
   }
 
-  my_b_seek(&log, pos);				// Seek will done on next read
   /*
     We need to start a packet with something other than 255
-    to distiquish it from error
+    to distinguish it from error
   */
-  packet->set("\0", 1, &my_charset_bin);
+  packet->set("\0", 1, &my_charset_bin); /* This is the start of a new packet */
 
   /*
+    Tell the client about the log name with a fake Rotate event;
+    this is needed even if we also send a Format_description_log_event just
+    after, because that event does not contain the binlog's name.
+    Note that as this Rotate event is sent before Format_description_log_event,
+    the slave cannot have any info to understand this event's format, so the
+    header len of Rotate_log_event is FROZEN
+    (so in 5.0 it will have a header shorter than other events except
+    FORMAT_DESCRIPTION_EVENT). 
     Before 4.0.14 we called fake_rotate_event below only if 
     (pos == BIN_LOG_HEADER_SIZE), because if this is false then the slave
     already knows the binlog's name.
-    Now we always call fake_rotate_event; if the slave already knew the log's
+    Since, we always call fake_rotate_event; if the slave already knew the log's
     name (ex: CHANGE MASTER TO MASTER_LOG_FILE=...) this is useless but does
     not harm much. It is nice for 3.23 (>=.58) slaves which test Rotate events
     to see if the master is 4.0 (then they choose to stop because they can't
@@ -413,15 +469,72 @@ impossible position";
   */
   if (fake_rotate_event(net, packet, log_file_name, pos, &errmsg))
   {
+    /* 
+       This error code is not perfect, as fake_rotate_event() does not read
+       anything from the binlog; if it fails it's because of an error in
+       my_net_write(), fortunately it will say it in errmsg. 
+    */
     my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
     goto err;
   }
   packet->set("\0", 1, &my_charset_bin);
 
+  /*
+    We can set log_lock now, it does not move (it's a member of mysql_bin_log,
+    and it's already inited, and it will be destroyed only at shutdown).
+  */
+  log_lock = mysql_bin_log.get_log_lock();  
+  if (pos > BIN_LOG_HEADER_SIZE)
+   {
+     /* Try to find a Format_description_log_event at the beginning of the binlog */
+     if (!(error = Log_event::read_log_event(&log, packet, log_lock)))
+     {
+       /*
+         The packet has offsets equal to the normal offsets in a binlog event
+         +1 (the first character is \0).
+       */
+       DBUG_PRINT("info",
+                  ("Looked for a Format_description_log_event, found event type %d",
+                   (*packet)[EVENT_TYPE_OFFSET+1]));
+       if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
+       {
+         /*
+           mark that this event with "log_pos=0", so the slave
+           should not increment master's binlog position
+           (rli->group_master_log_pos)
+         */
+         int4store(packet->c_ptr() +LOG_POS_OFFSET+1,0);
+         /* send it */
+         if (my_net_write(net, (char*)packet->ptr(), packet->length()))
+         {
+           errmsg = "Failed on my_net_write()";
+           my_errno= ER_UNKNOWN_ERROR;
+           goto err;
+         }
+         /*
+           No need to save this event. We are only doing simple reads (no real
+           parsing of the events) so we don't need it. And so we don't need the
+           artificial Format_description_log_event of 3.23&4.x.
+         */
+       }
+     }
+     else
+       if (test_for_non_eof_log_read_errors(error, errmsg))
+         goto err;
+     /* 
+        else: it's EOF, nothing to do, go on reading next events, the
+        Format_description_log_event will be found naturally if it is written.
+     */
+     /* reset the packet as we wrote to it in any case */
+     packet->set("\0", 1, &my_charset_bin);
+   } /* end of if (pos > BIN_LOG_HEADER_SIZE); if false, the Format_description_log_event
+        event will be found naturally. */
+  
+  /* seek to the requested position, to start the requested dump */
+  my_b_seek(&log, pos);			// Seek will done on next read
+
   while (!net->error && net->vio != 0 && !thd->killed)
   {
-    pthread_mutex_t *log_lock = mysql_bin_log.get_log_lock();
-
     while (!(error = Log_event::read_log_event(&log, packet, log_lock)))
     {
 #ifndef DBUG_OFF
@@ -433,7 +546,7 @@ impossible position";
 	goto err;
       }
 #endif
-      if (my_net_write(net, (char*)packet->ptr(), packet->length()) )
+      if (my_net_write(net, (char*)packet->ptr(), packet->length()))
       {
 	errmsg = "Failed on my_net_write()";
 	my_errno= ER_UNKNOWN_ERROR;
@@ -454,34 +567,14 @@ impossible position";
     }
     /*
       TODO: now that we are logging the offset, check to make sure
-      the recorded offset and the actual match
+      the recorded offset and the actual match.
+      Guilhem 2003-06: this is not true if this master is a slave <4.0.15
+      running with --log-slave-updates, because then log_pos may be the offset
+      in the-master-of-this-master's binlog.
     */
-    if (error != LOG_READ_EOF)
-    {
-      my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
-      switch (error) {
-      case LOG_READ_BOGUS:
-	errmsg = "bogus data in log event";
-	break;
-      case LOG_READ_TOO_LARGE:
-	errmsg = "log event entry exceeded max_allowed_packet; \
-Increase max_allowed_packet on master";
-	break;
-      case LOG_READ_IO:
-	errmsg = "I/O error reading log event";
-	break;
-      case LOG_READ_MEM:
-	errmsg = "memory allocation failed reading log event";
-	break;
-      case LOG_READ_TRUNC:
-	errmsg = "binlog truncated in the middle of event";
-	break;
-      default:
-	errmsg = "unknown error reading log event on the master";
-	break;
-      }
+
+    if (test_for_non_eof_log_read_errors(error, errmsg))
       goto err;
-    }
 
     if (!(flags & BINLOG_DUMP_NON_BLOCK) &&
        mysql_bin_log.is_active(log_file_name))
@@ -615,8 +708,13 @@ Increase max_allowed_packet on master";
       (void) my_close(file, MYF(MY_WME));
 
       /*
-        Even if the previous log contained a Rotate_log_event, we still fake
-        one.
+        Call fake_rotate_event() in case the previous log (the one which we have
+        just finished reading) did not contain a Rotate event (for example (I
+        don't know any other example) the previous log was the last one before
+        the master was shutdown & restarted).
+        This way we tell the slave about the new log's name and position.
+        If the binlog is 5.0, the next event we are going to read and send is
+        Format_description_log_event.
       */
       if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0 ||
 	  fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE, &errmsg))
@@ -1112,7 +1210,7 @@ int change_master(THD* thd, MASTER_INFO* mi)
 			   mi->rli.group_relay_log_name,
 			   mi->rli.group_relay_log_pos,
 			   0 /*no data lock*/,
-			   &msg))
+			   &msg, 0))
     {
       net_printf(thd,0,"Failed initializing relay log position: %s",msg);
       unlock_slave_threads(mi);
@@ -1197,6 +1295,8 @@ int show_binlog_events(THD* thd)
   const char *errmsg = 0;
   IO_CACHE log;
   File file = -1;
+  Format_description_log_event *description_event= new
+    Format_description_log_event(3); /* MySQL 4.0 by default */ 
 
   Log_event::init_show_field_list(&field_list);
   if (protocol-> send_fields(&field_list, 1))
@@ -1235,10 +1335,35 @@ int show_binlog_events(THD* thd)
       goto err;
 
     pthread_mutex_lock(log_lock);
+
+    /* 
+       open_binlog() sought to position 4.
+       Read the first event in case it's a Format_description_log_event, to know the
+       format. If there's no such event, we are 3.23 or 4.x. This code, like
+       before, can't read 3.23 binlogs.
+       This code will fail on a mixed relay log (one which has Format_desc then
+       Rotate then Format_desc).
+    */
+    
+    ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event);
+    if (ev)
+    {
+      if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
+        description_event= (Format_description_log_event*) ev;
+      else
+        delete ev;
+    }
+
     my_b_seek(&log, pos);
 
+    if (!description_event->is_valid())
+    {
+      errmsg="Invalid Format_description event; could be out of memory";
+      goto err;
+    }
+    
     for (event_count = 0;
-	 (ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,0)); )
+	 (ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event)); )
     {
       if (event_count >= limit_start &&
 	  ev->net_send(protocol, linfo.log_file_name, pos))
@@ -1267,6 +1392,7 @@ int show_binlog_events(THD* thd)
   }
 
 err:
+  delete description_event;
   if (file >= 0)
   {
     end_io_cache(&log);