diff --git a/client/Makefile.am b/client/Makefile.am index 2c54ec459890fed1268409be34fa7bf77dc24984..612a5f01f8d90fe2583b0779ef84e2d549da49af 100644 --- a/client/Makefile.am +++ b/client/Makefile.am @@ -39,7 +39,7 @@ mysqlbinlog_SOURCES = mysqlbinlog.cc ../mysys/mf_tempdir.c mysqlbinlog_DEPENDENCIES= $(LIBRARIES) $(pkglib_LTLIBRARIES) mysqlmanagerc_SOURCES = mysqlmanagerc.c mysqlmanagerc_DEPENDENCIES= $(LIBRARIES) $(pkglib_LTLIBRARIES) -sql_src=log_event.h log_event.cc +sql_src=log_event.h mysql_priv.h log_event.cc # Fix for mit-threads DEFS = -DUNDEF_THREADS_HACK diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc index 35f0db76ad67f05eed533712b1a994a0e954caa0..90a1526c2c7353e8eb51705afd1da9ed304fedc6 100644 --- a/client/mysqlbinlog.cc +++ b/client/mysqlbinlog.cc @@ -14,12 +14,28 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + + TODO: print the catalog (some USE catalog.db ????). + + Standalone program to read a MySQL binary log (or relay log); + can read files produced by 3.23, 4.x, 5.0 servers. + + Can read binlogs from 3.23/4.x/5.0 and relay logs from 4.x/5.0. + Should be able to read any file of these categories, even with --position. + An important fact: the Format_desc event of the log is at most the 3rd event + of the log; if it is the 3rd then there is this combination: + Format_desc_of_slave, Rotate_of_master, Format_desc_of_master. +*/ + #define MYSQL_CLIENT #undef MYSQL_SERVER #include "client_priv.h" #include <time.h> #include <assert.h> #include "log_event.h" +/* That one is necessary for defines of OPTION_NO_FOREIGN_KEY_CHECKS etc */ +#include "mysql_priv.h" #define BIN_LOG_HEADER_SIZE 4 #define PROBE_HEADER_LEN (EVENT_LEN_OFFSET+4) @@ -481,21 +497,26 @@ static int check_master_version(MYSQL* mysql) } +/* + TODO fix this for new format (like local log); this will be done when 4.0 is + merged here (Victor's fixes are needed to make dump_remote_log_entries() + work). +*/ + static void dump_remote_log_entries(const char* logname) { char buf[128]; - char last_db[FN_REFLEN+1] = ""; + LAST_EVENT_INFO last_event_info; uint len; NET* net = &mysql->net; int old_format; old_format = check_master_version(mysql); if (!position) - position = BIN_LOG_HEADER_SIZE; // protect the innocent from spam + position = BIN_LOG_HEADER_SIZE; if (position < BIN_LOG_HEADER_SIZE) { position = BIN_LOG_HEADER_SIZE; - // warn the guity sql_print_error("Warning: The position in the binary log can't be less than %d.\nStarting from position %d\n", BIN_LOG_HEADER_SIZE, BIN_LOG_HEADER_SIZE); } int4store(buf, position); @@ -517,10 +538,11 @@ static void dump_remote_log_entries(const char* logname) DBUG_PRINT("info",( "len= %u, net->read_pos[5] = %d\n", len, net->read_pos[5])); Log_event *ev = Log_event::read_log_event((const char*) net->read_pos + 1 , - len - 1, &error, old_format); + len - 1, &error, 0); + //TODO this ,0) : we need to store the description_event like for local_log if (ev) { - ev->print(result_file, short_form, last_db); + ev->print(result_file, short_form, &last_event_info); if (ev->get_type_code() == LOAD_EVENT) dump_remote_file(net, ((Load_log_event*)ev)->fname); delete ev; @@ -531,29 +553,98 @@ static void dump_remote_log_entries(const char* logname) } -static int check_header(IO_CACHE* file) +static void check_header(IO_CACHE* file, + Format_description_log_event **description_event) { byte header[BIN_LOG_HEADER_SIZE]; byte buf[PROBE_HEADER_LEN]; - int old_format=0; + *description_event= new Format_description_log_event(3); + my_off_t tmp_pos; my_off_t pos = my_b_tell(file); my_b_seek(file, (my_off_t)0); if (my_b_read(file, header, sizeof(header))) die("Failed reading header; Probably an empty file"); if (memcmp(header, BINLOG_MAGIC, sizeof(header))) die("File is not a binary log file"); - if (!my_b_read(file, buf, sizeof(buf))) + + /* + Imagine we are running with --position=1000. We still need to know the + binlog format's. So we still need to find, if there is one, the Format_desc + event, or to know if this is a 3.23 binlog. So we need to first read the + first events of the log, those around offset 4. + Even if we are reading a 3.23 binlog from the start (no --position): we need + to know the header length (which is 13 in 3.23, 19 in 4.x) to be able to + successfully print the first event (Start_log_event_v3). So even in this + case, we need to "probe" the first bytes of the log *before* we do a real + read_log_event(). Because read_log_event() needs to know the header's length + to work fine. + */ + for(;;) { - if (buf[4] == START_EVENT) + tmp_pos= my_b_tell(file); /* should be 4 the first time */ + if (my_b_read(file, buf, sizeof(buf))) { - uint event_len; - event_len = uint4korr(buf + EVENT_LEN_OFFSET); - old_format = (event_len < (LOG_EVENT_HEADER_LEN + START_HEADER_LEN)); + if (file->error) + die("\ +Could not read entry at offset %lu : Error in log format or read error", + tmp_pos); + /* + Otherwise this is just EOF : this log currently contains 0-2 events. + Maybe it's going to be filled in the next milliseconds; then we are + going to have a problem if this a 3.23 log (imagine we are locally + reading a 3.23 binlog which is being written presently): we won't know + it in read_log_event() and will fail(). + Similar problems could happen with hot relay logs if --position is used + (but a --position which is posterior to the current size of the log). + These are rare problems anyway (reading a hot log + when we read the + first events there are not all there yet + when we read a bit later + there are more events + using a strange --position). + */ + break; + } + else + { + DBUG_PRINT("info",("buf[4]=%d", buf[4])); + /* always test for a Start_v3, even if no --position */ + if (buf[4] == START_EVENT_V3) /* This is 3.23 or 4.x */ + { + if (uint4korr(buf + EVENT_LEN_OFFSET) < + (LOG_EVENT_MINIMAL_HEADER_LEN + START_V3_HEADER_LEN)) + { + /* This is 3.23 (format 1) */ + delete *description_event; + *description_event= new Format_description_log_event(1); + } + break; + } + else if (tmp_pos>=position) + break; + else if (buf[4] == FORMAT_DESCRIPTION_EVENT) /* This is 5.0 */ + { + my_b_seek(file, tmp_pos); /* seek back to event's start */ + if (!(*description_event= (Format_description_log_event*) + Log_event::read_log_event(file, *description_event))) + /* EOF can't be hit here normally, so it's a real error */ + die("Could not read a Format_description_log_event event \ +at offset %lu ; this could be a log format error or read error", + tmp_pos); + DBUG_PRINT("info",("Setting description_event")); + } + else if (buf[4] == ROTATE_EVENT) + { + my_b_seek(file, tmp_pos); /* seek back to event's start */ + if (!Log_event::read_log_event(file, *description_event)) + /* EOF can't be hit here normally, so it's a real error */ + die("Could not read a Rotate_log_event event \ +at offset %lu ; this could be a log format error or read error", + tmp_pos); + } + else + break; } } my_b_seek(file, pos); - return old_format; } @@ -562,11 +653,15 @@ static void dump_local_log_entries(const char* logname) File fd = -1; IO_CACHE cache,*file= &cache; ulonglong rec_count = 0; - char last_db[FN_REFLEN+1]; + LAST_EVENT_INFO last_event_info; byte tmp_buff[BIN_LOG_HEADER_SIZE]; - bool old_format = 0; - - last_db[0]=0; + /* + check_header() will set the pointer below. + Why do we need here a pointer on an event instead of an event ? + This is because the event will be created (alloced) in read_log_event() + (which returns a pointer) in check_header(). + */ + Format_description_log_event* description_event; if (logname && logname[0] != '-') { @@ -575,14 +670,14 @@ static void dump_local_log_entries(const char* logname) if (init_io_cache(file, fd, 0, READ_CACHE, (my_off_t) position, 0, MYF(MY_WME | MY_NABP))) exit(1); - old_format = check_header(file); + check_header(file, &description_event); } - else + else // reading from stdin; TODO: check that it works { if (init_io_cache(file, fileno(result_file), 0, READ_CACHE, (my_off_t) 0, 0, MYF(MY_WME | MY_NABP | MY_DONT_CHECK_FILESIZE))) exit(1); - old_format = check_header(file); + check_header(file, &description_event); if (position) { /* skip 'position' characters from stdout */ @@ -599,6 +694,9 @@ static void dump_local_log_entries(const char* logname) file->seek_not_done=0; } + if (!description_event->is_valid()) + die("Invalid Format_description log event; could be out of memory"); + if (!position) my_b_read(file, tmp_buff, BIN_LOG_HEADER_SIZE); // Skip header for (;;) @@ -606,7 +704,7 @@ static void dump_local_log_entries(const char* logname) char llbuff[21]; my_off_t old_off = my_b_tell(file); - Log_event* ev = Log_event::read_log_event(file, old_format); + Log_event* ev = Log_event::read_log_event(file, description_event); if (!ev) { if (file->error) @@ -633,7 +731,7 @@ Could not read entry at offset %s : Error in log format or read error", continue; // next } } - ev->print(result_file, short_form, last_db); + ev->print(result_file, short_form, &last_event_info); break; case CREATE_FILE_EVENT: { @@ -661,18 +759,18 @@ Could not read entry at offset %s : Error in log format or read error", filename and use LOCAL), prepared in the 'case EXEC_LOAD_EVENT' below. */ - ce->print(result_file, short_form, last_db, true); + ce->print(result_file, short_form, &last_event_info, true); load_processor.process(ce); ev= 0; break; } case APPEND_BLOCK_EVENT: - ev->print(result_file, short_form, last_db); + ev->print(result_file, short_form, &last_event_info); load_processor.process((Append_block_log_event*)ev); break; case EXEC_LOAD_EVENT: { - ev->print(result_file, short_form, last_db); + ev->print(result_file, short_form, &last_event_info); Execute_load_log_event *exv= (Execute_load_log_event*)ev; Create_file_log_event *ce= load_processor.grab_event(exv->file_id); /* @@ -682,7 +780,7 @@ Could not read entry at offset %s : Error in log format or read error", */ if (ce) { - ce->print(result_file, short_form, last_db,true); + ce->print(result_file, short_form, &last_event_info,true); my_free((char*)ce->fname,MYF(MY_WME)); delete ce; } @@ -691,17 +789,23 @@ Could not read entry at offset %s : Error in log format or read error", Create_file event for file_id: %u\n",exv->file_id); break; } + case FORMAT_DESCRIPTION_EVENT: + delete description_event; + description_event= (Format_description_log_event*) ev; + ev->print(result_file, short_form, &last_event_info); + break; default: - ev->print(result_file, short_form, last_db); + ev->print(result_file, short_form, &last_event_info); } } rec_count++; - if (ev) - delete ev; + if (ev && ev->get_type_code()!=FORMAT_DESCRIPTION_EVENT) + delete ev; /* otherwise, deleted in the end */ } if (fd >= 0) my_close(fd, MYF(MY_WME)); end_io_cache(file); + delete description_event; } diff --git a/include/my_sys.h b/include/my_sys.h index 59d5767d204b7fddae874e2fe65a2c0eb9198b21..bfdb360c1ed45d59167dc9cf5b15cb1eca083ff1 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -498,6 +498,7 @@ typedef int (*qsort2_cmp)(const void *, const void *, const void *); /* tell write offset in the SEQ_APPEND cache */ my_off_t my_b_append_tell(IO_CACHE* info); +my_off_t my_b_safe_tell(IO_CACHE* info); /* picks the correct tell() */ #define my_b_bytes_in_cache(info) (uint) (*(info)->current_end - \ *(info)->current_pos) diff --git a/mysql-test/r/rpl_session_var.result b/mysql-test/r/rpl_session_var.result new file mode 100644 index 0000000000000000000000000000000000000000..d1ba04f36ec204abba00a3294bc4c03186fff4e1 --- /dev/null +++ b/mysql-test/r/rpl_session_var.result @@ -0,0 +1,43 @@ +stop slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +reset master; +reset slave; +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; +start slave; +drop table if exists t1; +Warnings: +Note 1051 Unknown table 't1' +create table t1(a varchar(10),b int); +set @@session.sql_mode=pipes_as_concat; +insert into t1 values('My'||'SQL', 1); +set @@session.sql_mode=default; +insert into t1 values('My'||'SQL', 2); +select * from t1 where b<3 order by a; +a b +0 2 +MySQL 1 +select * from t1 where b<3 order by a; +a b +0 2 +MySQL 1 +set @@session.sql_mode=ignore_space; +insert into t1 values(password ('MySQL'), 3); +set @@session.sql_mode=ansi_quotes; +create table "t2" ("a" int); +drop table t1, t2; +set @@session.sql_mode=default; +create table t1(a int auto_increment primary key); +create table t2(b int, a int); +set @@session.sql_auto_is_null=1; +insert into t1 values(null); +insert into t2 select 1,a from t1 where a is null; +set @@session.sql_auto_is_null=0; +insert into t1 values(null); +insert into t2 select 2,a from t1 where a is null; +select * from t2 order by b; +b a +1 1 +select * from t2 order by b; +b a +1 1 +drop table t1,t2; diff --git a/mysql-test/t/rpl_session_var.test b/mysql-test/t/rpl_session_var.test new file mode 100644 index 0000000000000000000000000000000000000000..39d3128d2605b9ef3c244a709aeafaa8e690bc39 --- /dev/null +++ b/mysql-test/t/rpl_session_var.test @@ -0,0 +1,42 @@ +# Replication of session variables. +# FOREIGN_KEY_CHECKS is tested in rpl_insert_id.test + +source include/master-slave.inc; +drop table if exists t1; +create table t1(a varchar(10),b int); +set @@session.sql_mode=pipes_as_concat; +insert into t1 values('My'||'SQL', 1); +set @@session.sql_mode=default; +insert into t1 values('My'||'SQL', 2); +select * from t1 where b<3 order by a; +save_master_pos; +connection slave; +sync_with_master; +select * from t1 where b<3 order by a; +connection master; +# if the slave does the next sync_with_master fine, then it means it accepts the +# two lines of ANSI syntax below, which is what we want to check. +set @@session.sql_mode=ignore_space; +insert into t1 values(password ('MySQL'), 3); +set @@session.sql_mode=ansi_quotes; +create table "t2" ("a" int); +drop table t1, t2; +set @@session.sql_mode=default; +create table t1(a int auto_increment primary key); +create table t2(b int, a int); +set @@session.sql_auto_is_null=1; +insert into t1 values(null); +insert into t2 select 1,a from t1 where a is null; +set @@session.sql_auto_is_null=0; +insert into t1 values(null); +insert into t2 select 2,a from t1 where a is null; +select * from t2 order by b; +save_master_pos; +connection slave; +sync_with_master; +select * from t2 order by b; +connection master; +drop table t1,t2; +save_master_pos; +connection slave; +sync_with_master; diff --git a/mysys/mf_iocache2.c b/mysys/mf_iocache2.c index bce08b9795bcb9391d1623b147398b3ce65240a9..232097d355b21375d85bbfe8142511a1fb459761 100644 --- a/mysys/mf_iocache2.c +++ b/mysys/mf_iocache2.c @@ -66,6 +66,13 @@ my_off_t my_b_append_tell(IO_CACHE* info) return res; } +my_off_t my_b_safe_tell(IO_CACHE *info) +{ + if (unlikely(info->type == SEQ_READ_APPEND)) + return my_b_append_tell(info); + return my_b_tell(info); +} + /* Make next read happen at the given position For write cache, make next write happen at the given position diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index cbbb0d96d22c4266c61beb0c4e8b45bfa8ab80ed..6bf746c73e63ab69b4e0660ac3c6d1d6920dfcde 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -987,10 +987,19 @@ innobase_commit_low( trx->mysql_master_log_file_name = active_mi->rli.group_master_log_name; + /* + Guilhem to Heikki: in 5.0 we don't need to do a computation + (old_pos+len) to get the end_pos, because we already have the + end_pos under hand in the replication code + (Query_log_event::exec_event()). + I tested the code change below (simulated a crash with kill + -9) and got the good (binlog, position) displayed by InnoDB at + crash recovery, so this code change is ok. + */ trx->mysql_master_log_pos = ((ib_longlong) - (active_mi->rli.group_master_log_pos + - active_mi->rli.event_len - )); + (active_mi->rli.future_group_master_log_pos + )); + } #endif /* HAVE_REPLICATION */ diff --git a/sql/log.cc b/sql/log.cc index 95ee4928971b45a07c6df78ed77383809dfbcb10..e73e009aaaa0fe0250a81fe11a0a0b11e22db6ca 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -84,7 +84,8 @@ static int find_uniq_filename(char *name) MYSQL_LOG::MYSQL_LOG() :bytes_written(0), last_time(0), query_start(0), name(0), file_id(1), open_count(1), log_type(LOG_CLOSED), write_error(0), inited(0), - need_start_event(1) + need_start_event(1), description_event_for_exec(0), + description_event_for_queue(0) { /* We don't want to initialize LOCK_Log here as such initialization depends on @@ -111,6 +112,8 @@ void MYSQL_LOG::cleanup() { inited= 0; close(LOG_CLOSE_INDEX); + delete description_event_for_queue; + delete description_event_for_exec; (void) pthread_mutex_destroy(&LOCK_log); (void) pthread_mutex_destroy(&LOCK_index); (void) pthread_cond_destroy(&update_cond); @@ -179,7 +182,8 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg, const char *new_name, const char *index_file_name_arg, enum cache_type io_cache_type_arg, bool no_auto_events_arg, - ulong max_size_arg) + ulong max_size_arg, + bool null_created_arg) { char buff[512]; File file= -1, index_file_nr= -1; @@ -272,8 +276,8 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg, if (my_b_safe_write(&log_file, (byte*) BINLOG_MAGIC, BIN_LOG_HEADER_SIZE)) goto err; - bytes_written += BIN_LOG_HEADER_SIZE; - write_file_name_to_index_file=1; + bytes_written+= BIN_LOG_HEADER_SIZE; + write_file_name_to_index_file= 1; } if (!my_b_inited(&index_file)) @@ -302,10 +306,42 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg, } if (need_start_event && !no_auto_events) { - need_start_event=0; - Start_log_event s; + /* + In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event + even if this is not the very first binlog. + */ + Format_description_log_event s(BINLOG_VERSION); + if (!s.is_valid()) + goto err; s.set_log_pos(this); - s.write(&log_file); + if (null_created_arg) + s.created= 0; + if (s.write(&log_file)) + goto err; + bytes_written+= s.get_event_len(); + } + if (description_event_for_queue && + description_event_for_queue->binlog_version>=4) + { + /* + This is a relay log written to by the I/O slave thread. + Write the event so that others can later know the format of this relay + log. + Note that this event is very close to the original event from the + master (it has binlog version of the master, event types of the + master), so this is suitable to parse the next relay log's event. It + has been produced by + Format_description_log_event::Format_description_log_event(char* + buf,). + Why don't we want to write the description_event_for_queue if this event + is for format<4 (3.23 or 4.x): this is because in that case, the + description_event_for_queue describes the data received from the master, + but not the data written to the relay log (*conversion*), which is in + format 4 (slave's). + */ + if (description_event_for_queue->write(&log_file)) + goto err; + bytes_written+= description_event_for_queue->get_event_len(); } if (flush_io_cache(&log_file)) goto err; @@ -596,7 +632,7 @@ bool MYSQL_LOG::reset_logs(THD* thd) if (!thd->slave_thread) need_start_event=1; open(save_name, save_log_type, 0, index_file_name, - io_cache_type, no_auto_events, max_size); + io_cache_type, no_auto_events, max_size, 0); my_free((gptr) save_name, MYF(0)); err: @@ -986,8 +1022,17 @@ void MYSQL_LOG::new_file(bool need_lock) Note that at this point, log_type != LOG_CLOSED (important for is_open()). */ + /* + new_file() is only used for rotation (in FLUSH LOGS or because size > + max_binlog_size or max_relay_log_size). + If this is a binary log, the Format_description_log_event at the beginning of + the new file should have created=0 (to distinguish with the + Format_description_log_event written at server startup, which should + trigger temp tables deletion on slaves. + */ + open(old_name, save_log_type, new_name_ptr, index_file_name, io_cache_type, - no_auto_events, max_size); + no_auto_events, max_size, 1); my_free(old_name,MYF(0)); end: @@ -1282,6 +1327,12 @@ bool MYSQL_LOG::write(Log_event* event_info) } #endif +#if MYSQL_VERSION_ID < 50000 + /* + In 5.0 this is not needed anymore as we store the value of + FOREIGN_KEY_CHECKS in a binary way in the Query event's header. + The code below was enabled in 4.0 and 4.1. + */ /* If the user has set FOREIGN_KEY_CHECKS=0 we wrap every SQL command in the binlog inside: @@ -1297,6 +1348,7 @@ bool MYSQL_LOG::write(Log_event* event_info) if (e.write(file)) goto err; } +#endif } /* Write the SQL command */ @@ -1307,6 +1359,7 @@ bool MYSQL_LOG::write(Log_event* event_info) /* Write log events to reset the 'run environment' of the SQL command */ +#if MYSQL_VERSION_ID < 50000 if (thd && thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) { Query_log_event e(thd, "SET FOREIGN_KEY_CHECKS=1", 24, 0); @@ -1314,6 +1367,7 @@ bool MYSQL_LOG::write(Log_event* event_info) if (e.write(file)) goto err; } +#endif /* Tell for transactional table handlers up to which position in the @@ -1720,6 +1774,7 @@ void MYSQL_LOG::close(uint exiting) Stop_log_event s; s.set_log_pos(this); s.write(&log_file); + bytes_written+= s.get_event_len(); signal_update(); } #endif /* HAVE_REPLICATION */ diff --git a/sql/log_event.cc b/sql/log_event.cc index 8c93a22cf6e44e2707e37290ff4c9386eba26365..018366507e6828ea0dbe122480e34cdb5b0f8dbf 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -192,9 +192,25 @@ static inline int read_str(char * &buf, char *buf_end, char * &str, return 0; } +/* + Prints a "session_var=value" string. Used by mysqlbinlog to print some SET + commands just before it prints a query. +*/ + +static void print_set_option(FILE* file, uint32 bits_changed, uint32 option, + uint32 flags, const char* name, bool* need_comma) +{ + if (bits_changed & option) + { + if (*need_comma) + fprintf(file,", "); + fprintf(file,"%s=%d", name, (bool)(flags & option)); + *need_comma= 1; + } +} /************************************************************************** - Log_event methods + Log_event methods (= the parent class of all events) **************************************************************************/ /* @@ -204,7 +220,7 @@ static inline int read_str(char * &buf, char *buf_end, char * &str, const char* Log_event::get_type_str() { switch(get_type_code()) { - case START_EVENT: return "Start"; + case START_EVENT_V3: return "Start_v3"; case STOP_EVENT: return "Stop"; case QUERY_EVENT: return "Query"; case ROTATE_EVENT: return "Rotate"; @@ -218,6 +234,7 @@ const char* Log_event::get_type_str() case EXEC_LOAD_EVENT: return "Exec_load"; case RAND_EVENT: return "RAND"; case USER_VAR_EVENT: return "User var"; + case FORMAT_DESCRIPTION_EVENT: return "Format_desc"; default: return "Unknown"; /* impossible */ } } @@ -254,24 +271,66 @@ Log_event::Log_event() Log_event::Log_event() */ -Log_event::Log_event(const char* buf, bool old_format) +Log_event::Log_event(const char* buf, + const Format_description_log_event* description_event) :temp_buf(0), cached_event_len(0), cache_stmt(0) { +#ifndef MYSQL_CLIENT + thd = 0; +#endif when = uint4korr(buf); server_id = uint4korr(buf + SERVER_ID_OFFSET); - if (old_format) + if (description_event->binlog_version==1) { - log_pos=0; - flags=0; + log_pos= 0; + flags= 0; + return; } - else + /* 4.0 or newer */ + log_pos= uint4korr(buf + LOG_POS_OFFSET); + /* + If the log is 4.0 (so here it can only be a 4.0 relay log read by the SQL + thread or a 4.0 master binlog read by the I/O thread), log_pos is the + beginning of the event: we transform it into the end of the event, which is + more useful. + But how do you know that the log is 4.0: you know it if description_event is + version 3 *and* you are not reading a Format_desc (remember that mysqlbinlog + starts by assuming that 5.0 logs are in 4.0 format, until it finds a + Format_desc). + */ + if (description_event->binlog_version==3 && + buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT && + /* + If log_pos=0, don't change it. log_pos==0 is a marker to mean + "don't change rli->group_master_log_pos" (see + inc_group_relay_log_pos()). As it is unreal log_pos, adding the event + len's is nonsense. For example, a fake Rotate event should + not have its log_pos (which is 0) changed or it will modify + Exec_master_log_pos in SHOW SLAVE STATUS, displaying a nonsense value of + (a non-zero offset which does not exist in the master's binlog, so which + will cause problems if the user uses this value in CHANGE MASTER). + */ + log_pos) + log_pos+= uint4korr(buf + EVENT_LEN_OFFSET); + flags= uint2korr(buf + FLAGS_OFFSET); + if ((buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) || + (buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT)) { - log_pos = uint4korr(buf + LOG_POS_OFFSET); - flags = uint2korr(buf + FLAGS_OFFSET); + /* + These events always have a header which stops here (i.e. their header is + FROZEN). + */ + /* + Initialization to zero of all other Log_event members as they're not + specified. Currently there are no such members; in the future there will + be an event UID (but Format_description and Rotate don't need this UID, as + they are not propagated through --log-slave-updates (remember the UID is + used to not play a query twice when you have two masters which are slaves + of a 3rd master). Then we are done. + */ + return; } -#ifndef MYSQL_CLIENT - thd = 0; -#endif + /* otherwise, go on with reading the header from buf (nothing now) */ } #ifndef MYSQL_CLIENT @@ -322,10 +381,10 @@ int Log_event::exec_event(struct st_relay_log_info* rli) has already been updated. */ if ((thd->options & OPTION_BEGIN) && opt_using_transactions) - rli->inc_event_relay_log_pos(get_event_len()); + rli->inc_event_relay_log_pos(); else { - rli->inc_group_relay_log_pos(get_event_len(),log_pos); + rli->inc_group_relay_log_pos(log_pos); flush_relay_log_info(rli); /* Note that Rotate_log_event::exec_event() does not call this function, @@ -387,7 +446,7 @@ void Log_event::init_show_field_list(List<Item>* field_list) field_list->push_back(new Item_empty_string("Event_type", 20)); field_list->push_back(new Item_return_int("Server_id", 10, MYSQL_TYPE_LONG)); - field_list->push_back(new Item_return_int("Orig_log_pos", 11, + field_list->push_back(new Item_return_int("End_log_pos", 11, MYSQL_TYPE_LONGLONG)); field_list->push_back(new Item_empty_string("Info", 20)); } @@ -410,6 +469,12 @@ int Log_event::write(IO_CACHE* file) int Log_event::write_header(IO_CACHE* file) { + /* + Header will be of size LOG_EVENT_HEADER_LEN for all events, except for + FORMAT_DESCRIPTION_EVENT and ROTATE_EVENT, where it will be + LOG_EVENT_MINIMAL_HEADER_LEN (remember these 2 have a frozen header, because + we read them before knowing the format). + */ char buf[LOG_EVENT_HEADER_LEN]; char* pos = buf; int4store(pos, (ulong) when); // timestamp @@ -417,19 +482,45 @@ int Log_event::write_header(IO_CACHE* file) *pos++ = get_type_code(); // event type code int4store(pos, server_id); pos += 4; - long tmp=get_data_size() + LOG_EVENT_HEADER_LEN; + long tmp; // total length of the event + switch (get_type_code()) + { + case FORMAT_DESCRIPTION_EVENT: + case ROTATE_EVENT: + tmp= get_data_size() + LOG_EVENT_MINIMAL_HEADER_LEN; + break; + default: + tmp= get_data_size() + LOG_EVENT_HEADER_LEN; + break; + } int4store(pos, tmp); pos += 4; int4store(pos, log_pos); pos += 4; int2store(pos, flags); pos += 2; + switch (get_type_code()) + { + case FORMAT_DESCRIPTION_EVENT: + case ROTATE_EVENT: + break; + default: + /* + Other data to print in the header (nothing now); in that case increment + pos. + */ + break; + } return (my_b_safe_write(file, (byte*) buf, (uint) (pos - buf))); } /* Log_event::read_log_event() + + This needn't be format-tolerant, because we only read + LOG_EVENT_MINIMAL_HEADER_LEN (we just want to read the event's length). + */ #ifndef MYSQL_CLIENT @@ -438,7 +529,7 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet, { ulong data_len; int result=0; - char buf[LOG_EVENT_HEADER_LEN]; + char buf[LOG_EVENT_MINIMAL_HEADER_LEN]; DBUG_ENTER("read_log_event"); if (log_lock) @@ -458,24 +549,25 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet, goto end; } data_len= uint4korr(buf + EVENT_LEN_OFFSET); - if (data_len < LOG_EVENT_HEADER_LEN || + if (data_len < LOG_EVENT_MINIMAL_HEADER_LEN || data_len > current_thd->variables.max_allowed_packet) { DBUG_PRINT("error",("data_len: %ld", data_len)); - result= ((data_len < LOG_EVENT_HEADER_LEN) ? LOG_READ_BOGUS : + result= ((data_len < LOG_EVENT_MINIMAL_HEADER_LEN) ? LOG_READ_BOGUS : LOG_READ_TOO_LARGE); goto end; } packet->append(buf, sizeof(buf)); - data_len-= LOG_EVENT_HEADER_LEN; + data_len-= LOG_EVENT_MINIMAL_HEADER_LEN; if (data_len) { if (packet->append(file, data_len)) { /* - Here we should never hit EOF in a non-error condition. + Here if we hit EOF it's really an error: as data_len is >=0 + there's supposed to be more bytes available. EOF means we are reading the event partially, which should - never happen. + never happen: either we read badly or the binlog is truncated. */ result= file->error >= 0 ? LOG_READ_TRUNC: LOG_READ_IO; /* Implicit goto end; */ @@ -502,24 +594,42 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet, Log_event::read_log_event() NOTE: - Allocates memory; The caller is responsible for clean-up + Allocates memory; The caller is responsible for clean-up. */ #ifndef MYSQL_CLIENT Log_event* Log_event::read_log_event(IO_CACHE* file, pthread_mutex_t* log_lock, - bool old_format) + const Format_description_log_event *description_event) #else -Log_event* Log_event::read_log_event(IO_CACHE* file, bool old_format) +Log_event* Log_event::read_log_event(IO_CACHE* file, + const Format_description_log_event *description_event) #endif { - char head[LOG_EVENT_HEADER_LEN]; - uint header_size= old_format ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN; + DBUG_ASSERT(description_event); + char head[LOG_EVENT_MINIMAL_HEADER_LEN]; + /* + First we only want to read at most LOG_EVENT_MINIMAL_HEADER_LEN, just to + check the event for sanity and to know its length; no need to really parse + it. We say "at most" because this could be a 3.23 master, which has header + of 13 bytes, whereas LOG_EVENT_MINIMAL_HEADER_LEN is 19 bytes (it's "minimal" + over the set {MySQL >=4.0}). + */ + uint header_size= min(description_event->common_header_len, + LOG_EVENT_MINIMAL_HEADER_LEN); LOCK_MUTEX; + DBUG_PRINT("info", ("my_b_tell=%lu", my_b_tell(file))); if (my_b_read(file, (byte *) head, header_size)) { + DBUG_PRINT("info", ("Log_event::read_log_event(IO_CACHE*,Format_desc*) \ +failed my_b_read")); UNLOCK_MUTEX; + /* + No error here; it could be that we are at the file's end. However if the + next my_b_read() fails (below), it will be an error as we were able to + read the first bytes. + */ return 0; } @@ -553,7 +663,8 @@ Log_event* Log_event::read_log_event(IO_CACHE* file, bool old_format) error = "read error"; goto err; } - if ((res = read_log_event(buf, data_len, &error, old_format))) + if ((res= read_log_event(buf, data_len, &error, + description_event))) res->register_temp_buf(buf); err: @@ -580,87 +691,105 @@ Error in Log_event::read_log_event(): '%s', data_len: %d, event_type: %d", /* Log_event::read_log_event() + Binlog format tolerance is in (buf, event_len, description_event) + constructors. */ -Log_event* Log_event::read_log_event(const char* buf, int event_len, - const char **error, bool old_format) +Log_event* Log_event::read_log_event(const char* buf, uint event_len, + const char **error, + const Format_description_log_event *description_event) { + DBUG_ENTER("Log_event::read_log_event(char*,...)"); + DBUG_ASSERT(description_event); + DBUG_PRINT("info", ("binlog_version=%d", description_event->binlog_version)); if (event_len < EVENT_LEN_OFFSET || (uint) event_len != uint4korr(buf+EVENT_LEN_OFFSET)) { *error="Sanity check failed"; // Needed to free buffer - return NULL; // general sanity check - will fail on a partial read + DBUG_RETURN(NULL); // general sanity check - will fail on a partial read } Log_event* ev = NULL; switch(buf[EVENT_TYPE_OFFSET]) { case QUERY_EVENT: - ev = new Query_log_event(buf, event_len, old_format); + ev = new Query_log_event(buf, event_len, description_event); break; case LOAD_EVENT: - ev = new Create_file_log_event(buf, event_len, old_format); + ev = new Create_file_log_event(buf, event_len, description_event); break; case NEW_LOAD_EVENT: - ev = new Load_log_event(buf, event_len, old_format); + ev = new Load_log_event(buf, event_len, description_event); break; case ROTATE_EVENT: - ev = new Rotate_log_event(buf, event_len, old_format); + ev = new Rotate_log_event(buf, event_len, description_event); break; #ifdef HAVE_REPLICATION - case SLAVE_EVENT: + case SLAVE_EVENT: /* can never happen (unused event) */ ev = new Slave_log_event(buf, event_len); break; #endif /* HAVE_REPLICATION */ case CREATE_FILE_EVENT: - ev = new Create_file_log_event(buf, event_len, old_format); + ev = new Create_file_log_event(buf, event_len, description_event); break; case APPEND_BLOCK_EVENT: - ev = new Append_block_log_event(buf, event_len); + ev = new Append_block_log_event(buf, event_len, description_event); break; case DELETE_FILE_EVENT: - ev = new Delete_file_log_event(buf, event_len); + ev = new Delete_file_log_event(buf, event_len, description_event); break; case EXEC_LOAD_EVENT: - ev = new Execute_load_log_event(buf, event_len); + ev = new Execute_load_log_event(buf, event_len, description_event); break; - case START_EVENT: - ev = new Start_log_event(buf, old_format); + case START_EVENT_V3: /* this is sent only by MySQL <=4.x */ + ev = new Start_log_event_v3(buf, description_event); break; #ifdef HAVE_REPLICATION case STOP_EVENT: - ev = new Stop_log_event(buf, old_format); + ev = new Stop_log_event(buf, description_event); break; #endif /* HAVE_REPLICATION */ case INTVAR_EVENT: - ev = new Intvar_log_event(buf, old_format); + ev = new Intvar_log_event(buf, description_event); break; case RAND_EVENT: - ev = new Rand_log_event(buf, old_format); + ev = new Rand_log_event(buf, description_event); break; case USER_VAR_EVENT: - ev = new User_var_log_event(buf, old_format); + ev = new User_var_log_event(buf, description_event); + break; + case FORMAT_DESCRIPTION_EVENT: + ev = new Format_description_log_event(buf, event_len, description_event); break; default: break; } + /* + is_valid() are small event-specific sanity tests which are important; for + example there are some my_malloc() in constructors + (e.g. Query_log_event::Query_log_event(char*...)); when these my_malloc() + fail we can't return an error out of the constructor (because constructor is + "void") ; so instead we leave the pointer we wanted to allocate + (e.g. 'query') to 0 and we test it in is_valid(). Same for + Format_description_log_event, member 'post_header_len'. + */ if (!ev || !ev->is_valid()) { delete ev; #ifdef MYSQL_CLIENT - if (!force_opt) + if (!force_opt) /* then mysqlbinlog dies */ { *error= "Found invalid event in binary log"; - return 0; + DBUG_RETURN(0); } - ev= new Unknown_log_event(buf, old_format); + ev= new Unknown_log_event(buf, description_event); #else *error= "Found invalid event in binary log"; - return 0; + DBUG_RETURN(0); #endif } ev->cached_event_len = event_len; - return ev; + DBUG_RETURN(ev); } #ifdef MYSQL_CLIENT @@ -674,7 +803,7 @@ void Log_event::print_header(FILE* file) char llbuff[22]; fputc('#', file); print_timestamp(file); - fprintf(file, " server id %d log_pos %s ", server_id, + fprintf(file, " server id %d end_log_pos %s ", server_id, llstr(log_pos,llbuff)); } @@ -708,13 +837,30 @@ void Log_event::print_timestamp(FILE* file, time_t* ts) /* Log_event::set_log_pos() + Only used when we are writing an event which we created, to the BINlog. That + is, when we have parsed and executed a query; we then want to set the event's + log_pos to what it is going to be in the binlog after we write it. Note that + this is the position of the END of the event. */ #ifndef MYSQL_CLIENT void Log_event::set_log_pos(MYSQL_LOG* log) { + /* + Note that with a SEQ_READ_APPEND cache, my_b_tell() does not work well. + So this will give slightly wrong positions for the Format_desc/Rotate/Stop + events which the slave writes to its relay log. For example, the initial + Format_desc will have end_log_pos=91 instead of 95. Because after writing + the first 4 bytes of the relay log, my_b_tell() still reports 0. Because + my_b_append() does not update the counter which my_b_tell() later uses (one + should probably use my_b_append_tell() to work around this). + To get right positions even when writing to the relay log, we use the (new) + my_b_safe_tell(). + Note that this raises a question on the correctness of all these + DBUG_ASSERT(my_b_tell()=rli->event_relay_log_pos). + */ if (!log_pos) - log_pos = my_b_tell(&log->log_file); + log_pos = my_b_safe_tell(&log->log_file)+get_event_len(); } #endif /* !MYSQL_CLIENT */ @@ -727,10 +873,15 @@ void Log_event::set_log_pos(MYSQL_LOG* log) /* Query_log_event::pack_info() + This (which is used only for SHOW BINLOG EVENTS) could be updated to + print SET @@session_var=. But this is not urgent, as SHOW BINLOG EVENTS is + only an information, it does not produce suitable queries to replay (for + example it does not print LOAD DATA INFILE). */ void Query_log_event::pack_info(Protocol *protocol) { + // TODO: show the catalog ?? char *buf, *pos; if (!(buf= my_malloc(9 + db_len + q_len, MYF(MY_WME)))) return; @@ -768,7 +919,7 @@ int Query_log_event::write(IO_CACHE* file) int Query_log_event::write_data(IO_CACHE* file) { - char buf[QUERY_HEADER_LEN]; + char buf[QUERY_HEADER_LEN+1+4+1+8+1+1+catalog_len]; if (!query) return -1; @@ -814,10 +965,46 @@ int Query_log_event::write_data(IO_CACHE* file) int4store(buf + Q_EXEC_TIME_OFFSET, exec_time); buf[Q_DB_LEN_OFFSET] = (char) db_len; int2store(buf + Q_ERR_CODE_OFFSET, error_code); + int2store(buf + Q_STATUS_VARS_LEN_OFFSET, status_vars_len); - return (my_b_safe_write(file, (byte*) buf, QUERY_HEADER_LEN) || - my_b_safe_write(file, (db) ? (byte*) db : (byte*)"", db_len + 1) || - my_b_safe_write(file, (byte*) query, q_len)) ? -1 : 0; + /* + You MUST always write status vars in increasing order of code. This + guarantees that a slightly older slave will be able to parse those he + knows. + */ + char* start= buf+QUERY_HEADER_LEN; + if (flags2_inited) + { + *(start++)= Q_FLAGS2_CODE; + int4store(start, flags2); + start+= 4; + } + if (sql_mode_inited) + { + *(start++)= Q_SQL_MODE_CODE; + int8store(start, sql_mode); + start+= 8; + } + if (catalog) + { + *(start++)= Q_CATALOG_CODE; + *(start++)= catalog_len; + bmove(start, catalog, catalog_len); + start+= catalog_len; + } + /* + Here there could be code like + if (command-line-option-which-says-"log_this_variable") + { + *(start++)= Q_THIS_VARIABLE_CODE; + int4store(start, this_variable); + start+= 4; + } + */ + + return (my_b_safe_write(file, (byte*) buf, (start-buf)) || + my_b_safe_write(file, (db) ? (byte*) db : (byte*)"", db_len + 1) || + my_b_safe_write(file, (byte*) query, q_len)) ? -1 : 0; } @@ -830,68 +1017,177 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg, ulong query_length, bool using_trans) :Log_event(thd_arg, !thd_arg->tmp_table_used ? 0 : LOG_EVENT_THREAD_SPECIFIC_F, using_trans), - data_buf(0), query(query_arg), + data_buf(0), query(query_arg), catalog(thd_arg->catalog), db(thd_arg->db), q_len((uint32) query_length), - error_code(thd_arg->killed != THD::NOT_KILLED ? thd->killed_errno() : thd_arg->net.last_errno), + error_code(thd_arg->killed != THD::NOT_KILLED ? thd->killed_errno() : thd_arg->net.last_errno), thread_id(thd_arg->thread_id), /* save the original thread id; we already know the server id */ - slave_proxy_id(thd_arg->variables.pseudo_thread_id) + slave_proxy_id(thd_arg->variables.pseudo_thread_id), + flags2_inited(1), sql_mode_inited(1), flags2(0), sql_mode(0) { time_t end_time; time(&end_time); exec_time = (ulong) (end_time - thd->start_time); + catalog_len = (catalog) ? (uint32) strlen(catalog) : 0; + status_vars_len= 1+4+1+8+1+1+catalog_len; db_len = (db) ? (uint32) strlen(db) : 0; + /* + If we don't use flags2 for anything else than options contained in + thd->options, it would be more efficient to flags2=thd_arg->options + (OPTIONS_WRITTEN_TO_BINLOG would be used only at reading time). + But it's likely that we don't want to use 32 bits for 3 bits; in the future + we will probably want to reclaim the 29 bits. So we need the &. + */ + flags2= thd_arg->options & OPTIONS_WRITTEN_TO_BIN_LOG; + sql_mode= thd_arg->variables.sql_mode; + DBUG_PRINT("info",("Query_log_event has flags2=%lu sql_mode=%lu",flags2,sql_mode)); } #endif /* MYSQL_CLIENT */ /* Query_log_event::Query_log_event() + This is used by the SQL slave thread to prepare the event before execution. */ -Query_log_event::Query_log_event(const char* buf, int event_len, - bool old_format) - :Log_event(buf, old_format),data_buf(0), query(NULL), db(NULL) +Query_log_event::Query_log_event(const char* buf, uint event_len, + const Format_description_log_event *description_event) + :Log_event(buf, description_event), data_buf(0), query(NULL), + catalog(NULL), db(NULL), catalog_len(0), status_vars_len(0), + flags2_inited(0), sql_mode_inited(0) { ulong data_len; - if (old_format) - { - if ((uint)event_len < OLD_HEADER_LEN + QUERY_HEADER_LEN) - return; - data_len = event_len - (QUERY_HEADER_LEN + OLD_HEADER_LEN); - buf += OLD_HEADER_LEN; - } - else - { - if ((uint)event_len < QUERY_EVENT_OVERHEAD) - return; - data_len = event_len - QUERY_EVENT_OVERHEAD; - buf += LOG_EVENT_HEADER_LEN; - } - + uint8 common_header_len, post_header_len; + DBUG_ENTER("Query_log_event::Query_log_event(char*,...)"); + common_header_len= description_event->common_header_len; + post_header_len= description_event->post_header_len[QUERY_EVENT-1]; + DBUG_PRINT("info",("event_len=%ld, common_header_len=%d, post_header_len=%d", + event_len, common_header_len, post_header_len)); + + /* + We test if the event's length is sensible, and if so we compute data_len. + We cannot rely on QUERY_HEADER_LEN here as it would not be format-tolerant. + We use QUERY_HEADER_MINIMAL_LEN which is the same for 3.23, 4.0 & 5.0. + */ + if (event_len < (uint)(common_header_len + post_header_len)) + return; + data_len = event_len - (common_header_len + post_header_len); + buf+= common_header_len; + + slave_proxy_id= thread_id = uint4korr(buf + Q_THREAD_ID_OFFSET); exec_time = uint4korr(buf + Q_EXEC_TIME_OFFSET); + db_len = (uint)buf[Q_DB_LEN_OFFSET]; error_code = uint2korr(buf + Q_ERR_CODE_OFFSET); - if (!(data_buf = (char*) my_malloc(data_len + 1, MYF(MY_WME)))) + /* + 5.0 format starts here. + Depending on the format, we may or not have affected/warnings etc + The remnent post-header to be parsed has length: + */ + uint32 tmp= post_header_len - QUERY_HEADER_MINIMAL_LEN; + if (tmp) + { + status_vars_len= uint2korr(buf + Q_STATUS_VARS_LEN_OFFSET); + DBUG_PRINT("info", ("Query_log_event has status_vars_len=%d", + status_vars_len)); + tmp-= 2; + } + /* we have parsed everything we know in the post header */ +#ifndef DBUG_OFF + if (tmp) /* this is probably a master newer than us */ + DBUG_PRINT("info", ("Query_log_event has longer post header than we know\ + (%d more bytes)", tmp)); +#endif + + /* + Suppose you have a 4.0 master with --ansi and a 5.0 slave with --ansi. + The slave sets flags2 to 0 (because that's a 4.0 event); if we simply use + this value of 0, we will cancel --ansi on the slave, which is + unwanted. In this example 0 means "unset", not really "set to 0". + Fortunately we have flags2_inited==0 to distinguish between "unset" and + "set to 0". See below. + */ + + /* variable-part: the status vars; only in MySQL 5.0 */ + + const char* start= buf+post_header_len; + const char* end= start+status_vars_len; + /* + The place from which we will start string duplication. + */ + const char* start_dup= end; + + for (const char* pos=start;pos<end;) + { + switch (*pos) + { + case Q_FLAGS2_CODE: + flags2_inited= 1; + flags2= uint4korr(++pos); + DBUG_PRINT("info",("In Query_log_event, read flags2=%lu", flags2)); + pos+= 4; + break; + case Q_SQL_MODE_CODE: + sql_mode_inited= 1; + sql_mode= uint8korr(++pos); + DBUG_PRINT("info",("In Query_log_event, read sql_mode=%lu", sql_mode)); + pos+= 8; + break; + case Q_CATALOG_CODE: + catalog_len= *(++pos); + /* + Now 'pos' points to beginning of catalog - 1. + The catalog must be included in the string which we will duplicate + later. If string status vars having a smaller code had been seen before + and so marked to-be-duplicated, start_dup would be != end and we would + not need (and want) to change start_dup (because this would cut the + previously marked status vars). + */ + if (start_dup==end) + start_dup= ++pos; + pos+= catalog_len; + break; + default: + /* That's why you must write status vars in growing order of code */ + DBUG_PRINT("info",("Query_log_event has unknown status vars (first has\ + code=%d), skipping the rest of them", *pos)); + pos= end; + } + } + + /* A 2nd variable part; this is common to all versions */ + + data_len-= start_dup-start; /* cut not-to-be-duplicated head */ + if (!(data_buf = (char*) my_strdup_with_length(start_dup, + data_len, + MYF(MY_WME)))) return; - - memcpy(data_buf, buf + Q_DATA_OFFSET, data_len); - slave_proxy_id= thread_id= uint4korr(buf + Q_THREAD_ID_OFFSET); - db = data_buf; - db_len = (uint)buf[Q_DB_LEN_OFFSET]; - query=data_buf + db_len + 1; - q_len = data_len - 1 - db_len; - *((char*)query+q_len) = 0; + + const char* tmp_buf= data_buf; + /* Now set event's pointers to point to bits of the new string */ + if (catalog_len) + { + catalog= tmp_buf; + tmp_buf+= end-start_dup; /* "seek" to db */ + } + db= tmp_buf; + query= tmp_buf + db_len + 1; + q_len = data_buf + data_len - query; + /* This is used to detect wrong parsing. Could be removed in the future. */ + DBUG_PRINT("info", ("catalog_len=%d catalog='%s' db='%s' q_len=%d", + catalog_len, catalog, db, q_len)); + DBUG_VOID_RETURN; } - /* Query_log_event::print() */ #ifdef MYSQL_CLIENT -void Query_log_event::print(FILE* file, bool short_form, char* last_db) +void Query_log_event::print(FILE* file, bool short_form, + LAST_EVENT_INFO* last_event_info) { + // TODO: print the catalog ?? char buff[40],*end; // Enough for SET TIMESTAMP if (!short_form) { @@ -902,10 +1198,10 @@ void Query_log_event::print(FILE* file, bool short_form, char* last_db) bool same_db = 0; - if (db && last_db) + if (db && last_event_info->db) { - if (!(same_db = !memcmp(last_db, db, db_len + 1))) - memcpy(last_db, db, db_len + 1); + if (!(same_db = !memcmp(last_event_info->db, db, db_len + 1))) + memcpy(last_event_info->db, db, db_len + 1); } if (db && db[0] && !same_db) @@ -916,6 +1212,67 @@ void Query_log_event::print(FILE* file, bool short_form, char* last_db) my_fwrite(file, (byte*) buff, (uint) (end-buff),MYF(MY_NABP | MY_WME)); if (flags & LOG_EVENT_THREAD_SPECIFIC_F) fprintf(file,"SET @@session.pseudo_thread_id=%lu;\n",(ulong)thread_id); + /* + Now the session variables; + it's more efficient to pass SQL_MODE as a number instead of a + comma-separated list. + FOREIGN_KEY_CHECKS, SQL_AUTO_IS_NULL, UNIQUE_CHECKS are session-only + variables (they have no global version; they're not listed in sql_class.h), + The tests below work for pure binlogs or pure relay logs. Won't work for + mixed relay logs but we don't create mixed relay logs (that is, there is no + relay log with a format change except within the 3 first events, which + mysqlbinlog handles gracefully). So this code should always be good. + */ + + uint32 tmp; + + if (likely(flags2_inited)) /* likely as this will mainly read 5.0 logs */ + { + /* tmp is a bitmask of bits which have changed. */ + if (likely(last_event_info->flags2_inited)) + /* All bits which have changed */ + tmp= (last_event_info->flags2) ^ flags2; + else /* that's the first Query event we read */ + { + last_event_info->flags2_inited= 1; + tmp= ~((uint32)0); /* all bits have changed */ + } + + if (unlikely(tmp)) /* some bits have changed */ + { + bool need_comma= 0; + fprintf(file, "SET "); + print_set_option(file, tmp, OPTION_NO_FOREIGN_KEY_CHECKS, ~flags2, + "@@session.foreign_key_checks", &need_comma); + print_set_option(file, tmp, OPTION_AUTO_IS_NULL, flags2, + "@@session.sql_auto_is_null", &need_comma); + print_set_option(file, tmp, OPTION_RELAXED_UNIQUE_CHECKS, ~flags2, + "@@session.unique_checks", &need_comma); + fprintf(file,";\n"); + last_event_info->flags2= flags2; + } + } + + /* + If flags2_inited==0, this is an event from 3.23 or 4.0; nothing to print + (remember we don't produce mixed relay logs so there cannot be 5.0 events + before that one so there is nothing to reset). + */ + + if (likely(sql_mode_inited)) + { + if (unlikely(!last_event_info->sql_mode_inited)) /* first Query event */ + { + last_event_info->sql_mode_inited= 1; + last_event_info->sql_mode= ~sql_mode; // force a difference to force write + } + if (unlikely(last_event_info->sql_mode != sql_mode)) + { + fprintf(file,"SET @@session.sql_mode=%lu;\n",(ulong)sql_mode); + last_event_info->sql_mode= sql_mode; + } + } + my_fwrite(file, (byte*) query, q_len, MYF(MY_NABP | MY_WME)); fprintf(file, ";\n"); } @@ -930,6 +1287,13 @@ void Query_log_event::print(FILE* file, bool short_form, char* last_db) int Query_log_event::exec_event(struct st_relay_log_info* rli) { int expected_error,actual_error= 0; + /* + Colleagues: please never free(thd->catalog) in MySQL. This would lead to + bugs as here thd->catalog is a part of an alloced block, not an entire + alloced block (see Query_log_event::exec_event()). Same for thd->db. + Thank you. + */ + thd->catalog= (char*) catalog; thd->db= (char*) rewrite_db(db); /* @@ -938,10 +1302,11 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli) When InnoDB will want to store, the positions in rli won't have been updated yet, so group_master_log_* will point to old BEGIN and event_master_log* will point to the beginning of current COMMIT. - So the position to store is event_master_log_pos + event_len - since we must store the pos of the END of the current log event (COMMIT). + But log_pos of the COMMIT Query event is what we want, i.e. the pos of the + END of the current log event (COMMIT). We save it in rli so that InnoDB can + access it. */ - rli->event_len= get_event_len(); + rli->future_group_master_log_pos= log_pos; if (db_ok(thd->db, replicate_do_db, replicate_ignore_db)) { @@ -955,9 +1320,34 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli) thd->clear_error(); thd->variables.pseudo_thread_id= thread_id; // for temp tables + if (flags2_inited) + /* + all bits of thd->options which are 1 in OPTIONS_WRITTEN_TO_BIN_LOG must + take their value from flags2. + */ + thd->options= flags2|(thd->options & ~(ulong)OPTIONS_WRITTEN_TO_BIN_LOG); + /* + else, we are in a 3.23/4.0 binlog; we previously received a + Rotate_log_event which reset thd->options and sql_mode, so nothing to do. + */ + + /* + We do not replicate IGNORE_DIR_IN_CREATE. That is, if the master is a + slave which runs with SQL_MODE=IGNORE_DIR_IN_CREATE, this should not + force us to ignore the dir too. Imagine you are a ring of machines, and + one has a disk problem so that you temporarily need IGNORE_DIR_IN_CREATE + on this machine; you don't want it to propagate elsewhere (you don't want + all slaves to start ignoring the dirs). + */ + if (sql_mode_inited) + thd->variables.sql_mode= (thd->variables.sql_mode&MODE_NO_DIR_IN_CREATE)| + (sql_mode & ~(uint32)MODE_NO_DIR_IN_CREATE); + /* Sanity check to make sure the master did not get a really bad - error on the query. + error on the query (for example, if the query on the master was killed, we + don't want to play it on the slave, because we don't know how much of it + we have to play). */ if (ignored_error_code((expected_error = error_code)) || !check_expected_error(thd,rli,expected_error)) @@ -1014,12 +1404,33 @@ Default database: '%s'", print_slave_db_safe(db)); thd->query_error= 1; } + /* + TODO: compare the values of "affected rows" around here. Something + like: + if ((uint32) affected_in_event != (uint32) affected_on_slave) + { + sql_print_error("Slave: did not get the expected number of affected \ + rows running query from master - expected %d, got %d (this numbers \ + should have matched modulo 4294967296).", 0, ...); + thd->query_error = 1; + } + We may also want an option to tell the slave to ignore "affected" + mismatch. This mismatch could be implemented with a new ER_ code, and + to ignore it you would use --slave-skip-errors... + + To do the comparison we need to know the value of "affected" which the + above mysql_parse() computed. And we need to know the value of + "affected" in the master's binlog. Both will be implemented later. The + important thing is that we now have the format ready to log the values + of "affected" in the binlog. So we can release 5.0.0 before effectively + logging "affected" and effectively comparing it. + */ } - /* - End of sanity check. If the test was wrong, the query got a really bad - error on the master, which could be inconsistent, abort and tell DBA to - check/fix it. check_expected_error() already printed the message to - stderr and rli, and set thd->query_error to 1. + /* + End of sanity check. If the test was false, the query got a really bad + error on the master, which could be inconsistent. check_expected_error() + already printed the message to stderr and rli, to tell the DBA to fix + things, and it has set thd->query_error to 1. */ } /* End of if (db_ok(... */ @@ -1039,15 +1450,23 @@ Default database: '%s'", /************************************************************************** - Start_log_event methods + Start_log_event_v3 methods **************************************************************************/ +#ifndef MYSQL_CLIENT +Start_log_event_v3::Start_log_event_v3() :Log_event(), binlog_version(BINLOG_VERSION) +{ + created= when; + memcpy(server_version, ::server_version, ST_SERVER_VER_LEN); +} +#endif + /* - Start_log_event::pack_info() + Start_log_event_v3::pack_info() */ #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -void Start_log_event::pack_info(Protocol *protocol) +void Start_log_event_v3::pack_info(Protocol *protocol) { char buf[12 + ST_SERVER_VER_LEN + 14 + 22], *pos; pos= strmov(buf, "Server ver: "); @@ -1060,35 +1479,43 @@ void Start_log_event::pack_info(Protocol *protocol) /* - Start_log_event::print() + Start_log_event_v3::print() */ #ifdef MYSQL_CLIENT -void Start_log_event::print(FILE* file, bool short_form, char* last_db) +void Start_log_event_v3::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info) { - if (short_form) - return; - - print_header(file); - fprintf(file, "\tStart: binlog v %d, server v %s created ", binlog_version, - server_version); - print_timestamp(file); - if (created) - fprintf(file," at startup"); - fputc('\n', file); + if (!short_form) + { + print_header(file); + fprintf(file, "\tStart: binlog v %d, server v %s created ", binlog_version, + server_version); + print_timestamp(file); + if (created) + fprintf(file," at startup"); + fputc('\n', file); + } +#ifdef WHEN_WE_HAVE_THE_RESET_CONNECTION_SQL_COMMAND + /* + This is for mysqlbinlog: like in replication, we want to delete the stale + tmp files left by an unclean shutdown of mysqld (temporary tables). Probably + this can be done with RESET CONNECTION (syntax to be defined). + */ + fprintf(file,"RESET CONNECTION;\n"); +#endif fflush(file); } #endif /* MYSQL_CLIENT */ /* - Start_log_event::Start_log_event() + Start_log_event_v3::Start_log_event_v3() */ -Start_log_event::Start_log_event(const char* buf, - bool old_format) - :Log_event(buf, old_format) +Start_log_event_v3::Start_log_event_v3(const char* buf, + const Format_description_log_event* description_event) + :Log_event(buf, description_event) { - buf += (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN; + buf += description_event->common_header_len; binlog_version = uint2korr(buf+ST_BINLOG_VER_OFFSET); memcpy(server_version, buf+ST_SERVER_VER_OFFSET, ST_SERVER_VER_LEN); @@ -1097,12 +1524,12 @@ Start_log_event::Start_log_event(const char* buf, /* - Start_log_event::write_data() + Start_log_event_v3::write_data() */ -int Start_log_event::write_data(IO_CACHE* file) +int Start_log_event_v3::write_data(IO_CACHE* file) { - char buff[START_HEADER_LEN]; + char buff[START_V3_HEADER_LEN]; int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version); memcpy(buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN); int4store(buff + ST_CREATED_OFFSET,created); @@ -1110,7 +1537,7 @@ int Start_log_event::write_data(IO_CACHE* file) } /* - Start_log_event::exec_event() + Start_log_event_v3::exec_event() The master started @@ -1129,18 +1556,24 @@ int Start_log_event::write_data(IO_CACHE* file) */ #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) -int Start_log_event::exec_event(struct st_relay_log_info* rli) +int Start_log_event_v3::exec_event(struct st_relay_log_info* rli) { - DBUG_ENTER("Start_log_event::exec_event"); - - switch (rli->mi->old_format) { - case BINLOG_FORMAT_CURRENT: - /* - This is 4.x, so a Start_log_event is only at master startup, - so we are sure the master has restarted and cleared his temp tables. + DBUG_ENTER("Start_log_event_v3::exec_event"); + switch (rli->relay_log.description_event_for_exec->binlog_version) + { + case 3: + case 4: + /* + This can either be 4.x (then a Start_log_event_v3 is only at master + startup so we are sure the master has restarted and cleared his temp + tables; the event always has 'created'>0) or 5.0 (then we have to test + 'created'). */ - close_temporary_tables(thd); - cleanup_load_tmpdir(); + if (created) + { + close_temporary_tables(thd); + cleanup_load_tmpdir(); + } /* As a transaction NEVER spans on 2 or more binlogs: if we have an active transaction at this point, the master died while @@ -1163,33 +1596,264 @@ binary log."); Now the older formats; in that case load_tmpdir is cleaned up by the I/O thread. */ - case BINLOG_FORMAT_323_LESS_57: + case 1: + if (strncmp(rli->relay_log.description_event_for_exec->server_version, + "3.23.57",7) >= 0 && created) + { + /* + Can distinguish, based on the value of 'created': this event was + generated at master startup. + */ + close_temporary_tables(thd); + } /* - Cannot distinguish a Start_log_event generated at master startup and - one generated by master FLUSH LOGS, so cannot be sure temp tables - have to be dropped. So do nothing. + Otherwise, can't distinguish a Start_log_event generated at master startup + and one generated by master FLUSH LOGS, so cannot be sure temp tables have + to be dropped. So do nothing. */ break; - case BINLOG_FORMAT_323_GEQ_57: - /* - Can distinguish, based on the value of 'created', - which was generated at master startup. - */ - if (created) - close_temporary_tables(thd); - break; default: /* this case is impossible */ - return 1; + DBUG_RETURN(1); } - DBUG_RETURN(Log_event::exec_event(rli)); } #endif /* defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) */ -/************************************************************************** - Load_log_event methods -**************************************************************************/ +/*************************************************************************** + Format_description_log_event methods +****************************************************************************/ + +/* + Format_description_log_event 1st ctor. + + SYNOPSIS + Format_description_log_event::Format_description_log_event + binlog_version the binlog version for which we want to build + an event. Can be 1 (=MySQL 3.23), 3 (=4.0.x + x>=2 and 4.1) or 4 (MySQL 5.0). Note that the + old 4.0 (binlog version 2) is not supported; + it should not be used for replication with + 5.0. + + DESCRIPTION + Ctor. Can be used to create the event to write to the binary log (when the + server starts or when FLUSH LOGS), or to create artificial events to parse + binlogs from MySQL 3.23 or 4.x. + When in a client, only the 2nd use is possible. + + TODO + Update this code with the new event for LOAD DATA, once they are pushed (in + 4.1 or 5.0). If it's in 5.0, only the "case 4" block should be updated. + +*/ + +Format_description_log_event::Format_description_log_event(uint8 binlog_ver, + const char* server_ver) + : Start_log_event_v3() +{ + created= when; + binlog_version= binlog_ver; + switch(binlog_ver) + { + case 4: /* MySQL 5.0 */ + memcpy(server_version, ::server_version, ST_SERVER_VER_LEN); + common_header_len= LOG_EVENT_HEADER_LEN; + number_of_event_types= LOG_EVENT_TYPES; + /* we'll catch my_malloc() error in is_valid() */ + post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8), + MYF(0)); + /* + This long list of assignments is not beautiful, but I see no way to + make it nicer, as the right members are #defines, not array members, so + it's impossible to write a loop. + */ + if (post_header_len) + { + post_header_len[START_EVENT_V3-1]= START_V3_HEADER_LEN; + post_header_len[QUERY_EVENT-1]= QUERY_HEADER_LEN; + post_header_len[STOP_EVENT-1]= 0; + post_header_len[ROTATE_EVENT-1]= ROTATE_HEADER_LEN; + post_header_len[INTVAR_EVENT-1]= 0; + post_header_len[LOAD_EVENT-1]= LOAD_HEADER_LEN; + post_header_len[SLAVE_EVENT-1]= 0; + post_header_len[CREATE_FILE_EVENT-1]= CREATE_FILE_HEADER_LEN; + post_header_len[APPEND_BLOCK_EVENT-1]= APPEND_BLOCK_HEADER_LEN; + post_header_len[EXEC_LOAD_EVENT-1]= EXEC_LOAD_HEADER_LEN; + post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN; + post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1]; + post_header_len[RAND_EVENT-1]= 0; + post_header_len[USER_VAR_EVENT-1]= 0; + post_header_len[FORMAT_DESCRIPTION_EVENT-1]= FORMAT_DESCRIPTION_HEADER_LEN; + } + break; + + case 1: /* 3.23 */ + case 3: /* 4.0.x x>=2 */ + /* + We build an artificial (i.e. not sent by the master) event, which + describes what those old master versions send. + */ + if (binlog_ver==1) + strmov(server_version, server_ver ? server_ver : "3.23"); + else + strmov(server_version, server_ver ? server_ver : "4.0"); + common_header_len= binlog_ver==1 ? OLD_HEADER_LEN : + LOG_EVENT_MINIMAL_HEADER_LEN; + /* + The first new event in binlog version 4 is Format_desc. So any event type + after that does not exist in older versions. We use the events known by + version 3, even if version 1 had only a subset of them (this is not a + problem: it uses a few bytes for nothing but unifies code; it does not + make the slave detect less corruptions). + */ + number_of_event_types= FORMAT_DESCRIPTION_EVENT - 1; + post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8), + MYF(0)); + if (post_header_len) + { + post_header_len[START_EVENT_V3-1]= START_V3_HEADER_LEN; + post_header_len[QUERY_EVENT-1]= QUERY_HEADER_MINIMAL_LEN; + post_header_len[STOP_EVENT-1]= 0; + post_header_len[ROTATE_EVENT-1]= (binlog_ver==1) ? 0 : ROTATE_HEADER_LEN; + post_header_len[INTVAR_EVENT-1]= 0; + post_header_len[LOAD_EVENT-1]= LOAD_HEADER_LEN; + post_header_len[SLAVE_EVENT-1]= 0; + post_header_len[CREATE_FILE_EVENT-1]= CREATE_FILE_HEADER_LEN; + post_header_len[APPEND_BLOCK_EVENT-1]= APPEND_BLOCK_HEADER_LEN; + post_header_len[EXEC_LOAD_EVENT-1]= EXEC_LOAD_HEADER_LEN; + post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN; + post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1]; + post_header_len[RAND_EVENT-1]= 0; + post_header_len[USER_VAR_EVENT-1]= 0; + } + break; + default: /* Includes binlog version 2 i.e. 4.0.x x<=1 */ + post_header_len= 0; /* will make is_valid() fail */ + break; + } +} + +Format_description_log_event::Format_description_log_event(const char* buf, + uint event_len, + const + Format_description_log_event* + description_event) + /* + The problem with this constructor is that the fixed header may have a length + different from this version, but we don't know this length as we have not + read the Format_description_log_event which says it, yet. This length is in + the post-header of the event, but we don't know where the post-header + starts. + So this type of event HAS to: + - either have the header's length at the beginning (in the header, at a + fixed position which will never be changed), not in the post-header. That + would make the header be "shifted" compared to other events. + - or have a header of size LOG_EVENT_MINIMAL_HEADER_LEN (19), in all future + versions, so that we know for sure. + I (Guilhem) chose the 2nd solution. Rotate has the same constraint (because + it is sent before Format_description_log_event). + */ + + :Start_log_event_v3(buf, description_event) +{ + DBUG_ENTER("Format_description_log_event::Format_description_log_event(char*,...)"); + buf+= LOG_EVENT_MINIMAL_HEADER_LEN; + if ((common_header_len=buf[ST_COMMON_HEADER_LEN_OFFSET]) < OLD_HEADER_LEN) + DBUG_VOID_RETURN; /* sanity check */ + number_of_event_types= + event_len-(LOG_EVENT_MINIMAL_HEADER_LEN+ST_COMMON_HEADER_LEN_OFFSET+1); + DBUG_PRINT("info", ("common_header_len=%d number_of_event_types=%d", + common_header_len, number_of_event_types)); + /* If alloc fails, we'll detect it in is_valid() */ + post_header_len= (uint8*) my_memdup(buf+ST_COMMON_HEADER_LEN_OFFSET+1, + number_of_event_types* + sizeof(*post_header_len), + MYF(0)); + DBUG_VOID_RETURN; +} + +int Format_description_log_event::write_data(IO_CACHE* file) +{ + /* + We don't call Start_log_event_v3::write_data() because this would make 2 + my_b_safe_write(). + */ + char buff[FORMAT_DESCRIPTION_HEADER_LEN]; + int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version); + memcpy(buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN); + int4store(buff + ST_CREATED_OFFSET,created); + buff[ST_COMMON_HEADER_LEN_OFFSET]= LOG_EVENT_HEADER_LEN; + memcpy(buff+ST_COMMON_HEADER_LEN_OFFSET+1, (byte*) post_header_len, + LOG_EVENT_TYPES); + return (my_b_safe_write(file, (byte*) buff, sizeof(buff)) ? -1 : 0); +} + +/* + SYNOPSIS + Format_description_log_event::exec_event() + + IMPLEMENTATION + Save the information which describes the binlog's format, to be able to + read all coming events. + Call Start_log_event_v3::exec_event(). +*/ + +#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) +int Format_description_log_event::exec_event(struct st_relay_log_info* rli) +{ + DBUG_ENTER("Format_description_log_event::exec_event"); + + /* save the information describing this binlog */ + delete rli->relay_log.description_event_for_exec; + rli->relay_log.description_event_for_exec= this; + + /* + If this event comes from ourselves, there is no cleaning task to perform, + we don't call Start_log_event_v3::exec_event() (this was just to update the + log's description event). + */ + if (server_id == (uint32) ::server_id) + { + /* + Do not modify rli->group_master_log_pos, as this event did not exist on + the master. That is, just update the *relay log* coordinates; this is done + by passing log_pos=0 to inc_group_relay_log_pos, like we do in + Stop_log_event::exec_event(). + */ + rli->inc_group_relay_log_pos(0); + flush_relay_log_info(rli); + DBUG_RETURN(0); + } + + /* + If the event was not requested by the slave i.e. the master sent it while + the slave asked for a position >4, the event will make + rli->group_master_log_pos advance. Say that the slave asked for position + 1000, and the Format_desc event's end is 95. Then in the beginning of + replication rli->group_master_log_pos will be 0, then 95, then jump to first + really asked event (which is >95). So this is ok. + */ + DBUG_RETURN(Start_log_event_v3::exec_event(rli)); +} +#endif + + /************************************************************************** + Load_log_event methods + General note about Load_log_event: the binlogging of LOAD DATA INFILE is + going to be changed in 5.0 (or maybe in 4.1; not decided yet). + However, the 5.0 slave could still have to read such events (from a 4.x + master), convert them (which just means maybe expand the header, when 5.0 + servers have a UID in events) (remember that whatever is after the header + will be like in 4.x, as this event's format is not modified in 5.0 as we + will use new types of events to log the new LOAD DATA INFILE features). + To be able to read/convert, we just need to not assume that the common + header is of length LOG_EVENT_HEADER_LEN (we must use the description + event). + Note that I (Guilhem) manually tested replication of a big LOAD DATA INFILE + between 3.23 and 5.0, and between 4.0 and 5.0, and it works fine (and the + positions displayed in SHOW SLAVE STATUS then are fine too). + **************************************************************************/ /* Load_log_event::pack_info() @@ -1441,15 +2105,24 @@ Load_log_event::Load_log_event(THD *thd_arg, sql_exchange *ex, constructed event. */ -Load_log_event::Load_log_event(const char *buf, int event_len, - bool old_format) - :Log_event(buf, old_format),num_fields(0),fields(0), +Load_log_event::Load_log_event(const char *buf, uint event_len, + const Format_description_log_event *description_event) + :Log_event(buf, description_event),num_fields(0),fields(0), field_lens(0),field_block_len(0), table_name(0),db(0),fname(0) { if (!event_len) // derived class, will call copy_log_event() itself return; - copy_log_event(buf, event_len, old_format); + /* + I (Guilhem) manually tested replication of LOAD DATA INFILE for 3.23->5.0, + 4.0->5.0 and 5.0->5.0 and it works. + */ + copy_log_event(buf, event_len, + ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ? + LOAD_HEADER_LEN + + description_event->common_header_len : + LOAD_HEADER_LEN + LOG_EVENT_HEADER_LEN), + description_event); } @@ -1458,12 +2131,13 @@ Load_log_event::Load_log_event(const char *buf, int event_len, */ int Load_log_event::copy_log_event(const char *buf, ulong event_len, - bool old_format) + int body_offset, + const Format_description_log_event *description_event) { uint data_len; char* buf_end = (char*)buf + event_len; - uint header_len= old_format ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN; - const char* data_head = buf + header_len; + /* this is the beginning of the post-header */ + const char* data_head = buf + description_event->common_header_len; slave_proxy_id= thread_id= uint4korr(data_head + L_THREAD_ID_OFFSET); exec_time = uint4korr(data_head + L_EXEC_TIME_OFFSET); skip_lines = uint4korr(data_head + L_SKIP_LINES_OFFSET); @@ -1471,10 +2145,6 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len, db_len = (uint)data_head[L_DB_LEN_OFFSET]; num_fields = uint4korr(data_head + L_NUM_FIELDS_OFFSET); - int body_offset = ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ? - LOAD_HEADER_LEN + header_len : - get_data_body_offset()); - if ((int) event_len < body_offset) return 1; /* @@ -1498,6 +2168,12 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len, fname = db + db_len + 1; fname_len = strlen(fname); // null termination is accomplished by the caller doing buf[event_len]=0 + + /* + In 5.0 this event will have the same format, as we are planning to log LOAD + DATA INFILE in a completely different way (as a plain-text query) since 4.1 + or 5.0 (Dmitri's WL#874) + */ return 0; } @@ -1507,13 +2183,13 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len, */ #ifdef MYSQL_CLIENT -void Load_log_event::print(FILE* file, bool short_form, char* last_db) +void Load_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info) { - print(file, short_form, last_db, 0); + print(file, short_form, last_event_info, 0); } -void Load_log_event::print(FILE* file, bool short_form, char* last_db, +void Load_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool commented) { if (!short_form) @@ -1524,10 +2200,10 @@ void Load_log_event::print(FILE* file, bool short_form, char* last_db, } bool same_db = 0; - if (db && last_db) + if (db && last_event_info->db) { - if (!(same_db = !memcmp(last_db, db, db_len + 1))) - memcpy(last_db, db, db_len + 1); + if (!(same_db = !memcmp(last_event_info->db, db, db_len + 1))) + memcpy(last_event_info->db, db, db_len + 1); } if (db && db[0] && !same_db) @@ -1839,17 +2515,17 @@ void Rotate_log_event::pack_info(Protocol *protocol) */ #ifdef MYSQL_CLIENT -void Rotate_log_event::print(FILE* file, bool short_form, char* last_db) +void Rotate_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info) { char buf[22]; + if (short_form) return; - print_header(file); fprintf(file, "\tRotate to "); if (new_log_ident) my_fwrite(file, (byte*) new_log_ident, (uint)ident_len, - MYF(MY_NABP | MY_WME)); + MYF(MY_NABP | MY_WME)); fprintf(file, " pos: %s", llstr(pos, buf)); fputc('\n', file); fflush(file); @@ -1861,28 +2537,21 @@ void Rotate_log_event::print(FILE* file, bool short_form, char* last_db) Rotate_log_event::Rotate_log_event() */ -Rotate_log_event::Rotate_log_event(const char* buf, int event_len, - bool old_format) - :Log_event(buf, old_format),new_log_ident(NULL),alloced(0) +Rotate_log_event::Rotate_log_event(const char* buf, uint event_len, + const Format_description_log_event* description_event) + :Log_event(buf, description_event) ,new_log_ident(NULL),alloced(0) { // The caller will ensure that event_len is what we have at EVENT_LEN_OFFSET - int header_size = (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN; + uint8 header_size= description_event->common_header_len; + uint8 post_header_len= description_event->post_header_len[ROTATE_EVENT-1]; uint ident_offset; if (event_len < header_size) return; buf += header_size; - if (old_format) - { - ident_len = (uint)(event_len - OLD_HEADER_LEN); - pos = 4; - ident_offset = 0; - } - else - { - ident_len = (uint)(event_len - ROTATE_EVENT_OVERHEAD); - pos = uint8korr(buf + R_POS_OFFSET); - ident_offset = ROTATE_HEADER_LEN; - } + pos = post_header_len ? uint8korr(buf + R_POS_OFFSET) : 4; + ident_len = (uint)(event_len - + (header_size+post_header_len)); + ident_offset = post_header_len; set_if_smaller(ident_len,FN_REFLEN-1); if (!(new_log_ident= my_strdup_with_length((byte*) buf + ident_offset, @@ -1909,14 +2578,15 @@ int Rotate_log_event::write_data(IO_CACHE* file) /* Rotate_log_event::exec_event() - Got a rotate log even from the master + Got a rotate log event from the master IMPLEMENTATION This is mainly used so that we can later figure out the logname and position for the master. - We can't rotate the slave as this will cause infinitive rotations + We can't rotate the slave's BINlog as this will cause infinitive rotations in a A -> B -> A setup. + The NOTES below is a wrong comment which will disappear when 4.1 is merged. RETURN VALUES 0 ok @@ -1928,7 +2598,7 @@ int Rotate_log_event::exec_event(struct st_relay_log_info* rli) DBUG_ENTER("Rotate_log_event::exec_event"); pthread_mutex_lock(&rli->data_lock); - rli->event_relay_log_pos += get_event_len(); + rli->event_relay_log_pos= my_b_tell(rli->cur_log); /* If we are in a transaction: the only normal case is when the I/O thread was copying a big transaction, then it was stopped and restarted: we have this @@ -1945,10 +2615,21 @@ int Rotate_log_event::exec_event(struct st_relay_log_info* rli) { memcpy(rli->group_master_log_name, new_log_ident, ident_len+1); rli->notify_group_master_log_name_update(); - rli->group_master_log_pos = pos; - rli->group_relay_log_pos = rli->event_relay_log_pos; - DBUG_PRINT("info", ("group_master_log_pos: %lu", + rli->group_master_log_pos= pos; + rli->group_relay_log_pos= rli->event_relay_log_pos; + DBUG_PRINT("info", ("group_master_log_name: '%s' group_master_log_pos:\ +%lu", + rli->group_master_log_name, (ulong) rli->group_master_log_pos)); + /* + Reset thd->options and sql_mode, because this could be the signal of a + master's downgrade from 5.0 to 4.0. + However, no need to reset description_event_for_exec: indeed, if the next + master is 5.0 (even 5.0.1) we will soon get a Format_desc; if the next + master is 4.0 then the events are in the slave's format (conversion). + */ + set_slave_thread_options(thd); + thd->variables.sql_mode= global_system_variables.sql_mode; } pthread_mutex_unlock(&rli->data_lock); pthread_cond_broadcast(&rli->data_cond); @@ -1982,12 +2663,13 @@ void Intvar_log_event::pack_info(Protocol *protocol) Intvar_log_event::Intvar_log_event() */ -Intvar_log_event::Intvar_log_event(const char* buf, bool old_format) - :Log_event(buf, old_format) +Intvar_log_event::Intvar_log_event(const char* buf, + const Format_description_log_event* description_event) + :Log_event(buf, description_event) { - buf += (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN; - type = buf[I_TYPE_OFFSET]; - val = uint8korr(buf+I_VAL_OFFSET); + buf+= description_event->common_header_len; + type= buf[I_TYPE_OFFSET]; + val= uint8korr(buf+I_VAL_OFFSET); } @@ -2023,7 +2705,7 @@ int Intvar_log_event::write_data(IO_CACHE* file) */ #ifdef MYSQL_CLIENT -void Intvar_log_event::print(FILE* file, bool short_form, char* last_db) +void Intvar_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info) { char llbuff[22]; const char *msg; @@ -2066,7 +2748,7 @@ int Intvar_log_event::exec_event(struct st_relay_log_info* rli) thd->next_insert_id = val; break; } - rli->inc_event_relay_log_pos(get_event_len()); + rli->inc_event_relay_log_pos(); return 0; } #endif @@ -2089,12 +2771,13 @@ void Rand_log_event::pack_info(Protocol *protocol) #endif -Rand_log_event::Rand_log_event(const char* buf, bool old_format) - :Log_event(buf, old_format) +Rand_log_event::Rand_log_event(const char* buf, + const Format_description_log_event* description_event) + :Log_event(buf, description_event) { - buf += (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN; - seed1 = uint8korr(buf+RAND_SEED1_OFFSET); - seed2 = uint8korr(buf+RAND_SEED2_OFFSET); + buf+= description_event->common_header_len; + seed1= uint8korr(buf+RAND_SEED1_OFFSET); + seed2= uint8korr(buf+RAND_SEED2_OFFSET); } @@ -2108,7 +2791,7 @@ int Rand_log_event::write_data(IO_CACHE* file) #ifdef MYSQL_CLIENT -void Rand_log_event::print(FILE* file, bool short_form, char* last_db) +void Rand_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info) { char llbuff[22],llbuff2[22]; if (!short_form) @@ -2128,7 +2811,7 @@ int Rand_log_event::exec_event(struct st_relay_log_info* rli) { thd->rand.seed1= (ulong) seed1; thd->rand.seed2= (ulong) seed2; - rli->inc_event_relay_log_pos(get_event_len()); + rli->inc_event_relay_log_pos(); return 0; } #endif /* !MYSQL_CLIENT */ @@ -2192,10 +2875,11 @@ void User_var_log_event::pack_info(Protocol* protocol) #endif /* !MYSQL_CLIENT */ -User_var_log_event::User_var_log_event(const char* buf, bool old_format) - :Log_event(buf, old_format) +User_var_log_event::User_var_log_event(const char* buf, + const Format_description_log_event* description_event) + :Log_event(buf, description_event) { - buf+= (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN; + buf+= description_event->common_header_len; name_len= uint4korr(buf); name= (char *) buf + UV_NAME_LEN_SIZE; buf+= UV_NAME_LEN_SIZE + name_len; @@ -2269,7 +2953,7 @@ int User_var_log_event::write_data(IO_CACHE* file) */ #ifdef MYSQL_CLIENT -void User_var_log_event::print(FILE* file, bool short_form, char* last_db) +void User_var_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info) { if (!short_form) { @@ -2359,7 +3043,7 @@ int User_var_log_event::exec_event(struct st_relay_log_info* rli) e.update_hash(val, val_len, type, charset, DERIVATION_NONE); free_root(&thd->mem_root,0); - rli->inc_event_relay_log_pos(get_event_len()); + rli->inc_event_relay_log_pos(); return 0; } #endif /* !MYSQL_CLIENT */ @@ -2371,7 +3055,7 @@ int User_var_log_event::exec_event(struct st_relay_log_info* rli) #ifdef HAVE_REPLICATION #ifdef MYSQL_CLIENT -void Unknown_log_event::print(FILE* file, bool short_form, char* last_db) +void Unknown_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info) { if (short_form) return; @@ -2401,7 +3085,7 @@ void Slave_log_event::pack_info(Protocol *protocol) #ifndef MYSQL_CLIENT Slave_log_event::Slave_log_event(THD* thd_arg, struct st_relay_log_info* rli) - :Log_event(thd_arg, 0, 0), mem_pool(0), master_host(0) + :Log_event(thd_arg, 0, 0) , mem_pool(0), master_host(0) { DBUG_ENTER("Slave_log_event"); if (!rli->inited) // QQ When can this happen ? @@ -2442,7 +3126,7 @@ Slave_log_event::~Slave_log_event() #ifdef MYSQL_CLIENT -void Slave_log_event::print(FILE* file, bool short_form, char* last_db) +void Slave_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info) { char llbuff[22]; if (short_form) @@ -2488,12 +3172,13 @@ void Slave_log_event::init_from_mem_pool(int data_size) } -Slave_log_event::Slave_log_event(const char* buf, int event_len) - :Log_event(buf,0),mem_pool(0),master_host(0) +/* This code is not used, so has not been updated to be format-tolerant */ +Slave_log_event::Slave_log_event(const char* buf, uint event_len) + :Log_event(buf,0) /*unused event*/ ,mem_pool(0),master_host(0) { - event_len -= LOG_EVENT_HEADER_LEN; - if (event_len < 0) + if (event_len < LOG_EVENT_HEADER_LEN) return; + event_len -= LOG_EVENT_HEADER_LEN; if (!(mem_pool = (char*) my_malloc(event_len + 1, MYF(MY_WME)))) return; memcpy(mem_pool, buf + LOG_EVENT_HEADER_LEN, event_len); @@ -2521,7 +3206,7 @@ int Slave_log_event::exec_event(struct st_relay_log_info* rli) */ #ifdef MYSQL_CLIENT -void Stop_log_event::print(FILE* file, bool short_form, char* last_db) +void Stop_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info) { if (short_form) return; @@ -2543,7 +3228,7 @@ void Stop_log_event::print(FILE* file, bool short_form, char* last_db) We used to clean up slave_load_tmpdir, but this is useless as it has been cleared at the end of LOAD DATA INFILE. So we have nothing to do here. - The place were we must do this cleaning is in Start_log_event::exec_event(), + The place were we must do this cleaning is in Start_log_event_v3::exec_event(), not here. Because if we come here, the master was sane. */ @@ -2557,7 +3242,7 @@ int Stop_log_event::exec_event(struct st_relay_log_info* rli) could give false triggers in MASTER_POS_WAIT() that we have reached the target position when in fact we have not. */ - rli->inc_group_relay_log_pos(get_event_len(), 0); + rli->inc_group_relay_log_pos(0); flush_relay_log_info(rli); return 0; } @@ -2636,20 +3321,40 @@ int Create_file_log_event::write_base(IO_CACHE* file) Create_file_log_event ctor */ -Create_file_log_event::Create_file_log_event(const char* buf, int len, - bool old_format) - :Load_log_event(buf,0,old_format),fake_base(0),block(0),inited_from_old(0) -{ - int block_offset; - if (copy_log_event(buf,len,old_format)) +Create_file_log_event::Create_file_log_event(const char* buf, uint len, + const Format_description_log_event* description_event) + :Load_log_event(buf,0,description_event),fake_base(0),block(0),inited_from_old(0) +{ + uint block_offset; + uint header_len= description_event->common_header_len; + uint8 load_header_len= description_event->post_header_len[LOAD_EVENT-1]; + uint8 create_file_header_len= description_event->post_header_len[CREATE_FILE_EVENT-1]; + if (copy_log_event(buf,len, + ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ? + load_header_len + header_len : + (fake_base ? (header_len+load_header_len) : + (header_len+load_header_len) + + create_file_header_len)), + description_event)) return; - if (!old_format) + if (description_event->binlog_version!=1) { - file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + - + LOAD_HEADER_LEN + CF_FILE_ID_OFFSET); + file_id= uint4korr(buf + + header_len + + load_header_len + CF_FILE_ID_OFFSET); // + 1 for \0 terminating fname - block_offset = (LOG_EVENT_HEADER_LEN + Load_log_event::get_data_size() + - CREATE_FILE_HEADER_LEN + 1); + /* + Note that it's ok to use get_data_size() below, because it is computed + with values we have already read from this event (because we called + copy_log_event()); we are not using slave's format info to decode master's + format, we are really using master's format info. + Anyway, both formats should be identical (except the common_header_len) as + these Load events are not changed between 4.0 and 5.0 (as logging of LOAD + DATA INFILE does not use Load_log_event in 5.0). + */ + block_offset= description_event->common_header_len + + Load_log_event::get_data_size() + + create_file_header_len + 1; if (len < block_offset) return; block = (char*)buf + block_offset; @@ -2669,18 +3374,18 @@ Create_file_log_event::Create_file_log_event(const char* buf, int len, #ifdef MYSQL_CLIENT void Create_file_log_event::print(FILE* file, bool short_form, - char* last_db, bool enable_local) + LAST_EVENT_INFO* last_event_info, bool enable_local) { if (short_form) { if (enable_local && check_fname_outside_temp_buf()) - Load_log_event::print(file, 1, last_db); + Load_log_event::print(file, 1, last_event_info); return; } if (enable_local) { - Load_log_event::print(file, 1, last_db, !check_fname_outside_temp_buf()); + Load_log_event::print(file, 1, last_event_info, !check_fname_outside_temp_buf()); /* That one is for "file_id: etc" below: in mysqlbinlog we want the #, in SHOW BINLOG EVENTS we don't. @@ -2693,9 +3398,9 @@ void Create_file_log_event::print(FILE* file, bool short_form, void Create_file_log_event::print(FILE* file, bool short_form, - char* last_db) + LAST_EVENT_INFO* last_event_info) { - print(file,short_form,last_db,0); + print(file,short_form,last_event_info,0); } #endif /* MYSQL_CLIENT */ @@ -2809,14 +3514,19 @@ Append_block_log_event::Append_block_log_event(THD* thd_arg, const char* db_arg, Append_block_log_event ctor */ -Append_block_log_event::Append_block_log_event(const char* buf, int len) - :Log_event(buf, 0),block(0) +Append_block_log_event::Append_block_log_event(const char* buf, uint len, + const Format_description_log_event* description_event) + :Log_event(buf, description_event),block(0) { - if ((uint)len < APPEND_BLOCK_EVENT_OVERHEAD) + uint8 common_header_len= description_event->common_header_len; + uint8 append_block_header_len= + description_event->post_header_len[APPEND_BLOCK_EVENT-1]; + uint total_header_len= common_header_len+append_block_header_len; + if (len < total_header_len) return; - file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + AB_FILE_ID_OFFSET); - block = (char*)buf + APPEND_BLOCK_EVENT_OVERHEAD; - block_len = len - APPEND_BLOCK_EVENT_OVERHEAD; + file_id= uint4korr(buf + common_header_len + AB_FILE_ID_OFFSET); + block= (char*)buf + total_header_len; + block_len= len - total_header_len; } @@ -2839,7 +3549,7 @@ int Append_block_log_event::write_data(IO_CACHE* file) #ifdef MYSQL_CLIENT void Append_block_log_event::print(FILE* file, bool short_form, - char* last_db) + LAST_EVENT_INFO* last_event_info) { if (short_form) return; @@ -2921,12 +3631,15 @@ Delete_file_log_event::Delete_file_log_event(THD *thd_arg, const char* db_arg, Delete_file_log_event ctor */ -Delete_file_log_event::Delete_file_log_event(const char* buf, int len) - :Log_event(buf, 0),file_id(0) +Delete_file_log_event::Delete_file_log_event(const char* buf, uint len, + const Format_description_log_event* description_event) + :Log_event(buf, description_event),file_id(0) { - if ((uint)len < DELETE_FILE_EVENT_OVERHEAD) + uint8 common_header_len= description_event->common_header_len; + uint8 delete_file_header_len= description_event->post_header_len[DELETE_FILE_EVENT-1]; + if (len < (uint)(common_header_len + delete_file_header_len)) return; - file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + AB_FILE_ID_OFFSET); + file_id= uint4korr(buf + common_header_len + DF_FILE_ID_OFFSET); } @@ -2948,7 +3661,7 @@ int Delete_file_log_event::write_data(IO_CACHE* file) #ifdef MYSQL_CLIENT void Delete_file_log_event::print(FILE* file, bool short_form, - char* last_db) + LAST_EVENT_INFO* last_event_info) { if (short_form) return; @@ -3011,12 +3724,15 @@ Execute_load_log_event::Execute_load_log_event(THD *thd_arg, const char* db_arg, Execute_load_log_event ctor */ -Execute_load_log_event::Execute_load_log_event(const char* buf, int len) - :Log_event(buf, 0), file_id(0) +Execute_load_log_event::Execute_load_log_event(const char* buf, uint len, + const Format_description_log_event* description_event) + :Log_event(buf, description_event), file_id(0) { - if ((uint)len < EXEC_LOAD_EVENT_OVERHEAD) + uint8 common_header_len= description_event->common_header_len; + uint8 exec_load_header_len= description_event->post_header_len[EXEC_LOAD_EVENT-1]; + if (len < (uint)(common_header_len+exec_load_header_len)) return; - file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + EL_FILE_ID_OFFSET); + file_id= uint4korr(buf + common_header_len + EL_FILE_ID_OFFSET); } @@ -3038,7 +3754,7 @@ int Execute_load_log_event::write_data(IO_CACHE* file) #ifdef MYSQL_CLIENT void Execute_load_log_event::print(FILE* file, bool short_form, - char* last_db) + LAST_EVENT_INFO* last_event_info) { if (short_form) return; @@ -3085,8 +3801,8 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli) goto err; } if (!(lev = (Load_log_event*)Log_event::read_log_event(&file, - (pthread_mutex_t*)0, - (bool)0)) || + (pthread_mutex_t*)0, + rli->relay_log.description_event_for_exec)) || lev->get_type_code() != NEW_LOAD_EVENT) { slave_print_error(rli,0, "Error in Exec_load event: file '%s' appears corrupted", fname); diff --git a/sql/log_event.h b/sql/log_event.h index 6cc8a7ca06d50b511877f835ce4ba1d13e4ee4a6..58691c6a24fbe86fc8602ac933c9202866fbfd78 100644 --- a/sql/log_event.h +++ b/sql/log_event.h @@ -34,13 +34,43 @@ #define LOG_READ_TOO_LARGE -7 #define LOG_EVENT_OFFSET 4 - -#define BINLOG_VERSION 3 + +/* + 3 is MySQL 4.x; 4 is MySQL 5.0.0. + Compared to version 3, version 4 has: + - a different Start_log_event, which includes info about the binary log + (sizes of headers); this info is included for better compatibility if the + master's MySQL version is different from the slave's. + - all events have a unique ID (the triplet (server_id, timestamp at server + start, other) to be sure an event is not executed more than once in a + multimaster setup, example: + M1 + / \ + v v + M2 M3 + \ / + v v + S + if a query is run on M1, it will arrive twice on S, so we need that S + remembers the last unique ID it has processed, to compare and know if the + event should be skipped or not. Example of ID: we already have the server id + (4 bytes), plus: + timestamp_when_the_master_started (4 bytes), a counter (a sequence number + which increments every time we write an event to the binlog) (3 bytes). + Q: how do we handle when the counter is overflowed and restarts from 0 ? + + - Query and Load (Create or Execute) events may have a more precise timestamp + (with microseconds), number of matched/affected/warnings rows + and fields of session variables: SQL_MODE, + FOREIGN_KEY_CHECKS, UNIQUE_CHECKS, SQL_AUTO_IS_NULL, the collations and + charsets, the PASSWORD() version (old/new/...). +*/ +#define BINLOG_VERSION 4 /* We could have used SERVER_VERSION_LENGTH, but this introduces an obscure dependency - if somebody decided to change SERVER_VERSION_LENGTH - this would have broken the replication protocol + this would break the replication protocol */ #define ST_SERVER_VER_LEN 50 @@ -49,6 +79,12 @@ TERMINATED etc). */ +/* + These are flags and structs to handle all the LOAD DATA INFILE options (LINES + TERMINATED etc). + DUMPFILE_FLAG is probably useless (DUMPFILE is a clause of SELECT, not of LOAD + DATA). +*/ #define DUMPFILE_FLAG 0x1 #define OPT_ENCLOSED_FLAG 0x2 #define REPLACE_FLAG 0x4 @@ -136,16 +172,28 @@ struct sql_ex_info #define LOG_EVENT_HEADER_LEN 19 /* the fixed header length */ #define OLD_HEADER_LEN 13 /* the fixed header length in 3.23 */ - +/* + Fixed header length, where 4.x and 5.0 agree. That is, 5.0 may have a longer + header (it will for sure when we have the unique event's ID), but at least + the first 19 bytes are the same in 4.x and 5.0. So when we have the unique + event's ID, LOG_EVENT_HEADER_LEN will be something like 26, but + LOG_EVENT_MINIMAL_HEADER_LEN will remain 19. +*/ +#define LOG_EVENT_MINIMAL_HEADER_LEN 19 + /* event-specific post-header sizes */ -#define QUERY_HEADER_LEN (4 + 4 + 1 + 2) +// where 3.23, 4.x and 5.0 agree +#define QUERY_HEADER_MINIMAL_LEN (4 + 4 + 1 + 2) +// where 5.0 differs: 2 for len of N-bytes vars. +#define QUERY_HEADER_LEN (QUERY_HEADER_MINIMAL_LEN + 2) #define LOAD_HEADER_LEN (4 + 4 + 4 + 1 +1 + 4) -#define START_HEADER_LEN (2 + ST_SERVER_VER_LEN + 4) -#define ROTATE_HEADER_LEN 8 +#define START_V3_HEADER_LEN (2 + ST_SERVER_VER_LEN + 4) +#define ROTATE_HEADER_LEN 8 // this is FROZEN (the Rotate post-header is frozen) #define CREATE_FILE_HEADER_LEN 4 #define APPEND_BLOCK_HEADER_LEN 4 #define EXEC_LOAD_HEADER_LEN 4 #define DELETE_FILE_HEADER_LEN 4 +#define FORMAT_DESCRIPTION_HEADER_LEN (START_V3_HEADER_LEN+1+LOG_EVENT_TYPES) /* Event header offsets; @@ -158,11 +206,12 @@ struct sql_ex_info #define LOG_POS_OFFSET 13 #define FLAGS_OFFSET 17 -/* start event post-header */ +/* start event post-header (for v3 and v4) */ #define ST_BINLOG_VER_OFFSET 0 #define ST_SERVER_VER_OFFSET 2 #define ST_CREATED_OFFSET (ST_SERVER_VER_OFFSET + ST_SERVER_VER_LEN) +#define ST_COMMON_HEADER_LEN_OFFSET (ST_CREATED_OFFSET + 4) /* slave event post-header (this event is never written) */ @@ -176,7 +225,13 @@ struct sql_ex_info #define Q_EXEC_TIME_OFFSET 4 #define Q_DB_LEN_OFFSET 8 #define Q_ERR_CODE_OFFSET 9 +#define Q_STATUS_VARS_LEN_OFFSET 11 #define Q_DATA_OFFSET QUERY_HEADER_LEN +/* these are codes, not offsets; not more than 256 values (1 byte). */ +#define Q_FLAGS2_CODE 0 +#define Q_SQL_MODE_CODE 1 +#define Q_CATALOG_CODE 2 + /* Intvar event post-header */ @@ -228,16 +283,6 @@ struct sql_ex_info /* DF = "Delete File" */ #define DF_FILE_ID_OFFSET 0 -#define QUERY_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+QUERY_HEADER_LEN) -#define QUERY_DATA_OFFSET (LOG_EVENT_HEADER_LEN+QUERY_HEADER_LEN) -#define ROTATE_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+ROTATE_HEADER_LEN) -#define LOAD_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+LOAD_HEADER_LEN) -#define CREATE_FILE_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+\ - +LOAD_HEADER_LEN+CREATE_FILE_HEADER_LEN) -#define DELETE_FILE_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+DELETE_FILE_HEADER_LEN) -#define EXEC_LOAD_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+EXEC_LOAD_HEADER_LEN) -#define APPEND_BLOCK_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+APPEND_BLOCK_HEADER_LEN) - /* 4 bytes which all binlogs should begin with */ #define BINLOG_MAGIC "\xfe\x62\x69\x6e" @@ -264,15 +309,54 @@ struct sql_ex_info */ #define LOG_EVENT_THREAD_SPECIFIC_F 0x4 +/* + OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be written + to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written into the + Format_description_log_event, so that if later we don't want to replicate a + variable we did replicate, or the contrary, it's doable. But it should not be + too hard to decide once for all of what we replicate and what we don't, among + the fixed 32 bits of thd->options. + I (Guilhem) have read through every option's usage, and it looks like + OPTION_AUTO_IS_NULL and OPTION_NO_FOREIGN_KEYS are the only ones which alter + how the query modifies the table. It's good to replicate + OPTION_RELAXED_UNIQUE_CHECKS too because otherwise, the slave may insert data + slower than the master, in InnoDB. + OPTION_BIG_SELECTS is not needed (the slave thread runs with + max_join_size=HA_POS_ERROR) and OPTION_BIG_TABLES is not needed either, as + the manual says (because a too big in-memory temp table is automatically + written to disk). +*/ +#define OPTIONS_WRITTEN_TO_BIN_LOG (OPTION_AUTO_IS_NULL | \ +OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_RELAXED_UNIQUE_CHECKS) + enum Log_event_type { - UNKNOWN_EVENT= 0, START_EVENT= 1, QUERY_EVENT= 2, STOP_EVENT= 3, - ROTATE_EVENT= 4, INTVAR_EVENT= 5, LOAD_EVENT=6, SLAVE_EVENT= 7, - CREATE_FILE_EVENT= 8, APPEND_BLOCK_EVENT= 9, EXEC_LOAD_EVENT= 10, - DELETE_FILE_EVENT= 11, NEW_LOAD_EVENT= 12, RAND_EVENT= 13, - USER_VAR_EVENT= 14 + /* + Every time you update this enum (when you add a type), you have to + update the code of Format_description_log_event::Format_description_log_event(). + Make sure you always insert new types ***BEFORE*** ENUM_END_EVENT. + */ + UNKNOWN_EVENT= 0, START_EVENT_V3, QUERY_EVENT, STOP_EVENT, ROTATE_EVENT, + INTVAR_EVENT, LOAD_EVENT, SLAVE_EVENT, CREATE_FILE_EVENT, + APPEND_BLOCK_EVENT, EXEC_LOAD_EVENT, DELETE_FILE_EVENT, + /* + NEW_LOAD_EVENT is like LOAD_EVENT except that it has a longer sql_ex, + allowing multibyte TERMINATED BY etc; both types share the same class + (Load_log_event) + */ + NEW_LOAD_EVENT, + RAND_EVENT, USER_VAR_EVENT, + FORMAT_DESCRIPTION_EVENT, + ENUM_END_EVENT /* end marker */ }; +/* + The number of types we handle in Format_description_log_event (UNKNOWN_EVENT + is not to be handled, it does not exist in binlogs, it does not have a + format). +*/ +#define LOG_EVENT_TYPES (ENUM_END_EVENT-1) + enum Int_event_type { INVALID_INT_EVENT = 0, LAST_INSERT_ID_EVENT = 1, INSERT_ID_EVENT = 2 @@ -285,8 +369,33 @@ class MYSQL_LOG; class THD; #endif +class Format_description_log_event; + struct st_relay_log_info; +#ifdef MYSQL_CLIENT +/* + A structure for mysqlbinlog to remember the last db, flags2, sql_mode etc; it + is passed to events' print() methods, so that they print only the necessary + USE and SET commands. +*/ +typedef struct st_last_event_info +{ + // TODO: have the last catalog here ?? + char db[FN_REFLEN+1]; // TODO: make this a LEX_STRING when thd->db is + bool flags2_inited; + uint32 flags2; + bool sql_mode_inited; + ulonglong sql_mode; + st_last_event_info() + : flags2_inited(0), flags2(0), sql_mode_inited(0), sql_mode(0) + { + db[0]= 0; /* initially, the db is unknown */ + } +} LAST_EVENT_INFO; +#endif + + /***************************************************************************** Log_event class @@ -337,21 +446,26 @@ class Log_event uint16 flags; bool cache_stmt; + #ifndef MYSQL_CLIENT THD* thd; - Log_event(THD* thd_arg, uint16 flags_arg, bool cache_stmt); Log_event(); + Log_event(THD* thd_arg, uint16 flags_arg, bool cache_stmt); /* read_log_event() functions read an event from a binlog or relay log; used by SHOW BINLOG EVENTS, the binlog_dump thread on the master (reads master's binlog), the slave IO thread (reads the event sent by binlog_dump), the slave SQL thread (reads the event from the relay log). + If mutex is 0, the read will proceed without mutex. + We need the description_event to be able to parse the event (to know the + post-header's size); in fact in read_log_event we detect the event's type, + then call the specific event's constructor and pass description_event as an + argument. */ - // if mutex is 0, the read will proceed without mutex static Log_event* read_log_event(IO_CACHE* file, pthread_mutex_t* log_lock, - bool old_format); + const Format_description_log_event *description_event); static int read_log_event(IO_CACHE* file, String* packet, pthread_mutex_t* log_lock); /* set_log_pos() is used to fill log_pos with tell(log). */ @@ -379,10 +493,12 @@ class Log_event return thd ? thd->db : 0; } #else + Log_event() : temp_buf(0) {} // avoid having to link mysqlbinlog against libpthread - static Log_event* read_log_event(IO_CACHE* file, bool old_format); + static Log_event* read_log_event(IO_CACHE* file, + const Format_description_log_event *description_event); /* print*() functions are used by mysqlbinlog */ - virtual void print(FILE* file, bool short_form = 0, char* last_db = 0) = 0; + virtual void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0) = 0; void print_timestamp(FILE* file, time_t *ts = 0); void print_header(FILE* file); #endif @@ -405,9 +521,9 @@ class Log_event virtual int write_data_body(IO_CACHE* file __attribute__((unused))) { return 0; } virtual Log_event_type get_type_code() = 0; - virtual bool is_valid() = 0; + virtual const bool is_valid() = 0; inline bool get_cache_stmt() { return cache_stmt; } - Log_event(const char* buf, bool old_format); + Log_event(const char* buf, const Format_description_log_event* description_event); virtual ~Log_event() { free_temp_buf();} void register_temp_buf(char* buf) { temp_buf = buf; } void free_temp_buf() @@ -419,18 +535,37 @@ class Log_event } } virtual int get_data_size() { return 0;} - virtual int get_data_body_offset() { return 0; } int get_event_len() { - return (cached_event_len ? cached_event_len : - (cached_event_len = LOG_EVENT_HEADER_LEN + get_data_size())); + /* + We don't re-use the cached event's length anymore (we did in 4.x) because + this leads to nasty problems: when the 5.0 slave reads an event from a 4.0 + master, it caches the event's length, then this event is converted before + it goes into the relay log, so it would be written to the relay log with + its old length, which is garbage. + */ + return (cached_event_len=(LOG_EVENT_HEADER_LEN + get_data_size())); } - static Log_event* read_log_event(const char* buf, int event_len, - const char **error, bool old_format); + static Log_event* read_log_event(const char* buf, uint event_len, + const char **error, + const Format_description_log_event + *description_event); /* returns the human readable name of the event's type */ const char* get_type_str(); }; +/* + One class for each type of event. + Two constructors for each class: + - one to create the event for logging (when the server acts as a master), + called after an update to the database is done, + which accepts parameters like the query, the database, the options for LOAD + DATA INFILE... + - one to create the event from a packet (when the server acts as a slave), + called before reproducing the update, which accepts parameters (like a + buffer). Used to read from the master, from the relay log, and in + mysqlbinlog. This constructor must be format-tolerant. +*/ /***************************************************************************** @@ -445,6 +580,7 @@ class Query_log_event: public Log_event char* data_buf; public: const char* query; + const char* catalog; const char* db; /* If we already know the length of the query string @@ -462,6 +598,52 @@ class Query_log_event: public Log_event BUG#1686). */ ulong slave_proxy_id; + + /* + Binlog format 3 and 4 start to differ (as far as class members are + concerned) from here. + */ + + uint32 catalog_len; + + /* + We want to be able to store a variable number of N-bit status vars: + (generally N=32; but N=64 for SQL_MODE) a user may want to log the number of + affected rows (for debugging) while another does not want to lose 4 bytes in + this. + The storage on disk is the following: + status_vars_len is part of the post-header, + status_vars are in the variable-length part, after the post-header, before + the db & query. + status_vars on disk is a sequence of pairs (code, value) where 'code' means + 'sql_mode', 'affected' etc. Sometimes 'value' must be a short string, so its + first byte is its length. For now the order of status vars is: + flags2 - sql_mode - catalog. + We should add the same thing to Load_log_event, but in fact + LOAD DATA INFILE is going to be logged with a new type of event (logging of + the plain text query), so Load_log_event would be frozen, so no need. The + new way of logging LOAD DATA INFILE would use a derived class of + Query_log_event, so automatically benefit from the work already done for + status variables in Query_log_event. + */ + uint16 status_vars_len; + + /* + 'flags2' is a second set of flags (on top of those in Log_event), for + session variables. These are thd->options which is & against a mask + (OPTIONS_WRITTEN_TO_BINLOG). + flags2_inited helps make a difference between flags2==0 (3.23 or 4.x + master, we don't know flags2, so use the slave server's global options) and + flags2==0 (5.0 master, we know this has a meaning of flags all down which + must influence the query). + */ + bool flags2_inited; + bool sql_mode_inited; + + uint32 flags2; + /* In connections sql_mode is 32 bits now but will be 64 bits soon */ + ulonglong sql_mode; + #ifndef MYSQL_CLIENT Query_log_event(THD* thd_arg, const char* query_arg, ulong query_length, @@ -472,10 +654,11 @@ class Query_log_event: public Log_event int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); #endif - Query_log_event(const char* buf, int event_len, bool old_format); + Query_log_event(const char* buf, uint event_len, + const Format_description_log_event *description_event); ~Query_log_event() { if (data_buf) @@ -486,14 +669,11 @@ class Query_log_event: public Log_event Log_event_type get_type_code() { return QUERY_EVENT; } int write(IO_CACHE* file); int write_data(IO_CACHE* file); // returns 0 on success, -1 on error - bool is_valid() { return query != 0; } + const bool is_valid() { return query != 0; } int get_data_size() { - return (q_len + db_len + 2 - + 4 // thread_id - + 4 // exec_time - + 2 // error_code - ); + /* Note that the "1" below is the db's length. */ + return (q_len + db_len + 1 + status_vars_len + QUERY_HEADER_LEN); } }; @@ -504,6 +684,7 @@ class Query_log_event: public Log_event Slave Log Event class Note that this class is currently not used at all; no code writes a Slave_log_event (though some code in repl_failsafe.cc reads Slave_log_event). + So it's not a problem if this code is not maintained. ****************************************************************************/ class Slave_log_event: public Log_event @@ -524,13 +705,13 @@ class Slave_log_event: public Log_event void pack_info(Protocol* protocol); int exec_event(struct st_relay_log_info* rli); #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); #endif - Slave_log_event(const char* buf, int event_len); + Slave_log_event(const char* buf, uint event_len); ~Slave_log_event(); int get_data_size(); - bool is_valid() { return master_host != 0; } + const bool is_valid() { return master_host != 0; } Log_event_type get_type_code() { return SLAVE_EVENT; } int write_data(IO_CACHE* file ); }; @@ -546,12 +727,18 @@ class Slave_log_event: public Log_event class Load_log_event: public Log_event { protected: - int copy_log_event(const char *buf, ulong event_len, bool old_format); + int copy_log_event(const char *buf, ulong event_len, + int body_offset, const Format_description_log_event* description_event); public: ulong thread_id; ulong slave_proxy_id; uint32 table_name_len; + /* + No need to have a catalog, as these events can only come from 4.x. + TODO: this may become false if Dmitri pushes his new LOAD DATA INFILE in + 5.0 only (not in 4.x). + */ uint32 db_len; uint32 fname_len; uint32 num_fields; @@ -597,11 +784,18 @@ class Load_log_event: public Log_event bool use_rli_only_for_errors); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); - void print(FILE* file, bool short_form, char* last_db, bool commented); + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info = 0); + void print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool commented); #endif - Load_log_event(const char* buf, int event_len, bool old_format); + /* + Note that for all the events related to LOAD DATA (Load_log_event, + Create_file/Append/Exec/Delete, we pass description_event; however as + logging of LOAD DATA is going to be changed in 4.1 or 5.0, this is only used + for the common_header_len (post_header_len will not be changed). + */ + Load_log_event(const char* buf, uint event_len, + const Format_description_log_event* description_event); ~Load_log_event() {} Log_event_type get_type_code() @@ -610,27 +804,31 @@ class Load_log_event: public Log_event } int write_data_header(IO_CACHE* file); int write_data_body(IO_CACHE* file); - bool is_valid() { return table_name != 0; } + const bool is_valid() { return table_name != 0; } int get_data_size() { - return (table_name_len + 2 + db_len + 2 + fname_len - + 4 // thread_id - + 4 // exec_time - + 4 // skip_lines - + 4 // field block len + return (table_name_len + db_len + 2 + fname_len + + LOAD_HEADER_LEN + sql_ex.data_size() + field_block_len + num_fields); } - int get_data_body_offset() { return LOAD_EVENT_OVERHEAD; } }; extern char server_version[SERVER_VERSION_LENGTH]; /***************************************************************************** - Start Log Event class + Start Log Event_v3 class + Start_log_event_v3 is the Start_log_event of binlog format 3 (MySQL 3.23 and + 4.x). + Format_description_log_event derives from Start_log_event_v3; it is the + Start_log_event of binlog format 4 (MySQL 5.0), that is, the event that + describes the other events' header/postheader lengths. This event is sent by + MySQL 5.0 whenever it starts sending a new binlog if the requested position + is >4 (otherwise if ==4 the event will be sent naturally). + ****************************************************************************/ -class Start_log_event: public Log_event +class Start_log_event_v3: public Log_event { public: /* @@ -658,27 +856,81 @@ class Start_log_event: public Log_event char server_version[ST_SERVER_VER_LEN]; #ifndef MYSQL_CLIENT - Start_log_event() :Log_event(), binlog_version(BINLOG_VERSION) - { - created = (time_t) when; - memcpy(server_version, ::server_version, ST_SERVER_VER_LEN); - } + Start_log_event_v3(); #ifdef HAVE_REPLICATION void pack_info(Protocol* protocol); int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); + Start_log_event_v3() {} + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); #endif - Start_log_event(const char* buf, bool old_format); - ~Start_log_event() {} - Log_event_type get_type_code() { return START_EVENT;} + Start_log_event_v3(const char* buf, + const Format_description_log_event* description_event); + ~Start_log_event_v3() {} + Log_event_type get_type_code() { return START_EVENT_V3;} int write_data(IO_CACHE* file); - bool is_valid() { return 1; } + const bool is_valid() { return 1; } int get_data_size() { - return START_HEADER_LEN; + return START_V3_HEADER_LEN; //no variable-sized part + } +}; + +/* + For binlog version 4. + This event is saved by threads which read it, as they need it for future + use (to decode the ordinary events). +*/ + +class Format_description_log_event: public Start_log_event_v3 +{ +public: + /* + The size of the fixed header which _all_ events have + (for binlogs written by this version, this is equal to + LOG_EVENT_HEADER_LEN), except FORMAT_DESCRIPTION_EVENT and ROTATE_EVENT + (those have a header of size LOG_EVENT_MINIMAL_HEADER_LEN). + */ + uint8 common_header_len; + uint8 number_of_event_types; + /* The list of post-headers' lengthes */ + uint8 *post_header_len; + + Format_description_log_event(uint8 binlog_ver, const char* server_ver=0); + +#ifndef MYSQL_CLIENT +#ifdef HAVE_REPLICATION + int exec_event(struct st_relay_log_info* rli); +#endif /* HAVE_REPLICATION */ +#endif + + Format_description_log_event(const char* buf, uint event_len, + const Format_description_log_event* description_event); + ~Format_description_log_event() { my_free((gptr)post_header_len, MYF(0)); } + Log_event_type get_type_code() { return FORMAT_DESCRIPTION_EVENT;} + int write_data(IO_CACHE* file); + const bool is_valid() + { + return ((common_header_len >= ((binlog_version==1) ? OLD_HEADER_LEN : + LOG_EVENT_MINIMAL_HEADER_LEN)) && + (post_header_len != NULL)); + } + int get_event_len() + { + int i= LOG_EVENT_MINIMAL_HEADER_LEN + get_data_size(); + DBUG_PRINT("info",("event_len=%d",i)); + return i; + } + int get_data_size() + { + /* + The vector of post-header lengths is considered as part of the + post-header, because in a given version it never changes (contrary to the + query in a Query_log_event). + */ + return FORMAT_DESCRIPTION_HEADER_LEN; } }; @@ -705,23 +957,26 @@ class Intvar_log_event: public Log_event int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); #endif - Intvar_log_event(const char* buf, bool old_format); + Intvar_log_event(const char* buf, const Format_description_log_event* description_event); ~Intvar_log_event() {} Log_event_type get_type_code() { return INTVAR_EVENT;} const char* get_var_type_name(); int get_data_size() { return 9; /* sizeof(type) + sizeof(val) */;} int write_data(IO_CACHE* file); - bool is_valid() { return 1; } + const bool is_valid() { return 1; } }; /***************************************************************************** Rand Log Event class - Logs random seed used by the next RAND(), and by PASSWORD() in 4.1. + Logs random seed used by the next RAND(), and by PASSWORD() in 4.1.0. + 4.1.1 does not need it (it's repeatable again) so this event needn't be + written in 4.1.1 for PASSWORD() (but the fact that it is written is just a + waste, it does not cause bugs). ****************************************************************************/ class Rand_log_event: public Log_event @@ -739,15 +994,15 @@ class Rand_log_event: public Log_event int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); #endif - Rand_log_event(const char* buf, bool old_format); + Rand_log_event(const char* buf, const Format_description_log_event* description_event); ~Rand_log_event() {} Log_event_type get_type_code() { return RAND_EVENT;} int get_data_size() { return 16; /* sizeof(ulonglong) * 2*/ } int write_data(IO_CACHE* file); - bool is_valid() { return 1; } + const bool is_valid() { return 1; } }; /***************************************************************************** @@ -757,6 +1012,9 @@ class Rand_log_event: public Log_event Every time a query uses the value of a user variable, a User_var_log_event is written before the Query_log_event, to set the user variable. + Every time a query uses the value of a user variable, a User_var_log_event is + written before the Query_log_event, to set the user variable. + ****************************************************************************/ class User_var_log_event: public Log_event { @@ -778,10 +1036,10 @@ class User_var_log_event: public Log_event void pack_info(Protocol* protocol); int exec_event(struct st_relay_log_info* rli); #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); #endif - User_var_log_event(const char* buf, bool old_format); + User_var_log_event(const char* buf, const Format_description_log_event* description_event); ~User_var_log_event() {} Log_event_type get_type_code() { return USER_VAR_EVENT;} int get_data_size() @@ -791,7 +1049,7 @@ class User_var_log_event: public Log_event UV_CHARSET_NUMBER_SIZE + UV_VAL_LEN_SIZE + val_len); } int write_data(IO_CACHE* file); - bool is_valid() { return 1; } + const bool is_valid() { return 1; } }; /***************************************************************************** @@ -809,15 +1067,15 @@ class Stop_log_event: public Log_event {} int exec_event(struct st_relay_log_info* rli); #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); #endif - Stop_log_event(const char* buf, bool old_format): - Log_event(buf, old_format) + Stop_log_event(const char* buf, const Format_description_log_event* description_event): + Log_event(buf, description_event) {} ~Stop_log_event() {} Log_event_type get_type_code() { return STOP_EVENT;} - bool is_valid() { return 1; } + const bool is_valid() { return 1; } }; #endif /* HAVE_REPLICATION */ @@ -850,18 +1108,23 @@ class Rotate_log_event: public Log_event int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); #endif - Rotate_log_event(const char* buf, int event_len, bool old_format); + Rotate_log_event(const char* buf, uint event_len, + const Format_description_log_event* description_event); ~Rotate_log_event() { if (alloced) my_free((gptr) new_log_ident, MYF(0)); } Log_event_type get_type_code() { return ROTATE_EVENT;} + int get_event_len() + { + return (LOG_EVENT_MINIMAL_HEADER_LEN + get_data_size()); + } int get_data_size() { return ident_len + ROTATE_HEADER_LEN;} - bool is_valid() { return new_log_ident != 0; } + const bool is_valid() { return new_log_ident != 0; } int write_data(IO_CACHE* file); }; @@ -899,11 +1162,12 @@ class Create_file_log_event: public Load_log_event int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); - void print(FILE* file, bool short_form, char* last_db, bool enable_local); + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); + void print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool enable_local); #endif - Create_file_log_event(const char* buf, int event_len, bool old_format); + Create_file_log_event(const char* buf, uint event_len, + const Format_description_log_event* description_event); ~Create_file_log_event() {} Log_event_type get_type_code() @@ -916,12 +1180,7 @@ class Create_file_log_event: public Load_log_event Load_log_event::get_data_size() + 4 + 1 + block_len); } - int get_data_body_offset() - { - return (fake_base ? LOAD_EVENT_OVERHEAD: - LOAD_EVENT_OVERHEAD + CREATE_FILE_HEADER_LEN); - } - bool is_valid() { return inited_from_old || block != 0; } + const bool is_valid() { return inited_from_old || block != 0; } int write_data_header(IO_CACHE* file); int write_data_body(IO_CACHE* file); /* @@ -963,14 +1222,15 @@ class Append_block_log_event: public Log_event void pack_info(Protocol* protocol); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); #endif - Append_block_log_event(const char* buf, int event_len); + Append_block_log_event(const char* buf, uint event_len, + const Format_description_log_event* description_event); ~Append_block_log_event() {} Log_event_type get_type_code() { return APPEND_BLOCK_EVENT;} int get_data_size() { return block_len + APPEND_BLOCK_HEADER_LEN ;} - bool is_valid() { return block != 0; } + const bool is_valid() { return block != 0; } int write_data(IO_CACHE* file); const char* get_db() { return db; } }; @@ -993,15 +1253,16 @@ class Delete_file_log_event: public Log_event int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); - void print(FILE* file, bool short_form, char* last_db, bool enable_local); + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); + void print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool enable_local); #endif - Delete_file_log_event(const char* buf, int event_len); + Delete_file_log_event(const char* buf, uint event_len, + const Format_description_log_event* description_event); ~Delete_file_log_event() {} Log_event_type get_type_code() { return DELETE_FILE_EVENT;} int get_data_size() { return DELETE_FILE_HEADER_LEN ;} - bool is_valid() { return file_id != 0; } + const bool is_valid() { return file_id != 0; } int write_data(IO_CACHE* file); const char* get_db() { return db; } }; @@ -1024,14 +1285,15 @@ class Execute_load_log_event: public Log_event int exec_event(struct st_relay_log_info* rli); #endif /* HAVE_REPLICATION */ #else - void print(FILE* file, bool short_form = 0, char* last_db = 0); + void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0); #endif - Execute_load_log_event(const char* buf, int event_len); + Execute_load_log_event(const char* buf, uint event_len, + const Format_description_log_event* description_event); ~Execute_load_log_event() {} Log_event_type get_type_code() { return EXEC_LOAD_EVENT;} int get_data_size() { return EXEC_LOAD_HEADER_LEN ;} - bool is_valid() { return file_id != 0; } + const bool is_valid() { return file_id != 0; } int write_data(IO_CACHE* file); const char* get_db() { return db; } }; @@ -1040,13 +1302,18 @@ class Execute_load_log_event: public Log_event class Unknown_log_event: public Log_event { public: - Unknown_log_event(const char* buf, bool old_format): - Log_event(buf, old_format) + /* + Even if this is an unknown event, we still pass description_event to + Log_event's ctor, this way we can extract maximum information from the + event's header (the unique ID for example). + */ + Unknown_log_event(const char* buf, const Format_description_log_event* description_event): + Log_event(buf, description_event) {} ~Unknown_log_event() {} - void print(FILE* file, bool short_form= 0, char* last_db= 0); + void print(FILE* file, bool short_form= 0, LAST_EVENT_INFO* last_event_info= 0); Log_event_type get_type_code() { return UNKNOWN_EVENT;} - bool is_valid() { return 1; } + const bool is_valid() { return 1; } }; #endif diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index c963e601d2ed65c5c3d1aed44d59009486057a59..39b8fbcb1a98c6b4e473f1d1baca42c92f1a0ac1 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -14,6 +14,15 @@ along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + Mostly this file is used in the server. But a little part of it is used in + mysqlbinlog too (definition of SELECT_DISTINCT and others). + The consequence is that 90% of the file is wrapped in #ifndef MYSQL_CLIENT, + except the part which must be in the server and in the client. +*/ + +#ifndef MYSQL_CLIENT + #include <my_global.h> #include <assert.h> #include <mysql_version.h> @@ -176,7 +185,15 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset; #define TEST_NO_STACKTRACE 512 #define TEST_SIGINT 1024 /* Allow sigint on threads */ -/* options for select set by the yacc parser (stored in lex->options) */ +#endif + +/* + This is included in the server and in the client. + Options for select set by the yacc parser (stored in lex->options). + None of the 32 defines below should have its value changed, or this will + break replication. +*/ + #define SELECT_DISTINCT (1L << 0) #define SELECT_STRAIGHT_JOIN (1L << 1) #define SELECT_DESCRIBE (1L << 2) @@ -214,6 +231,9 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset; #define OPTION_RELAXED_UNIQUE_CHECKS (1L << 27) #define SELECT_NO_UNLOCK (1L << 28) +/* The rest of the file is included in the server only */ +#ifndef MYSQL_CLIENT + /* options for UNION set by the yacc parser (stored in unit->union_option) */ #define UNION_ALL 1 @@ -1102,3 +1122,5 @@ inline void setup_table_map(TABLE *table, TABLE_LIST *table_list, uint tablenr) table->map= (table_map) 1 << tablenr; table->force_index= table_list->force_index; } + +#endif /* MYSQL_CLIENT */ diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 7b3d8411e32324d51e461ada24549393f85cef0e..6c321972c4b8f6aa14d8b911960c51c6b2582982 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -2023,7 +2023,7 @@ bool open_log(MYSQL_LOG *log, const char *hostname, } return log->open(opt_name, type, 0, index_file_name, (read_append) ? SEQ_READ_APPEND : WRITE_CACHE, - no_auto_events, max_size); + no_auto_events, max_size, 0); } diff --git a/sql/slave.cc b/sql/slave.cc index 3f8a3c3761823c90ad5ba554814eabf90839fbd7..bc9471a5368a85e0cb6aa3182285e499a8db528c 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -73,8 +73,6 @@ static int safe_sleep(THD* thd, int sec, CHECK_KILLED_FUNC thread_killed, static int request_table_dump(MYSQL* mysql, const char* db, const char* table); static int create_table_from_dump(THD* thd, MYSQL *mysql, const char* db, const char* table_name, bool overwrite); -static int check_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi); - /* Find out which replications threads are running @@ -215,6 +213,12 @@ static byte* get_table_key(TABLE_RULE_ENT* e, uint* len, pos Position in relay log file need_data_lock Set to 1 if this functions should do mutex locks errmsg Store pointer to error message here + look_for_description_event + 1 if we should look for such an event. We only need + this when the SQL thread starts and opens an existing + relay log and has to execute it (possibly from an offset + >4); then we need to read the first event of the relay + log to be able to parse the events we have to execute. DESCRIPTION - Close old open relay log files. @@ -232,15 +236,35 @@ static byte* get_table_key(TABLE_RULE_ENT* e, uint* len, int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log, ulonglong pos, bool need_data_lock, - const char** errmsg) + const char** errmsg, + bool look_for_description_event) { DBUG_ENTER("init_relay_log_pos"); + DBUG_PRINT("info", ("pos=%lu", pos)); *errmsg=0; pthread_mutex_t *log_lock=rli->relay_log.get_log_lock(); if (need_data_lock) pthread_mutex_lock(&rli->data_lock); + + /* + Slave threads are not the only users of init_relay_log_pos(). CHANGE MASTER + is, too, and init_slave() too; these 2 functions allocate a description + event in init_relay_log_pos, which is not freed by the terminating SQL slave + thread as that thread is not started by these functions. So we have to free + the description_event here, in case, so that there is no memory leak in + running, say, CHANGE MASTER. + */ + delete rli->relay_log.description_event_for_exec; + /* + By default the relay log is in binlog format 3 (4.0). + Even if format is 4, this will work enough to read the first event + (Format_desc) (remember that format 4 is just lenghtened compared to format + 3; format 3 is a prefix of format 4). + */ + rli->relay_log.description_event_for_exec= new + Format_description_log_event(3); pthread_mutex_lock(log_lock); @@ -280,8 +304,8 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log, In this case, we will use the same IO_CACHE pointer to read data as the IO thread is using to write data. */ - if (my_b_tell((rli->cur_log=rli->relay_log.get_log_file())) == 0 && - check_binlog_magic(rli->cur_log,errmsg)) + my_b_seek((rli->cur_log=rli->relay_log.get_log_file()), (off_t)0); + if (check_binlog_magic(rli->cur_log,errmsg)) goto err; rli->cur_log_old_open_count=rli->relay_log.get_open_count(); } @@ -295,8 +319,85 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log, goto err; rli->cur_log = &rli->cache_buf; } - if (pos >= BIN_LOG_HEADER_SIZE) + /* + In all cases, check_binlog_magic() has been called so we're at offset 4 for + sure. + */ + if (pos > BIN_LOG_HEADER_SIZE) /* If pos<=4, we stay at 4 */ + { + Log_event* ev; + while (look_for_description_event) + { + /* + Read the possible Format_description_log_event; if position was 4, no need, it will + be read naturally. + */ + DBUG_PRINT("info",("looking for a Format_description_log_event")); + + if (my_b_tell(rli->cur_log) >= pos) + break; + + /* + Because of we have rli->data_lock and log_lock, we can safely read an + event + */ + if (!(ev=Log_event::read_log_event(rli->cur_log,0, + rli->relay_log.description_event_for_exec))) + { + DBUG_PRINT("info",("could not read event, rli->cur_log->error=%d", + rli->cur_log->error)); + if (rli->cur_log->error) /* not EOF */ + { + *errmsg= "I/O error reading event at position 4"; + goto err; + } + break; + } + else if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT) + { + DBUG_PRINT("info",("found Format_description_log_event")); + delete rli->relay_log.description_event_for_exec; + rli->relay_log.description_event_for_exec= (Format_description_log_event*) ev; + /* + As ev was returned by read_log_event, it has passed is_valid(), so + my_malloc() in ctor worked, no need to check again. + */ + /* + Ok, we found a Format_description event. But it is not sure that this + describes the whole relay log; indeed, one can have this sequence + (starting from position 4): + Format_desc (of slave) + Rotate (of master) + Format_desc (of slave) + So the Format_desc which really describes the rest of the relay log is + the 3rd event (it can't be further than that, because we rotate the + relay log when we queue a Rotate event from the master). + But what describes the Rotate is the first Format_desc. + So what we do is: + go on searching for Format_description events, until you exceed the + position (argument 'pos') or until you find another event than Rotate + or Format_desc. + */ + } + else + { + DBUG_PRINT("info",("found event of another type=%d", + ev->get_type_code())); + look_for_description_event= (ev->get_type_code() == ROTATE_EVENT); + delete ev; + } + } my_b_seek(rli->cur_log,(off_t)pos); +#ifndef DBUG_OFF + { + char llbuf1[22], llbuf2[22]; + DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s", + llstr(my_b_tell(rli->cur_log),llbuf1), + llstr(rli->event_relay_log_pos,llbuf2))); + } +#endif + + } err: /* @@ -311,6 +412,8 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log, if (need_data_lock) pthread_mutex_unlock(&rli->data_lock); + if (!rli->relay_log.description_event_for_exec->is_valid() && !*errmsg) + *errmsg= "Invalid Format_description log event; could be out of memory"; DBUG_RETURN ((*errmsg) ? 1 : 0); } @@ -428,13 +531,15 @@ int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset, sizeof(rli->group_relay_log_name)-1); strmake(rli->event_relay_log_name, rli->relay_log.get_log_fname(), sizeof(rli->event_relay_log_name)-1); - // Just first log with magic number and nothing else - rli->log_space_total= BIN_LOG_HEADER_SIZE; rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE; - rli->relay_log.reset_bytes_written(); + if (count_relay_log_space(rli)) + { + *errmsg= "Error counting relay log space"; + goto err; + } if (!just_reset) error= init_relay_log_pos(rli, rli->group_relay_log_name, rli->group_relay_log_pos, - 0 /* do not need data lock */, errmsg); + 0 /* do not need data lock */, errmsg, 0); err: #ifndef DBUG_OFF @@ -693,6 +798,10 @@ static TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len) different results. Note also the order of precedence of the do/ignore rules (see code below). For that reason, users should not set conflicting rules because they may get unpredicted results. + Thought which arose from a question of a big customer "I want to include all + tables like "abc.%" except the "%.EFG"". This can't be done now. If we + supported Perl regexps we could do it with this pattern: /^abc\.(?!EFG)/ + (I could not find an equivalent in the regex library MySQL uses). RETURN VALUES 0 should not be logged/replicated @@ -1087,30 +1196,74 @@ static int init_intvar_from_file(int* var, IO_CACHE* f, int default_val) return 1; } +/* + Note that we rely on the master's version (3.23, 4.0.14 etc) instead of + relying on the binlog's version. This is not perfect: imagine an upgrade + of the master without waiting that all slaves are in sync with the master; + then a slave could be fooled about the binlog's format. This is what happens + when people upgrade a 3.23 master to 4.0 without doing RESET MASTER: 4.0 + slaves are fooled. So we do this only to distinguish between 3.23 and more + recent masters (it's too late to change things for 3.23). + + RETURNS + 0 ok + 1 error +*/ static int get_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi) { const char* errmsg= 0; - /* - Note the following switch will bug when we have MySQL branch 30 ;) - */ - switch (*mysql->server_version) { - case '3': - mi->old_format = - (strncmp(mysql->server_version, "3.23.57", 7) < 0) /* < .57 */ ? - BINLOG_FORMAT_323_LESS_57 : - BINLOG_FORMAT_323_GEQ_57 ; - break; - case '4': - case '5': - mi->old_format = BINLOG_FORMAT_CURRENT; - break; - default: + if (!my_isdigit(&my_charset_bin,*mysql->server_version)) errmsg = "Master reported unrecognized MySQL version"; - break; + else + { + /* + Note the following switch will bug when we have MySQL branch 30 ;) + */ + switch (*mysql->server_version) + { + case '0': + case '1': + case '2': + errmsg = "Master reported unrecognized MySQL version"; + break; + case '3': + mi->rli.relay_log.description_event_for_queue= new + Format_description_log_event(1, mysql->server_version); + break; + case '4': + mi->rli.relay_log.description_event_for_queue= new + Format_description_log_event(3, mysql->server_version); + break; + default: + /* + Master is MySQL >=5.0. Give a default Format_desc event, so that we can + take the early steps (like tests for "is this a 3.23 master") which we + have to take before we receive the real master's Format_desc which will + override this one. Note that the Format_desc we create below is garbage + (it has the format of the *slave*); it's only good to help know if the + master is 3.23, 4.0, etc. + */ + mi->rli.relay_log.description_event_for_queue= new + Format_description_log_event(4, mysql->server_version); + break; + } } - + + /* + This does not mean that a 5.0 slave will be able to read a 6.0 master; but + as we don't know yet, we don't want to forbid this for now. If a 5.0 slave + can't read a 6.0 master, this will show up when the slave can't read some + events sent by the master, and there will be error messages. + */ + + if (errmsg) + { + sql_print_error(errmsg); + return 1; + } + MYSQL_RES *master_clock_res; MYSQL_ROW master_clock_row; time_t slave_clock; @@ -1385,7 +1538,7 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname) if (open_log(&rli->relay_log, glob_hostname, opt_relay_logname, "-relay-bin", opt_relaylog_index_name, LOG_BIN, 1 /* read_append cache */, - 1 /* no auto events */, + 0 /* starting from 5.0 we want relay logs to have auto events */, max_relay_log_size ? max_relay_log_size : max_binlog_size)) { sql_print_error("Failed in open_log() called from init_relay_log_info()"); @@ -1419,7 +1572,7 @@ file '%s', errno %d)", fname, my_errno); /* Init relay log with first entry in the relay index file */ if (init_relay_log_pos(rli,NullS,BIN_LOG_HEADER_SIZE,0 /* no data lock */, - &msg)) + &msg, 0)) { sql_print_error("Failed to open the relay log 'FIRST' (relay_log_pos 4)"); goto err; @@ -1484,7 +1637,7 @@ Failed to open the existing relay log info file '%s' (errno %d)", rli->group_relay_log_name, rli->group_relay_log_pos, 0 /* no data lock*/, - &msg)) + &msg, 0)) { char llbuf[22]; sql_print_error("Failed to open the relay log '%s' (relay_log_pos %s)", @@ -1493,8 +1646,18 @@ Failed to open the existing relay log info file '%s' (errno %d)", goto err; } } - DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE); - DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos); + +#ifndef DBUG_OFF + { + char llbuf1[22], llbuf2[22]; + DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s", + llstr(my_b_tell(rli->cur_log),llbuf1), + llstr(rli->event_relay_log_pos,llbuf2))); + DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE); + DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos); + } +#endif + /* Now change the cache from READ to WRITE - must do this before flush_relay_log_info @@ -2251,14 +2414,18 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, goto err; } + int cmp_result; + /* The "compare and wait" main loop */ while (!thd->killed && init_abort_pos_wait == abort_pos_wait && slave_running) { - bool pos_reached; - int cmp_result= 0; - DBUG_ASSERT(*group_master_log_name || group_master_log_pos == 0); + /* + If we are after RESET SLAVE, and the SQL slave thread has not processed + any event yet, it could be that group_master_log_name is "". In that case, + just wait for more events (as there is no sensible comparison to do). + */ if (*group_master_log_name) { char *basename= group_master_log_name + dirname_length(group_master_log_name); @@ -2280,13 +2447,12 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, if (group_master_log_name_extension < log_name_extension) cmp_result = -1 ; else - cmp_result= (group_master_log_name_extension > log_name_extension) ? 1 : 0 ; + cmp_result= (group_master_log_name_extension > log_name_extension) ? + 1 : 0 ; + if (((!cmp_result && group_master_log_pos >= (ulonglong)log_pos) || + cmp_result > 0) || thd->killed) + break; } - pos_reached = ((!cmp_result && group_master_log_pos >= (ulonglong)log_pos) || - cmp_result > 0); - if (pos_reached || thd->killed) - break; - //wait for master update, with optional timeout. DBUG_PRINT("info",("Waiting for master update")); @@ -2343,6 +2509,11 @@ improper_arguments: %d timed_out: %d", DBUG_RETURN( error ? error : event_count ); } +void set_slave_thread_options(THD* thd) +{ + thd->options = ((opt_log_slave_updates) ? OPTION_BIN_LOG:0) | + OPTION_AUTO_IS_NULL; +} /* init_slave_thread() @@ -2359,8 +2530,7 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type) thd->master_access= ~0; thd->priv_user = 0; thd->slave_thread = 1; - thd->options = ((opt_log_slave_updates) ? OPTION_BIN_LOG:0) | - OPTION_AUTO_IS_NULL; + set_slave_thread_options(thd); /* It's nonsense to constrain the slave threads with max_join_size; if a query succeeded on master, we HAVE to execute it. @@ -2623,13 +2793,14 @@ bool st_relay_log_info::is_until_satisfied() if (until_log_names_cmp_result == UNTIL_LOG_NAMES_CMP_UNKNOWN) { - /* - We have no cached comaprison results so we should compare log names - and cache result + /* + We have no cached comparison results so we should compare log names + and cache result. + If we are after RESET SLAVE, and the SQL slave thread has not processed + any event yet, it could be that group_master_log_name is "". In that case, + just wait for more events (as there is no sensible comparison to do). */ - DBUG_ASSERT(*log_name || log_pos == 0); - if (*log_name) { const char *basename= log_name + dirname_length(log_name); @@ -2704,28 +2875,45 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) int exec_res; /* - Skip queries originating from this server or number of - queries specified by the user in slave_skip_counter - We can't however skip event's that has something to do with the + Queries originating from this server must be skipped. + Low-level events (Format_desc, Rotate, Stop) from this server + must also be skipped. But for those we don't want to modify + group_master_log_pos, because these events did not exist on the master. + Format_desc is not completely skipped. + Skip queries specified by the user in slave_skip_counter. + We can't however skip events that has something to do with the log files themselves. + Filtering on own server id is extremely important, to ignore execution of + events created by the creation/rotation of the relay log (remember that + now the relay log starts with its Format_desc, has a Rotate etc). */ - - if (ev->server_id == (uint32) ::server_id || - (rli->slave_skip_counter && type_code != ROTATE_EVENT)) + + DBUG_PRINT("info",("type_code=%d, server_id=%d",type_code,ev->server_id)); + + if ((ev->server_id == (uint32) ::server_id && + type_code!= FORMAT_DESCRIPTION_EVENT) || + (rli->slave_skip_counter && + type_code != ROTATE_EVENT && type_code != STOP_EVENT && + type_code != START_EVENT_V3 && type_code!= FORMAT_DESCRIPTION_EVENT)) { - /* TODO: I/O thread should not even log events with the same server id */ - rli->inc_group_relay_log_pos(ev->get_event_len(), - type_code != STOP_EVENT ? ev->log_pos : LL(0), - 1/* skip lock*/); + DBUG_PRINT("info", ("event skipped")); + rli->inc_group_relay_log_pos((type_code == ROTATE_EVENT || + type_code == STOP_EVENT || + type_code == FORMAT_DESCRIPTION_EVENT) ? + LL(0) : ev->log_pos, + 1/* skip lock*/); flush_relay_log_info(rli); - + /* - Protect against common user error of setting the counter to 1 - instead of 2 while recovering from an failed auto-increment insert + Protect against common user error of setting the counter to 1 + instead of 2 while recovering from an insert which used auto_increment, + rand or user var. */ if (rli->slave_skip_counter && - !((type_code == INTVAR_EVENT || type_code == STOP_EVENT) && - rli->slave_skip_counter == 1)) + !((type_code == INTVAR_EVENT || + type_code == RAND_EVENT || + type_code == USER_VAR_EVENT) && + rli->slave_skip_counter == 1)) --rli->slave_skip_counter; pthread_mutex_unlock(&rli->data_lock); delete ev; @@ -2741,7 +2929,16 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli) ev->thd = thd; exec_res = ev->exec_event(rli); DBUG_ASSERT(rli->sql_thd==thd); - delete ev; + /* + Format_description_log_event should not be deleted because it will be + used to read info about the relay log's format; it will be deleted when + the SQL thread does not need it, i.e. when this thread terminates. + */ + if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT) + { + DBUG_PRINT("info", ("Deleting the event after it has been executed")); + delete ev; + } return exec_res; } else @@ -2842,7 +3039,8 @@ extern "C" pthread_handler_decl(handle_slave_io,arg) thd->proc_info = "Checking master version"; if (get_master_version_and_clock(mysql, mi)) goto err; - if (!mi->old_format) + + if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1) { /* Register ourselves with the master. @@ -3043,6 +3241,9 @@ log space"); pthread_mutex_lock(&mi->run_lock); mi->slave_running = 0; mi->io_thd = 0; + /* Forget the relay log's format */ + delete mi->rli.relay_log.description_event_for_queue; + mi->rli.relay_log.description_event_for_queue= 0; // TODO: make rpl_status part of MASTER_INFO change_rpl_status(RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE); mi->abort_slave = 0; // TODO: check if this is needed @@ -3137,15 +3338,38 @@ extern "C" pthread_handler_decl(handle_slave_sql,arg) if (init_relay_log_pos(rli, rli->group_relay_log_name, rli->group_relay_log_pos, - 1 /*need data lock*/, &errmsg)) + 1 /*need data lock*/, &errmsg, + 1 /*look for a description_event*/)) { sql_print_error("Error initializing relay log position: %s", errmsg); goto err; } THD_CHECK_SENTRY(thd); - DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE); - DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos); +#ifndef DBUG_OFF + { + char llbuf1[22], llbuf2[22]; + DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s", + llstr(my_b_tell(rli->cur_log),llbuf1), + llstr(rli->event_relay_log_pos,llbuf2))); + DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE); + /* + Wonder if this is correct. I (Guilhem) wonder if my_b_tell() returns the + correct position when it's called just after my_b_seek() (the questionable + stuff is those "seek is done on next read" comments in the my_b_seek() + source code). + The crude reality is that this assertion randomly fails whereas + replication seems to work fine. And there is no easy explanation why it + fails (as we my_b_seek(rli->event_relay_log_pos) at the very end of + init_relay_log_pos() called above). Maybe the assertion would be + meaningful if we held rli->data_lock between the my_b_seek() and the + DBUG_ASSERT(). + */ +#ifdef SHOULD_BE_CHECKED + DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos); +#endif + } +#endif DBUG_ASSERT(rli->sql_thd == thd); DBUG_PRINT("master_info",("log_file_name: %s position: %s", @@ -3205,11 +3429,9 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun /* When master_pos_wait() wakes up it will check this and terminate */ rli->slave_running= 0; - /* - Going out of the transaction. Necessary to mark it, in case the user - restarts replication from a non-transactional statement (with CHANGE - MASTER). - */ + /* Forget the relay log's format */ + delete rli->relay_log.description_event_for_exec; + rli->relay_log.description_event_for_exec= 0; /* Wake up master_pos_wait() */ pthread_mutex_unlock(&rli->data_lock); DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions")); @@ -3299,7 +3521,7 @@ static int process_io_create_file(MASTER_INFO* mi, Create_file_log_event* cev) { net_write_command(net, 0, "", 0, "", 0);/* 3.23 master wants it */ Execute_load_log_event xev(thd,0,0); - xev.log_pos = mi->master_log_pos; + xev.log_pos = cev->log_pos; if (unlikely(mi->rli.relay_log.append(&xev))) { sql_print_error("Slave I/O: error writing Exec_load event to \ @@ -3313,7 +3535,6 @@ relay log"); { cev->block = (char*)net->read_pos; cev->block_len = num_bytes; - cev->log_pos = mi->master_log_pos; if (unlikely(mi->rli.relay_log.append(cev))) { sql_print_error("Slave I/O: error writing Create_file event to \ @@ -3327,7 +3548,7 @@ relay log"); { aev.block = (char*)net->read_pos; aev.block_len = num_bytes; - aev.log_pos = mi->master_log_pos; + aev.log_pos = cev->log_pos; if (unlikely(mi->rli.relay_log.append(&aev))) { sql_print_error("Slave I/O: error writing Append_block event to \ @@ -3355,6 +3576,7 @@ relay log"); DESCRIPTION Updates the master info with the place in the next binary log where we should start reading. + Rotate the relay log to avoid mixed-format relay logs. NOTES We assume we already locked mi->data_lock @@ -3386,21 +3608,30 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev) if (disconnect_slave_event_count) events_till_disconnect++; #endif + /* + If description_event_for_queue is format <4, there is conversion in the + relay log to the slave's format (4). And Rotate can mean upgrade or + nothing. If upgrade, it's to 5.0 or newer, so we will get a Format_desc, so + no need to reset description_event_for_queue now. And if it's nothing (same + master version as before), no need (still using the slave's format). + */ + if (mi->rli.relay_log.description_event_for_queue->binlog_version >= 4) + { + delete mi->rli.relay_log.description_event_for_queue; + /* start from format 3 (MySQL 4.0) again */ + mi->rli.relay_log.description_event_for_queue= new + Format_description_log_event(3); + } + + rotate_relay_log(mi); /* will take the right mutexes */ DBUG_RETURN(0); } - /* - queue_old_event() - - Writes a 3.23 event to the relay log. - - TODO: - Test this code before release - it has to be tested on a separate - setup with 3.23 master + Reads a 3.23 event and converts it to the slave's format. This code was copied + from MySQL 4.0. */ - -static int queue_old_event(MASTER_INFO *mi, const char *buf, +static int queue_binlog_ver_1_event(MASTER_INFO *mi, const char *buf, ulong event_len) { const char *errmsg = 0; @@ -3408,7 +3639,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf, bool ignore_event= 0; char *tmp_buf = 0; RELAY_LOG_INFO *rli= &mi->rli; - DBUG_ENTER("queue_old_event"); + DBUG_ENTER("queue_binlog_ver_1_event"); /* If we get Load event, we need to pass a non-reusable buffer @@ -3432,7 +3663,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf, connected to the master). */ Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg, - 1 /*old format*/ ); + mi->rli.relay_log.description_event_for_queue); if (unlikely(!ev)) { sql_print_error("Read invalid event from master: '%s',\ @@ -3442,7 +3673,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf, DBUG_RETURN(1); } pthread_mutex_lock(&mi->data_lock); - ev->log_pos = mi->master_log_pos; + ev->log_pos= mi->master_log_pos; /* 3.23 events don't contain log_pos */ switch (ev->get_type_code()) { case STOP_EVENT: ignore_event= 1; @@ -3467,9 +3698,11 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf, { /* We come here when and only when tmp_buf != 0 */ DBUG_ASSERT(tmp_buf); + inc_pos=event_len; + ev->log_pos+= inc_pos; int error = process_io_create_file(mi,(Create_file_log_event*)ev); delete ev; - mi->master_log_pos += event_len; + mi->master_log_pos += inc_pos; DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos)); pthread_mutex_unlock(&mi->data_lock); my_free((char*)tmp_buf, MYF(0)); @@ -3481,6 +3714,12 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf, } if (likely(!ignore_event)) { + if (ev->log_pos) + /* + Don't do it for fake Rotate events (see comment in + Log_event::Log_event(const char* buf...) in log_event.cc). + */ + ev->log_pos+= event_len; /* make log_pos be the pos of the end of the event */ if (unlikely(rli->relay_log.append(ev))) { delete ev; @@ -3496,10 +3735,98 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf, DBUG_RETURN(0); } +/* + Reads a 4.0 event and converts it to the slave's format. This code was copied + from queue_binlog_ver_1_event(), with some affordable simplifications. +*/ +static int queue_binlog_ver_3_event(MASTER_INFO *mi, const char *buf, + ulong event_len) +{ + const char *errmsg = 0; + ulong inc_pos; + char *tmp_buf = 0; + RELAY_LOG_INFO *rli= &mi->rli; + DBUG_ENTER("queue_binlog_ver_3_event"); + + /* read_log_event() will adjust log_pos to be end_log_pos */ + Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg, + mi->rli.relay_log.description_event_for_queue); + if (unlikely(!ev)) + { + sql_print_error("Read invalid event from master: '%s',\ + master could be corrupt but a more likely cause of this is a bug", + errmsg); + my_free((char*) tmp_buf, MYF(MY_ALLOW_ZERO_PTR)); + DBUG_RETURN(1); + } + pthread_mutex_lock(&mi->data_lock); + switch (ev->get_type_code()) { + case STOP_EVENT: + goto err; + case ROTATE_EVENT: + if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev))) + { + delete ev; + pthread_mutex_unlock(&mi->data_lock); + DBUG_RETURN(1); + } + inc_pos= 0; + break; + default: + inc_pos= event_len; + break; + } + if (unlikely(rli->relay_log.append(ev))) + { + delete ev; + pthread_mutex_unlock(&mi->data_lock); + DBUG_RETURN(1); + } + rli->relay_log.harvest_bytes_written(&rli->log_space_total); + delete ev; + mi->master_log_pos+= inc_pos; +err: + DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos)); + pthread_mutex_unlock(&mi->data_lock); + DBUG_RETURN(0); +} + +/* + queue_old_event() + + Writes a 3.23 or 4.0 event to the relay log, after converting it to the 5.0 + (exactly, slave's) format. To do the conversion, we create a 5.0 event from + the 3.23/4.0 bytes, then write this event to the relay log. + + TODO: + Test this code before release - it has to be tested on a separate + setup with 3.23 master or 4.0 master +*/ + +static int queue_old_event(MASTER_INFO *mi, const char *buf, + ulong event_len) +{ + switch (mi->rli.relay_log.description_event_for_queue->binlog_version) + { + case 1: + return queue_binlog_ver_1_event(mi,buf,event_len); + case 3: + return queue_binlog_ver_3_event(mi,buf,event_len); + default: /* unsupported format; eg version 2 */ + DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()", + mi->rli.relay_log.description_event_for_queue->binlog_version)); + return 1; + } +} /* queue_event() + If the event is 3.23/4.0, passes it to queue_old_event() which will convert + it. Otherwise, writes a 5.0 (or newer) event to the relay log. Then there is + no format conversion, it's pure read/write of bytes. + So a 5.0.0 slave's relay log can contain events in the slave's format or in + any >=5.0.0 format. */ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len) @@ -3509,7 +3836,8 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len) RELAY_LOG_INFO *rli= &mi->rli; DBUG_ENTER("queue_event"); - if (mi->old_format) + if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 && + buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */) DBUG_RETURN(queue_old_event(mi,buf,event_len)); pthread_mutex_lock(&mi->data_lock); @@ -3536,7 +3864,7 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len) goto err; case ROTATE_EVENT: { - Rotate_log_event rev(buf,event_len,0); + Rotate_log_event rev(buf,event_len,mi->rli.relay_log.description_event_for_queue); if (unlikely(process_io_rotate(mi,&rev))) { error= 1; @@ -3549,6 +3877,47 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len) inc_pos= 0; break; } + case FORMAT_DESCRIPTION_EVENT: + { + /* + Create an event, and save it (when we rotate the relay log, we will have + to write this event again). + */ + /* + We are the only thread which reads/writes description_event_for_queue. The + relay_log struct does not move (though some members of it can change), so + we needn't any lock (no rli->data_lock, no log lock). + */ + Format_description_log_event* tmp= mi->rli.relay_log.description_event_for_queue; + const char* errmsg; + if (!(mi->rli.relay_log.description_event_for_queue= (Format_description_log_event*) + Log_event::read_log_event(buf, event_len, &errmsg, + mi->rli.relay_log.description_event_for_queue))) + { + delete tmp; + error= 2; + goto err; + } + delete tmp; + /* + Set 'created' to 0, so that in next relay logs this event does not trigger + cleaning actions on the slave in Format_description_log_event::exec_event(). + */ + mi->rli.relay_log.description_event_for_queue->created= 0; + /* + Though this does some conversion to the slave's format, this will + preserve the master's binlog format version, and number of event types. + */ + /* + If the event was not requested by the slave (the slave did not ask for + it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos + */ + inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0; + DBUG_PRINT("info",("binlog format is now %d", + mi->rli.relay_log.description_event_for_queue->binlog_version)); + + } + break; default: inc_pos= event_len; break; @@ -3574,20 +3943,29 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len) We still want to increment, so that we won't re-read this event from the master if the slave IO thread is now stopped/restarted (more efficient if the events we are ignoring are big LOAD DATA INFILE). + But events which were generated by this slave and which do not exist in + the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment + mi->master_log_pos. */ - mi->master_log_pos+= inc_pos; + if (buf[EVENT_TYPE_OFFSET]!=FORMAT_DESCRIPTION_EVENT && + buf[EVENT_TYPE_OFFSET]!=ROTATE_EVENT && + buf[EVENT_TYPE_OFFSET]!=STOP_EVENT) + mi->master_log_pos+= inc_pos; DBUG_PRINT("info", ("master_log_pos: %d, event originating from the same server, ignored", (ulong) mi->master_log_pos)); } else /* write the event to the relay log */ - if (likely(!(error= rli->relay_log.appendv(buf,event_len,0)))) + if (likely(!(rli->relay_log.appendv(buf,event_len,0)))) { mi->master_log_pos+= inc_pos; DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos)); rli->relay_log.harvest_bytes_written(&rli->log_space_total); } + else + error=3; err: pthread_mutex_unlock(&mi->data_lock); + DBUG_PRINT("info", ("error=%d", error)); DBUG_RETURN(error); } @@ -3612,6 +3990,7 @@ void end_relay_log_info(RELAY_LOG_INFO* rli) } rli->inited = 0; rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT); + rli->relay_log.harvest_bytes_written(&rli->log_space_total); /* Delete the slave's temporary tables from memory. In the future there will be other actions than this, to ensure persistance @@ -3832,6 +4211,7 @@ static IO_CACHE *reopen_relay_log(RELAY_LOG_INFO *rli, const char **errmsg) relay_log_pos Current log pos pending Number of bytes already processed from the event */ + rli->event_relay_log_pos= max(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE); my_b_seek(cur_log,rli->event_relay_log_pos); DBUG_RETURN(cur_log); } @@ -3890,28 +4270,40 @@ Log_event* next_event(RELAY_LOG_INFO* rli) hot_log=0; // Using old binary log } } + #ifndef DBUG_OFF { + /* This is an assertion which sometimes fails, let's try to track it */ char llbuf1[22], llbuf2[22]; - DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE); - /* - The next assertion sometimes (very rarely) fails, let's try to track - it - */ - DBUG_PRINT("info", ("\ -Before assert, my_b_tell(cur_log)=%s rli->event_relay_log_pos=%s", + DBUG_PRINT("info", ("my_b_tell(cur_log)=%s rli->event_relay_log_pos=%s", llstr(my_b_tell(cur_log),llbuf1), - llstr(rli->group_relay_log_pos,llbuf2))); - DBUG_ASSERT(my_b_tell(cur_log) == rli->event_relay_log_pos); + llstr(rli->event_relay_log_pos,llbuf2))); + DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE); + DBUG_ASSERT(my_b_tell(cur_log) == rli->event_relay_log_pos); } #endif /* Relay log is always in new format - if the master is 3.23, the - I/O thread will convert the format for us + I/O thread will convert the format for us. + A problem: the description event may be in a previous relay log. So if the + slave has been shutdown meanwhile, we would have to look in old relay + logs, which may even have been deleted. So we need to write this + description event at the beginning of the relay log. + When the relay log is created when the I/O thread starts, easy: the master + will send the description event and we will queue it. + But if the relay log is created by new_file(): then the solution is: + MYSQL_LOG::open() will write the buffered description event. */ - if ((ev=Log_event::read_log_event(cur_log,0,(bool)0 /* new format */))) + if ((ev=Log_event::read_log_event(cur_log,0, + rli->relay_log.description_event_for_exec))) + { DBUG_ASSERT(thd==rli->sql_thd); + /* + read it while we have a lock, to avoid a mutex lock in + inc_event_relay_log_pos() + */ + rli->future_event_relay_log_pos= my_b_tell(cur_log); if (hot_log) pthread_mutex_unlock(log_lock); DBUG_RETURN(ev); @@ -4106,8 +4498,9 @@ void rotate_relay_log(MASTER_INFO* mi) DBUG_ENTER("rotate_relay_log"); RELAY_LOG_INFO* rli= &mi->rli; - lock_slave_threads(mi); - pthread_mutex_lock(&rli->data_lock); + /* We don't lock rli->run_lock. This would lead to deadlocks. */ + pthread_mutex_lock(&mi->run_lock); + /* We need to test inited because otherwise, new_file() will attempt to lock LOCK_log, which may not be inited (if we're not a slave). @@ -4136,8 +4529,7 @@ void rotate_relay_log(MASTER_INFO* mi) */ rli->relay_log.harvest_bytes_written(&rli->log_space_total); end: - pthread_mutex_unlock(&rli->data_lock); - unlock_slave_threads(mi); + pthread_mutex_unlock(&mi->run_lock); DBUG_VOID_RETURN; } diff --git a/sql/slave.h b/sql/slave.h index e42b93a47ef3d9d1b9784b0830e36dc2d5b12272..3313f587b3dcefef20816eb864b2ef3b2a5e3aaf 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -67,11 +67,6 @@ extern my_bool opt_log_slave_updates; extern ulonglong relay_log_space_limit; struct st_master_info; -enum enum_binlog_formats { - BINLOG_FORMAT_CURRENT=0, /* 0 is important for easy 'if (mi->old_format)' */ - BINLOG_FORMAT_323_LESS_57, - BINLOG_FORMAT_323_GEQ_57 }; - /* TODO: this needs to be redone, but for now it does not matter since we do not have multi-master yet. @@ -186,6 +181,8 @@ typedef struct st_relay_log_info ulonglong group_relay_log_pos; char event_relay_log_name[FN_REFLEN]; ulonglong event_relay_log_pos; + ulonglong future_event_relay_log_pos; + /* Original log name and position of the group we're currently executing (whose coordinates are group_relay_log_name/pos in the relay log) @@ -207,11 +204,13 @@ typedef struct st_relay_log_info /* InnoDB internally stores the master log position it has processed - so far; the position to store is really the sum of - pos + pending + event_len here since we must store the pos of the - END of the current log event + so far; when the InnoDB code to store this position is called, we have not + updated rli->group_master_log_pos yet. So the position is the event's + log_pos (the position of the end of the event); we save it in the variable + below. It's the *coming* group_master_log_pos (the one which will be + group_master_log_pos in the coming milliseconds). */ - int event_len; + ulonglong future_group_master_log_pos; time_t last_master_timestamp; @@ -285,16 +284,17 @@ typedef struct st_relay_log_info until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN; } - inline void inc_event_relay_log_pos(ulonglong val) + inline void inc_event_relay_log_pos() { - event_relay_log_pos+= val; + event_relay_log_pos= future_event_relay_log_pos; } - void inc_group_relay_log_pos(ulonglong val, ulonglong log_pos, bool skip_lock=0) + void inc_group_relay_log_pos(ulonglong log_pos, + bool skip_lock=0) { if (!skip_lock) pthread_mutex_lock(&data_lock); - inc_event_relay_log_pos(val); + inc_event_relay_log_pos(); group_relay_log_pos= event_relay_log_pos; strmake(group_relay_log_name,event_relay_log_name, sizeof(group_relay_log_name)-1); @@ -311,8 +311,31 @@ typedef struct st_relay_log_info not advance as it should on the non-transactional slave (it advances by big leaps, whereas it should advance by small leaps). */ - if (log_pos) // 3.23 binlogs don't have log_posx - group_master_log_pos= log_pos+ val; + /* + In 4.x we used the event's len to compute the positions here. This is + wrong if the event was 3.23/4.0 and has been converted to 5.0, because + then the event's len is not what is was in the master's binlog, so this + will make a wrong group_master_log_pos (yes it's a bug in 3.23->4.0 + replication: Exec_master_log_pos is wrong). Only way to solve this is to + have the original offset of the end of the event the relay log. This is + what we do in 5.0: log_pos has become "end_log_pos" (because the real use + of log_pos in 4.0 was to compute the end_log_pos; so better to store + end_log_pos instead of begin_log_pos. + If we had not done this fix here, the problem would also have appeared + when the slave and master are 5.0 but with different event length (for + example the slave is more recent than the master and features the event + UID). It would give false MASTER_POS_WAIT, false Exec_master_log_pos in + SHOW SLAVE STATUS, and so the user would do some CHANGE MASTER using this + value which would lead to badly broken replication. + Even the relay_log_pos will be corrupted in this case, because the len is + the relay log is not "val". + With the end_log_pos solution, we avoid computations involving lengthes. + */ + DBUG_PRINT("info", ("log_pos=%lld group_master_log_pos=%lld", + log_pos,group_master_log_pos)); + if (log_pos) // some events (like fake Rotate) don't have log_pos + // when we are here, log_pos is the end of the event + group_master_log_pos= log_pos; pthread_cond_broadcast(&data_cond); if (!skip_lock) pthread_mutex_unlock(&data_lock); @@ -389,7 +412,6 @@ typedef struct st_master_info int events_till_abort; #endif bool inited; - enum enum_binlog_formats old_format; volatile bool abort_slave, slave_running; volatile ulong slave_run_id; /* @@ -404,7 +426,7 @@ typedef struct st_master_info long clock_diff_with_master; st_master_info() - :ssl(0), fd(-1), io_thd(0), inited(0), old_format(BINLOG_FORMAT_CURRENT), + :ssl(0), fd(-1), io_thd(0), inited(0), abort_slave(0),slave_running(0), slave_run_id(0) { host[0] = 0; user[0] = 0; password[0] = 0; @@ -535,10 +557,12 @@ void lock_slave_threads(MASTER_INFO* mi); void unlock_slave_threads(MASTER_INFO* mi); void init_thread_mask(int* mask,MASTER_INFO* mi,bool inverse); int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,ulonglong pos, - bool need_data_lock, const char** errmsg); + bool need_data_lock, const char** errmsg, + bool look_for_description_event); int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset, const char** errmsg); +void set_slave_thread_options(THD* thd); void rotate_relay_log(MASTER_INFO* mi); extern "C" pthread_handler_decl(handle_slave_io,arg); diff --git a/sql/sql_class.cc b/sql/sql_class.cc index d1ebcdbd15e6ff86cf0bbff624b01b8a6ae34a09..b9051f88fbd54731d68981146f36e5673e23d54f 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -92,6 +92,7 @@ THD::THD():user_time(0), is_fatal_error(0), global_read_lock(0), bootstrap(0), spcont(NULL) { host= user= priv_user= db= ip= 0; + catalog= (char*)"std"; // the only catalog we have for now host_or_ip= "connecting host"; locked=some_tables_deleted=no_errors=password= 0; query_start_used= 0; diff --git a/sql/sql_class.h b/sql/sql_class.h index 8263789a2a2a1b33ac314745ecc0183cc97370ad..f57272b6c6673c2ba7770f0e5ba21a1965a03720 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -26,6 +26,7 @@ class Query_log_event; class Load_log_event; class Slave_log_event; +class Format_description_log_event; class sp_rcontext; class sp_cache; @@ -99,7 +100,14 @@ class MYSQL_LOG enum cache_type io_cache_type; bool write_error, inited; bool need_start_event; - bool no_auto_events; // For relay binlog + /* + no_auto_events means we don't want any of these automatic events : + Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't want + a Rotate_log event to be written to the relay log. When we start a relay log + etc. So in 4.x this is 1 for relay logs, 0 for binlogs. + In 5.0 it's 0 for relay logs too! + */ + bool no_auto_events; /* The max size before rotation (usable only if log_type == LOG_BIN: binary logs and relay logs). @@ -116,6 +124,18 @@ class MYSQL_LOG public: MYSQL_LOG(); ~MYSQL_LOG(); + + /* + These describe the log's format. This is used only for relay logs. + _for_exec is used by the SQL thread, _for_queue by the I/O thread. It's + necessary to have 2 distinct objects, because the I/O thread may be reading + events in a different format from what the SQL thread is reading (consider + the case of a master which has been upgraded from 5.0 to 5.1 without doing + RESET MASTER, or from 4.x to 5.0). + */ + Format_description_log_event *description_event_for_exec, + *description_event_for_queue; + void reset_bytes_written() { bytes_written = 0; @@ -144,7 +164,8 @@ class MYSQL_LOG bool open(const char *log_name,enum_log_type log_type, const char *new_name, const char *index_file_name_arg, enum cache_type io_cache_type_arg, - bool no_auto_events_arg, ulong max_size); + bool no_auto_events_arg, ulong max_size, + bool null_created); void new_file(bool need_lock= 1); bool write(THD *thd, enum enum_server_command command, const char *format,...); @@ -590,9 +611,10 @@ class THD :public ilink, the connection priv_user - The user privilege we are using. May be '' for anonymous user. db - currently selected database + catalog - currently selected catalog ip - client IP */ - char *host,*user,*priv_user,*db,*ip; + char *host,*user,*priv_user,*db,*catalog,*ip; char priv_host[MAX_HOSTNAME]; /* remote (peer) port */ uint16 peer_port; diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index c0ee42771289cbb33fd31b8c007d189fa0ef9eb9..d6d523891838f6edc83dcc9a77dabfe4afe88821 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -48,16 +48,34 @@ int check_binlog_magic(IO_CACHE* log, const char** errmsg) return 0; } + /* + fake_rotate_event() builds a fake (=which does not exist physically in any + binlog) Rotate event, which contains the name of the binlog we are going to + send to the slave (because the slave may not know it if it just asked for + MASTER_LOG_FILE='', MASTER_LOG_POS=4). + < 4.0.14, fake_rotate_event() was called only if the requested pos was + 4. After this version we always call it, so that a 3.23.58 slave can rely on + it to detect if the master is 4.0 (and stop) (the _fake_ Rotate event has + zeros in the good positions which, by chance, make it possible for the 3.23 + slave to detect that this event is unexpected) (this is luck which happens + because the master and slave disagree on the size of the header of + Log_event). + + Relying on the event length of the Rotate event instead of these well-placed + zeros was not possible as Rotate events have a variable-length part. +*/ + static int fake_rotate_event(NET* net, String* packet, char* log_file_name, - ulonglong position, const char**errmsg) + ulonglong position, const char** errmsg) { + DBUG_ENTER("fake_rotate_event"); char header[LOG_EVENT_HEADER_LEN], buf[ROTATE_HEADER_LEN]; - memset(header, 0, 4); // when does not matter + memset(header, 0, 4); // 'when' (the timestamp) does not matter, is set to 0 header[EVENT_TYPE_OFFSET] = ROTATE_EVENT; char* p = log_file_name+dirname_length(log_file_name); uint ident_len = (uint) strlen(p); - ulong event_len = ident_len + ROTATE_EVENT_OVERHEAD; + ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN; int4store(header + SERVER_ID_OFFSET, server_id); int4store(header + EVENT_LEN_OFFSET, event_len); int2store(header + FLAGS_OFFSET, 0); @@ -72,9 +90,9 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name, if (my_net_write(net, (char*)packet->ptr(), packet->length())) { *errmsg = "failed on my_net_write()"; - return -1; + DBUG_RETURN(-1); } - return 0; + DBUG_RETURN(0); } static int send_file(THD *thd) @@ -310,6 +328,36 @@ int purge_master_logs_before_date(THD* thd, time_t purge_time) return purge_error_message(thd ,res); } +int test_for_non_eof_log_read_errors(int error, const char *errmsg) +{ + if (error == LOG_READ_EOF) + return 0; + my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; + switch (error) { + case LOG_READ_BOGUS: + errmsg = "bogus data in log event"; + break; + case LOG_READ_TOO_LARGE: + errmsg = "log event entry exceeded max_allowed_packet; \ +Increase max_allowed_packet on master"; + break; + case LOG_READ_IO: + errmsg = "I/O error reading log event"; + break; + case LOG_READ_MEM: + errmsg = "memory allocation failed reading log event"; + break; + case LOG_READ_TRUNC: + errmsg = "binlog truncated in the middle of event"; + break; + default: + errmsg = "unknown error reading log event on the master"; + break; + } + return error; +} + + /* TODO: Clean up loop to only have one call to send_file() */ @@ -326,6 +374,7 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos, int error; const char *errmsg = "Unknown error"; NET* net = &thd->net; + pthread_mutex_t *log_lock; #ifndef DBUG_OFF int left_events = max_binlog_dump_events; #endif @@ -385,18 +434,25 @@ impossible position"; goto err; } - my_b_seek(&log, pos); // Seek will done on next read /* We need to start a packet with something other than 255 - to distiquish it from error + to distinguish it from error */ - packet->set("\0", 1, &my_charset_bin); + packet->set("\0", 1, &my_charset_bin); /* This is the start of a new packet */ /* + Tell the client about the log name with a fake Rotate event; + this is needed even if we also send a Format_description_log_event just + after, because that event does not contain the binlog's name. + Note that as this Rotate event is sent before Format_description_log_event, + the slave cannot have any info to understand this event's format, so the + header len of Rotate_log_event is FROZEN + (so in 5.0 it will have a header shorter than other events except + FORMAT_DESCRIPTION_EVENT). Before 4.0.14 we called fake_rotate_event below only if (pos == BIN_LOG_HEADER_SIZE), because if this is false then the slave already knows the binlog's name. - Now we always call fake_rotate_event; if the slave already knew the log's + Since, we always call fake_rotate_event; if the slave already knew the log's name (ex: CHANGE MASTER TO MASTER_LOG_FILE=...) this is useless but does not harm much. It is nice for 3.23 (>=.58) slaves which test Rotate events to see if the master is 4.0 (then they choose to stop because they can't @@ -413,15 +469,72 @@ impossible position"; */ if (fake_rotate_event(net, packet, log_file_name, pos, &errmsg)) { + /* + This error code is not perfect, as fake_rotate_event() does not read + anything from the binlog; if it fails it's because of an error in + my_net_write(), fortunately it will say it in errmsg. + */ my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; goto err; } packet->set("\0", 1, &my_charset_bin); + /* + We can set log_lock now, it does not move (it's a member of mysql_bin_log, + and it's already inited, and it will be destroyed only at shutdown). + */ + log_lock = mysql_bin_log.get_log_lock(); + if (pos > BIN_LOG_HEADER_SIZE) + { + /* Try to find a Format_description_log_event at the beginning of the binlog */ + if (!(error = Log_event::read_log_event(&log, packet, log_lock))) + { + /* + The packet has offsets equal to the normal offsets in a binlog event + +1 (the first character is \0). + */ + DBUG_PRINT("info", + ("Looked for a Format_description_log_event, found event type %d", + (*packet)[EVENT_TYPE_OFFSET+1])); + if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT) + { + /* + mark that this event with "log_pos=0", so the slave + should not increment master's binlog position + (rli->group_master_log_pos) + */ + int4store(packet->c_ptr() +LOG_POS_OFFSET+1,0); + /* send it */ + if (my_net_write(net, (char*)packet->ptr(), packet->length())) + { + errmsg = "Failed on my_net_write()"; + my_errno= ER_UNKNOWN_ERROR; + goto err; + } + /* + No need to save this event. We are only doing simple reads (no real + parsing of the events) so we don't need it. And so we don't need the + artificial Format_description_log_event of 3.23&4.x. + */ + } + } + else + if (test_for_non_eof_log_read_errors(error, errmsg)) + goto err; + /* + else: it's EOF, nothing to do, go on reading next events, the + Format_description_log_event will be found naturally if it is written. + */ + /* reset the packet as we wrote to it in any case */ + packet->set("\0", 1, &my_charset_bin); + } /* end of if (pos > BIN_LOG_HEADER_SIZE); if false, the Format_description_log_event + event will be found naturally. */ + + /* seek to the requested position, to start the requested dump */ + my_b_seek(&log, pos); // Seek will done on next read + while (!net->error && net->vio != 0 && !thd->killed) { - pthread_mutex_t *log_lock = mysql_bin_log.get_log_lock(); - while (!(error = Log_event::read_log_event(&log, packet, log_lock))) { #ifndef DBUG_OFF @@ -433,7 +546,7 @@ impossible position"; goto err; } #endif - if (my_net_write(net, (char*)packet->ptr(), packet->length()) ) + if (my_net_write(net, (char*)packet->ptr(), packet->length())) { errmsg = "Failed on my_net_write()"; my_errno= ER_UNKNOWN_ERROR; @@ -454,34 +567,14 @@ impossible position"; } /* TODO: now that we are logging the offset, check to make sure - the recorded offset and the actual match + the recorded offset and the actual match. + Guilhem 2003-06: this is not true if this master is a slave <4.0.15 + running with --log-slave-updates, because then log_pos may be the offset + in the-master-of-this-master's binlog. */ - if (error != LOG_READ_EOF) - { - my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG; - switch (error) { - case LOG_READ_BOGUS: - errmsg = "bogus data in log event"; - break; - case LOG_READ_TOO_LARGE: - errmsg = "log event entry exceeded max_allowed_packet; \ -Increase max_allowed_packet on master"; - break; - case LOG_READ_IO: - errmsg = "I/O error reading log event"; - break; - case LOG_READ_MEM: - errmsg = "memory allocation failed reading log event"; - break; - case LOG_READ_TRUNC: - errmsg = "binlog truncated in the middle of event"; - break; - default: - errmsg = "unknown error reading log event on the master"; - break; - } + + if (test_for_non_eof_log_read_errors(error, errmsg)) goto err; - } if (!(flags & BINLOG_DUMP_NON_BLOCK) && mysql_bin_log.is_active(log_file_name)) @@ -615,8 +708,13 @@ Increase max_allowed_packet on master"; (void) my_close(file, MYF(MY_WME)); /* - Even if the previous log contained a Rotate_log_event, we still fake - one. + Call fake_rotate_event() in case the previous log (the one which we have + just finished reading) did not contain a Rotate event (for example (I + don't know any other example) the previous log was the last one before + the master was shutdown & restarted). + This way we tell the slave about the new log's name and position. + If the binlog is 5.0, the next event we are going to read and send is + Format_description_log_event. */ if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0 || fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE, &errmsg)) @@ -1112,7 +1210,7 @@ int change_master(THD* thd, MASTER_INFO* mi) mi->rli.group_relay_log_name, mi->rli.group_relay_log_pos, 0 /*no data lock*/, - &msg)) + &msg, 0)) { net_printf(thd,0,"Failed initializing relay log position: %s",msg); unlock_slave_threads(mi); @@ -1197,6 +1295,8 @@ int show_binlog_events(THD* thd) const char *errmsg = 0; IO_CACHE log; File file = -1; + Format_description_log_event *description_event= new + Format_description_log_event(3); /* MySQL 4.0 by default */ Log_event::init_show_field_list(&field_list); if (protocol-> send_fields(&field_list, 1)) @@ -1235,10 +1335,35 @@ int show_binlog_events(THD* thd) goto err; pthread_mutex_lock(log_lock); + + /* + open_binlog() sought to position 4. + Read the first event in case it's a Format_description_log_event, to know the + format. If there's no such event, we are 3.23 or 4.x. This code, like + before, can't read 3.23 binlogs. + This code will fail on a mixed relay log (one which has Format_desc then + Rotate then Format_desc). + */ + + ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event); + if (ev) + { + if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT) + description_event= (Format_description_log_event*) ev; + else + delete ev; + } + my_b_seek(&log, pos); + if (!description_event->is_valid()) + { + errmsg="Invalid Format_description event; could be out of memory"; + goto err; + } + for (event_count = 0; - (ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,0)); ) + (ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event)); ) { if (event_count >= limit_start && ev->net_send(protocol, linfo.log_file_name, pos)) @@ -1267,6 +1392,7 @@ int show_binlog_events(THD* thd) } err: + delete description_event; if (file >= 0) { end_io_cache(&log);