Commit 66a32e89 authored by guilhem@gbichot2's avatar guilhem@gbichot2

This will be pushed only after I fix the testsuite.

This is the main commit for Worklog tasks:
 * A more dynamic binlog format which allows small changes (1064)
 * Log session variables in Query_log_event (1063)
Below 5.0 means 5.0.0.
MySQL 5.0 is able to replicate FOREIGN_KEY_CHECKS, UNIQUE_KEY_CHECKS (for speed),
SQL_AUTO_IS_NULL, SQL_MODE. Not charsets (WL#1062), not some vars (I can only think
of SQL_SELECT_LIMIT, which deserves a special treatment). Note that this
works for queries, except LOAD DATA INFILE (for this it would have to wait
for Dmitri's push of WL#874, which in turns waits for the present push, so...
the deadlock must be broken!). Note that when Dmitri pushes WL#874 in 5.0.1,
5.0.0 won't be able to replicate a LOAD DATA INFILE from 5.0.1.
Apart from that, the new binlog format is designed so that it can tolerate
a little variation in the events (so that a 5.0.0 slave could replicate a
5.0.1 master, except for LOAD DATA INFILE unfortunately); that is, when I
later add replication of charsets it should break nothing. And when I later
add a UID to every event, it should break nothing.
The main change brought by this patch is a new type of event, Format_description_log_event,
which describes some lengthes in other event types. This event is needed for
the master/slave/mysqlbinlog to understand a 5.0 log. Thanks to this event,
we can later add more bytes to the header of every event without breaking compatibility.
Inside Query_log_event, we have some additional dynamic format, as every Query_log_event
can have a different number of status variables, stored as pairs (code, value); that's
how SQL_MODE and session variables and catalog are stored. Like this, we can later
add count of affected rows, charsets... and we can have options --don't-log-count-affected-rows
if we want.
MySQL 5.0 is able to run on 4.x relay logs, 4.x binlogs.
Upgrading a 4.x master to 5.0 is ok (no need to delete binlogs),
upgrading a 4.x slave to 5.0 is ok (no need to delete relay logs);
so both can be "hot" upgrades.
Upgrading a 3.23 master to 5.0 requires as much as upgrading it to 4.0.
3.23 and 4.x can't be slaves of 5.0.
So downgrading from 5.0 to 4.x may be complicated.
Log_event::log_pos is now the position of the end of the event, which is
more useful than the position of the beginning. We take care about compatibility
with <5.0 (in which log_pos is the beginning).
I added a short test for replication of SQL_MODE and some other variables.
TODO:
- after committing this, merge the latest 5.0 into it
- fix all tests
- update the manual with upgrade notes.
parent 7a1fddaa
......@@ -39,7 +39,7 @@ mysqlbinlog_SOURCES = mysqlbinlog.cc ../mysys/mf_tempdir.c
mysqlbinlog_DEPENDENCIES= $(LIBRARIES) $(pkglib_LTLIBRARIES)
mysqlmanagerc_SOURCES = mysqlmanagerc.c
mysqlmanagerc_DEPENDENCIES= $(LIBRARIES) $(pkglib_LTLIBRARIES)
sql_src=log_event.h log_event.cc
sql_src=log_event.h mysql_priv.h log_event.cc
# Fix for mit-threads
DEFS = -DUNDEF_THREADS_HACK
......
......@@ -14,12 +14,28 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
TODO: print the catalog (some USE catalog.db ????).
Standalone program to read a MySQL binary log (or relay log);
can read files produced by 3.23, 4.x, 5.0 servers.
Can read binlogs from 3.23/4.x/5.0 and relay logs from 4.x/5.0.
Should be able to read any file of these categories, even with --position.
An important fact: the Format_desc event of the log is at most the 3rd event
of the log; if it is the 3rd then there is this combination:
Format_desc_of_slave, Rotate_of_master, Format_desc_of_master.
*/
#define MYSQL_CLIENT
#undef MYSQL_SERVER
#include "client_priv.h"
#include <time.h>
#include <assert.h>
#include "log_event.h"
/* That one is necessary for defines of OPTION_NO_FOREIGN_KEY_CHECKS etc */
#include "mysql_priv.h"
#define BIN_LOG_HEADER_SIZE 4
#define PROBE_HEADER_LEN (EVENT_LEN_OFFSET+4)
......@@ -481,21 +497,26 @@ static int check_master_version(MYSQL* mysql)
}
/*
TODO fix this for new format (like local log); this will be done when 4.0 is
merged here (Victor's fixes are needed to make dump_remote_log_entries()
work).
*/
static void dump_remote_log_entries(const char* logname)
{
char buf[128];
char last_db[FN_REFLEN+1] = "";
LAST_EVENT_INFO last_event_info;
uint len;
NET* net = &mysql->net;
int old_format;
old_format = check_master_version(mysql);
if (!position)
position = BIN_LOG_HEADER_SIZE; // protect the innocent from spam
position = BIN_LOG_HEADER_SIZE;
if (position < BIN_LOG_HEADER_SIZE)
{
position = BIN_LOG_HEADER_SIZE;
// warn the guity
sql_print_error("Warning: The position in the binary log can't be less than %d.\nStarting from position %d\n", BIN_LOG_HEADER_SIZE, BIN_LOG_HEADER_SIZE);
}
int4store(buf, position);
......@@ -517,10 +538,11 @@ static void dump_remote_log_entries(const char* logname)
DBUG_PRINT("info",( "len= %u, net->read_pos[5] = %d\n",
len, net->read_pos[5]));
Log_event *ev = Log_event::read_log_event((const char*) net->read_pos + 1 ,
len - 1, &error, old_format);
len - 1, &error, 0);
//TODO this ,0) : we need to store the description_event like for local_log
if (ev)
{
ev->print(result_file, short_form, last_db);
ev->print(result_file, short_form, &last_event_info);
if (ev->get_type_code() == LOAD_EVENT)
dump_remote_file(net, ((Load_log_event*)ev)->fname);
delete ev;
......@@ -531,29 +553,98 @@ static void dump_remote_log_entries(const char* logname)
}
static int check_header(IO_CACHE* file)
static void check_header(IO_CACHE* file,
Format_description_log_event **description_event)
{
byte header[BIN_LOG_HEADER_SIZE];
byte buf[PROBE_HEADER_LEN];
int old_format=0;
*description_event= new Format_description_log_event(3);
my_off_t tmp_pos;
my_off_t pos = my_b_tell(file);
my_b_seek(file, (my_off_t)0);
if (my_b_read(file, header, sizeof(header)))
die("Failed reading header; Probably an empty file");
if (memcmp(header, BINLOG_MAGIC, sizeof(header)))
die("File is not a binary log file");
if (!my_b_read(file, buf, sizeof(buf)))
/*
Imagine we are running with --position=1000. We still need to know the
binlog format's. So we still need to find, if there is one, the Format_desc
event, or to know if this is a 3.23 binlog. So we need to first read the
first events of the log, those around offset 4.
Even if we are reading a 3.23 binlog from the start (no --position): we need
to know the header length (which is 13 in 3.23, 19 in 4.x) to be able to
successfully print the first event (Start_log_event_v3). So even in this
case, we need to "probe" the first bytes of the log *before* we do a real
read_log_event(). Because read_log_event() needs to know the header's length
to work fine.
*/
for(;;)
{
tmp_pos= my_b_tell(file); /* should be 4 the first time */
if (my_b_read(file, buf, sizeof(buf)))
{
if (file->error)
die("\
Could not read entry at offset %lu : Error in log format or read error",
tmp_pos);
/*
Otherwise this is just EOF : this log currently contains 0-2 events.
Maybe it's going to be filled in the next milliseconds; then we are
going to have a problem if this a 3.23 log (imagine we are locally
reading a 3.23 binlog which is being written presently): we won't know
it in read_log_event() and will fail().
Similar problems could happen with hot relay logs if --position is used
(but a --position which is posterior to the current size of the log).
These are rare problems anyway (reading a hot log + when we read the
first events there are not all there yet + when we read a bit later
there are more events + using a strange --position).
*/
break;
}
else
{
if (buf[4] == START_EVENT)
DBUG_PRINT("info",("buf[4]=%d", buf[4]));
/* always test for a Start_v3, even if no --position */
if (buf[4] == START_EVENT_V3) /* This is 3.23 or 4.x */
{
uint event_len;
event_len = uint4korr(buf + EVENT_LEN_OFFSET);
old_format = (event_len < (LOG_EVENT_HEADER_LEN + START_HEADER_LEN));
if (uint4korr(buf + EVENT_LEN_OFFSET) <
(LOG_EVENT_MINIMAL_HEADER_LEN + START_V3_HEADER_LEN))
{
/* This is 3.23 (format 1) */
delete *description_event;
*description_event= new Format_description_log_event(1);
}
break;
}
else if (tmp_pos>=position)
break;
else if (buf[4] == FORMAT_DESCRIPTION_EVENT) /* This is 5.0 */
{
my_b_seek(file, tmp_pos); /* seek back to event's start */
if (!(*description_event= (Format_description_log_event*)
Log_event::read_log_event(file, *description_event)))
/* EOF can't be hit here normally, so it's a real error */
die("Could not read a Format_description_log_event event \
at offset %lu ; this could be a log format error or read error",
tmp_pos);
DBUG_PRINT("info",("Setting description_event"));
}
else if (buf[4] == ROTATE_EVENT)
{
my_b_seek(file, tmp_pos); /* seek back to event's start */
if (!Log_event::read_log_event(file, *description_event))
/* EOF can't be hit here normally, so it's a real error */
die("Could not read a Rotate_log_event event \
at offset %lu ; this could be a log format error or read error",
tmp_pos);
}
else
break;
}
}
my_b_seek(file, pos);
return old_format;
}
......@@ -562,11 +653,15 @@ static void dump_local_log_entries(const char* logname)
File fd = -1;
IO_CACHE cache,*file= &cache;
ulonglong rec_count = 0;
char last_db[FN_REFLEN+1];
LAST_EVENT_INFO last_event_info;
byte tmp_buff[BIN_LOG_HEADER_SIZE];
bool old_format = 0;
last_db[0]=0;
/*
check_header() will set the pointer below.
Why do we need here a pointer on an event instead of an event ?
This is because the event will be created (alloced) in read_log_event()
(which returns a pointer) in check_header().
*/
Format_description_log_event* description_event;
if (logname && logname[0] != '-')
{
......@@ -575,14 +670,14 @@ static void dump_local_log_entries(const char* logname)
if (init_io_cache(file, fd, 0, READ_CACHE, (my_off_t) position, 0,
MYF(MY_WME | MY_NABP)))
exit(1);
old_format = check_header(file);
check_header(file, &description_event);
}
else
else // reading from stdin; TODO: check that it works
{
if (init_io_cache(file, fileno(result_file), 0, READ_CACHE, (my_off_t) 0,
0, MYF(MY_WME | MY_NABP | MY_DONT_CHECK_FILESIZE)))
exit(1);
old_format = check_header(file);
check_header(file, &description_event);
if (position)
{
/* skip 'position' characters from stdout */
......@@ -599,6 +694,9 @@ static void dump_local_log_entries(const char* logname)
file->seek_not_done=0;
}
if (!description_event->is_valid())
die("Invalid Format_description log event; could be out of memory");
if (!position)
my_b_read(file, tmp_buff, BIN_LOG_HEADER_SIZE); // Skip header
for (;;)
......@@ -606,7 +704,7 @@ static void dump_local_log_entries(const char* logname)
char llbuff[21];
my_off_t old_off = my_b_tell(file);
Log_event* ev = Log_event::read_log_event(file, old_format);
Log_event* ev = Log_event::read_log_event(file, description_event);
if (!ev)
{
if (file->error)
......@@ -633,7 +731,7 @@ Could not read entry at offset %s : Error in log format or read error",
continue; // next
}
}
ev->print(result_file, short_form, last_db);
ev->print(result_file, short_form, &last_event_info);
break;
case CREATE_FILE_EVENT:
{
......@@ -661,18 +759,18 @@ Could not read entry at offset %s : Error in log format or read error",
filename and use LOCAL), prepared in the 'case EXEC_LOAD_EVENT'
below.
*/
ce->print(result_file, short_form, last_db, true);
ce->print(result_file, short_form, &last_event_info, true);
load_processor.process(ce);
ev= 0;
break;
}
case APPEND_BLOCK_EVENT:
ev->print(result_file, short_form, last_db);
ev->print(result_file, short_form, &last_event_info);
load_processor.process((Append_block_log_event*)ev);
break;
case EXEC_LOAD_EVENT:
{
ev->print(result_file, short_form, last_db);
ev->print(result_file, short_form, &last_event_info);
Execute_load_log_event *exv= (Execute_load_log_event*)ev;
Create_file_log_event *ce= load_processor.grab_event(exv->file_id);
/*
......@@ -682,7 +780,7 @@ Could not read entry at offset %s : Error in log format or read error",
*/
if (ce)
{
ce->print(result_file, short_form, last_db,true);
ce->print(result_file, short_form, &last_event_info,true);
my_free((char*)ce->fname,MYF(MY_WME));
delete ce;
}
......@@ -691,17 +789,23 @@ Could not read entry at offset %s : Error in log format or read error",
Create_file event for file_id: %u\n",exv->file_id);
break;
}
case FORMAT_DESCRIPTION_EVENT:
delete description_event;
description_event= (Format_description_log_event*) ev;
ev->print(result_file, short_form, &last_event_info);
break;
default:
ev->print(result_file, short_form, last_db);
ev->print(result_file, short_form, &last_event_info);
}
}
rec_count++;
if (ev)
delete ev;
if (ev && ev->get_type_code()!=FORMAT_DESCRIPTION_EVENT)
delete ev; /* otherwise, deleted in the end */
}
if (fd >= 0)
my_close(fd, MYF(MY_WME));
end_io_cache(file);
delete description_event;
}
......
......@@ -498,6 +498,7 @@ typedef int (*qsort2_cmp)(const void *, const void *, const void *);
/* tell write offset in the SEQ_APPEND cache */
my_off_t my_b_append_tell(IO_CACHE* info);
my_off_t my_b_safe_tell(IO_CACHE* info); /* picks the correct tell() */
#define my_b_bytes_in_cache(info) (uint) (*(info)->current_end - \
*(info)->current_pos)
......
stop slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
drop table if exists t1;
Warnings:
Note 1051 Unknown table 't1'
create table t1(a varchar(10),b int);
set @@session.sql_mode=pipes_as_concat;
insert into t1 values('My'||'SQL', 1);
set @@session.sql_mode=default;
insert into t1 values('My'||'SQL', 2);
select * from t1 where b<3 order by a;
a b
0 2
MySQL 1
select * from t1 where b<3 order by a;
a b
0 2
MySQL 1
set @@session.sql_mode=ignore_space;
insert into t1 values(password ('MySQL'), 3);
set @@session.sql_mode=ansi_quotes;
create table "t2" ("a" int);
drop table t1, t2;
set @@session.sql_mode=default;
create table t1(a int auto_increment primary key);
create table t2(b int, a int);
set @@session.sql_auto_is_null=1;
insert into t1 values(null);
insert into t2 select 1,a from t1 where a is null;
set @@session.sql_auto_is_null=0;
insert into t1 values(null);
insert into t2 select 2,a from t1 where a is null;
select * from t2 order by b;
b a
1 1
select * from t2 order by b;
b a
1 1
drop table t1,t2;
# Replication of session variables.
# FOREIGN_KEY_CHECKS is tested in rpl_insert_id.test
source include/master-slave.inc;
drop table if exists t1;
create table t1(a varchar(10),b int);
set @@session.sql_mode=pipes_as_concat;
insert into t1 values('My'||'SQL', 1);
set @@session.sql_mode=default;
insert into t1 values('My'||'SQL', 2);
select * from t1 where b<3 order by a;
save_master_pos;
connection slave;
sync_with_master;
select * from t1 where b<3 order by a;
connection master;
# if the slave does the next sync_with_master fine, then it means it accepts the
# two lines of ANSI syntax below, which is what we want to check.
set @@session.sql_mode=ignore_space;
insert into t1 values(password ('MySQL'), 3);
set @@session.sql_mode=ansi_quotes;
create table "t2" ("a" int);
drop table t1, t2;
set @@session.sql_mode=default;
create table t1(a int auto_increment primary key);
create table t2(b int, a int);
set @@session.sql_auto_is_null=1;
insert into t1 values(null);
insert into t2 select 1,a from t1 where a is null;
set @@session.sql_auto_is_null=0;
insert into t1 values(null);
insert into t2 select 2,a from t1 where a is null;
select * from t2 order by b;
save_master_pos;
connection slave;
sync_with_master;
select * from t2 order by b;
connection master;
drop table t1,t2;
save_master_pos;
connection slave;
sync_with_master;
......@@ -66,6 +66,13 @@ my_off_t my_b_append_tell(IO_CACHE* info)
return res;
}
my_off_t my_b_safe_tell(IO_CACHE *info)
{
if (unlikely(info->type == SEQ_READ_APPEND))
return my_b_append_tell(info);
return my_b_tell(info);
}
/*
Make next read happen at the given position
For write cache, make next write happen at the given position
......
......@@ -987,10 +987,19 @@ innobase_commit_low(
trx->mysql_master_log_file_name
= active_mi->rli.group_master_log_name;
/*
Guilhem to Heikki: in 5.0 we don't need to do a computation
(old_pos+len) to get the end_pos, because we already have the
end_pos under hand in the replication code
(Query_log_event::exec_event()).
I tested the code change below (simulated a crash with kill
-9) and got the good (binlog, position) displayed by InnoDB at
crash recovery, so this code change is ok.
*/
trx->mysql_master_log_pos = ((ib_longlong)
(active_mi->rli.group_master_log_pos +
active_mi->rli.event_len
(active_mi->rli.future_group_master_log_pos
));
}
#endif /* HAVE_REPLICATION */
......
......@@ -84,7 +84,8 @@ static int find_uniq_filename(char *name)
MYSQL_LOG::MYSQL_LOG()
:bytes_written(0), last_time(0), query_start(0), name(0),
file_id(1), open_count(1), log_type(LOG_CLOSED), write_error(0), inited(0),
need_start_event(1)
need_start_event(1), description_event_for_exec(0),
description_event_for_queue(0)
{
/*
We don't want to initialize LOCK_Log here as such initialization depends on
......@@ -111,6 +112,8 @@ void MYSQL_LOG::cleanup()
{
inited= 0;
close(LOG_CLOSE_INDEX);
delete description_event_for_queue;
delete description_event_for_exec;
(void) pthread_mutex_destroy(&LOCK_log);
(void) pthread_mutex_destroy(&LOCK_index);
(void) pthread_cond_destroy(&update_cond);
......@@ -179,7 +182,8 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
const char *new_name, const char *index_file_name_arg,
enum cache_type io_cache_type_arg,
bool no_auto_events_arg,
ulong max_size_arg)
ulong max_size_arg,
bool null_created_arg)
{
char buff[512];
File file= -1, index_file_nr= -1;
......@@ -272,8 +276,8 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
if (my_b_safe_write(&log_file, (byte*) BINLOG_MAGIC,
BIN_LOG_HEADER_SIZE))
goto err;
bytes_written += BIN_LOG_HEADER_SIZE;
write_file_name_to_index_file=1;
bytes_written+= BIN_LOG_HEADER_SIZE;
write_file_name_to_index_file= 1;
}
if (!my_b_inited(&index_file))
......@@ -302,10 +306,42 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
}
if (need_start_event && !no_auto_events)
{
need_start_event=0;
Start_log_event s;
/*
In 4.x we set need_start_event=0 here, but in 5.0 we want a Start event
even if this is not the very first binlog.
*/
Format_description_log_event s(BINLOG_VERSION);
if (!s.is_valid())
goto err;
s.set_log_pos(this);
s.write(&log_file);
if (null_created_arg)
s.created= 0;
if (s.write(&log_file))
goto err;
bytes_written+= s.get_event_len();
}
if (description_event_for_queue &&
description_event_for_queue->binlog_version>=4)
{
/*
This is a relay log written to by the I/O slave thread.
Write the event so that others can later know the format of this relay
log.
Note that this event is very close to the original event from the
master (it has binlog version of the master, event types of the
master), so this is suitable to parse the next relay log's event. It
has been produced by
Format_description_log_event::Format_description_log_event(char*
buf,).
Why don't we want to write the description_event_for_queue if this event
is for format<4 (3.23 or 4.x): this is because in that case, the
description_event_for_queue describes the data received from the master,
but not the data written to the relay log (*conversion*), which is in
format 4 (slave's).
*/
if (description_event_for_queue->write(&log_file))
goto err;
bytes_written+= description_event_for_queue->get_event_len();
}
if (flush_io_cache(&log_file))
goto err;
......@@ -596,7 +632,7 @@ bool MYSQL_LOG::reset_logs(THD* thd)
if (!thd->slave_thread)
need_start_event=1;
open(save_name, save_log_type, 0, index_file_name,
io_cache_type, no_auto_events, max_size);
io_cache_type, no_auto_events, max_size, 0);
my_free((gptr) save_name, MYF(0));
err:
......@@ -986,8 +1022,17 @@ void MYSQL_LOG::new_file(bool need_lock)
Note that at this point, log_type != LOG_CLOSED (important for is_open()).
*/
/*
new_file() is only used for rotation (in FLUSH LOGS or because size >
max_binlog_size or max_relay_log_size).
If this is a binary log, the Format_description_log_event at the beginning of
the new file should have created=0 (to distinguish with the
Format_description_log_event written at server startup, which should
trigger temp tables deletion on slaves.
*/
open(old_name, save_log_type, new_name_ptr, index_file_name, io_cache_type,
no_auto_events, max_size);
no_auto_events, max_size, 1);
my_free(old_name,MYF(0));
end:
......@@ -1282,6 +1327,12 @@ bool MYSQL_LOG::write(Log_event* event_info)
}
#endif
#if MYSQL_VERSION_ID < 50000
/*
In 5.0 this is not needed anymore as we store the value of
FOREIGN_KEY_CHECKS in a binary way in the Query event's header.
The code below was enabled in 4.0 and 4.1.
*/
/*
If the user has set FOREIGN_KEY_CHECKS=0 we wrap every SQL
command in the binlog inside:
......@@ -1297,6 +1348,7 @@ bool MYSQL_LOG::write(Log_event* event_info)
if (e.write(file))
goto err;
}
#endif
}
/* Write the SQL command */
......@@ -1307,6 +1359,7 @@ bool MYSQL_LOG::write(Log_event* event_info)
/* Write log events to reset the 'run environment' of the SQL command */
#if MYSQL_VERSION_ID < 50000
if (thd && thd->options & OPTION_NO_FOREIGN_KEY_CHECKS)
{
Query_log_event e(thd, "SET FOREIGN_KEY_CHECKS=1", 24, 0);
......@@ -1314,6 +1367,7 @@ bool MYSQL_LOG::write(Log_event* event_info)
if (e.write(file))
goto err;
}
#endif
/*
Tell for transactional table handlers up to which position in the
......@@ -1720,6 +1774,7 @@ void MYSQL_LOG::close(uint exiting)
Stop_log_event s;
s.set_log_pos(this);
s.write(&log_file);
bytes_written+= s.get_event_len();
signal_update();
}
#endif /* HAVE_REPLICATION */
......
......@@ -192,9 +192,25 @@ static inline int read_str(char * &buf, char *buf_end, char * &str,
return 0;
}
/*
Prints a "session_var=value" string. Used by mysqlbinlog to print some SET
commands just before it prints a query.
*/
static void print_set_option(FILE* file, uint32 bits_changed, uint32 option,
uint32 flags, const char* name, bool* need_comma)
{
if (bits_changed & option)
{
if (*need_comma)
fprintf(file,", ");
fprintf(file,"%s=%d", name, (bool)(flags & option));
*need_comma= 1;
}
}
/**************************************************************************
Log_event methods
Log_event methods (= the parent class of all events)
**************************************************************************/
/*
......@@ -204,7 +220,7 @@ static inline int read_str(char * &buf, char *buf_end, char * &str,
const char* Log_event::get_type_str()
{
switch(get_type_code()) {
case START_EVENT: return "Start";
case START_EVENT_V3: return "Start_v3";
case STOP_EVENT: return "Stop";
case QUERY_EVENT: return "Query";
case ROTATE_EVENT: return "Rotate";
......@@ -218,6 +234,7 @@ const char* Log_event::get_type_str()
case EXEC_LOAD_EVENT: return "Exec_load";
case RAND_EVENT: return "RAND";
case USER_VAR_EVENT: return "User var";
case FORMAT_DESCRIPTION_EVENT: return "Format_desc";
default: return "Unknown"; /* impossible */
}
}
......@@ -254,24 +271,66 @@ Log_event::Log_event()
Log_event::Log_event()
*/
Log_event::Log_event(const char* buf, bool old_format)
Log_event::Log_event(const char* buf,
const Format_description_log_event* description_event)
:temp_buf(0), cached_event_len(0), cache_stmt(0)
{
#ifndef MYSQL_CLIENT
thd = 0;
#endif
when = uint4korr(buf);
server_id = uint4korr(buf + SERVER_ID_OFFSET);
if (old_format)
if (description_event->binlog_version==1)
{
log_pos=0;
flags=0;
log_pos= 0;
flags= 0;
return;
}
else
/* 4.0 or newer */
log_pos= uint4korr(buf + LOG_POS_OFFSET);
/*
If the log is 4.0 (so here it can only be a 4.0 relay log read by the SQL
thread or a 4.0 master binlog read by the I/O thread), log_pos is the
beginning of the event: we transform it into the end of the event, which is
more useful.
But how do you know that the log is 4.0: you know it if description_event is
version 3 *and* you are not reading a Format_desc (remember that mysqlbinlog
starts by assuming that 5.0 logs are in 4.0 format, until it finds a
Format_desc).
*/
if (description_event->binlog_version==3 &&
buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT &&
/*
If log_pos=0, don't change it. log_pos==0 is a marker to mean
"don't change rli->group_master_log_pos" (see
inc_group_relay_log_pos()). As it is unreal log_pos, adding the event
len's is nonsense. For example, a fake Rotate event should
not have its log_pos (which is 0) changed or it will modify
Exec_master_log_pos in SHOW SLAVE STATUS, displaying a nonsense value of
(a non-zero offset which does not exist in the master's binlog, so which
will cause problems if the user uses this value in CHANGE MASTER).
*/
log_pos)
log_pos+= uint4korr(buf + EVENT_LEN_OFFSET);
flags= uint2korr(buf + FLAGS_OFFSET);
if ((buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT) ||
(buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT))
{
log_pos = uint4korr(buf + LOG_POS_OFFSET);
flags = uint2korr(buf + FLAGS_OFFSET);
/*
These events always have a header which stops here (i.e. their header is
FROZEN).
*/
/*
Initialization to zero of all other Log_event members as they're not
specified. Currently there are no such members; in the future there will
be an event UID (but Format_description and Rotate don't need this UID, as
they are not propagated through --log-slave-updates (remember the UID is
used to not play a query twice when you have two masters which are slaves
of a 3rd master). Then we are done.
*/
return;
}
#ifndef MYSQL_CLIENT
thd = 0;
#endif
/* otherwise, go on with reading the header from buf (nothing now) */
}
#ifndef MYSQL_CLIENT
......@@ -322,10 +381,10 @@ int Log_event::exec_event(struct st_relay_log_info* rli)
has already been updated.
*/
if ((thd->options & OPTION_BEGIN) && opt_using_transactions)
rli->inc_event_relay_log_pos(get_event_len());
rli->inc_event_relay_log_pos();
else
{
rli->inc_group_relay_log_pos(get_event_len(),log_pos);
rli->inc_group_relay_log_pos(log_pos);
flush_relay_log_info(rli);
/*
Note that Rotate_log_event::exec_event() does not call this function,
......@@ -387,7 +446,7 @@ void Log_event::init_show_field_list(List<Item>* field_list)
field_list->push_back(new Item_empty_string("Event_type", 20));
field_list->push_back(new Item_return_int("Server_id", 10,
MYSQL_TYPE_LONG));
field_list->push_back(new Item_return_int("Orig_log_pos", 11,
field_list->push_back(new Item_return_int("End_log_pos", 11,
MYSQL_TYPE_LONGLONG));
field_list->push_back(new Item_empty_string("Info", 20));
}
......@@ -410,6 +469,12 @@ int Log_event::write(IO_CACHE* file)
int Log_event::write_header(IO_CACHE* file)
{
/*
Header will be of size LOG_EVENT_HEADER_LEN for all events, except for
FORMAT_DESCRIPTION_EVENT and ROTATE_EVENT, where it will be
LOG_EVENT_MINIMAL_HEADER_LEN (remember these 2 have a frozen header, because
we read them before knowing the format).
*/
char buf[LOG_EVENT_HEADER_LEN];
char* pos = buf;
int4store(pos, (ulong) when); // timestamp
......@@ -417,19 +482,45 @@ int Log_event::write_header(IO_CACHE* file)
*pos++ = get_type_code(); // event type code
int4store(pos, server_id);
pos += 4;
long tmp=get_data_size() + LOG_EVENT_HEADER_LEN;
long tmp; // total length of the event
switch (get_type_code())
{
case FORMAT_DESCRIPTION_EVENT:
case ROTATE_EVENT:
tmp= get_data_size() + LOG_EVENT_MINIMAL_HEADER_LEN;
break;
default:
tmp= get_data_size() + LOG_EVENT_HEADER_LEN;
break;
}
int4store(pos, tmp);
pos += 4;
int4store(pos, log_pos);
pos += 4;
int2store(pos, flags);
pos += 2;
switch (get_type_code())
{
case FORMAT_DESCRIPTION_EVENT:
case ROTATE_EVENT:
break;
default:
/*
Other data to print in the header (nothing now); in that case increment
pos.
*/
break;
}
return (my_b_safe_write(file, (byte*) buf, (uint) (pos - buf)));
}
/*
Log_event::read_log_event()
This needn't be format-tolerant, because we only read
LOG_EVENT_MINIMAL_HEADER_LEN (we just want to read the event's length).
*/
#ifndef MYSQL_CLIENT
......@@ -438,7 +529,7 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet,
{
ulong data_len;
int result=0;
char buf[LOG_EVENT_HEADER_LEN];
char buf[LOG_EVENT_MINIMAL_HEADER_LEN];
DBUG_ENTER("read_log_event");
if (log_lock)
......@@ -458,24 +549,25 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet,
goto end;
}
data_len= uint4korr(buf + EVENT_LEN_OFFSET);
if (data_len < LOG_EVENT_HEADER_LEN ||
if (data_len < LOG_EVENT_MINIMAL_HEADER_LEN ||
data_len > current_thd->variables.max_allowed_packet)
{
DBUG_PRINT("error",("data_len: %ld", data_len));
result= ((data_len < LOG_EVENT_HEADER_LEN) ? LOG_READ_BOGUS :
result= ((data_len < LOG_EVENT_MINIMAL_HEADER_LEN) ? LOG_READ_BOGUS :
LOG_READ_TOO_LARGE);
goto end;
}
packet->append(buf, sizeof(buf));
data_len-= LOG_EVENT_HEADER_LEN;
data_len-= LOG_EVENT_MINIMAL_HEADER_LEN;
if (data_len)
{
if (packet->append(file, data_len))
{
/*
Here we should never hit EOF in a non-error condition.
Here if we hit EOF it's really an error: as data_len is >=0
there's supposed to be more bytes available.
EOF means we are reading the event partially, which should
never happen.
never happen: either we read badly or the binlog is truncated.
*/
result= file->error >= 0 ? LOG_READ_TRUNC: LOG_READ_IO;
/* Implicit goto end; */
......@@ -502,24 +594,42 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet,
Log_event::read_log_event()
NOTE:
Allocates memory; The caller is responsible for clean-up
Allocates memory; The caller is responsible for clean-up.
*/
#ifndef MYSQL_CLIENT
Log_event* Log_event::read_log_event(IO_CACHE* file,
pthread_mutex_t* log_lock,
bool old_format)
const Format_description_log_event *description_event)
#else
Log_event* Log_event::read_log_event(IO_CACHE* file, bool old_format)
Log_event* Log_event::read_log_event(IO_CACHE* file,
const Format_description_log_event *description_event)
#endif
{
char head[LOG_EVENT_HEADER_LEN];
uint header_size= old_format ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
DBUG_ASSERT(description_event);
char head[LOG_EVENT_MINIMAL_HEADER_LEN];
/*
First we only want to read at most LOG_EVENT_MINIMAL_HEADER_LEN, just to
check the event for sanity and to know its length; no need to really parse
it. We say "at most" because this could be a 3.23 master, which has header
of 13 bytes, whereas LOG_EVENT_MINIMAL_HEADER_LEN is 19 bytes (it's "minimal"
over the set {MySQL >=4.0}).
*/
uint header_size= min(description_event->common_header_len,
LOG_EVENT_MINIMAL_HEADER_LEN);
LOCK_MUTEX;
DBUG_PRINT("info", ("my_b_tell=%lu", my_b_tell(file)));
if (my_b_read(file, (byte *) head, header_size))
{
DBUG_PRINT("info", ("Log_event::read_log_event(IO_CACHE*,Format_desc*) \
failed my_b_read"));
UNLOCK_MUTEX;
/*
No error here; it could be that we are at the file's end. However if the
next my_b_read() fails (below), it will be an error as we were able to
read the first bytes.
*/
return 0;
}
......@@ -553,7 +663,8 @@ Log_event* Log_event::read_log_event(IO_CACHE* file, bool old_format)
error = "read error";
goto err;
}
if ((res = read_log_event(buf, data_len, &error, old_format)))
if ((res= read_log_event(buf, data_len, &error,
description_event)))
res->register_temp_buf(buf);
err:
......@@ -580,87 +691,105 @@ Error in Log_event::read_log_event(): '%s', data_len: %d, event_type: %d",
/*
Log_event::read_log_event()
Binlog format tolerance is in (buf, event_len, description_event)
constructors.
*/
Log_event* Log_event::read_log_event(const char* buf, int event_len,
const char **error, bool old_format)
Log_event* Log_event::read_log_event(const char* buf, uint event_len,
const char **error,
const Format_description_log_event *description_event)
{
DBUG_ENTER("Log_event::read_log_event(char*,...)");
DBUG_ASSERT(description_event);
DBUG_PRINT("info", ("binlog_version=%d", description_event->binlog_version));
if (event_len < EVENT_LEN_OFFSET ||
(uint) event_len != uint4korr(buf+EVENT_LEN_OFFSET))
{
*error="Sanity check failed"; // Needed to free buffer
return NULL; // general sanity check - will fail on a partial read
DBUG_RETURN(NULL); // general sanity check - will fail on a partial read
}
Log_event* ev = NULL;
switch(buf[EVENT_TYPE_OFFSET]) {
case QUERY_EVENT:
ev = new Query_log_event(buf, event_len, old_format);
ev = new Query_log_event(buf, event_len, description_event);
break;
case LOAD_EVENT:
ev = new Create_file_log_event(buf, event_len, old_format);
ev = new Create_file_log_event(buf, event_len, description_event);
break;
case NEW_LOAD_EVENT:
ev = new Load_log_event(buf, event_len, old_format);
ev = new Load_log_event(buf, event_len, description_event);
break;
case ROTATE_EVENT:
ev = new Rotate_log_event(buf, event_len, old_format);
ev = new Rotate_log_event(buf, event_len, description_event);
break;
#ifdef HAVE_REPLICATION
case SLAVE_EVENT:
case SLAVE_EVENT: /* can never happen (unused event) */
ev = new Slave_log_event(buf, event_len);
break;
#endif /* HAVE_REPLICATION */
case CREATE_FILE_EVENT:
ev = new Create_file_log_event(buf, event_len, old_format);
ev = new Create_file_log_event(buf, event_len, description_event);
break;
case APPEND_BLOCK_EVENT:
ev = new Append_block_log_event(buf, event_len);
ev = new Append_block_log_event(buf, event_len, description_event);
break;
case DELETE_FILE_EVENT:
ev = new Delete_file_log_event(buf, event_len);
ev = new Delete_file_log_event(buf, event_len, description_event);
break;
case EXEC_LOAD_EVENT:
ev = new Execute_load_log_event(buf, event_len);
ev = new Execute_load_log_event(buf, event_len, description_event);
break;
case START_EVENT:
ev = new Start_log_event(buf, old_format);
case START_EVENT_V3: /* this is sent only by MySQL <=4.x */
ev = new Start_log_event_v3(buf, description_event);
break;
#ifdef HAVE_REPLICATION
case STOP_EVENT:
ev = new Stop_log_event(buf, old_format);
ev = new Stop_log_event(buf, description_event);
break;
#endif /* HAVE_REPLICATION */
case INTVAR_EVENT:
ev = new Intvar_log_event(buf, old_format);
ev = new Intvar_log_event(buf, description_event);
break;
case RAND_EVENT:
ev = new Rand_log_event(buf, old_format);
ev = new Rand_log_event(buf, description_event);
break;
case USER_VAR_EVENT:
ev = new User_var_log_event(buf, old_format);
ev = new User_var_log_event(buf, description_event);
break;
case FORMAT_DESCRIPTION_EVENT:
ev = new Format_description_log_event(buf, event_len, description_event);
break;
default:
break;
}
/*
is_valid() are small event-specific sanity tests which are important; for
example there are some my_malloc() in constructors
(e.g. Query_log_event::Query_log_event(char*...)); when these my_malloc()
fail we can't return an error out of the constructor (because constructor is
"void") ; so instead we leave the pointer we wanted to allocate
(e.g. 'query') to 0 and we test it in is_valid(). Same for
Format_description_log_event, member 'post_header_len'.
*/
if (!ev || !ev->is_valid())
{
delete ev;
#ifdef MYSQL_CLIENT
if (!force_opt)
if (!force_opt) /* then mysqlbinlog dies */
{
*error= "Found invalid event in binary log";
return 0;
DBUG_RETURN(0);
}
ev= new Unknown_log_event(buf, old_format);
ev= new Unknown_log_event(buf, description_event);
#else
*error= "Found invalid event in binary log";
return 0;
DBUG_RETURN(0);
#endif
}
ev->cached_event_len = event_len;
return ev;
DBUG_RETURN(ev);
}
#ifdef MYSQL_CLIENT
......@@ -674,7 +803,7 @@ void Log_event::print_header(FILE* file)
char llbuff[22];
fputc('#', file);
print_timestamp(file);
fprintf(file, " server id %d log_pos %s ", server_id,
fprintf(file, " server id %d end_log_pos %s ", server_id,
llstr(log_pos,llbuff));
}
......@@ -708,13 +837,30 @@ void Log_event::print_timestamp(FILE* file, time_t* ts)
/*
Log_event::set_log_pos()
Only used when we are writing an event which we created, to the BINlog. That
is, when we have parsed and executed a query; we then want to set the event's
log_pos to what it is going to be in the binlog after we write it. Note that
this is the position of the END of the event.
*/
#ifndef MYSQL_CLIENT
void Log_event::set_log_pos(MYSQL_LOG* log)
{
/*
Note that with a SEQ_READ_APPEND cache, my_b_tell() does not work well.
So this will give slightly wrong positions for the Format_desc/Rotate/Stop
events which the slave writes to its relay log. For example, the initial
Format_desc will have end_log_pos=91 instead of 95. Because after writing
the first 4 bytes of the relay log, my_b_tell() still reports 0. Because
my_b_append() does not update the counter which my_b_tell() later uses (one
should probably use my_b_append_tell() to work around this).
To get right positions even when writing to the relay log, we use the (new)
my_b_safe_tell().
Note that this raises a question on the correctness of all these
DBUG_ASSERT(my_b_tell()=rli->event_relay_log_pos).
*/
if (!log_pos)
log_pos = my_b_tell(&log->log_file);
log_pos = my_b_safe_tell(&log->log_file)+get_event_len();
}
#endif /* !MYSQL_CLIENT */
......@@ -727,10 +873,15 @@ void Log_event::set_log_pos(MYSQL_LOG* log)
/*
Query_log_event::pack_info()
This (which is used only for SHOW BINLOG EVENTS) could be updated to
print SET @@session_var=. But this is not urgent, as SHOW BINLOG EVENTS is
only an information, it does not produce suitable queries to replay (for
example it does not print LOAD DATA INFILE).
*/
void Query_log_event::pack_info(Protocol *protocol)
{
// TODO: show the catalog ??
char *buf, *pos;
if (!(buf= my_malloc(9 + db_len + q_len, MYF(MY_WME))))
return;
......@@ -768,7 +919,7 @@ int Query_log_event::write(IO_CACHE* file)
int Query_log_event::write_data(IO_CACHE* file)
{
char buf[QUERY_HEADER_LEN];
char buf[QUERY_HEADER_LEN+1+4+1+8+1+1+catalog_len];
if (!query)
return -1;
......@@ -814,8 +965,44 @@ int Query_log_event::write_data(IO_CACHE* file)
int4store(buf + Q_EXEC_TIME_OFFSET, exec_time);
buf[Q_DB_LEN_OFFSET] = (char) db_len;
int2store(buf + Q_ERR_CODE_OFFSET, error_code);
int2store(buf + Q_STATUS_VARS_LEN_OFFSET, status_vars_len);
return (my_b_safe_write(file, (byte*) buf, QUERY_HEADER_LEN) ||
/*
You MUST always write status vars in increasing order of code. This
guarantees that a slightly older slave will be able to parse those he
knows.
*/
char* start= buf+QUERY_HEADER_LEN;
if (flags2_inited)
{
*(start++)= Q_FLAGS2_CODE;
int4store(start, flags2);
start+= 4;
}
if (sql_mode_inited)
{
*(start++)= Q_SQL_MODE_CODE;
int8store(start, sql_mode);
start+= 8;
}
if (catalog)
{
*(start++)= Q_CATALOG_CODE;
*(start++)= catalog_len;
bmove(start, catalog, catalog_len);
start+= catalog_len;
}
/*
Here there could be code like
if (command-line-option-which-says-"log_this_variable")
{
*(start++)= Q_THIS_VARIABLE_CODE;
int4store(start, this_variable);
start+= 4;
}
*/
return (my_b_safe_write(file, (byte*) buf, (start-buf)) ||
my_b_safe_write(file, (db) ? (byte*) db : (byte*)"", db_len + 1) ||
my_b_safe_write(file, (byte*) query, q_len)) ? -1 : 0;
}
......@@ -830,68 +1017,177 @@ Query_log_event::Query_log_event(THD* thd_arg, const char* query_arg,
ulong query_length, bool using_trans)
:Log_event(thd_arg, !thd_arg->tmp_table_used ?
0 : LOG_EVENT_THREAD_SPECIFIC_F, using_trans),
data_buf(0), query(query_arg),
data_buf(0), query(query_arg), catalog(thd_arg->catalog),
db(thd_arg->db), q_len((uint32) query_length),
error_code(thd_arg->killed != THD::NOT_KILLED ? thd->killed_errno() : thd_arg->net.last_errno),
thread_id(thd_arg->thread_id),
/* save the original thread id; we already know the server id */
slave_proxy_id(thd_arg->variables.pseudo_thread_id)
slave_proxy_id(thd_arg->variables.pseudo_thread_id),
flags2_inited(1), sql_mode_inited(1), flags2(0), sql_mode(0)
{
time_t end_time;
time(&end_time);
exec_time = (ulong) (end_time - thd->start_time);
catalog_len = (catalog) ? (uint32) strlen(catalog) : 0;
status_vars_len= 1+4+1+8+1+1+catalog_len;
db_len = (db) ? (uint32) strlen(db) : 0;
/*
If we don't use flags2 for anything else than options contained in
thd->options, it would be more efficient to flags2=thd_arg->options
(OPTIONS_WRITTEN_TO_BINLOG would be used only at reading time).
But it's likely that we don't want to use 32 bits for 3 bits; in the future
we will probably want to reclaim the 29 bits. So we need the &.
*/
flags2= thd_arg->options & OPTIONS_WRITTEN_TO_BIN_LOG;
sql_mode= thd_arg->variables.sql_mode;
DBUG_PRINT("info",("Query_log_event has flags2=%lu sql_mode=%lu",flags2,sql_mode));
}
#endif /* MYSQL_CLIENT */
/*
Query_log_event::Query_log_event()
This is used by the SQL slave thread to prepare the event before execution.
*/
Query_log_event::Query_log_event(const char* buf, int event_len,
bool old_format)
:Log_event(buf, old_format),data_buf(0), query(NULL), db(NULL)
Query_log_event::Query_log_event(const char* buf, uint event_len,
const Format_description_log_event *description_event)
:Log_event(buf, description_event), data_buf(0), query(NULL),
catalog(NULL), db(NULL), catalog_len(0), status_vars_len(0),
flags2_inited(0), sql_mode_inited(0)
{
ulong data_len;
if (old_format)
{
if ((uint)event_len < OLD_HEADER_LEN + QUERY_HEADER_LEN)
uint8 common_header_len, post_header_len;
DBUG_ENTER("Query_log_event::Query_log_event(char*,...)");
common_header_len= description_event->common_header_len;
post_header_len= description_event->post_header_len[QUERY_EVENT-1];
DBUG_PRINT("info",("event_len=%ld, common_header_len=%d, post_header_len=%d",
event_len, common_header_len, post_header_len));
/*
We test if the event's length is sensible, and if so we compute data_len.
We cannot rely on QUERY_HEADER_LEN here as it would not be format-tolerant.
We use QUERY_HEADER_MINIMAL_LEN which is the same for 3.23, 4.0 & 5.0.
*/
if (event_len < (uint)(common_header_len + post_header_len))
return;
data_len = event_len - (QUERY_HEADER_LEN + OLD_HEADER_LEN);
buf += OLD_HEADER_LEN;
data_len = event_len - (common_header_len + post_header_len);
buf+= common_header_len;
slave_proxy_id= thread_id = uint4korr(buf + Q_THREAD_ID_OFFSET);
exec_time = uint4korr(buf + Q_EXEC_TIME_OFFSET);
db_len = (uint)buf[Q_DB_LEN_OFFSET];
error_code = uint2korr(buf + Q_ERR_CODE_OFFSET);
/*
5.0 format starts here.
Depending on the format, we may or not have affected/warnings etc
The remnent post-header to be parsed has length:
*/
uint32 tmp= post_header_len - QUERY_HEADER_MINIMAL_LEN;
if (tmp)
{
status_vars_len= uint2korr(buf + Q_STATUS_VARS_LEN_OFFSET);
DBUG_PRINT("info", ("Query_log_event has status_vars_len=%d",
status_vars_len));
tmp-= 2;
}
else
/* we have parsed everything we know in the post header */
#ifndef DBUG_OFF
if (tmp) /* this is probably a master newer than us */
DBUG_PRINT("info", ("Query_log_event has longer post header than we know\
(%d more bytes)", tmp));
#endif
/*
Suppose you have a 4.0 master with --ansi and a 5.0 slave with --ansi.
The slave sets flags2 to 0 (because that's a 4.0 event); if we simply use
this value of 0, we will cancel --ansi on the slave, which is
unwanted. In this example 0 means "unset", not really "set to 0".
Fortunately we have flags2_inited==0 to distinguish between "unset" and
"set to 0". See below.
*/
/* variable-part: the status vars; only in MySQL 5.0 */
const char* start= buf+post_header_len;
const char* end= start+status_vars_len;
/*
The place from which we will start string duplication.
*/
const char* start_dup= end;
for (const char* pos=start;pos<end;)
{
if ((uint)event_len < QUERY_EVENT_OVERHEAD)
return;
data_len = event_len - QUERY_EVENT_OVERHEAD;
buf += LOG_EVENT_HEADER_LEN;
switch (*pos)
{
case Q_FLAGS2_CODE:
flags2_inited= 1;
flags2= uint4korr(++pos);
DBUG_PRINT("info",("In Query_log_event, read flags2=%lu", flags2));
pos+= 4;
break;
case Q_SQL_MODE_CODE:
sql_mode_inited= 1;
sql_mode= uint8korr(++pos);
DBUG_PRINT("info",("In Query_log_event, read sql_mode=%lu", sql_mode));
pos+= 8;
break;
case Q_CATALOG_CODE:
catalog_len= *(++pos);
/*
Now 'pos' points to beginning of catalog - 1.
The catalog must be included in the string which we will duplicate
later. If string status vars having a smaller code had been seen before
and so marked to-be-duplicated, start_dup would be != end and we would
not need (and want) to change start_dup (because this would cut the
previously marked status vars).
*/
if (start_dup==end)
start_dup= ++pos;
pos+= catalog_len;
break;
default:
/* That's why you must write status vars in growing order of code */
DBUG_PRINT("info",("Query_log_event has unknown status vars (first has\
code=%d), skipping the rest of them", *pos));
pos= end;
}
}
exec_time = uint4korr(buf + Q_EXEC_TIME_OFFSET);
error_code = uint2korr(buf + Q_ERR_CODE_OFFSET);
/* A 2nd variable part; this is common to all versions */
if (!(data_buf = (char*) my_malloc(data_len + 1, MYF(MY_WME))))
data_len-= start_dup-start; /* cut not-to-be-duplicated head */
if (!(data_buf = (char*) my_strdup_with_length(start_dup,
data_len,
MYF(MY_WME))))
return;
memcpy(data_buf, buf + Q_DATA_OFFSET, data_len);
slave_proxy_id= thread_id= uint4korr(buf + Q_THREAD_ID_OFFSET);
db = data_buf;
db_len = (uint)buf[Q_DB_LEN_OFFSET];
query=data_buf + db_len + 1;
q_len = data_len - 1 - db_len;
*((char*)query+q_len) = 0;
const char* tmp_buf= data_buf;
/* Now set event's pointers to point to bits of the new string */
if (catalog_len)
{
catalog= tmp_buf;
tmp_buf+= end-start_dup; /* "seek" to db */
}
db= tmp_buf;
query= tmp_buf + db_len + 1;
q_len = data_buf + data_len - query;
/* This is used to detect wrong parsing. Could be removed in the future. */
DBUG_PRINT("info", ("catalog_len=%d catalog='%s' db='%s' q_len=%d",
catalog_len, catalog, db, q_len));
DBUG_VOID_RETURN;
}
/*
Query_log_event::print()
*/
#ifdef MYSQL_CLIENT
void Query_log_event::print(FILE* file, bool short_form, char* last_db)
void Query_log_event::print(FILE* file, bool short_form,
LAST_EVENT_INFO* last_event_info)
{
// TODO: print the catalog ??
char buff[40],*end; // Enough for SET TIMESTAMP
if (!short_form)
{
......@@ -902,10 +1198,10 @@ void Query_log_event::print(FILE* file, bool short_form, char* last_db)
bool same_db = 0;
if (db && last_db)
if (db && last_event_info->db)
{
if (!(same_db = !memcmp(last_db, db, db_len + 1)))
memcpy(last_db, db, db_len + 1);
if (!(same_db = !memcmp(last_event_info->db, db, db_len + 1)))
memcpy(last_event_info->db, db, db_len + 1);
}
if (db && db[0] && !same_db)
......@@ -916,6 +1212,67 @@ void Query_log_event::print(FILE* file, bool short_form, char* last_db)
my_fwrite(file, (byte*) buff, (uint) (end-buff),MYF(MY_NABP | MY_WME));
if (flags & LOG_EVENT_THREAD_SPECIFIC_F)
fprintf(file,"SET @@session.pseudo_thread_id=%lu;\n",(ulong)thread_id);
/*
Now the session variables;
it's more efficient to pass SQL_MODE as a number instead of a
comma-separated list.
FOREIGN_KEY_CHECKS, SQL_AUTO_IS_NULL, UNIQUE_CHECKS are session-only
variables (they have no global version; they're not listed in sql_class.h),
The tests below work for pure binlogs or pure relay logs. Won't work for
mixed relay logs but we don't create mixed relay logs (that is, there is no
relay log with a format change except within the 3 first events, which
mysqlbinlog handles gracefully). So this code should always be good.
*/
uint32 tmp;
if (likely(flags2_inited)) /* likely as this will mainly read 5.0 logs */
{
/* tmp is a bitmask of bits which have changed. */
if (likely(last_event_info->flags2_inited))
/* All bits which have changed */
tmp= (last_event_info->flags2) ^ flags2;
else /* that's the first Query event we read */
{
last_event_info->flags2_inited= 1;
tmp= ~((uint32)0); /* all bits have changed */
}
if (unlikely(tmp)) /* some bits have changed */
{
bool need_comma= 0;
fprintf(file, "SET ");
print_set_option(file, tmp, OPTION_NO_FOREIGN_KEY_CHECKS, ~flags2,
"@@session.foreign_key_checks", &need_comma);
print_set_option(file, tmp, OPTION_AUTO_IS_NULL, flags2,
"@@session.sql_auto_is_null", &need_comma);
print_set_option(file, tmp, OPTION_RELAXED_UNIQUE_CHECKS, ~flags2,
"@@session.unique_checks", &need_comma);
fprintf(file,";\n");
last_event_info->flags2= flags2;
}
}
/*
If flags2_inited==0, this is an event from 3.23 or 4.0; nothing to print
(remember we don't produce mixed relay logs so there cannot be 5.0 events
before that one so there is nothing to reset).
*/
if (likely(sql_mode_inited))
{
if (unlikely(!last_event_info->sql_mode_inited)) /* first Query event */
{
last_event_info->sql_mode_inited= 1;
last_event_info->sql_mode= ~sql_mode; // force a difference to force write
}
if (unlikely(last_event_info->sql_mode != sql_mode))
{
fprintf(file,"SET @@session.sql_mode=%lu;\n",(ulong)sql_mode);
last_event_info->sql_mode= sql_mode;
}
}
my_fwrite(file, (byte*) query, q_len, MYF(MY_NABP | MY_WME));
fprintf(file, ";\n");
}
......@@ -930,6 +1287,13 @@ void Query_log_event::print(FILE* file, bool short_form, char* last_db)
int Query_log_event::exec_event(struct st_relay_log_info* rli)
{
int expected_error,actual_error= 0;
/*
Colleagues: please never free(thd->catalog) in MySQL. This would lead to
bugs as here thd->catalog is a part of an alloced block, not an entire
alloced block (see Query_log_event::exec_event()). Same for thd->db.
Thank you.
*/
thd->catalog= (char*) catalog;
thd->db= (char*) rewrite_db(db);
/*
......@@ -938,10 +1302,11 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
When InnoDB will want to store, the positions in rli won't have
been updated yet, so group_master_log_* will point to old BEGIN
and event_master_log* will point to the beginning of current COMMIT.
So the position to store is event_master_log_pos + event_len
since we must store the pos of the END of the current log event (COMMIT).
But log_pos of the COMMIT Query event is what we want, i.e. the pos of the
END of the current log event (COMMIT). We save it in rli so that InnoDB can
access it.
*/
rli->event_len= get_event_len();
rli->future_group_master_log_pos= log_pos;
if (db_ok(thd->db, replicate_do_db, replicate_ignore_db))
{
......@@ -955,9 +1320,34 @@ int Query_log_event::exec_event(struct st_relay_log_info* rli)
thd->clear_error();
thd->variables.pseudo_thread_id= thread_id; // for temp tables
if (flags2_inited)
/*
all bits of thd->options which are 1 in OPTIONS_WRITTEN_TO_BIN_LOG must
take their value from flags2.
*/
thd->options= flags2|(thd->options & ~(ulong)OPTIONS_WRITTEN_TO_BIN_LOG);
/*
else, we are in a 3.23/4.0 binlog; we previously received a
Rotate_log_event which reset thd->options and sql_mode, so nothing to do.
*/
/*
We do not replicate IGNORE_DIR_IN_CREATE. That is, if the master is a
slave which runs with SQL_MODE=IGNORE_DIR_IN_CREATE, this should not
force us to ignore the dir too. Imagine you are a ring of machines, and
one has a disk problem so that you temporarily need IGNORE_DIR_IN_CREATE
on this machine; you don't want it to propagate elsewhere (you don't want
all slaves to start ignoring the dirs).
*/
if (sql_mode_inited)
thd->variables.sql_mode= (thd->variables.sql_mode&MODE_NO_DIR_IN_CREATE)|
(sql_mode & ~(uint32)MODE_NO_DIR_IN_CREATE);
/*
Sanity check to make sure the master did not get a really bad
error on the query.
error on the query (for example, if the query on the master was killed, we
don't want to play it on the slave, because we don't know how much of it
we have to play).
*/
if (ignored_error_code((expected_error = error_code)) ||
!check_expected_error(thd,rli,expected_error))
......@@ -1014,12 +1404,33 @@ Default database: '%s'",
print_slave_db_safe(db));
thd->query_error= 1;
}
/*
TODO: compare the values of "affected rows" around here. Something
like:
if ((uint32) affected_in_event != (uint32) affected_on_slave)
{
sql_print_error("Slave: did not get the expected number of affected \
rows running query from master - expected %d, got %d (this numbers \
should have matched modulo 4294967296).", 0, ...);
thd->query_error = 1;
}
We may also want an option to tell the slave to ignore "affected"
mismatch. This mismatch could be implemented with a new ER_ code, and
to ignore it you would use --slave-skip-errors...
To do the comparison we need to know the value of "affected" which the
above mysql_parse() computed. And we need to know the value of
"affected" in the master's binlog. Both will be implemented later. The
important thing is that we now have the format ready to log the values
of "affected" in the binlog. So we can release 5.0.0 before effectively
logging "affected" and effectively comparing it.
*/
}
/*
End of sanity check. If the test was wrong, the query got a really bad
error on the master, which could be inconsistent, abort and tell DBA to
check/fix it. check_expected_error() already printed the message to
stderr and rli, and set thd->query_error to 1.
End of sanity check. If the test was false, the query got a really bad
error on the master, which could be inconsistent. check_expected_error()
already printed the message to stderr and rli, to tell the DBA to fix
things, and it has set thd->query_error to 1.
*/
} /* End of if (db_ok(... */
......@@ -1039,15 +1450,23 @@ Default database: '%s'",
/**************************************************************************
Start_log_event methods
Start_log_event_v3 methods
**************************************************************************/
#ifndef MYSQL_CLIENT
Start_log_event_v3::Start_log_event_v3() :Log_event(), binlog_version(BINLOG_VERSION)
{
created= when;
memcpy(server_version, ::server_version, ST_SERVER_VER_LEN);
}
#endif
/*
Start_log_event::pack_info()
Start_log_event_v3::pack_info()
*/
#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
void Start_log_event::pack_info(Protocol *protocol)
void Start_log_event_v3::pack_info(Protocol *protocol)
{
char buf[12 + ST_SERVER_VER_LEN + 14 + 22], *pos;
pos= strmov(buf, "Server ver: ");
......@@ -1060,15 +1479,14 @@ void Start_log_event::pack_info(Protocol *protocol)
/*
Start_log_event::print()
Start_log_event_v3::print()
*/
#ifdef MYSQL_CLIENT
void Start_log_event::print(FILE* file, bool short_form, char* last_db)
void Start_log_event_v3::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
{
if (short_form)
return;
if (!short_form)
{
print_header(file);
fprintf(file, "\tStart: binlog v %d, server v %s created ", binlog_version,
server_version);
......@@ -1076,19 +1494,28 @@ void Start_log_event::print(FILE* file, bool short_form, char* last_db)
if (created)
fprintf(file," at startup");
fputc('\n', file);
}
#ifdef WHEN_WE_HAVE_THE_RESET_CONNECTION_SQL_COMMAND
/*
This is for mysqlbinlog: like in replication, we want to delete the stale
tmp files left by an unclean shutdown of mysqld (temporary tables). Probably
this can be done with RESET CONNECTION (syntax to be defined).
*/
fprintf(file,"RESET CONNECTION;\n");
#endif
fflush(file);
}
#endif /* MYSQL_CLIENT */
/*
Start_log_event::Start_log_event()
Start_log_event_v3::Start_log_event_v3()
*/
Start_log_event::Start_log_event(const char* buf,
bool old_format)
:Log_event(buf, old_format)
Start_log_event_v3::Start_log_event_v3(const char* buf,
const Format_description_log_event* description_event)
:Log_event(buf, description_event)
{
buf += (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
buf += description_event->common_header_len;
binlog_version = uint2korr(buf+ST_BINLOG_VER_OFFSET);
memcpy(server_version, buf+ST_SERVER_VER_OFFSET,
ST_SERVER_VER_LEN);
......@@ -1097,12 +1524,12 @@ Start_log_event::Start_log_event(const char* buf,
/*
Start_log_event::write_data()
Start_log_event_v3::write_data()
*/
int Start_log_event::write_data(IO_CACHE* file)
int Start_log_event_v3::write_data(IO_CACHE* file)
{
char buff[START_HEADER_LEN];
char buff[START_V3_HEADER_LEN];
int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version);
memcpy(buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN);
int4store(buff + ST_CREATED_OFFSET,created);
......@@ -1110,7 +1537,7 @@ int Start_log_event::write_data(IO_CACHE* file)
}
/*
Start_log_event::exec_event()
Start_log_event_v3::exec_event()
The master started
......@@ -1129,18 +1556,24 @@ int Start_log_event::write_data(IO_CACHE* file)
*/
#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
int Start_log_event::exec_event(struct st_relay_log_info* rli)
int Start_log_event_v3::exec_event(struct st_relay_log_info* rli)
{
DBUG_ENTER("Start_log_event::exec_event");
switch (rli->mi->old_format) {
case BINLOG_FORMAT_CURRENT:
DBUG_ENTER("Start_log_event_v3::exec_event");
switch (rli->relay_log.description_event_for_exec->binlog_version)
{
case 3:
case 4:
/*
This is 4.x, so a Start_log_event is only at master startup,
so we are sure the master has restarted and cleared his temp tables.
This can either be 4.x (then a Start_log_event_v3 is only at master
startup so we are sure the master has restarted and cleared his temp
tables; the event always has 'created'>0) or 5.0 (then we have to test
'created').
*/
if (created)
{
close_temporary_tables(thd);
cleanup_load_tmpdir();
}
/*
As a transaction NEVER spans on 2 or more binlogs:
if we have an active transaction at this point, the master died while
......@@ -1163,33 +1596,264 @@ binary log.");
Now the older formats; in that case load_tmpdir is cleaned up by the I/O
thread.
*/
case BINLOG_FORMAT_323_LESS_57:
case 1:
if (strncmp(rli->relay_log.description_event_for_exec->server_version,
"3.23.57",7) >= 0 && created)
{
/*
Cannot distinguish a Start_log_event generated at master startup and
one generated by master FLUSH LOGS, so cannot be sure temp tables
have to be dropped. So do nothing.
Can distinguish, based on the value of 'created': this event was
generated at master startup.
*/
break;
case BINLOG_FORMAT_323_GEQ_57:
close_temporary_tables(thd);
}
/*
Can distinguish, based on the value of 'created',
which was generated at master startup.
Otherwise, can't distinguish a Start_log_event generated at master startup
and one generated by master FLUSH LOGS, so cannot be sure temp tables have
to be dropped. So do nothing.
*/
if (created)
close_temporary_tables(thd);
break;
default:
/* this case is impossible */
return 1;
DBUG_RETURN(1);
}
DBUG_RETURN(Log_event::exec_event(rli));
}
#endif /* defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT) */
/**************************************************************************
/***************************************************************************
Format_description_log_event methods
****************************************************************************/
/*
Format_description_log_event 1st ctor.
SYNOPSIS
Format_description_log_event::Format_description_log_event
binlog_version the binlog version for which we want to build
an event. Can be 1 (=MySQL 3.23), 3 (=4.0.x
x>=2 and 4.1) or 4 (MySQL 5.0). Note that the
old 4.0 (binlog version 2) is not supported;
it should not be used for replication with
5.0.
DESCRIPTION
Ctor. Can be used to create the event to write to the binary log (when the
server starts or when FLUSH LOGS), or to create artificial events to parse
binlogs from MySQL 3.23 or 4.x.
When in a client, only the 2nd use is possible.
TODO
Update this code with the new event for LOAD DATA, once they are pushed (in
4.1 or 5.0). If it's in 5.0, only the "case 4" block should be updated.
*/
Format_description_log_event::Format_description_log_event(uint8 binlog_ver,
const char* server_ver)
: Start_log_event_v3()
{
created= when;
binlog_version= binlog_ver;
switch(binlog_ver)
{
case 4: /* MySQL 5.0 */
memcpy(server_version, ::server_version, ST_SERVER_VER_LEN);
common_header_len= LOG_EVENT_HEADER_LEN;
number_of_event_types= LOG_EVENT_TYPES;
/* we'll catch my_malloc() error in is_valid() */
post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8),
MYF(0));
/*
This long list of assignments is not beautiful, but I see no way to
make it nicer, as the right members are #defines, not array members, so
it's impossible to write a loop.
*/
if (post_header_len)
{
post_header_len[START_EVENT_V3-1]= START_V3_HEADER_LEN;
post_header_len[QUERY_EVENT-1]= QUERY_HEADER_LEN;
post_header_len[STOP_EVENT-1]= 0;
post_header_len[ROTATE_EVENT-1]= ROTATE_HEADER_LEN;
post_header_len[INTVAR_EVENT-1]= 0;
post_header_len[LOAD_EVENT-1]= LOAD_HEADER_LEN;
post_header_len[SLAVE_EVENT-1]= 0;
post_header_len[CREATE_FILE_EVENT-1]= CREATE_FILE_HEADER_LEN;
post_header_len[APPEND_BLOCK_EVENT-1]= APPEND_BLOCK_HEADER_LEN;
post_header_len[EXEC_LOAD_EVENT-1]= EXEC_LOAD_HEADER_LEN;
post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN;
post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1];
post_header_len[RAND_EVENT-1]= 0;
post_header_len[USER_VAR_EVENT-1]= 0;
post_header_len[FORMAT_DESCRIPTION_EVENT-1]= FORMAT_DESCRIPTION_HEADER_LEN;
}
break;
case 1: /* 3.23 */
case 3: /* 4.0.x x>=2 */
/*
We build an artificial (i.e. not sent by the master) event, which
describes what those old master versions send.
*/
if (binlog_ver==1)
strmov(server_version, server_ver ? server_ver : "3.23");
else
strmov(server_version, server_ver ? server_ver : "4.0");
common_header_len= binlog_ver==1 ? OLD_HEADER_LEN :
LOG_EVENT_MINIMAL_HEADER_LEN;
/*
The first new event in binlog version 4 is Format_desc. So any event type
after that does not exist in older versions. We use the events known by
version 3, even if version 1 had only a subset of them (this is not a
problem: it uses a few bytes for nothing but unifies code; it does not
make the slave detect less corruptions).
*/
number_of_event_types= FORMAT_DESCRIPTION_EVENT - 1;
post_header_len=(uint8*) my_malloc(number_of_event_types*sizeof(uint8),
MYF(0));
if (post_header_len)
{
post_header_len[START_EVENT_V3-1]= START_V3_HEADER_LEN;
post_header_len[QUERY_EVENT-1]= QUERY_HEADER_MINIMAL_LEN;
post_header_len[STOP_EVENT-1]= 0;
post_header_len[ROTATE_EVENT-1]= (binlog_ver==1) ? 0 : ROTATE_HEADER_LEN;
post_header_len[INTVAR_EVENT-1]= 0;
post_header_len[LOAD_EVENT-1]= LOAD_HEADER_LEN;
post_header_len[SLAVE_EVENT-1]= 0;
post_header_len[CREATE_FILE_EVENT-1]= CREATE_FILE_HEADER_LEN;
post_header_len[APPEND_BLOCK_EVENT-1]= APPEND_BLOCK_HEADER_LEN;
post_header_len[EXEC_LOAD_EVENT-1]= EXEC_LOAD_HEADER_LEN;
post_header_len[DELETE_FILE_EVENT-1]= DELETE_FILE_HEADER_LEN;
post_header_len[NEW_LOAD_EVENT-1]= post_header_len[LOAD_EVENT-1];
post_header_len[RAND_EVENT-1]= 0;
post_header_len[USER_VAR_EVENT-1]= 0;
}
break;
default: /* Includes binlog version 2 i.e. 4.0.x x<=1 */
post_header_len= 0; /* will make is_valid() fail */
break;
}
}
Format_description_log_event::Format_description_log_event(const char* buf,
uint event_len,
const
Format_description_log_event*
description_event)
/*
The problem with this constructor is that the fixed header may have a length
different from this version, but we don't know this length as we have not
read the Format_description_log_event which says it, yet. This length is in
the post-header of the event, but we don't know where the post-header
starts.
So this type of event HAS to:
- either have the header's length at the beginning (in the header, at a
fixed position which will never be changed), not in the post-header. That
would make the header be "shifted" compared to other events.
- or have a header of size LOG_EVENT_MINIMAL_HEADER_LEN (19), in all future
versions, so that we know for sure.
I (Guilhem) chose the 2nd solution. Rotate has the same constraint (because
it is sent before Format_description_log_event).
*/
:Start_log_event_v3(buf, description_event)
{
DBUG_ENTER("Format_description_log_event::Format_description_log_event(char*,...)");
buf+= LOG_EVENT_MINIMAL_HEADER_LEN;
if ((common_header_len=buf[ST_COMMON_HEADER_LEN_OFFSET]) < OLD_HEADER_LEN)
DBUG_VOID_RETURN; /* sanity check */
number_of_event_types=
event_len-(LOG_EVENT_MINIMAL_HEADER_LEN+ST_COMMON_HEADER_LEN_OFFSET+1);
DBUG_PRINT("info", ("common_header_len=%d number_of_event_types=%d",
common_header_len, number_of_event_types));
/* If alloc fails, we'll detect it in is_valid() */
post_header_len= (uint8*) my_memdup(buf+ST_COMMON_HEADER_LEN_OFFSET+1,
number_of_event_types*
sizeof(*post_header_len),
MYF(0));
DBUG_VOID_RETURN;
}
int Format_description_log_event::write_data(IO_CACHE* file)
{
/*
We don't call Start_log_event_v3::write_data() because this would make 2
my_b_safe_write().
*/
char buff[FORMAT_DESCRIPTION_HEADER_LEN];
int2store(buff + ST_BINLOG_VER_OFFSET,binlog_version);
memcpy(buff + ST_SERVER_VER_OFFSET,server_version,ST_SERVER_VER_LEN);
int4store(buff + ST_CREATED_OFFSET,created);
buff[ST_COMMON_HEADER_LEN_OFFSET]= LOG_EVENT_HEADER_LEN;
memcpy(buff+ST_COMMON_HEADER_LEN_OFFSET+1, (byte*) post_header_len,
LOG_EVENT_TYPES);
return (my_b_safe_write(file, (byte*) buff, sizeof(buff)) ? -1 : 0);
}
/*
SYNOPSIS
Format_description_log_event::exec_event()
IMPLEMENTATION
Save the information which describes the binlog's format, to be able to
read all coming events.
Call Start_log_event_v3::exec_event().
*/
#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
int Format_description_log_event::exec_event(struct st_relay_log_info* rli)
{
DBUG_ENTER("Format_description_log_event::exec_event");
/* save the information describing this binlog */
delete rli->relay_log.description_event_for_exec;
rli->relay_log.description_event_for_exec= this;
/*
If this event comes from ourselves, there is no cleaning task to perform,
we don't call Start_log_event_v3::exec_event() (this was just to update the
log's description event).
*/
if (server_id == (uint32) ::server_id)
{
/*
Do not modify rli->group_master_log_pos, as this event did not exist on
the master. That is, just update the *relay log* coordinates; this is done
by passing log_pos=0 to inc_group_relay_log_pos, like we do in
Stop_log_event::exec_event().
*/
rli->inc_group_relay_log_pos(0);
flush_relay_log_info(rli);
DBUG_RETURN(0);
}
/*
If the event was not requested by the slave i.e. the master sent it while
the slave asked for a position >4, the event will make
rli->group_master_log_pos advance. Say that the slave asked for position
1000, and the Format_desc event's end is 95. Then in the beginning of
replication rli->group_master_log_pos will be 0, then 95, then jump to first
really asked event (which is >95). So this is ok.
*/
DBUG_RETURN(Start_log_event_v3::exec_event(rli));
}
#endif
/**************************************************************************
Load_log_event methods
**************************************************************************/
General note about Load_log_event: the binlogging of LOAD DATA INFILE is
going to be changed in 5.0 (or maybe in 4.1; not decided yet).
However, the 5.0 slave could still have to read such events (from a 4.x
master), convert them (which just means maybe expand the header, when 5.0
servers have a UID in events) (remember that whatever is after the header
will be like in 4.x, as this event's format is not modified in 5.0 as we
will use new types of events to log the new LOAD DATA INFILE features).
To be able to read/convert, we just need to not assume that the common
header is of length LOG_EVENT_HEADER_LEN (we must use the description
event).
Note that I (Guilhem) manually tested replication of a big LOAD DATA INFILE
between 3.23 and 5.0, and between 4.0 and 5.0, and it works fine (and the
positions displayed in SHOW SLAVE STATUS then are fine too).
**************************************************************************/
/*
Load_log_event::pack_info()
......@@ -1441,15 +2105,24 @@ Load_log_event::Load_log_event(THD *thd_arg, sql_exchange *ex,
constructed event.
*/
Load_log_event::Load_log_event(const char *buf, int event_len,
bool old_format)
:Log_event(buf, old_format),num_fields(0),fields(0),
Load_log_event::Load_log_event(const char *buf, uint event_len,
const Format_description_log_event *description_event)
:Log_event(buf, description_event),num_fields(0),fields(0),
field_lens(0),field_block_len(0),
table_name(0),db(0),fname(0)
{
if (!event_len) // derived class, will call copy_log_event() itself
return;
copy_log_event(buf, event_len, old_format);
/*
I (Guilhem) manually tested replication of LOAD DATA INFILE for 3.23->5.0,
4.0->5.0 and 5.0->5.0 and it works.
*/
copy_log_event(buf, event_len,
((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
LOAD_HEADER_LEN +
description_event->common_header_len :
LOAD_HEADER_LEN + LOG_EVENT_HEADER_LEN),
description_event);
}
......@@ -1458,12 +2131,13 @@ Load_log_event::Load_log_event(const char *buf, int event_len,
*/
int Load_log_event::copy_log_event(const char *buf, ulong event_len,
bool old_format)
int body_offset,
const Format_description_log_event *description_event)
{
uint data_len;
char* buf_end = (char*)buf + event_len;
uint header_len= old_format ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
const char* data_head = buf + header_len;
/* this is the beginning of the post-header */
const char* data_head = buf + description_event->common_header_len;
slave_proxy_id= thread_id= uint4korr(data_head + L_THREAD_ID_OFFSET);
exec_time = uint4korr(data_head + L_EXEC_TIME_OFFSET);
skip_lines = uint4korr(data_head + L_SKIP_LINES_OFFSET);
......@@ -1471,10 +2145,6 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
db_len = (uint)data_head[L_DB_LEN_OFFSET];
num_fields = uint4korr(data_head + L_NUM_FIELDS_OFFSET);
int body_offset = ((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
LOAD_HEADER_LEN + header_len :
get_data_body_offset());
if ((int) event_len < body_offset)
return 1;
/*
......@@ -1498,6 +2168,12 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
fname = db + db_len + 1;
fname_len = strlen(fname);
// null termination is accomplished by the caller doing buf[event_len]=0
/*
In 5.0 this event will have the same format, as we are planning to log LOAD
DATA INFILE in a completely different way (as a plain-text query) since 4.1
or 5.0 (Dmitri's WL#874)
*/
return 0;
}
......@@ -1507,13 +2183,13 @@ int Load_log_event::copy_log_event(const char *buf, ulong event_len,
*/
#ifdef MYSQL_CLIENT
void Load_log_event::print(FILE* file, bool short_form, char* last_db)
void Load_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
{
print(file, short_form, last_db, 0);
print(file, short_form, last_event_info, 0);
}
void Load_log_event::print(FILE* file, bool short_form, char* last_db,
void Load_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info,
bool commented)
{
if (!short_form)
......@@ -1524,10 +2200,10 @@ void Load_log_event::print(FILE* file, bool short_form, char* last_db,
}
bool same_db = 0;
if (db && last_db)
if (db && last_event_info->db)
{
if (!(same_db = !memcmp(last_db, db, db_len + 1)))
memcpy(last_db, db, db_len + 1);
if (!(same_db = !memcmp(last_event_info->db, db, db_len + 1)))
memcpy(last_event_info->db, db, db_len + 1);
}
if (db && db[0] && !same_db)
......@@ -1839,12 +2515,12 @@ void Rotate_log_event::pack_info(Protocol *protocol)
*/
#ifdef MYSQL_CLIENT
void Rotate_log_event::print(FILE* file, bool short_form, char* last_db)
void Rotate_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
{
char buf[22];
if (short_form)
return;
print_header(file);
fprintf(file, "\tRotate to ");
if (new_log_ident)
......@@ -1861,28 +2537,21 @@ void Rotate_log_event::print(FILE* file, bool short_form, char* last_db)
Rotate_log_event::Rotate_log_event()
*/
Rotate_log_event::Rotate_log_event(const char* buf, int event_len,
bool old_format)
:Log_event(buf, old_format),new_log_ident(NULL),alloced(0)
Rotate_log_event::Rotate_log_event(const char* buf, uint event_len,
const Format_description_log_event* description_event)
:Log_event(buf, description_event) ,new_log_ident(NULL),alloced(0)
{
// The caller will ensure that event_len is what we have at EVENT_LEN_OFFSET
int header_size = (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
uint8 header_size= description_event->common_header_len;
uint8 post_header_len= description_event->post_header_len[ROTATE_EVENT-1];
uint ident_offset;
if (event_len < header_size)
return;
buf += header_size;
if (old_format)
{
ident_len = (uint)(event_len - OLD_HEADER_LEN);
pos = 4;
ident_offset = 0;
}
else
{
ident_len = (uint)(event_len - ROTATE_EVENT_OVERHEAD);
pos = uint8korr(buf + R_POS_OFFSET);
ident_offset = ROTATE_HEADER_LEN;
}
pos = post_header_len ? uint8korr(buf + R_POS_OFFSET) : 4;
ident_len = (uint)(event_len -
(header_size+post_header_len));
ident_offset = post_header_len;
set_if_smaller(ident_len,FN_REFLEN-1);
if (!(new_log_ident= my_strdup_with_length((byte*) buf +
ident_offset,
......@@ -1909,14 +2578,15 @@ int Rotate_log_event::write_data(IO_CACHE* file)
/*
Rotate_log_event::exec_event()
Got a rotate log even from the master
Got a rotate log event from the master
IMPLEMENTATION
This is mainly used so that we can later figure out the logname and
position for the master.
We can't rotate the slave as this will cause infinitive rotations
We can't rotate the slave's BINlog as this will cause infinitive rotations
in a A -> B -> A setup.
The NOTES below is a wrong comment which will disappear when 4.1 is merged.
RETURN VALUES
0 ok
......@@ -1928,7 +2598,7 @@ int Rotate_log_event::exec_event(struct st_relay_log_info* rli)
DBUG_ENTER("Rotate_log_event::exec_event");
pthread_mutex_lock(&rli->data_lock);
rli->event_relay_log_pos += get_event_len();
rli->event_relay_log_pos= my_b_tell(rli->cur_log);
/*
If we are in a transaction: the only normal case is when the I/O thread was
copying a big transaction, then it was stopped and restarted: we have this
......@@ -1945,10 +2615,21 @@ int Rotate_log_event::exec_event(struct st_relay_log_info* rli)
{
memcpy(rli->group_master_log_name, new_log_ident, ident_len+1);
rli->notify_group_master_log_name_update();
rli->group_master_log_pos = pos;
rli->group_relay_log_pos = rli->event_relay_log_pos;
DBUG_PRINT("info", ("group_master_log_pos: %lu",
rli->group_master_log_pos= pos;
rli->group_relay_log_pos= rli->event_relay_log_pos;
DBUG_PRINT("info", ("group_master_log_name: '%s' group_master_log_pos:\
%lu",
rli->group_master_log_name,
(ulong) rli->group_master_log_pos));
/*
Reset thd->options and sql_mode, because this could be the signal of a
master's downgrade from 5.0 to 4.0.
However, no need to reset description_event_for_exec: indeed, if the next
master is 5.0 (even 5.0.1) we will soon get a Format_desc; if the next
master is 4.0 then the events are in the slave's format (conversion).
*/
set_slave_thread_options(thd);
thd->variables.sql_mode= global_system_variables.sql_mode;
}
pthread_mutex_unlock(&rli->data_lock);
pthread_cond_broadcast(&rli->data_cond);
......@@ -1982,12 +2663,13 @@ void Intvar_log_event::pack_info(Protocol *protocol)
Intvar_log_event::Intvar_log_event()
*/
Intvar_log_event::Intvar_log_event(const char* buf, bool old_format)
:Log_event(buf, old_format)
Intvar_log_event::Intvar_log_event(const char* buf,
const Format_description_log_event* description_event)
:Log_event(buf, description_event)
{
buf += (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
type = buf[I_TYPE_OFFSET];
val = uint8korr(buf+I_VAL_OFFSET);
buf+= description_event->common_header_len;
type= buf[I_TYPE_OFFSET];
val= uint8korr(buf+I_VAL_OFFSET);
}
......@@ -2023,7 +2705,7 @@ int Intvar_log_event::write_data(IO_CACHE* file)
*/
#ifdef MYSQL_CLIENT
void Intvar_log_event::print(FILE* file, bool short_form, char* last_db)
void Intvar_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
{
char llbuff[22];
const char *msg;
......@@ -2066,7 +2748,7 @@ int Intvar_log_event::exec_event(struct st_relay_log_info* rli)
thd->next_insert_id = val;
break;
}
rli->inc_event_relay_log_pos(get_event_len());
rli->inc_event_relay_log_pos();
return 0;
}
#endif
......@@ -2089,12 +2771,13 @@ void Rand_log_event::pack_info(Protocol *protocol)
#endif
Rand_log_event::Rand_log_event(const char* buf, bool old_format)
:Log_event(buf, old_format)
Rand_log_event::Rand_log_event(const char* buf,
const Format_description_log_event* description_event)
:Log_event(buf, description_event)
{
buf += (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
seed1 = uint8korr(buf+RAND_SEED1_OFFSET);
seed2 = uint8korr(buf+RAND_SEED2_OFFSET);
buf+= description_event->common_header_len;
seed1= uint8korr(buf+RAND_SEED1_OFFSET);
seed2= uint8korr(buf+RAND_SEED2_OFFSET);
}
......@@ -2108,7 +2791,7 @@ int Rand_log_event::write_data(IO_CACHE* file)
#ifdef MYSQL_CLIENT
void Rand_log_event::print(FILE* file, bool short_form, char* last_db)
void Rand_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
{
char llbuff[22],llbuff2[22];
if (!short_form)
......@@ -2128,7 +2811,7 @@ int Rand_log_event::exec_event(struct st_relay_log_info* rli)
{
thd->rand.seed1= (ulong) seed1;
thd->rand.seed2= (ulong) seed2;
rli->inc_event_relay_log_pos(get_event_len());
rli->inc_event_relay_log_pos();
return 0;
}
#endif /* !MYSQL_CLIENT */
......@@ -2192,10 +2875,11 @@ void User_var_log_event::pack_info(Protocol* protocol)
#endif /* !MYSQL_CLIENT */
User_var_log_event::User_var_log_event(const char* buf, bool old_format)
:Log_event(buf, old_format)
User_var_log_event::User_var_log_event(const char* buf,
const Format_description_log_event* description_event)
:Log_event(buf, description_event)
{
buf+= (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN;
buf+= description_event->common_header_len;
name_len= uint4korr(buf);
name= (char *) buf + UV_NAME_LEN_SIZE;
buf+= UV_NAME_LEN_SIZE + name_len;
......@@ -2269,7 +2953,7 @@ int User_var_log_event::write_data(IO_CACHE* file)
*/
#ifdef MYSQL_CLIENT
void User_var_log_event::print(FILE* file, bool short_form, char* last_db)
void User_var_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
{
if (!short_form)
{
......@@ -2359,7 +3043,7 @@ int User_var_log_event::exec_event(struct st_relay_log_info* rli)
e.update_hash(val, val_len, type, charset, DERIVATION_NONE);
free_root(&thd->mem_root,0);
rli->inc_event_relay_log_pos(get_event_len());
rli->inc_event_relay_log_pos();
return 0;
}
#endif /* !MYSQL_CLIENT */
......@@ -2371,7 +3055,7 @@ int User_var_log_event::exec_event(struct st_relay_log_info* rli)
#ifdef HAVE_REPLICATION
#ifdef MYSQL_CLIENT
void Unknown_log_event::print(FILE* file, bool short_form, char* last_db)
void Unknown_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
{
if (short_form)
return;
......@@ -2401,7 +3085,7 @@ void Slave_log_event::pack_info(Protocol *protocol)
#ifndef MYSQL_CLIENT
Slave_log_event::Slave_log_event(THD* thd_arg,
struct st_relay_log_info* rli)
:Log_event(thd_arg, 0, 0), mem_pool(0), master_host(0)
:Log_event(thd_arg, 0, 0) , mem_pool(0), master_host(0)
{
DBUG_ENTER("Slave_log_event");
if (!rli->inited) // QQ When can this happen ?
......@@ -2442,7 +3126,7 @@ Slave_log_event::~Slave_log_event()
#ifdef MYSQL_CLIENT
void Slave_log_event::print(FILE* file, bool short_form, char* last_db)
void Slave_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
{
char llbuff[22];
if (short_form)
......@@ -2488,12 +3172,13 @@ void Slave_log_event::init_from_mem_pool(int data_size)
}
Slave_log_event::Slave_log_event(const char* buf, int event_len)
:Log_event(buf,0),mem_pool(0),master_host(0)
/* This code is not used, so has not been updated to be format-tolerant */
Slave_log_event::Slave_log_event(const char* buf, uint event_len)
:Log_event(buf,0) /*unused event*/ ,mem_pool(0),master_host(0)
{
event_len -= LOG_EVENT_HEADER_LEN;
if (event_len < 0)
if (event_len < LOG_EVENT_HEADER_LEN)
return;
event_len -= LOG_EVENT_HEADER_LEN;
if (!(mem_pool = (char*) my_malloc(event_len + 1, MYF(MY_WME))))
return;
memcpy(mem_pool, buf + LOG_EVENT_HEADER_LEN, event_len);
......@@ -2521,7 +3206,7 @@ int Slave_log_event::exec_event(struct st_relay_log_info* rli)
*/
#ifdef MYSQL_CLIENT
void Stop_log_event::print(FILE* file, bool short_form, char* last_db)
void Stop_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info)
{
if (short_form)
return;
......@@ -2543,7 +3228,7 @@ void Stop_log_event::print(FILE* file, bool short_form, char* last_db)
We used to clean up slave_load_tmpdir, but this is useless as it has been
cleared at the end of LOAD DATA INFILE.
So we have nothing to do here.
The place were we must do this cleaning is in Start_log_event::exec_event(),
The place were we must do this cleaning is in Start_log_event_v3::exec_event(),
not here. Because if we come here, the master was sane.
*/
......@@ -2557,7 +3242,7 @@ int Stop_log_event::exec_event(struct st_relay_log_info* rli)
could give false triggers in MASTER_POS_WAIT() that we have reached
the target position when in fact we have not.
*/
rli->inc_group_relay_log_pos(get_event_len(), 0);
rli->inc_group_relay_log_pos(0);
flush_relay_log_info(rli);
return 0;
}
......@@ -2636,20 +3321,40 @@ int Create_file_log_event::write_base(IO_CACHE* file)
Create_file_log_event ctor
*/
Create_file_log_event::Create_file_log_event(const char* buf, int len,
bool old_format)
:Load_log_event(buf,0,old_format),fake_base(0),block(0),inited_from_old(0)
{
int block_offset;
if (copy_log_event(buf,len,old_format))
Create_file_log_event::Create_file_log_event(const char* buf, uint len,
const Format_description_log_event* description_event)
:Load_log_event(buf,0,description_event),fake_base(0),block(0),inited_from_old(0)
{
uint block_offset;
uint header_len= description_event->common_header_len;
uint8 load_header_len= description_event->post_header_len[LOAD_EVENT-1];
uint8 create_file_header_len= description_event->post_header_len[CREATE_FILE_EVENT-1];
if (copy_log_event(buf,len,
((buf[EVENT_TYPE_OFFSET] == LOAD_EVENT) ?
load_header_len + header_len :
(fake_base ? (header_len+load_header_len) :
(header_len+load_header_len) +
create_file_header_len)),
description_event))
return;
if (!old_format)
if (description_event->binlog_version!=1)
{
file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN +
+ LOAD_HEADER_LEN + CF_FILE_ID_OFFSET);
file_id= uint4korr(buf +
header_len +
load_header_len + CF_FILE_ID_OFFSET);
// + 1 for \0 terminating fname
block_offset = (LOG_EVENT_HEADER_LEN + Load_log_event::get_data_size() +
CREATE_FILE_HEADER_LEN + 1);
/*
Note that it's ok to use get_data_size() below, because it is computed
with values we have already read from this event (because we called
copy_log_event()); we are not using slave's format info to decode master's
format, we are really using master's format info.
Anyway, both formats should be identical (except the common_header_len) as
these Load events are not changed between 4.0 and 5.0 (as logging of LOAD
DATA INFILE does not use Load_log_event in 5.0).
*/
block_offset= description_event->common_header_len +
Load_log_event::get_data_size() +
create_file_header_len + 1;
if (len < block_offset)
return;
block = (char*)buf + block_offset;
......@@ -2669,18 +3374,18 @@ Create_file_log_event::Create_file_log_event(const char* buf, int len,
#ifdef MYSQL_CLIENT
void Create_file_log_event::print(FILE* file, bool short_form,
char* last_db, bool enable_local)
LAST_EVENT_INFO* last_event_info, bool enable_local)
{
if (short_form)
{
if (enable_local && check_fname_outside_temp_buf())
Load_log_event::print(file, 1, last_db);
Load_log_event::print(file, 1, last_event_info);
return;
}
if (enable_local)
{
Load_log_event::print(file, 1, last_db, !check_fname_outside_temp_buf());
Load_log_event::print(file, 1, last_event_info, !check_fname_outside_temp_buf());
/*
That one is for "file_id: etc" below: in mysqlbinlog we want the #, in
SHOW BINLOG EVENTS we don't.
......@@ -2693,9 +3398,9 @@ void Create_file_log_event::print(FILE* file, bool short_form,
void Create_file_log_event::print(FILE* file, bool short_form,
char* last_db)
LAST_EVENT_INFO* last_event_info)
{
print(file,short_form,last_db,0);
print(file,short_form,last_event_info,0);
}
#endif /* MYSQL_CLIENT */
......@@ -2809,14 +3514,19 @@ Append_block_log_event::Append_block_log_event(THD* thd_arg, const char* db_arg,
Append_block_log_event ctor
*/
Append_block_log_event::Append_block_log_event(const char* buf, int len)
:Log_event(buf, 0),block(0)
Append_block_log_event::Append_block_log_event(const char* buf, uint len,
const Format_description_log_event* description_event)
:Log_event(buf, description_event),block(0)
{
if ((uint)len < APPEND_BLOCK_EVENT_OVERHEAD)
uint8 common_header_len= description_event->common_header_len;
uint8 append_block_header_len=
description_event->post_header_len[APPEND_BLOCK_EVENT-1];
uint total_header_len= common_header_len+append_block_header_len;
if (len < total_header_len)
return;
file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + AB_FILE_ID_OFFSET);
block = (char*)buf + APPEND_BLOCK_EVENT_OVERHEAD;
block_len = len - APPEND_BLOCK_EVENT_OVERHEAD;
file_id= uint4korr(buf + common_header_len + AB_FILE_ID_OFFSET);
block= (char*)buf + total_header_len;
block_len= len - total_header_len;
}
......@@ -2839,7 +3549,7 @@ int Append_block_log_event::write_data(IO_CACHE* file)
#ifdef MYSQL_CLIENT
void Append_block_log_event::print(FILE* file, bool short_form,
char* last_db)
LAST_EVENT_INFO* last_event_info)
{
if (short_form)
return;
......@@ -2921,12 +3631,15 @@ Delete_file_log_event::Delete_file_log_event(THD *thd_arg, const char* db_arg,
Delete_file_log_event ctor
*/
Delete_file_log_event::Delete_file_log_event(const char* buf, int len)
:Log_event(buf, 0),file_id(0)
Delete_file_log_event::Delete_file_log_event(const char* buf, uint len,
const Format_description_log_event* description_event)
:Log_event(buf, description_event),file_id(0)
{
if ((uint)len < DELETE_FILE_EVENT_OVERHEAD)
uint8 common_header_len= description_event->common_header_len;
uint8 delete_file_header_len= description_event->post_header_len[DELETE_FILE_EVENT-1];
if (len < (uint)(common_header_len + delete_file_header_len))
return;
file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + AB_FILE_ID_OFFSET);
file_id= uint4korr(buf + common_header_len + DF_FILE_ID_OFFSET);
}
......@@ -2948,7 +3661,7 @@ int Delete_file_log_event::write_data(IO_CACHE* file)
#ifdef MYSQL_CLIENT
void Delete_file_log_event::print(FILE* file, bool short_form,
char* last_db)
LAST_EVENT_INFO* last_event_info)
{
if (short_form)
return;
......@@ -3011,12 +3724,15 @@ Execute_load_log_event::Execute_load_log_event(THD *thd_arg, const char* db_arg,
Execute_load_log_event ctor
*/
Execute_load_log_event::Execute_load_log_event(const char* buf, int len)
:Log_event(buf, 0), file_id(0)
Execute_load_log_event::Execute_load_log_event(const char* buf, uint len,
const Format_description_log_event* description_event)
:Log_event(buf, description_event), file_id(0)
{
if ((uint)len < EXEC_LOAD_EVENT_OVERHEAD)
uint8 common_header_len= description_event->common_header_len;
uint8 exec_load_header_len= description_event->post_header_len[EXEC_LOAD_EVENT-1];
if (len < (uint)(common_header_len+exec_load_header_len))
return;
file_id = uint4korr(buf + LOG_EVENT_HEADER_LEN + EL_FILE_ID_OFFSET);
file_id= uint4korr(buf + common_header_len + EL_FILE_ID_OFFSET);
}
......@@ -3038,7 +3754,7 @@ int Execute_load_log_event::write_data(IO_CACHE* file)
#ifdef MYSQL_CLIENT
void Execute_load_log_event::print(FILE* file, bool short_form,
char* last_db)
LAST_EVENT_INFO* last_event_info)
{
if (short_form)
return;
......@@ -3086,7 +3802,7 @@ int Execute_load_log_event::exec_event(struct st_relay_log_info* rli)
}
if (!(lev = (Load_log_event*)Log_event::read_log_event(&file,
(pthread_mutex_t*)0,
(bool)0)) ||
rli->relay_log.description_event_for_exec)) ||
lev->get_type_code() != NEW_LOAD_EVENT)
{
slave_print_error(rli,0, "Error in Exec_load event: file '%s' appears corrupted", fname);
......
......@@ -35,12 +35,42 @@
#define LOG_EVENT_OFFSET 4
#define BINLOG_VERSION 3
/*
3 is MySQL 4.x; 4 is MySQL 5.0.0.
Compared to version 3, version 4 has:
- a different Start_log_event, which includes info about the binary log
(sizes of headers); this info is included for better compatibility if the
master's MySQL version is different from the slave's.
- all events have a unique ID (the triplet (server_id, timestamp at server
start, other) to be sure an event is not executed more than once in a
multimaster setup, example:
M1
/ \
v v
M2 M3
\ /
v v
S
if a query is run on M1, it will arrive twice on S, so we need that S
remembers the last unique ID it has processed, to compare and know if the
event should be skipped or not. Example of ID: we already have the server id
(4 bytes), plus:
timestamp_when_the_master_started (4 bytes), a counter (a sequence number
which increments every time we write an event to the binlog) (3 bytes).
Q: how do we handle when the counter is overflowed and restarts from 0 ?
- Query and Load (Create or Execute) events may have a more precise timestamp
(with microseconds), number of matched/affected/warnings rows
and fields of session variables: SQL_MODE,
FOREIGN_KEY_CHECKS, UNIQUE_CHECKS, SQL_AUTO_IS_NULL, the collations and
charsets, the PASSWORD() version (old/new/...).
*/
#define BINLOG_VERSION 4
/*
We could have used SERVER_VERSION_LENGTH, but this introduces an
obscure dependency - if somebody decided to change SERVER_VERSION_LENGTH
this would have broken the replication protocol
this would break the replication protocol
*/
#define ST_SERVER_VER_LEN 50
......@@ -49,6 +79,12 @@
TERMINATED etc).
*/
/*
These are flags and structs to handle all the LOAD DATA INFILE options (LINES
TERMINATED etc).
DUMPFILE_FLAG is probably useless (DUMPFILE is a clause of SELECT, not of LOAD
DATA).
*/
#define DUMPFILE_FLAG 0x1
#define OPT_ENCLOSED_FLAG 0x2
#define REPLACE_FLAG 0x4
......@@ -136,16 +172,28 @@ struct sql_ex_info
#define LOG_EVENT_HEADER_LEN 19 /* the fixed header length */
#define OLD_HEADER_LEN 13 /* the fixed header length in 3.23 */
/*
Fixed header length, where 4.x and 5.0 agree. That is, 5.0 may have a longer
header (it will for sure when we have the unique event's ID), but at least
the first 19 bytes are the same in 4.x and 5.0. So when we have the unique
event's ID, LOG_EVENT_HEADER_LEN will be something like 26, but
LOG_EVENT_MINIMAL_HEADER_LEN will remain 19.
*/
#define LOG_EVENT_MINIMAL_HEADER_LEN 19
/* event-specific post-header sizes */
#define QUERY_HEADER_LEN (4 + 4 + 1 + 2)
// where 3.23, 4.x and 5.0 agree
#define QUERY_HEADER_MINIMAL_LEN (4 + 4 + 1 + 2)
// where 5.0 differs: 2 for len of N-bytes vars.
#define QUERY_HEADER_LEN (QUERY_HEADER_MINIMAL_LEN + 2)
#define LOAD_HEADER_LEN (4 + 4 + 4 + 1 +1 + 4)
#define START_HEADER_LEN (2 + ST_SERVER_VER_LEN + 4)
#define ROTATE_HEADER_LEN 8
#define START_V3_HEADER_LEN (2 + ST_SERVER_VER_LEN + 4)
#define ROTATE_HEADER_LEN 8 // this is FROZEN (the Rotate post-header is frozen)
#define CREATE_FILE_HEADER_LEN 4
#define APPEND_BLOCK_HEADER_LEN 4
#define EXEC_LOAD_HEADER_LEN 4
#define DELETE_FILE_HEADER_LEN 4
#define FORMAT_DESCRIPTION_HEADER_LEN (START_V3_HEADER_LEN+1+LOG_EVENT_TYPES)
/*
Event header offsets;
......@@ -158,11 +206,12 @@ struct sql_ex_info
#define LOG_POS_OFFSET 13
#define FLAGS_OFFSET 17
/* start event post-header */
/* start event post-header (for v3 and v4) */
#define ST_BINLOG_VER_OFFSET 0
#define ST_SERVER_VER_OFFSET 2
#define ST_CREATED_OFFSET (ST_SERVER_VER_OFFSET + ST_SERVER_VER_LEN)
#define ST_COMMON_HEADER_LEN_OFFSET (ST_CREATED_OFFSET + 4)
/* slave event post-header (this event is never written) */
......@@ -176,7 +225,13 @@ struct sql_ex_info
#define Q_EXEC_TIME_OFFSET 4
#define Q_DB_LEN_OFFSET 8
#define Q_ERR_CODE_OFFSET 9
#define Q_STATUS_VARS_LEN_OFFSET 11
#define Q_DATA_OFFSET QUERY_HEADER_LEN
/* these are codes, not offsets; not more than 256 values (1 byte). */
#define Q_FLAGS2_CODE 0
#define Q_SQL_MODE_CODE 1
#define Q_CATALOG_CODE 2
/* Intvar event post-header */
......@@ -228,16 +283,6 @@ struct sql_ex_info
/* DF = "Delete File" */
#define DF_FILE_ID_OFFSET 0
#define QUERY_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+QUERY_HEADER_LEN)
#define QUERY_DATA_OFFSET (LOG_EVENT_HEADER_LEN+QUERY_HEADER_LEN)
#define ROTATE_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+ROTATE_HEADER_LEN)
#define LOAD_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+LOAD_HEADER_LEN)
#define CREATE_FILE_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+\
+LOAD_HEADER_LEN+CREATE_FILE_HEADER_LEN)
#define DELETE_FILE_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+DELETE_FILE_HEADER_LEN)
#define EXEC_LOAD_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+EXEC_LOAD_HEADER_LEN)
#define APPEND_BLOCK_EVENT_OVERHEAD (LOG_EVENT_HEADER_LEN+APPEND_BLOCK_HEADER_LEN)
/* 4 bytes which all binlogs should begin with */
#define BINLOG_MAGIC "\xfe\x62\x69\x6e"
......@@ -264,15 +309,54 @@ struct sql_ex_info
*/
#define LOG_EVENT_THREAD_SPECIFIC_F 0x4
/*
OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be written
to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written into the
Format_description_log_event, so that if later we don't want to replicate a
variable we did replicate, or the contrary, it's doable. But it should not be
too hard to decide once for all of what we replicate and what we don't, among
the fixed 32 bits of thd->options.
I (Guilhem) have read through every option's usage, and it looks like
OPTION_AUTO_IS_NULL and OPTION_NO_FOREIGN_KEYS are the only ones which alter
how the query modifies the table. It's good to replicate
OPTION_RELAXED_UNIQUE_CHECKS too because otherwise, the slave may insert data
slower than the master, in InnoDB.
OPTION_BIG_SELECTS is not needed (the slave thread runs with
max_join_size=HA_POS_ERROR) and OPTION_BIG_TABLES is not needed either, as
the manual says (because a too big in-memory temp table is automatically
written to disk).
*/
#define OPTIONS_WRITTEN_TO_BIN_LOG (OPTION_AUTO_IS_NULL | \
OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_RELAXED_UNIQUE_CHECKS)
enum Log_event_type
{
UNKNOWN_EVENT= 0, START_EVENT= 1, QUERY_EVENT= 2, STOP_EVENT= 3,
ROTATE_EVENT= 4, INTVAR_EVENT= 5, LOAD_EVENT=6, SLAVE_EVENT= 7,
CREATE_FILE_EVENT= 8, APPEND_BLOCK_EVENT= 9, EXEC_LOAD_EVENT= 10,
DELETE_FILE_EVENT= 11, NEW_LOAD_EVENT= 12, RAND_EVENT= 13,
USER_VAR_EVENT= 14
/*
Every time you update this enum (when you add a type), you have to
update the code of Format_description_log_event::Format_description_log_event().
Make sure you always insert new types ***BEFORE*** ENUM_END_EVENT.
*/
UNKNOWN_EVENT= 0, START_EVENT_V3, QUERY_EVENT, STOP_EVENT, ROTATE_EVENT,
INTVAR_EVENT, LOAD_EVENT, SLAVE_EVENT, CREATE_FILE_EVENT,
APPEND_BLOCK_EVENT, EXEC_LOAD_EVENT, DELETE_FILE_EVENT,
/*
NEW_LOAD_EVENT is like LOAD_EVENT except that it has a longer sql_ex,
allowing multibyte TERMINATED BY etc; both types share the same class
(Load_log_event)
*/
NEW_LOAD_EVENT,
RAND_EVENT, USER_VAR_EVENT,
FORMAT_DESCRIPTION_EVENT,
ENUM_END_EVENT /* end marker */
};
/*
The number of types we handle in Format_description_log_event (UNKNOWN_EVENT
is not to be handled, it does not exist in binlogs, it does not have a
format).
*/
#define LOG_EVENT_TYPES (ENUM_END_EVENT-1)
enum Int_event_type
{
INVALID_INT_EVENT = 0, LAST_INSERT_ID_EVENT = 1, INSERT_ID_EVENT = 2
......@@ -285,8 +369,33 @@ class MYSQL_LOG;
class THD;
#endif
class Format_description_log_event;
struct st_relay_log_info;
#ifdef MYSQL_CLIENT
/*
A structure for mysqlbinlog to remember the last db, flags2, sql_mode etc; it
is passed to events' print() methods, so that they print only the necessary
USE and SET commands.
*/
typedef struct st_last_event_info
{
// TODO: have the last catalog here ??
char db[FN_REFLEN+1]; // TODO: make this a LEX_STRING when thd->db is
bool flags2_inited;
uint32 flags2;
bool sql_mode_inited;
ulonglong sql_mode;
st_last_event_info()
: flags2_inited(0), flags2(0), sql_mode_inited(0), sql_mode(0)
{
db[0]= 0; /* initially, the db is unknown */
}
} LAST_EVENT_INFO;
#endif
/*****************************************************************************
Log_event class
......@@ -337,21 +446,26 @@ class Log_event
uint16 flags;
bool cache_stmt;
#ifndef MYSQL_CLIENT
THD* thd;
Log_event(THD* thd_arg, uint16 flags_arg, bool cache_stmt);
Log_event();
Log_event(THD* thd_arg, uint16 flags_arg, bool cache_stmt);
/*
read_log_event() functions read an event from a binlog or relay log; used by
SHOW BINLOG EVENTS, the binlog_dump thread on the master (reads master's
binlog), the slave IO thread (reads the event sent by binlog_dump), the
slave SQL thread (reads the event from the relay log).
If mutex is 0, the read will proceed without mutex.
We need the description_event to be able to parse the event (to know the
post-header's size); in fact in read_log_event we detect the event's type,
then call the specific event's constructor and pass description_event as an
argument.
*/
// if mutex is 0, the read will proceed without mutex
static Log_event* read_log_event(IO_CACHE* file,
pthread_mutex_t* log_lock,
bool old_format);
const Format_description_log_event *description_event);
static int read_log_event(IO_CACHE* file, String* packet,
pthread_mutex_t* log_lock);
/* set_log_pos() is used to fill log_pos with tell(log). */
......@@ -379,10 +493,12 @@ class Log_event
return thd ? thd->db : 0;
}
#else
Log_event() : temp_buf(0) {}
// avoid having to link mysqlbinlog against libpthread
static Log_event* read_log_event(IO_CACHE* file, bool old_format);
static Log_event* read_log_event(IO_CACHE* file,
const Format_description_log_event *description_event);
/* print*() functions are used by mysqlbinlog */
virtual void print(FILE* file, bool short_form = 0, char* last_db = 0) = 0;
virtual void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0) = 0;
void print_timestamp(FILE* file, time_t *ts = 0);
void print_header(FILE* file);
#endif
......@@ -405,9 +521,9 @@ class Log_event
virtual int write_data_body(IO_CACHE* file __attribute__((unused)))
{ return 0; }
virtual Log_event_type get_type_code() = 0;
virtual bool is_valid() = 0;
virtual const bool is_valid() = 0;
inline bool get_cache_stmt() { return cache_stmt; }
Log_event(const char* buf, bool old_format);
Log_event(const char* buf, const Format_description_log_event* description_event);
virtual ~Log_event() { free_temp_buf();}
void register_temp_buf(char* buf) { temp_buf = buf; }
void free_temp_buf()
......@@ -419,18 +535,37 @@ class Log_event
}
}
virtual int get_data_size() { return 0;}
virtual int get_data_body_offset() { return 0; }
int get_event_len()
{
return (cached_event_len ? cached_event_len :
(cached_event_len = LOG_EVENT_HEADER_LEN + get_data_size()));
/*
We don't re-use the cached event's length anymore (we did in 4.x) because
this leads to nasty problems: when the 5.0 slave reads an event from a 4.0
master, it caches the event's length, then this event is converted before
it goes into the relay log, so it would be written to the relay log with
its old length, which is garbage.
*/
return (cached_event_len=(LOG_EVENT_HEADER_LEN + get_data_size()));
}
static Log_event* read_log_event(const char* buf, int event_len,
const char **error, bool old_format);
static Log_event* read_log_event(const char* buf, uint event_len,
const char **error,
const Format_description_log_event
*description_event);
/* returns the human readable name of the event's type */
const char* get_type_str();
};
/*
One class for each type of event.
Two constructors for each class:
- one to create the event for logging (when the server acts as a master),
called after an update to the database is done,
which accepts parameters like the query, the database, the options for LOAD
DATA INFILE...
- one to create the event from a packet (when the server acts as a slave),
called before reproducing the update, which accepts parameters (like a
buffer). Used to read from the master, from the relay log, and in
mysqlbinlog. This constructor must be format-tolerant.
*/
/*****************************************************************************
......@@ -445,6 +580,7 @@ class Query_log_event: public Log_event
char* data_buf;
public:
const char* query;
const char* catalog;
const char* db;
/*
If we already know the length of the query string
......@@ -462,6 +598,52 @@ class Query_log_event: public Log_event
BUG#1686).
*/
ulong slave_proxy_id;
/*
Binlog format 3 and 4 start to differ (as far as class members are
concerned) from here.
*/
uint32 catalog_len;
/*
We want to be able to store a variable number of N-bit status vars:
(generally N=32; but N=64 for SQL_MODE) a user may want to log the number of
affected rows (for debugging) while another does not want to lose 4 bytes in
this.
The storage on disk is the following:
status_vars_len is part of the post-header,
status_vars are in the variable-length part, after the post-header, before
the db & query.
status_vars on disk is a sequence of pairs (code, value) where 'code' means
'sql_mode', 'affected' etc. Sometimes 'value' must be a short string, so its
first byte is its length. For now the order of status vars is:
flags2 - sql_mode - catalog.
We should add the same thing to Load_log_event, but in fact
LOAD DATA INFILE is going to be logged with a new type of event (logging of
the plain text query), so Load_log_event would be frozen, so no need. The
new way of logging LOAD DATA INFILE would use a derived class of
Query_log_event, so automatically benefit from the work already done for
status variables in Query_log_event.
*/
uint16 status_vars_len;
/*
'flags2' is a second set of flags (on top of those in Log_event), for
session variables. These are thd->options which is & against a mask
(OPTIONS_WRITTEN_TO_BINLOG).
flags2_inited helps make a difference between flags2==0 (3.23 or 4.x
master, we don't know flags2, so use the slave server's global options) and
flags2==0 (5.0 master, we know this has a meaning of flags all down which
must influence the query).
*/
bool flags2_inited;
bool sql_mode_inited;
uint32 flags2;
/* In connections sql_mode is 32 bits now but will be 64 bits soon */
ulonglong sql_mode;
#ifndef MYSQL_CLIENT
Query_log_event(THD* thd_arg, const char* query_arg, ulong query_length,
......@@ -472,10 +654,11 @@ class Query_log_event: public Log_event
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
#endif
Query_log_event(const char* buf, int event_len, bool old_format);
Query_log_event(const char* buf, uint event_len,
const Format_description_log_event *description_event);
~Query_log_event()
{
if (data_buf)
......@@ -486,14 +669,11 @@ class Query_log_event: public Log_event
Log_event_type get_type_code() { return QUERY_EVENT; }
int write(IO_CACHE* file);
int write_data(IO_CACHE* file); // returns 0 on success, -1 on error
bool is_valid() { return query != 0; }
const bool is_valid() { return query != 0; }
int get_data_size()
{
return (q_len + db_len + 2
+ 4 // thread_id
+ 4 // exec_time
+ 2 // error_code
);
/* Note that the "1" below is the db's length. */
return (q_len + db_len + 1 + status_vars_len + QUERY_HEADER_LEN);
}
};
......@@ -504,6 +684,7 @@ class Query_log_event: public Log_event
Slave Log Event class
Note that this class is currently not used at all; no code writes a
Slave_log_event (though some code in repl_failsafe.cc reads Slave_log_event).
So it's not a problem if this code is not maintained.
****************************************************************************/
class Slave_log_event: public Log_event
......@@ -524,13 +705,13 @@ class Slave_log_event: public Log_event
void pack_info(Protocol* protocol);
int exec_event(struct st_relay_log_info* rli);
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
#endif
Slave_log_event(const char* buf, int event_len);
Slave_log_event(const char* buf, uint event_len);
~Slave_log_event();
int get_data_size();
bool is_valid() { return master_host != 0; }
const bool is_valid() { return master_host != 0; }
Log_event_type get_type_code() { return SLAVE_EVENT; }
int write_data(IO_CACHE* file );
};
......@@ -546,12 +727,18 @@ class Slave_log_event: public Log_event
class Load_log_event: public Log_event
{
protected:
int copy_log_event(const char *buf, ulong event_len, bool old_format);
int copy_log_event(const char *buf, ulong event_len,
int body_offset, const Format_description_log_event* description_event);
public:
ulong thread_id;
ulong slave_proxy_id;
uint32 table_name_len;
/*
No need to have a catalog, as these events can only come from 4.x.
TODO: this may become false if Dmitri pushes his new LOAD DATA INFILE in
5.0 only (not in 4.x).
*/
uint32 db_len;
uint32 fname_len;
uint32 num_fields;
......@@ -597,11 +784,18 @@ class Load_log_event: public Log_event
bool use_rli_only_for_errors);
#endif /* HAVE_REPLICATION */
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
void print(FILE* file, bool short_form, char* last_db, bool commented);
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info = 0);
void print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool commented);
#endif
Load_log_event(const char* buf, int event_len, bool old_format);
/*
Note that for all the events related to LOAD DATA (Load_log_event,
Create_file/Append/Exec/Delete, we pass description_event; however as
logging of LOAD DATA is going to be changed in 4.1 or 5.0, this is only used
for the common_header_len (post_header_len will not be changed).
*/
Load_log_event(const char* buf, uint event_len,
const Format_description_log_event* description_event);
~Load_log_event()
{}
Log_event_type get_type_code()
......@@ -610,27 +804,31 @@ class Load_log_event: public Log_event
}
int write_data_header(IO_CACHE* file);
int write_data_body(IO_CACHE* file);
bool is_valid() { return table_name != 0; }
const bool is_valid() { return table_name != 0; }
int get_data_size()
{
return (table_name_len + 2 + db_len + 2 + fname_len
+ 4 // thread_id
+ 4 // exec_time
+ 4 // skip_lines
+ 4 // field block len
return (table_name_len + db_len + 2 + fname_len
+ LOAD_HEADER_LEN
+ sql_ex.data_size() + field_block_len + num_fields);
}
int get_data_body_offset() { return LOAD_EVENT_OVERHEAD; }
};
extern char server_version[SERVER_VERSION_LENGTH];
/*****************************************************************************
Start Log Event class
Start Log Event_v3 class
Start_log_event_v3 is the Start_log_event of binlog format 3 (MySQL 3.23 and
4.x).
Format_description_log_event derives from Start_log_event_v3; it is the
Start_log_event of binlog format 4 (MySQL 5.0), that is, the event that
describes the other events' header/postheader lengths. This event is sent by
MySQL 5.0 whenever it starts sending a new binlog if the requested position
is >4 (otherwise if ==4 the event will be sent naturally).
****************************************************************************/
class Start_log_event: public Log_event
class Start_log_event_v3: public Log_event
{
public:
/*
......@@ -658,27 +856,81 @@ class Start_log_event: public Log_event
char server_version[ST_SERVER_VER_LEN];
#ifndef MYSQL_CLIENT
Start_log_event() :Log_event(), binlog_version(BINLOG_VERSION)
{
created = (time_t) when;
memcpy(server_version, ::server_version, ST_SERVER_VER_LEN);
}
Start_log_event_v3();
#ifdef HAVE_REPLICATION
void pack_info(Protocol* protocol);
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
Start_log_event_v3() {}
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
#endif
Start_log_event(const char* buf, bool old_format);
~Start_log_event() {}
Log_event_type get_type_code() { return START_EVENT;}
Start_log_event_v3(const char* buf,
const Format_description_log_event* description_event);
~Start_log_event_v3() {}
Log_event_type get_type_code() { return START_EVENT_V3;}
int write_data(IO_CACHE* file);
bool is_valid() { return 1; }
const bool is_valid() { return 1; }
int get_data_size()
{
return START_HEADER_LEN;
return START_V3_HEADER_LEN; //no variable-sized part
}
};
/*
For binlog version 4.
This event is saved by threads which read it, as they need it for future
use (to decode the ordinary events).
*/
class Format_description_log_event: public Start_log_event_v3
{
public:
/*
The size of the fixed header which _all_ events have
(for binlogs written by this version, this is equal to
LOG_EVENT_HEADER_LEN), except FORMAT_DESCRIPTION_EVENT and ROTATE_EVENT
(those have a header of size LOG_EVENT_MINIMAL_HEADER_LEN).
*/
uint8 common_header_len;
uint8 number_of_event_types;
/* The list of post-headers' lengthes */
uint8 *post_header_len;
Format_description_log_event(uint8 binlog_ver, const char* server_ver=0);
#ifndef MYSQL_CLIENT
#ifdef HAVE_REPLICATION
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#endif
Format_description_log_event(const char* buf, uint event_len,
const Format_description_log_event* description_event);
~Format_description_log_event() { my_free((gptr)post_header_len, MYF(0)); }
Log_event_type get_type_code() { return FORMAT_DESCRIPTION_EVENT;}
int write_data(IO_CACHE* file);
const bool is_valid()
{
return ((common_header_len >= ((binlog_version==1) ? OLD_HEADER_LEN :
LOG_EVENT_MINIMAL_HEADER_LEN)) &&
(post_header_len != NULL));
}
int get_event_len()
{
int i= LOG_EVENT_MINIMAL_HEADER_LEN + get_data_size();
DBUG_PRINT("info",("event_len=%d",i));
return i;
}
int get_data_size()
{
/*
The vector of post-header lengths is considered as part of the
post-header, because in a given version it never changes (contrary to the
query in a Query_log_event).
*/
return FORMAT_DESCRIPTION_HEADER_LEN;
}
};
......@@ -705,23 +957,26 @@ class Intvar_log_event: public Log_event
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
#endif
Intvar_log_event(const char* buf, bool old_format);
Intvar_log_event(const char* buf, const Format_description_log_event* description_event);
~Intvar_log_event() {}
Log_event_type get_type_code() { return INTVAR_EVENT;}
const char* get_var_type_name();
int get_data_size() { return 9; /* sizeof(type) + sizeof(val) */;}
int write_data(IO_CACHE* file);
bool is_valid() { return 1; }
const bool is_valid() { return 1; }
};
/*****************************************************************************
Rand Log Event class
Logs random seed used by the next RAND(), and by PASSWORD() in 4.1.
Logs random seed used by the next RAND(), and by PASSWORD() in 4.1.0.
4.1.1 does not need it (it's repeatable again) so this event needn't be
written in 4.1.1 for PASSWORD() (but the fact that it is written is just a
waste, it does not cause bugs).
****************************************************************************/
class Rand_log_event: public Log_event
......@@ -739,15 +994,15 @@ class Rand_log_event: public Log_event
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
#endif
Rand_log_event(const char* buf, bool old_format);
Rand_log_event(const char* buf, const Format_description_log_event* description_event);
~Rand_log_event() {}
Log_event_type get_type_code() { return RAND_EVENT;}
int get_data_size() { return 16; /* sizeof(ulonglong) * 2*/ }
int write_data(IO_CACHE* file);
bool is_valid() { return 1; }
const bool is_valid() { return 1; }
};
/*****************************************************************************
......@@ -757,6 +1012,9 @@ class Rand_log_event: public Log_event
Every time a query uses the value of a user variable, a User_var_log_event is
written before the Query_log_event, to set the user variable.
Every time a query uses the value of a user variable, a User_var_log_event is
written before the Query_log_event, to set the user variable.
****************************************************************************/
class User_var_log_event: public Log_event
{
......@@ -778,10 +1036,10 @@ class User_var_log_event: public Log_event
void pack_info(Protocol* protocol);
int exec_event(struct st_relay_log_info* rli);
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
#endif
User_var_log_event(const char* buf, bool old_format);
User_var_log_event(const char* buf, const Format_description_log_event* description_event);
~User_var_log_event() {}
Log_event_type get_type_code() { return USER_VAR_EVENT;}
int get_data_size()
......@@ -791,7 +1049,7 @@ class User_var_log_event: public Log_event
UV_CHARSET_NUMBER_SIZE + UV_VAL_LEN_SIZE + val_len);
}
int write_data(IO_CACHE* file);
bool is_valid() { return 1; }
const bool is_valid() { return 1; }
};
/*****************************************************************************
......@@ -809,15 +1067,15 @@ class Stop_log_event: public Log_event
{}
int exec_event(struct st_relay_log_info* rli);
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
#endif
Stop_log_event(const char* buf, bool old_format):
Log_event(buf, old_format)
Stop_log_event(const char* buf, const Format_description_log_event* description_event):
Log_event(buf, description_event)
{}
~Stop_log_event() {}
Log_event_type get_type_code() { return STOP_EVENT;}
bool is_valid() { return 1; }
const bool is_valid() { return 1; }
};
#endif /* HAVE_REPLICATION */
......@@ -850,18 +1108,23 @@ class Rotate_log_event: public Log_event
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
#endif
Rotate_log_event(const char* buf, int event_len, bool old_format);
Rotate_log_event(const char* buf, uint event_len,
const Format_description_log_event* description_event);
~Rotate_log_event()
{
if (alloced)
my_free((gptr) new_log_ident, MYF(0));
}
Log_event_type get_type_code() { return ROTATE_EVENT;}
int get_event_len()
{
return (LOG_EVENT_MINIMAL_HEADER_LEN + get_data_size());
}
int get_data_size() { return ident_len + ROTATE_HEADER_LEN;}
bool is_valid() { return new_log_ident != 0; }
const bool is_valid() { return new_log_ident != 0; }
int write_data(IO_CACHE* file);
};
......@@ -899,11 +1162,12 @@ class Create_file_log_event: public Load_log_event
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
void print(FILE* file, bool short_form, char* last_db, bool enable_local);
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
void print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool enable_local);
#endif
Create_file_log_event(const char* buf, int event_len, bool old_format);
Create_file_log_event(const char* buf, uint event_len,
const Format_description_log_event* description_event);
~Create_file_log_event() {}
Log_event_type get_type_code()
......@@ -916,12 +1180,7 @@ class Create_file_log_event: public Load_log_event
Load_log_event::get_data_size() +
4 + 1 + block_len);
}
int get_data_body_offset()
{
return (fake_base ? LOAD_EVENT_OVERHEAD:
LOAD_EVENT_OVERHEAD + CREATE_FILE_HEADER_LEN);
}
bool is_valid() { return inited_from_old || block != 0; }
const bool is_valid() { return inited_from_old || block != 0; }
int write_data_header(IO_CACHE* file);
int write_data_body(IO_CACHE* file);
/*
......@@ -963,14 +1222,15 @@ class Append_block_log_event: public Log_event
void pack_info(Protocol* protocol);
#endif /* HAVE_REPLICATION */
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
#endif
Append_block_log_event(const char* buf, int event_len);
Append_block_log_event(const char* buf, uint event_len,
const Format_description_log_event* description_event);
~Append_block_log_event() {}
Log_event_type get_type_code() { return APPEND_BLOCK_EVENT;}
int get_data_size() { return block_len + APPEND_BLOCK_HEADER_LEN ;}
bool is_valid() { return block != 0; }
const bool is_valid() { return block != 0; }
int write_data(IO_CACHE* file);
const char* get_db() { return db; }
};
......@@ -993,15 +1253,16 @@ class Delete_file_log_event: public Log_event
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
void print(FILE* file, bool short_form, char* last_db, bool enable_local);
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
void print(FILE* file, bool short_form, LAST_EVENT_INFO* last_event_info, bool enable_local);
#endif
Delete_file_log_event(const char* buf, int event_len);
Delete_file_log_event(const char* buf, uint event_len,
const Format_description_log_event* description_event);
~Delete_file_log_event() {}
Log_event_type get_type_code() { return DELETE_FILE_EVENT;}
int get_data_size() { return DELETE_FILE_HEADER_LEN ;}
bool is_valid() { return file_id != 0; }
const bool is_valid() { return file_id != 0; }
int write_data(IO_CACHE* file);
const char* get_db() { return db; }
};
......@@ -1024,14 +1285,15 @@ class Execute_load_log_event: public Log_event
int exec_event(struct st_relay_log_info* rli);
#endif /* HAVE_REPLICATION */
#else
void print(FILE* file, bool short_form = 0, char* last_db = 0);
void print(FILE* file, bool short_form = 0, LAST_EVENT_INFO* last_event_info= 0);
#endif
Execute_load_log_event(const char* buf, int event_len);
Execute_load_log_event(const char* buf, uint event_len,
const Format_description_log_event* description_event);
~Execute_load_log_event() {}
Log_event_type get_type_code() { return EXEC_LOAD_EVENT;}
int get_data_size() { return EXEC_LOAD_HEADER_LEN ;}
bool is_valid() { return file_id != 0; }
const bool is_valid() { return file_id != 0; }
int write_data(IO_CACHE* file);
const char* get_db() { return db; }
};
......@@ -1040,13 +1302,18 @@ class Execute_load_log_event: public Log_event
class Unknown_log_event: public Log_event
{
public:
Unknown_log_event(const char* buf, bool old_format):
Log_event(buf, old_format)
/*
Even if this is an unknown event, we still pass description_event to
Log_event's ctor, this way we can extract maximum information from the
event's header (the unique ID for example).
*/
Unknown_log_event(const char* buf, const Format_description_log_event* description_event):
Log_event(buf, description_event)
{}
~Unknown_log_event() {}
void print(FILE* file, bool short_form= 0, char* last_db= 0);
void print(FILE* file, bool short_form= 0, LAST_EVENT_INFO* last_event_info= 0);
Log_event_type get_type_code() { return UNKNOWN_EVENT;}
bool is_valid() { return 1; }
const bool is_valid() { return 1; }
};
#endif
......
......@@ -14,6 +14,15 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/*
Mostly this file is used in the server. But a little part of it is used in
mysqlbinlog too (definition of SELECT_DISTINCT and others).
The consequence is that 90% of the file is wrapped in #ifndef MYSQL_CLIENT,
except the part which must be in the server and in the client.
*/
#ifndef MYSQL_CLIENT
#include <my_global.h>
#include <assert.h>
#include <mysql_version.h>
......@@ -176,7 +185,15 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
#define TEST_NO_STACKTRACE 512
#define TEST_SIGINT 1024 /* Allow sigint on threads */
/* options for select set by the yacc parser (stored in lex->options) */
#endif
/*
This is included in the server and in the client.
Options for select set by the yacc parser (stored in lex->options).
None of the 32 defines below should have its value changed, or this will
break replication.
*/
#define SELECT_DISTINCT (1L << 0)
#define SELECT_STRAIGHT_JOIN (1L << 1)
#define SELECT_DESCRIBE (1L << 2)
......@@ -214,6 +231,9 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
#define OPTION_RELAXED_UNIQUE_CHECKS (1L << 27)
#define SELECT_NO_UNLOCK (1L << 28)
/* The rest of the file is included in the server only */
#ifndef MYSQL_CLIENT
/* options for UNION set by the yacc parser (stored in unit->union_option) */
#define UNION_ALL 1
......@@ -1102,3 +1122,5 @@ inline void setup_table_map(TABLE *table, TABLE_LIST *table_list, uint tablenr)
table->map= (table_map) 1 << tablenr;
table->force_index= table_list->force_index;
}
#endif /* MYSQL_CLIENT */
......@@ -2023,7 +2023,7 @@ bool open_log(MYSQL_LOG *log, const char *hostname,
}
return log->open(opt_name, type, 0, index_file_name,
(read_append) ? SEQ_READ_APPEND : WRITE_CACHE,
no_auto_events, max_size);
no_auto_events, max_size, 0);
}
......
......@@ -73,8 +73,6 @@ static int safe_sleep(THD* thd, int sec, CHECK_KILLED_FUNC thread_killed,
static int request_table_dump(MYSQL* mysql, const char* db, const char* table);
static int create_table_from_dump(THD* thd, MYSQL *mysql, const char* db,
const char* table_name, bool overwrite);
static int check_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi);
/*
Find out which replications threads are running
......@@ -215,6 +213,12 @@ static byte* get_table_key(TABLE_RULE_ENT* e, uint* len,
pos Position in relay log file
need_data_lock Set to 1 if this functions should do mutex locks
errmsg Store pointer to error message here
look_for_description_event
1 if we should look for such an event. We only need
this when the SQL thread starts and opens an existing
relay log and has to execute it (possibly from an offset
>4); then we need to read the first event of the relay
log to be able to parse the events we have to execute.
DESCRIPTION
- Close old open relay log files.
......@@ -232,9 +236,11 @@ static byte* get_table_key(TABLE_RULE_ENT* e, uint* len,
int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
ulonglong pos, bool need_data_lock,
const char** errmsg)
const char** errmsg,
bool look_for_description_event)
{
DBUG_ENTER("init_relay_log_pos");
DBUG_PRINT("info", ("pos=%lu", pos));
*errmsg=0;
pthread_mutex_t *log_lock=rli->relay_log.get_log_lock();
......@@ -242,6 +248,24 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
if (need_data_lock)
pthread_mutex_lock(&rli->data_lock);
/*
Slave threads are not the only users of init_relay_log_pos(). CHANGE MASTER
is, too, and init_slave() too; these 2 functions allocate a description
event in init_relay_log_pos, which is not freed by the terminating SQL slave
thread as that thread is not started by these functions. So we have to free
the description_event here, in case, so that there is no memory leak in
running, say, CHANGE MASTER.
*/
delete rli->relay_log.description_event_for_exec;
/*
By default the relay log is in binlog format 3 (4.0).
Even if format is 4, this will work enough to read the first event
(Format_desc) (remember that format 4 is just lenghtened compared to format
3; format 3 is a prefix of format 4).
*/
rli->relay_log.description_event_for_exec= new
Format_description_log_event(3);
pthread_mutex_lock(log_lock);
/* Close log file and free buffers if it's already open */
......@@ -280,8 +304,8 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
In this case, we will use the same IO_CACHE pointer to
read data as the IO thread is using to write data.
*/
if (my_b_tell((rli->cur_log=rli->relay_log.get_log_file())) == 0 &&
check_binlog_magic(rli->cur_log,errmsg))
my_b_seek((rli->cur_log=rli->relay_log.get_log_file()), (off_t)0);
if (check_binlog_magic(rli->cur_log,errmsg))
goto err;
rli->cur_log_old_open_count=rli->relay_log.get_open_count();
}
......@@ -295,8 +319,85 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
goto err;
rli->cur_log = &rli->cache_buf;
}
if (pos >= BIN_LOG_HEADER_SIZE)
/*
In all cases, check_binlog_magic() has been called so we're at offset 4 for
sure.
*/
if (pos > BIN_LOG_HEADER_SIZE) /* If pos<=4, we stay at 4 */
{
Log_event* ev;
while (look_for_description_event)
{
/*
Read the possible Format_description_log_event; if position was 4, no need, it will
be read naturally.
*/
DBUG_PRINT("info",("looking for a Format_description_log_event"));
if (my_b_tell(rli->cur_log) >= pos)
break;
/*
Because of we have rli->data_lock and log_lock, we can safely read an
event
*/
if (!(ev=Log_event::read_log_event(rli->cur_log,0,
rli->relay_log.description_event_for_exec)))
{
DBUG_PRINT("info",("could not read event, rli->cur_log->error=%d",
rli->cur_log->error));
if (rli->cur_log->error) /* not EOF */
{
*errmsg= "I/O error reading event at position 4";
goto err;
}
break;
}
else if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
{
DBUG_PRINT("info",("found Format_description_log_event"));
delete rli->relay_log.description_event_for_exec;
rli->relay_log.description_event_for_exec= (Format_description_log_event*) ev;
/*
As ev was returned by read_log_event, it has passed is_valid(), so
my_malloc() in ctor worked, no need to check again.
*/
/*
Ok, we found a Format_description event. But it is not sure that this
describes the whole relay log; indeed, one can have this sequence
(starting from position 4):
Format_desc (of slave)
Rotate (of master)
Format_desc (of slave)
So the Format_desc which really describes the rest of the relay log is
the 3rd event (it can't be further than that, because we rotate the
relay log when we queue a Rotate event from the master).
But what describes the Rotate is the first Format_desc.
So what we do is:
go on searching for Format_description events, until you exceed the
position (argument 'pos') or until you find another event than Rotate
or Format_desc.
*/
}
else
{
DBUG_PRINT("info",("found event of another type=%d",
ev->get_type_code()));
look_for_description_event= (ev->get_type_code() == ROTATE_EVENT);
delete ev;
}
}
my_b_seek(rli->cur_log,(off_t)pos);
#ifndef DBUG_OFF
{
char llbuf1[22], llbuf2[22];
DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
llstr(my_b_tell(rli->cur_log),llbuf1),
llstr(rli->event_relay_log_pos,llbuf2)));
}
#endif
}
err:
/*
......@@ -311,6 +412,8 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,
if (need_data_lock)
pthread_mutex_unlock(&rli->data_lock);
if (!rli->relay_log.description_event_for_exec->is_valid() && !*errmsg)
*errmsg= "Invalid Format_description log event; could be out of memory";
DBUG_RETURN ((*errmsg) ? 1 : 0);
}
......@@ -428,13 +531,15 @@ int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset,
sizeof(rli->group_relay_log_name)-1);
strmake(rli->event_relay_log_name, rli->relay_log.get_log_fname(),
sizeof(rli->event_relay_log_name)-1);
// Just first log with magic number and nothing else
rli->log_space_total= BIN_LOG_HEADER_SIZE;
rli->group_relay_log_pos= rli->event_relay_log_pos= BIN_LOG_HEADER_SIZE;
rli->relay_log.reset_bytes_written();
if (count_relay_log_space(rli))
{
*errmsg= "Error counting relay log space";
goto err;
}
if (!just_reset)
error= init_relay_log_pos(rli, rli->group_relay_log_name, rli->group_relay_log_pos,
0 /* do not need data lock */, errmsg);
0 /* do not need data lock */, errmsg, 0);
err:
#ifndef DBUG_OFF
......@@ -693,6 +798,10 @@ static TABLE_RULE_ENT* find_wild(DYNAMIC_ARRAY *a, const char* key, int len)
different results. Note also the order of precedence of the do/ignore
rules (see code below). For that reason, users should not set conflicting
rules because they may get unpredicted results.
Thought which arose from a question of a big customer "I want to include all
tables like "abc.%" except the "%.EFG"". This can't be done now. If we
supported Perl regexps we could do it with this pattern: /^abc\.(?!EFG)/
(I could not find an equivalent in the regex library MySQL uses).
RETURN VALUES
0 should not be logged/replicated
......@@ -1087,29 +1196,73 @@ static int init_intvar_from_file(int* var, IO_CACHE* f, int default_val)
return 1;
}
/*
Note that we rely on the master's version (3.23, 4.0.14 etc) instead of
relying on the binlog's version. This is not perfect: imagine an upgrade
of the master without waiting that all slaves are in sync with the master;
then a slave could be fooled about the binlog's format. This is what happens
when people upgrade a 3.23 master to 4.0 without doing RESET MASTER: 4.0
slaves are fooled. So we do this only to distinguish between 3.23 and more
recent masters (it's too late to change things for 3.23).
RETURNS
0 ok
1 error
*/
static int get_master_version_and_clock(MYSQL* mysql, MASTER_INFO* mi)
{
const char* errmsg= 0;
if (!my_isdigit(&my_charset_bin,*mysql->server_version))
errmsg = "Master reported unrecognized MySQL version";
else
{
/*
Note the following switch will bug when we have MySQL branch 30 ;)
*/
switch (*mysql->server_version) {
switch (*mysql->server_version)
{
case '0':
case '1':
case '2':
errmsg = "Master reported unrecognized MySQL version";
break;
case '3':
mi->old_format =
(strncmp(mysql->server_version, "3.23.57", 7) < 0) /* < .57 */ ?
BINLOG_FORMAT_323_LESS_57 :
BINLOG_FORMAT_323_GEQ_57 ;
mi->rli.relay_log.description_event_for_queue= new
Format_description_log_event(1, mysql->server_version);
break;
case '4':
case '5':
mi->old_format = BINLOG_FORMAT_CURRENT;
mi->rli.relay_log.description_event_for_queue= new
Format_description_log_event(3, mysql->server_version);
break;
default:
errmsg = "Master reported unrecognized MySQL version";
/*
Master is MySQL >=5.0. Give a default Format_desc event, so that we can
take the early steps (like tests for "is this a 3.23 master") which we
have to take before we receive the real master's Format_desc which will
override this one. Note that the Format_desc we create below is garbage
(it has the format of the *slave*); it's only good to help know if the
master is 3.23, 4.0, etc.
*/
mi->rli.relay_log.description_event_for_queue= new
Format_description_log_event(4, mysql->server_version);
break;
}
}
/*
This does not mean that a 5.0 slave will be able to read a 6.0 master; but
as we don't know yet, we don't want to forbid this for now. If a 5.0 slave
can't read a 6.0 master, this will show up when the slave can't read some
events sent by the master, and there will be error messages.
*/
if (errmsg)
{
sql_print_error(errmsg);
return 1;
}
MYSQL_RES *master_clock_res;
MYSQL_ROW master_clock_row;
......@@ -1385,7 +1538,7 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname)
if (open_log(&rli->relay_log, glob_hostname, opt_relay_logname,
"-relay-bin", opt_relaylog_index_name,
LOG_BIN, 1 /* read_append cache */,
1 /* no auto events */,
0 /* starting from 5.0 we want relay logs to have auto events */,
max_relay_log_size ? max_relay_log_size : max_binlog_size))
{
sql_print_error("Failed in open_log() called from init_relay_log_info()");
......@@ -1419,7 +1572,7 @@ file '%s', errno %d)", fname, my_errno);
/* Init relay log with first entry in the relay index file */
if (init_relay_log_pos(rli,NullS,BIN_LOG_HEADER_SIZE,0 /* no data lock */,
&msg))
&msg, 0))
{
sql_print_error("Failed to open the relay log 'FIRST' (relay_log_pos 4)");
goto err;
......@@ -1484,7 +1637,7 @@ Failed to open the existing relay log info file '%s' (errno %d)",
rli->group_relay_log_name,
rli->group_relay_log_pos,
0 /* no data lock*/,
&msg))
&msg, 0))
{
char llbuf[22];
sql_print_error("Failed to open the relay log '%s' (relay_log_pos %s)",
......@@ -1493,8 +1646,18 @@ Failed to open the existing relay log info file '%s' (errno %d)",
goto err;
}
}
#ifndef DBUG_OFF
{
char llbuf1[22], llbuf2[22];
DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
llstr(my_b_tell(rli->cur_log),llbuf1),
llstr(rli->event_relay_log_pos,llbuf2)));
DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
}
#endif
/*
Now change the cache from READ to WRITE - must do this
before flush_relay_log_info
......@@ -2251,14 +2414,18 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
goto err;
}
int cmp_result;
/* The "compare and wait" main loop */
while (!thd->killed &&
init_abort_pos_wait == abort_pos_wait &&
slave_running)
{
bool pos_reached;
int cmp_result= 0;
DBUG_ASSERT(*group_master_log_name || group_master_log_pos == 0);
/*
If we are after RESET SLAVE, and the SQL slave thread has not processed
any event yet, it could be that group_master_log_name is "". In that case,
just wait for more events (as there is no sensible comparison to do).
*/
if (*group_master_log_name)
{
char *basename= group_master_log_name + dirname_length(group_master_log_name);
......@@ -2280,13 +2447,12 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name,
if (group_master_log_name_extension < log_name_extension)
cmp_result = -1 ;
else
cmp_result= (group_master_log_name_extension > log_name_extension) ? 1 : 0 ;
}
pos_reached = ((!cmp_result && group_master_log_pos >= (ulonglong)log_pos) ||
cmp_result > 0);
if (pos_reached || thd->killed)
cmp_result= (group_master_log_name_extension > log_name_extension) ?
1 : 0 ;
if (((!cmp_result && group_master_log_pos >= (ulonglong)log_pos) ||
cmp_result > 0) || thd->killed)
break;
}
//wait for master update, with optional timeout.
DBUG_PRINT("info",("Waiting for master update"));
......@@ -2343,6 +2509,11 @@ improper_arguments: %d timed_out: %d",
DBUG_RETURN( error ? error : event_count );
}
void set_slave_thread_options(THD* thd)
{
thd->options = ((opt_log_slave_updates) ? OPTION_BIN_LOG:0) |
OPTION_AUTO_IS_NULL;
}
/*
init_slave_thread()
......@@ -2359,8 +2530,7 @@ static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
thd->master_access= ~0;
thd->priv_user = 0;
thd->slave_thread = 1;
thd->options = ((opt_log_slave_updates) ? OPTION_BIN_LOG:0) |
OPTION_AUTO_IS_NULL;
set_slave_thread_options(thd);
/*
It's nonsense to constrain the slave threads with max_join_size; if a
query succeeded on master, we HAVE to execute it.
......@@ -2624,12 +2794,13 @@ bool st_relay_log_info::is_until_satisfied()
if (until_log_names_cmp_result == UNTIL_LOG_NAMES_CMP_UNKNOWN)
{
/*
We have no cached comaprison results so we should compare log names
and cache result
We have no cached comparison results so we should compare log names
and cache result.
If we are after RESET SLAVE, and the SQL slave thread has not processed
any event yet, it could be that group_master_log_name is "". In that case,
just wait for more events (as there is no sensible comparison to do).
*/
DBUG_ASSERT(*log_name || log_pos == 0);
if (*log_name)
{
const char *basename= log_name + dirname_length(log_name);
......@@ -2704,27 +2875,44 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
int exec_res;
/*
Skip queries originating from this server or number of
queries specified by the user in slave_skip_counter
We can't however skip event's that has something to do with the
Queries originating from this server must be skipped.
Low-level events (Format_desc, Rotate, Stop) from this server
must also be skipped. But for those we don't want to modify
group_master_log_pos, because these events did not exist on the master.
Format_desc is not completely skipped.
Skip queries specified by the user in slave_skip_counter.
We can't however skip events that has something to do with the
log files themselves.
Filtering on own server id is extremely important, to ignore execution of
events created by the creation/rotation of the relay log (remember that
now the relay log starts with its Format_desc, has a Rotate etc).
*/
if (ev->server_id == (uint32) ::server_id ||
(rli->slave_skip_counter && type_code != ROTATE_EVENT))
DBUG_PRINT("info",("type_code=%d, server_id=%d",type_code,ev->server_id));
if ((ev->server_id == (uint32) ::server_id &&
type_code!= FORMAT_DESCRIPTION_EVENT) ||
(rli->slave_skip_counter &&
type_code != ROTATE_EVENT && type_code != STOP_EVENT &&
type_code != START_EVENT_V3 && type_code!= FORMAT_DESCRIPTION_EVENT))
{
/* TODO: I/O thread should not even log events with the same server id */
rli->inc_group_relay_log_pos(ev->get_event_len(),
type_code != STOP_EVENT ? ev->log_pos : LL(0),
DBUG_PRINT("info", ("event skipped"));
rli->inc_group_relay_log_pos((type_code == ROTATE_EVENT ||
type_code == STOP_EVENT ||
type_code == FORMAT_DESCRIPTION_EVENT) ?
LL(0) : ev->log_pos,
1/* skip lock*/);
flush_relay_log_info(rli);
/*
Protect against common user error of setting the counter to 1
instead of 2 while recovering from an failed auto-increment insert
instead of 2 while recovering from an insert which used auto_increment,
rand or user var.
*/
if (rli->slave_skip_counter &&
!((type_code == INTVAR_EVENT || type_code == STOP_EVENT) &&
!((type_code == INTVAR_EVENT ||
type_code == RAND_EVENT ||
type_code == USER_VAR_EVENT) &&
rli->slave_skip_counter == 1))
--rli->slave_skip_counter;
pthread_mutex_unlock(&rli->data_lock);
......@@ -2741,7 +2929,16 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
ev->thd = thd;
exec_res = ev->exec_event(rli);
DBUG_ASSERT(rli->sql_thd==thd);
/*
Format_description_log_event should not be deleted because it will be
used to read info about the relay log's format; it will be deleted when
the SQL thread does not need it, i.e. when this thread terminates.
*/
if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
{
DBUG_PRINT("info", ("Deleting the event after it has been executed"));
delete ev;
}
return exec_res;
}
else
......@@ -2842,7 +3039,8 @@ extern "C" pthread_handler_decl(handle_slave_io,arg)
thd->proc_info = "Checking master version";
if (get_master_version_and_clock(mysql, mi))
goto err;
if (!mi->old_format)
if (mi->rli.relay_log.description_event_for_queue->binlog_version > 1)
{
/*
Register ourselves with the master.
......@@ -3043,6 +3241,9 @@ log space");
pthread_mutex_lock(&mi->run_lock);
mi->slave_running = 0;
mi->io_thd = 0;
/* Forget the relay log's format */
delete mi->rli.relay_log.description_event_for_queue;
mi->rli.relay_log.description_event_for_queue= 0;
// TODO: make rpl_status part of MASTER_INFO
change_rpl_status(RPL_ACTIVE_SLAVE,RPL_IDLE_SLAVE);
mi->abort_slave = 0; // TODO: check if this is needed
......@@ -3137,15 +3338,38 @@ extern "C" pthread_handler_decl(handle_slave_sql,arg)
if (init_relay_log_pos(rli,
rli->group_relay_log_name,
rli->group_relay_log_pos,
1 /*need data lock*/, &errmsg))
1 /*need data lock*/, &errmsg,
1 /*look for a description_event*/))
{
sql_print_error("Error initializing relay log position: %s",
errmsg);
goto err;
}
THD_CHECK_SENTRY(thd);
#ifndef DBUG_OFF
{
char llbuf1[22], llbuf2[22];
DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s rli->event_relay_log_pos=%s",
llstr(my_b_tell(rli->cur_log),llbuf1),
llstr(rli->event_relay_log_pos,llbuf2)));
DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
/*
Wonder if this is correct. I (Guilhem) wonder if my_b_tell() returns the
correct position when it's called just after my_b_seek() (the questionable
stuff is those "seek is done on next read" comments in the my_b_seek()
source code).
The crude reality is that this assertion randomly fails whereas
replication seems to work fine. And there is no easy explanation why it
fails (as we my_b_seek(rli->event_relay_log_pos) at the very end of
init_relay_log_pos() called above). Maybe the assertion would be
meaningful if we held rli->data_lock between the my_b_seek() and the
DBUG_ASSERT().
*/
#ifdef SHOULD_BE_CHECKED
DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->event_relay_log_pos);
#endif
}
#endif
DBUG_ASSERT(rli->sql_thd == thd);
DBUG_PRINT("master_info",("log_file_name: %s position: %s",
......@@ -3205,11 +3429,9 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \
DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun
/* When master_pos_wait() wakes up it will check this and terminate */
rli->slave_running= 0;
/*
Going out of the transaction. Necessary to mark it, in case the user
restarts replication from a non-transactional statement (with CHANGE
MASTER).
*/
/* Forget the relay log's format */
delete rli->relay_log.description_event_for_exec;
rli->relay_log.description_event_for_exec= 0;
/* Wake up master_pos_wait() */
pthread_mutex_unlock(&rli->data_lock);
DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions"));
......@@ -3299,7 +3521,7 @@ static int process_io_create_file(MASTER_INFO* mi, Create_file_log_event* cev)
{
net_write_command(net, 0, "", 0, "", 0);/* 3.23 master wants it */
Execute_load_log_event xev(thd,0,0);
xev.log_pos = mi->master_log_pos;
xev.log_pos = cev->log_pos;
if (unlikely(mi->rli.relay_log.append(&xev)))
{
sql_print_error("Slave I/O: error writing Exec_load event to \
......@@ -3313,7 +3535,6 @@ relay log");
{
cev->block = (char*)net->read_pos;
cev->block_len = num_bytes;
cev->log_pos = mi->master_log_pos;
if (unlikely(mi->rli.relay_log.append(cev)))
{
sql_print_error("Slave I/O: error writing Create_file event to \
......@@ -3327,7 +3548,7 @@ relay log");
{
aev.block = (char*)net->read_pos;
aev.block_len = num_bytes;
aev.log_pos = mi->master_log_pos;
aev.log_pos = cev->log_pos;
if (unlikely(mi->rli.relay_log.append(&aev)))
{
sql_print_error("Slave I/O: error writing Append_block event to \
......@@ -3355,6 +3576,7 @@ relay log");
DESCRIPTION
Updates the master info with the place in the next binary
log where we should start reading.
Rotate the relay log to avoid mixed-format relay logs.
NOTES
We assume we already locked mi->data_lock
......@@ -3386,21 +3608,30 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev)
if (disconnect_slave_event_count)
events_till_disconnect++;
#endif
/*
If description_event_for_queue is format <4, there is conversion in the
relay log to the slave's format (4). And Rotate can mean upgrade or
nothing. If upgrade, it's to 5.0 or newer, so we will get a Format_desc, so
no need to reset description_event_for_queue now. And if it's nothing (same
master version as before), no need (still using the slave's format).
*/
if (mi->rli.relay_log.description_event_for_queue->binlog_version >= 4)
{
delete mi->rli.relay_log.description_event_for_queue;
/* start from format 3 (MySQL 4.0) again */
mi->rli.relay_log.description_event_for_queue= new
Format_description_log_event(3);
}
rotate_relay_log(mi); /* will take the right mutexes */
DBUG_RETURN(0);
}
/*
queue_old_event()
Writes a 3.23 event to the relay log.
TODO:
Test this code before release - it has to be tested on a separate
setup with 3.23 master
Reads a 3.23 event and converts it to the slave's format. This code was copied
from MySQL 4.0.
*/
static int queue_old_event(MASTER_INFO *mi, const char *buf,
static int queue_binlog_ver_1_event(MASTER_INFO *mi, const char *buf,
ulong event_len)
{
const char *errmsg = 0;
......@@ -3408,7 +3639,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
bool ignore_event= 0;
char *tmp_buf = 0;
RELAY_LOG_INFO *rli= &mi->rli;
DBUG_ENTER("queue_old_event");
DBUG_ENTER("queue_binlog_ver_1_event");
/*
If we get Load event, we need to pass a non-reusable buffer
......@@ -3432,7 +3663,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
connected to the master).
*/
Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg,
1 /*old format*/ );
mi->rli.relay_log.description_event_for_queue);
if (unlikely(!ev))
{
sql_print_error("Read invalid event from master: '%s',\
......@@ -3442,7 +3673,7 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
DBUG_RETURN(1);
}
pthread_mutex_lock(&mi->data_lock);
ev->log_pos = mi->master_log_pos;
ev->log_pos= mi->master_log_pos; /* 3.23 events don't contain log_pos */
switch (ev->get_type_code()) {
case STOP_EVENT:
ignore_event= 1;
......@@ -3467,9 +3698,11 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
{
/* We come here when and only when tmp_buf != 0 */
DBUG_ASSERT(tmp_buf);
inc_pos=event_len;
ev->log_pos+= inc_pos;
int error = process_io_create_file(mi,(Create_file_log_event*)ev);
delete ev;
mi->master_log_pos += event_len;
mi->master_log_pos += inc_pos;
DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos));
pthread_mutex_unlock(&mi->data_lock);
my_free((char*)tmp_buf, MYF(0));
......@@ -3481,6 +3714,12 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
}
if (likely(!ignore_event))
{
if (ev->log_pos)
/*
Don't do it for fake Rotate events (see comment in
Log_event::Log_event(const char* buf...) in log_event.cc).
*/
ev->log_pos+= event_len; /* make log_pos be the pos of the end of the event */
if (unlikely(rli->relay_log.append(ev)))
{
delete ev;
......@@ -3496,10 +3735,98 @@ static int queue_old_event(MASTER_INFO *mi, const char *buf,
DBUG_RETURN(0);
}
/*
Reads a 4.0 event and converts it to the slave's format. This code was copied
from queue_binlog_ver_1_event(), with some affordable simplifications.
*/
static int queue_binlog_ver_3_event(MASTER_INFO *mi, const char *buf,
ulong event_len)
{
const char *errmsg = 0;
ulong inc_pos;
char *tmp_buf = 0;
RELAY_LOG_INFO *rli= &mi->rli;
DBUG_ENTER("queue_binlog_ver_3_event");
/* read_log_event() will adjust log_pos to be end_log_pos */
Log_event *ev = Log_event::read_log_event(buf,event_len, &errmsg,
mi->rli.relay_log.description_event_for_queue);
if (unlikely(!ev))
{
sql_print_error("Read invalid event from master: '%s',\
master could be corrupt but a more likely cause of this is a bug",
errmsg);
my_free((char*) tmp_buf, MYF(MY_ALLOW_ZERO_PTR));
DBUG_RETURN(1);
}
pthread_mutex_lock(&mi->data_lock);
switch (ev->get_type_code()) {
case STOP_EVENT:
goto err;
case ROTATE_EVENT:
if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
{
delete ev;
pthread_mutex_unlock(&mi->data_lock);
DBUG_RETURN(1);
}
inc_pos= 0;
break;
default:
inc_pos= event_len;
break;
}
if (unlikely(rli->relay_log.append(ev)))
{
delete ev;
pthread_mutex_unlock(&mi->data_lock);
DBUG_RETURN(1);
}
rli->relay_log.harvest_bytes_written(&rli->log_space_total);
delete ev;
mi->master_log_pos+= inc_pos;
err:
DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos));
pthread_mutex_unlock(&mi->data_lock);
DBUG_RETURN(0);
}
/*
queue_old_event()
Writes a 3.23 or 4.0 event to the relay log, after converting it to the 5.0
(exactly, slave's) format. To do the conversion, we create a 5.0 event from
the 3.23/4.0 bytes, then write this event to the relay log.
TODO:
Test this code before release - it has to be tested on a separate
setup with 3.23 master or 4.0 master
*/
static int queue_old_event(MASTER_INFO *mi, const char *buf,
ulong event_len)
{
switch (mi->rli.relay_log.description_event_for_queue->binlog_version)
{
case 1:
return queue_binlog_ver_1_event(mi,buf,event_len);
case 3:
return queue_binlog_ver_3_event(mi,buf,event_len);
default: /* unsupported format; eg version 2 */
DBUG_PRINT("info",("unsupported binlog format %d in queue_old_event()",
mi->rli.relay_log.description_event_for_queue->binlog_version));
return 1;
}
}
/*
queue_event()
If the event is 3.23/4.0, passes it to queue_old_event() which will convert
it. Otherwise, writes a 5.0 (or newer) event to the relay log. Then there is
no format conversion, it's pure read/write of bytes.
So a 5.0.0 slave's relay log can contain events in the slave's format or in
any >=5.0.0 format.
*/
int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
......@@ -3509,7 +3836,8 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
RELAY_LOG_INFO *rli= &mi->rli;
DBUG_ENTER("queue_event");
if (mi->old_format)
if (mi->rli.relay_log.description_event_for_queue->binlog_version<4 &&
buf[EVENT_TYPE_OFFSET] != FORMAT_DESCRIPTION_EVENT /* a way to escape */)
DBUG_RETURN(queue_old_event(mi,buf,event_len));
pthread_mutex_lock(&mi->data_lock);
......@@ -3536,7 +3864,7 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
goto err;
case ROTATE_EVENT:
{
Rotate_log_event rev(buf,event_len,0);
Rotate_log_event rev(buf,event_len,mi->rli.relay_log.description_event_for_queue);
if (unlikely(process_io_rotate(mi,&rev)))
{
error= 1;
......@@ -3549,6 +3877,47 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
inc_pos= 0;
break;
}
case FORMAT_DESCRIPTION_EVENT:
{
/*
Create an event, and save it (when we rotate the relay log, we will have
to write this event again).
*/
/*
We are the only thread which reads/writes description_event_for_queue. The
relay_log struct does not move (though some members of it can change), so
we needn't any lock (no rli->data_lock, no log lock).
*/
Format_description_log_event* tmp= mi->rli.relay_log.description_event_for_queue;
const char* errmsg;
if (!(mi->rli.relay_log.description_event_for_queue= (Format_description_log_event*)
Log_event::read_log_event(buf, event_len, &errmsg,
mi->rli.relay_log.description_event_for_queue)))
{
delete tmp;
error= 2;
goto err;
}
delete tmp;
/*
Set 'created' to 0, so that in next relay logs this event does not trigger
cleaning actions on the slave in Format_description_log_event::exec_event().
*/
mi->rli.relay_log.description_event_for_queue->created= 0;
/*
Though this does some conversion to the slave's format, this will
preserve the master's binlog format version, and number of event types.
*/
/*
If the event was not requested by the slave (the slave did not ask for
it), i.e. has end_log_pos=0, we do not increment mi->master_log_pos
*/
inc_pos= uint4korr(buf+LOG_POS_OFFSET) ? event_len : 0;
DBUG_PRINT("info",("binlog format is now %d",
mi->rli.relay_log.description_event_for_queue->binlog_version));
}
break;
default:
inc_pos= event_len;
break;
......@@ -3574,20 +3943,29 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
We still want to increment, so that we won't re-read this event from the
master if the slave IO thread is now stopped/restarted (more efficient if
the events we are ignoring are big LOAD DATA INFILE).
But events which were generated by this slave and which do not exist in
the master's binlog (i.e. Format_desc, Rotate & Stop) should not increment
mi->master_log_pos.
*/
if (buf[EVENT_TYPE_OFFSET]!=FORMAT_DESCRIPTION_EVENT &&
buf[EVENT_TYPE_OFFSET]!=ROTATE_EVENT &&
buf[EVENT_TYPE_OFFSET]!=STOP_EVENT)
mi->master_log_pos+= inc_pos;
DBUG_PRINT("info", ("master_log_pos: %d, event originating from the same server, ignored", (ulong) mi->master_log_pos));
}
else /* write the event to the relay log */
if (likely(!(error= rli->relay_log.appendv(buf,event_len,0))))
if (likely(!(rli->relay_log.appendv(buf,event_len,0))))
{
mi->master_log_pos+= inc_pos;
DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos));
rli->relay_log.harvest_bytes_written(&rli->log_space_total);
}
else
error=3;
err:
pthread_mutex_unlock(&mi->data_lock);
DBUG_PRINT("info", ("error=%d", error));
DBUG_RETURN(error);
}
......@@ -3612,6 +3990,7 @@ void end_relay_log_info(RELAY_LOG_INFO* rli)
}
rli->inited = 0;
rli->relay_log.close(LOG_CLOSE_INDEX | LOG_CLOSE_STOP_EVENT);
rli->relay_log.harvest_bytes_written(&rli->log_space_total);
/*
Delete the slave's temporary tables from memory.
In the future there will be other actions than this, to ensure persistance
......@@ -3832,6 +4211,7 @@ static IO_CACHE *reopen_relay_log(RELAY_LOG_INFO *rli, const char **errmsg)
relay_log_pos Current log pos
pending Number of bytes already processed from the event
*/
rli->event_relay_log_pos= max(rli->event_relay_log_pos, BIN_LOG_HEADER_SIZE);
my_b_seek(cur_log,rli->event_relay_log_pos);
DBUG_RETURN(cur_log);
}
......@@ -3890,28 +4270,40 @@ Log_event* next_event(RELAY_LOG_INFO* rli)
hot_log=0; // Using old binary log
}
}
#ifndef DBUG_OFF
{
/* This is an assertion which sometimes fails, let's try to track it */
char llbuf1[22], llbuf2[22];
DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
/*
The next assertion sometimes (very rarely) fails, let's try to track
it
*/
DBUG_PRINT("info", ("\
Before assert, my_b_tell(cur_log)=%s rli->event_relay_log_pos=%s",
DBUG_PRINT("info", ("my_b_tell(cur_log)=%s rli->event_relay_log_pos=%s",
llstr(my_b_tell(cur_log),llbuf1),
llstr(rli->group_relay_log_pos,llbuf2)));
llstr(rli->event_relay_log_pos,llbuf2)));
DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
DBUG_ASSERT(my_b_tell(cur_log) == rli->event_relay_log_pos);
}
#endif
/*
Relay log is always in new format - if the master is 3.23, the
I/O thread will convert the format for us
I/O thread will convert the format for us.
A problem: the description event may be in a previous relay log. So if the
slave has been shutdown meanwhile, we would have to look in old relay
logs, which may even have been deleted. So we need to write this
description event at the beginning of the relay log.
When the relay log is created when the I/O thread starts, easy: the master
will send the description event and we will queue it.
But if the relay log is created by new_file(): then the solution is:
MYSQL_LOG::open() will write the buffered description event.
*/
if ((ev=Log_event::read_log_event(cur_log,0,(bool)0 /* new format */)))
if ((ev=Log_event::read_log_event(cur_log,0,
rli->relay_log.description_event_for_exec)))
{
DBUG_ASSERT(thd==rli->sql_thd);
/*
read it while we have a lock, to avoid a mutex lock in
inc_event_relay_log_pos()
*/
rli->future_event_relay_log_pos= my_b_tell(cur_log);
if (hot_log)
pthread_mutex_unlock(log_lock);
DBUG_RETURN(ev);
......@@ -4106,8 +4498,9 @@ void rotate_relay_log(MASTER_INFO* mi)
DBUG_ENTER("rotate_relay_log");
RELAY_LOG_INFO* rli= &mi->rli;
lock_slave_threads(mi);
pthread_mutex_lock(&rli->data_lock);
/* We don't lock rli->run_lock. This would lead to deadlocks. */
pthread_mutex_lock(&mi->run_lock);
/*
We need to test inited because otherwise, new_file() will attempt to lock
LOCK_log, which may not be inited (if we're not a slave).
......@@ -4136,8 +4529,7 @@ void rotate_relay_log(MASTER_INFO* mi)
*/
rli->relay_log.harvest_bytes_written(&rli->log_space_total);
end:
pthread_mutex_unlock(&rli->data_lock);
unlock_slave_threads(mi);
pthread_mutex_unlock(&mi->run_lock);
DBUG_VOID_RETURN;
}
......
......@@ -67,11 +67,6 @@ extern my_bool opt_log_slave_updates;
extern ulonglong relay_log_space_limit;
struct st_master_info;
enum enum_binlog_formats {
BINLOG_FORMAT_CURRENT=0, /* 0 is important for easy 'if (mi->old_format)' */
BINLOG_FORMAT_323_LESS_57,
BINLOG_FORMAT_323_GEQ_57 };
/*
TODO: this needs to be redone, but for now it does not matter since
we do not have multi-master yet.
......@@ -186,6 +181,8 @@ typedef struct st_relay_log_info
ulonglong group_relay_log_pos;
char event_relay_log_name[FN_REFLEN];
ulonglong event_relay_log_pos;
ulonglong future_event_relay_log_pos;
/*
Original log name and position of the group we're currently executing
(whose coordinates are group_relay_log_name/pos in the relay log)
......@@ -207,11 +204,13 @@ typedef struct st_relay_log_info
/*
InnoDB internally stores the master log position it has processed
so far; the position to store is really the sum of
pos + pending + event_len here since we must store the pos of the
END of the current log event
so far; when the InnoDB code to store this position is called, we have not
updated rli->group_master_log_pos yet. So the position is the event's
log_pos (the position of the end of the event); we save it in the variable
below. It's the *coming* group_master_log_pos (the one which will be
group_master_log_pos in the coming milliseconds).
*/
int event_len;
ulonglong future_group_master_log_pos;
time_t last_master_timestamp;
......@@ -285,16 +284,17 @@ typedef struct st_relay_log_info
until_log_names_cmp_result= UNTIL_LOG_NAMES_CMP_UNKNOWN;
}
inline void inc_event_relay_log_pos(ulonglong val)
inline void inc_event_relay_log_pos()
{
event_relay_log_pos+= val;
event_relay_log_pos= future_event_relay_log_pos;
}
void inc_group_relay_log_pos(ulonglong val, ulonglong log_pos, bool skip_lock=0)
void inc_group_relay_log_pos(ulonglong log_pos,
bool skip_lock=0)
{
if (!skip_lock)
pthread_mutex_lock(&data_lock);
inc_event_relay_log_pos(val);
inc_event_relay_log_pos();
group_relay_log_pos= event_relay_log_pos;
strmake(group_relay_log_name,event_relay_log_name,
sizeof(group_relay_log_name)-1);
......@@ -311,8 +311,31 @@ typedef struct st_relay_log_info
not advance as it should on the non-transactional slave (it advances by
big leaps, whereas it should advance by small leaps).
*/
if (log_pos) // 3.23 binlogs don't have log_posx
group_master_log_pos= log_pos+ val;
/*
In 4.x we used the event's len to compute the positions here. This is
wrong if the event was 3.23/4.0 and has been converted to 5.0, because
then the event's len is not what is was in the master's binlog, so this
will make a wrong group_master_log_pos (yes it's a bug in 3.23->4.0
replication: Exec_master_log_pos is wrong). Only way to solve this is to
have the original offset of the end of the event the relay log. This is
what we do in 5.0: log_pos has become "end_log_pos" (because the real use
of log_pos in 4.0 was to compute the end_log_pos; so better to store
end_log_pos instead of begin_log_pos.
If we had not done this fix here, the problem would also have appeared
when the slave and master are 5.0 but with different event length (for
example the slave is more recent than the master and features the event
UID). It would give false MASTER_POS_WAIT, false Exec_master_log_pos in
SHOW SLAVE STATUS, and so the user would do some CHANGE MASTER using this
value which would lead to badly broken replication.
Even the relay_log_pos will be corrupted in this case, because the len is
the relay log is not "val".
With the end_log_pos solution, we avoid computations involving lengthes.
*/
DBUG_PRINT("info", ("log_pos=%lld group_master_log_pos=%lld",
log_pos,group_master_log_pos));
if (log_pos) // some events (like fake Rotate) don't have log_pos
// when we are here, log_pos is the end of the event
group_master_log_pos= log_pos;
pthread_cond_broadcast(&data_cond);
if (!skip_lock)
pthread_mutex_unlock(&data_lock);
......@@ -389,7 +412,6 @@ typedef struct st_master_info
int events_till_abort;
#endif
bool inited;
enum enum_binlog_formats old_format;
volatile bool abort_slave, slave_running;
volatile ulong slave_run_id;
/*
......@@ -404,7 +426,7 @@ typedef struct st_master_info
long clock_diff_with_master;
st_master_info()
:ssl(0), fd(-1), io_thd(0), inited(0), old_format(BINLOG_FORMAT_CURRENT),
:ssl(0), fd(-1), io_thd(0), inited(0),
abort_slave(0),slave_running(0), slave_run_id(0)
{
host[0] = 0; user[0] = 0; password[0] = 0;
......@@ -535,10 +557,12 @@ void lock_slave_threads(MASTER_INFO* mi);
void unlock_slave_threads(MASTER_INFO* mi);
void init_thread_mask(int* mask,MASTER_INFO* mi,bool inverse);
int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log,ulonglong pos,
bool need_data_lock, const char** errmsg);
bool need_data_lock, const char** errmsg,
bool look_for_description_event);
int purge_relay_logs(RELAY_LOG_INFO* rli, THD *thd, bool just_reset,
const char** errmsg);
void set_slave_thread_options(THD* thd);
void rotate_relay_log(MASTER_INFO* mi);
extern "C" pthread_handler_decl(handle_slave_io,arg);
......
......@@ -92,6 +92,7 @@ THD::THD():user_time(0), is_fatal_error(0),
global_read_lock(0), bootstrap(0), spcont(NULL)
{
host= user= priv_user= db= ip= 0;
catalog= (char*)"std"; // the only catalog we have for now
host_or_ip= "connecting host";
locked=some_tables_deleted=no_errors=password= 0;
query_start_used= 0;
......
......@@ -26,6 +26,7 @@
class Query_log_event;
class Load_log_event;
class Slave_log_event;
class Format_description_log_event;
class sp_rcontext;
class sp_cache;
......@@ -99,7 +100,14 @@ class MYSQL_LOG
enum cache_type io_cache_type;
bool write_error, inited;
bool need_start_event;
bool no_auto_events; // For relay binlog
/*
no_auto_events means we don't want any of these automatic events :
Start/Rotate/Stop. That is, in 4.x when we rotate a relay log, we don't want
a Rotate_log event to be written to the relay log. When we start a relay log
etc. So in 4.x this is 1 for relay logs, 0 for binlogs.
In 5.0 it's 0 for relay logs too!
*/
bool no_auto_events;
/*
The max size before rotation (usable only if log_type == LOG_BIN: binary
logs and relay logs).
......@@ -116,6 +124,18 @@ class MYSQL_LOG
public:
MYSQL_LOG();
~MYSQL_LOG();
/*
These describe the log's format. This is used only for relay logs.
_for_exec is used by the SQL thread, _for_queue by the I/O thread. It's
necessary to have 2 distinct objects, because the I/O thread may be reading
events in a different format from what the SQL thread is reading (consider
the case of a master which has been upgraded from 5.0 to 5.1 without doing
RESET MASTER, or from 4.x to 5.0).
*/
Format_description_log_event *description_event_for_exec,
*description_event_for_queue;
void reset_bytes_written()
{
bytes_written = 0;
......@@ -144,7 +164,8 @@ class MYSQL_LOG
bool open(const char *log_name,enum_log_type log_type,
const char *new_name, const char *index_file_name_arg,
enum cache_type io_cache_type_arg,
bool no_auto_events_arg, ulong max_size);
bool no_auto_events_arg, ulong max_size,
bool null_created);
void new_file(bool need_lock= 1);
bool write(THD *thd, enum enum_server_command command,
const char *format,...);
......@@ -590,9 +611,10 @@ class THD :public ilink,
the connection
priv_user - The user privilege we are using. May be '' for anonymous user.
db - currently selected database
catalog - currently selected catalog
ip - client IP
*/
char *host,*user,*priv_user,*db,*ip;
char *host,*user,*priv_user,*db,*catalog,*ip;
char priv_host[MAX_HOSTNAME];
/* remote (peer) port */
uint16 peer_port;
......
......@@ -48,16 +48,34 @@ int check_binlog_magic(IO_CACHE* log, const char** errmsg)
return 0;
}
/*
fake_rotate_event() builds a fake (=which does not exist physically in any
binlog) Rotate event, which contains the name of the binlog we are going to
send to the slave (because the slave may not know it if it just asked for
MASTER_LOG_FILE='', MASTER_LOG_POS=4).
< 4.0.14, fake_rotate_event() was called only if the requested pos was
4. After this version we always call it, so that a 3.23.58 slave can rely on
it to detect if the master is 4.0 (and stop) (the _fake_ Rotate event has
zeros in the good positions which, by chance, make it possible for the 3.23
slave to detect that this event is unexpected) (this is luck which happens
because the master and slave disagree on the size of the header of
Log_event).
Relying on the event length of the Rotate event instead of these well-placed
zeros was not possible as Rotate events have a variable-length part.
*/
static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
ulonglong position, const char**errmsg)
ulonglong position, const char** errmsg)
{
DBUG_ENTER("fake_rotate_event");
char header[LOG_EVENT_HEADER_LEN], buf[ROTATE_HEADER_LEN];
memset(header, 0, 4); // when does not matter
memset(header, 0, 4); // 'when' (the timestamp) does not matter, is set to 0
header[EVENT_TYPE_OFFSET] = ROTATE_EVENT;
char* p = log_file_name+dirname_length(log_file_name);
uint ident_len = (uint) strlen(p);
ulong event_len = ident_len + ROTATE_EVENT_OVERHEAD;
ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN;
int4store(header + SERVER_ID_OFFSET, server_id);
int4store(header + EVENT_LEN_OFFSET, event_len);
int2store(header + FLAGS_OFFSET, 0);
......@@ -72,9 +90,9 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
if (my_net_write(net, (char*)packet->ptr(), packet->length()))
{
*errmsg = "failed on my_net_write()";
return -1;
DBUG_RETURN(-1);
}
return 0;
DBUG_RETURN(0);
}
static int send_file(THD *thd)
......@@ -310,6 +328,36 @@ int purge_master_logs_before_date(THD* thd, time_t purge_time)
return purge_error_message(thd ,res);
}
int test_for_non_eof_log_read_errors(int error, const char *errmsg)
{
if (error == LOG_READ_EOF)
return 0;
my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
switch (error) {
case LOG_READ_BOGUS:
errmsg = "bogus data in log event";
break;
case LOG_READ_TOO_LARGE:
errmsg = "log event entry exceeded max_allowed_packet; \
Increase max_allowed_packet on master";
break;
case LOG_READ_IO:
errmsg = "I/O error reading log event";
break;
case LOG_READ_MEM:
errmsg = "memory allocation failed reading log event";
break;
case LOG_READ_TRUNC:
errmsg = "binlog truncated in the middle of event";
break;
default:
errmsg = "unknown error reading log event on the master";
break;
}
return error;
}
/*
TODO: Clean up loop to only have one call to send_file()
*/
......@@ -326,6 +374,7 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
int error;
const char *errmsg = "Unknown error";
NET* net = &thd->net;
pthread_mutex_t *log_lock;
#ifndef DBUG_OFF
int left_events = max_binlog_dump_events;
#endif
......@@ -385,18 +434,25 @@ impossible position";
goto err;
}
my_b_seek(&log, pos); // Seek will done on next read
/*
We need to start a packet with something other than 255
to distiquish it from error
to distinguish it from error
*/
packet->set("\0", 1, &my_charset_bin);
packet->set("\0", 1, &my_charset_bin); /* This is the start of a new packet */
/*
Tell the client about the log name with a fake Rotate event;
this is needed even if we also send a Format_description_log_event just
after, because that event does not contain the binlog's name.
Note that as this Rotate event is sent before Format_description_log_event,
the slave cannot have any info to understand this event's format, so the
header len of Rotate_log_event is FROZEN
(so in 5.0 it will have a header shorter than other events except
FORMAT_DESCRIPTION_EVENT).
Before 4.0.14 we called fake_rotate_event below only if
(pos == BIN_LOG_HEADER_SIZE), because if this is false then the slave
already knows the binlog's name.
Now we always call fake_rotate_event; if the slave already knew the log's
Since, we always call fake_rotate_event; if the slave already knew the log's
name (ex: CHANGE MASTER TO MASTER_LOG_FILE=...) this is useless but does
not harm much. It is nice for 3.23 (>=.58) slaves which test Rotate events
to see if the master is 4.0 (then they choose to stop because they can't
......@@ -413,15 +469,72 @@ impossible position";
*/
if (fake_rotate_event(net, packet, log_file_name, pos, &errmsg))
{
/*
This error code is not perfect, as fake_rotate_event() does not read
anything from the binlog; if it fails it's because of an error in
my_net_write(), fortunately it will say it in errmsg.
*/
my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
goto err;
}
packet->set("\0", 1, &my_charset_bin);
while (!net->error && net->vio != 0 && !thd->killed)
/*
We can set log_lock now, it does not move (it's a member of mysql_bin_log,
and it's already inited, and it will be destroyed only at shutdown).
*/
log_lock = mysql_bin_log.get_log_lock();
if (pos > BIN_LOG_HEADER_SIZE)
{
pthread_mutex_t *log_lock = mysql_bin_log.get_log_lock();
/* Try to find a Format_description_log_event at the beginning of the binlog */
if (!(error = Log_event::read_log_event(&log, packet, log_lock)))
{
/*
The packet has offsets equal to the normal offsets in a binlog event
+1 (the first character is \0).
*/
DBUG_PRINT("info",
("Looked for a Format_description_log_event, found event type %d",
(*packet)[EVENT_TYPE_OFFSET+1]));
if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
{
/*
mark that this event with "log_pos=0", so the slave
should not increment master's binlog position
(rli->group_master_log_pos)
*/
int4store(packet->c_ptr() +LOG_POS_OFFSET+1,0);
/* send it */
if (my_net_write(net, (char*)packet->ptr(), packet->length()))
{
errmsg = "Failed on my_net_write()";
my_errno= ER_UNKNOWN_ERROR;
goto err;
}
/*
No need to save this event. We are only doing simple reads (no real
parsing of the events) so we don't need it. And so we don't need the
artificial Format_description_log_event of 3.23&4.x.
*/
}
}
else
if (test_for_non_eof_log_read_errors(error, errmsg))
goto err;
/*
else: it's EOF, nothing to do, go on reading next events, the
Format_description_log_event will be found naturally if it is written.
*/
/* reset the packet as we wrote to it in any case */
packet->set("\0", 1, &my_charset_bin);
} /* end of if (pos > BIN_LOG_HEADER_SIZE); if false, the Format_description_log_event
event will be found naturally. */
/* seek to the requested position, to start the requested dump */
my_b_seek(&log, pos); // Seek will done on next read
while (!net->error && net->vio != 0 && !thd->killed)
{
while (!(error = Log_event::read_log_event(&log, packet, log_lock)))
{
#ifndef DBUG_OFF
......@@ -433,7 +546,7 @@ impossible position";
goto err;
}
#endif
if (my_net_write(net, (char*)packet->ptr(), packet->length()) )
if (my_net_write(net, (char*)packet->ptr(), packet->length()))
{
errmsg = "Failed on my_net_write()";
my_errno= ER_UNKNOWN_ERROR;
......@@ -454,34 +567,14 @@ impossible position";
}
/*
TODO: now that we are logging the offset, check to make sure
the recorded offset and the actual match
the recorded offset and the actual match.
Guilhem 2003-06: this is not true if this master is a slave <4.0.15
running with --log-slave-updates, because then log_pos may be the offset
in the-master-of-this-master's binlog.
*/
if (error != LOG_READ_EOF)
{
my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
switch (error) {
case LOG_READ_BOGUS:
errmsg = "bogus data in log event";
break;
case LOG_READ_TOO_LARGE:
errmsg = "log event entry exceeded max_allowed_packet; \
Increase max_allowed_packet on master";
break;
case LOG_READ_IO:
errmsg = "I/O error reading log event";
break;
case LOG_READ_MEM:
errmsg = "memory allocation failed reading log event";
break;
case LOG_READ_TRUNC:
errmsg = "binlog truncated in the middle of event";
break;
default:
errmsg = "unknown error reading log event on the master";
break;
}
if (test_for_non_eof_log_read_errors(error, errmsg))
goto err;
}
if (!(flags & BINLOG_DUMP_NON_BLOCK) &&
mysql_bin_log.is_active(log_file_name))
......@@ -615,8 +708,13 @@ Increase max_allowed_packet on master";
(void) my_close(file, MYF(MY_WME));
/*
Even if the previous log contained a Rotate_log_event, we still fake
one.
Call fake_rotate_event() in case the previous log (the one which we have
just finished reading) did not contain a Rotate event (for example (I
don't know any other example) the previous log was the last one before
the master was shutdown & restarted).
This way we tell the slave about the new log's name and position.
If the binlog is 5.0, the next event we are going to read and send is
Format_description_log_event.
*/
if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0 ||
fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE, &errmsg))
......@@ -1112,7 +1210,7 @@ int change_master(THD* thd, MASTER_INFO* mi)
mi->rli.group_relay_log_name,
mi->rli.group_relay_log_pos,
0 /*no data lock*/,
&msg))
&msg, 0))
{
net_printf(thd,0,"Failed initializing relay log position: %s",msg);
unlock_slave_threads(mi);
......@@ -1197,6 +1295,8 @@ int show_binlog_events(THD* thd)
const char *errmsg = 0;
IO_CACHE log;
File file = -1;
Format_description_log_event *description_event= new
Format_description_log_event(3); /* MySQL 4.0 by default */
Log_event::init_show_field_list(&field_list);
if (protocol-> send_fields(&field_list, 1))
......@@ -1235,10 +1335,35 @@ int show_binlog_events(THD* thd)
goto err;
pthread_mutex_lock(log_lock);
/*
open_binlog() sought to position 4.
Read the first event in case it's a Format_description_log_event, to know the
format. If there's no such event, we are 3.23 or 4.x. This code, like
before, can't read 3.23 binlogs.
This code will fail on a mixed relay log (one which has Format_desc then
Rotate then Format_desc).
*/
ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event);
if (ev)
{
if (ev->get_type_code() == FORMAT_DESCRIPTION_EVENT)
description_event= (Format_description_log_event*) ev;
else
delete ev;
}
my_b_seek(&log, pos);
if (!description_event->is_valid())
{
errmsg="Invalid Format_description event; could be out of memory";
goto err;
}
for (event_count = 0;
(ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,0)); )
(ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event)); )
{
if (event_count >= limit_start &&
ev->net_send(protocol, linfo.log_file_name, pos))
......@@ -1267,6 +1392,7 @@ int show_binlog_events(THD* thd)
}
err:
delete description_event;
if (file >= 0)
{
end_io_cache(&log);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment