Commit f3855774 authored by unknown's avatar unknown

auto-ROLLBACK if binlog was not closed properly

auto-commit on Xid_log_event


client/mysqlbinlog.cc:
  auto-ROLLBACK if binlog was not closed properly.
mysql-test/r/ctype_ucs.result:
  results updated
mysql-test/r/mix_innodb_myisam_binlog.result:
  results updated
mysql-test/r/mysqlbinlog2.result:
  results updated
mysql-test/r/rpl_relayrotate.result:
  results updated
mysql-test/r/user_var.result:
  results updated
mysql-test/t/ctype_ucs.test:
  finalize binlog before calling mysqlbinlog
mysql-test/t/user_var.test:
  finalize binlog before calling mysqlbinlog
sql/log_event.cc:
  commit at Xid_log_event
  comments edited
sql/mysqld.cc:
  free(0) fixed
sql/slave.cc:
  rollback at fake Rotate_log_event
sql/sql_class.h:
  no commit_or_rollback argument for binlog->write(THD *thd, IO_CACHE *cache)
sql/log.cc:
  don't write "COMMIT" query, Xid_log_event is enough
sql/log_event.h:
  more comments for LOG_EVENT_BINLOG_IN_USE_F
  LOG_EVENT_FORCE_ROLLBACK_F added
sql/sql_repl.cc:
  rollback at Rotate_log_event.
  don't consider binlog corrupted if it was open when we read Formar_description but closed when we got to the end
sql/sql_repl.h:
  style fix
parent 0820d47f
......@@ -1086,7 +1086,7 @@ at offset %lu ; this could be a log format error or read error",
/* EOF can't be hit here normally, so it's a real error */
die("Could not read a Rotate_log_event event \
at offset %lu ; this could be a log format error or read error",
tmp_pos);
tmp_pos);
}
else
break;
......@@ -1157,9 +1157,16 @@ static int dump_local_log_entries(const char* logname)
Log_event* ev = Log_event::read_log_event(file, description_event);
if (!ev)
{
if (file->error)
/*
if binlog wasn't closed properly ("in use" flag is set) don't complain
about a corruption, but issue a "ROLLBACK" to annihilate half-logged
transaction. Otherwise, treat it as EOF and move to the next binlog.
*/
if (description_event->flags & LOG_EVENT_BINLOG_IN_USE_F)
fprintf(result_file, "ROLLBACK;\n");
else if (file->error)
{
fprintf(stderr,
fprintf(stderr,
"Could not read entry at offset %s:"
"Error in log format or read error\n",
llstr(old_off,llbuff));
......
......@@ -527,6 +527,7 @@ show binlog events from 96;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 User var 1 136 @`v`=_ucs2 0x006100620063 COLLATE ucs2_general_ci
master-bin.000001 136 Query 1 219 use `test`; insert into t2 values (@v)
flush logs;
/*!40019 SET @@session.max_insert_delayed_threads=0*/;
SET @`v`:=_ucs2 0x006100620063 COLLATE ucs2_general_ci;
use test;
......
......@@ -11,8 +11,7 @@ Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(1)
master-bin.000001 239 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 327 Xid 1 # xid=7
master-bin.000001 354 Query 1 # use `test`; COMMIT
master-bin.000001 327 Xid 1 # COMMIT /* xid=7 */
delete from t1;
delete from t2;
reset master;
......@@ -48,8 +47,7 @@ master-bin.000001 239 Query 1 # use `test`; savepoint my_savepoint
master-bin.000001 318 Query 1 # use `test`; insert into t1 values(4)
master-bin.000001 399 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 487 Query 1 # use `test`; rollback to savepoint my_savepoint
master-bin.000001 578 Xid 1 # xid=24
master-bin.000001 605 Query 1 # use `test`; COMMIT
master-bin.000001 578 Xid 1 # COMMIT /* xid=24 */
delete from t1;
delete from t2;
reset master;
......@@ -76,8 +74,7 @@ master-bin.000001 318 Query 1 # use `test`; insert into t1 values(6)
master-bin.000001 399 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 487 Query 1 # use `test`; rollback to savepoint my_savepoint
master-bin.000001 578 Query 1 # use `test`; insert into t1 values(7)
master-bin.000001 659 Xid 1 # xid=36
master-bin.000001 686 Query 1 # use `test`; COMMIT
master-bin.000001 659 Xid 1 # COMMIT /* xid=36 */
delete from t1;
delete from t2;
reset master;
......@@ -106,9 +103,8 @@ show binlog events from 96;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(9)
master-bin.000001 239 Xid 1 # xid=59
master-bin.000001 266 Query 1 # use `test`; COMMIT
master-bin.000001 329 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 239 Xid 1 # COMMIT /* xid=59 */
master-bin.000001 266 Query 1 # use `test`; insert into t2 select * from t1
delete from t1;
delete from t2;
reset master;
......@@ -119,22 +115,19 @@ show binlog events from 96;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(10)
master-bin.000001 240 Xid 1 # xid=65
master-bin.000001 267 Query 1 # use `test`; COMMIT
master-bin.000001 330 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 240 Xid 1 # COMMIT /* xid=65 */
master-bin.000001 267 Query 1 # use `test`; insert into t2 select * from t1
insert into t1 values(11);
commit;
show binlog events from 96;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(10)
master-bin.000001 240 Xid 1 # xid=65
master-bin.000001 267 Query 1 # use `test`; COMMIT
master-bin.000001 330 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 418 Query 1 # use `test`; BEGIN
master-bin.000001 480 Query 1 # use `test`; insert into t1 values(11)
master-bin.000001 562 Xid 1 # xid=67
master-bin.000001 589 Query 1 # use `test`; COMMIT
master-bin.000001 240 Xid 1 # COMMIT /* xid=65 */
master-bin.000001 267 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 355 Query 1 # use `test`; BEGIN
master-bin.000001 417 Query 1 # use `test`; insert into t1 values(11)
master-bin.000001 499 Xid 1 # COMMIT /* xid=67 */
alter table t2 engine=INNODB;
delete from t1;
delete from t2;
......@@ -148,8 +141,7 @@ Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(12)
master-bin.000001 240 Query 1 # use `test`; insert into t2 select * from t1
master-bin.000001 328 Xid 1 # xid=77
master-bin.000001 355 Query 1 # use `test`; COMMIT
master-bin.000001 328 Xid 1 # COMMIT /* xid=77 */
delete from t1;
delete from t2;
reset master;
......@@ -173,8 +165,7 @@ show binlog events from 96;
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(14)
master-bin.000001 240 Xid 1 # xid=93
master-bin.000001 267 Query 1 # use `test`; COMMIT
master-bin.000001 240 Xid 1 # COMMIT /* xid=93 */
delete from t1;
delete from t2;
reset master;
......@@ -195,8 +186,7 @@ Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 96 Query 1 # use `test`; BEGIN
master-bin.000001 158 Query 1 # use `test`; insert into t1 values(16)
master-bin.000001 240 Query 1 # use `test`; insert into t1 values(18)
master-bin.000001 322 Xid 1 # xid=104
master-bin.000001 349 Query 1 # use `test`; COMMIT
master-bin.000001 322 Xid 1 # COMMIT /* xid=104 */
delete from t1;
delete from t2;
alter table t2 type=MyISAM;
......
......@@ -144,6 +144,7 @@ SET TIMESTAMP=1579609943;
SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1;
SET @@session.sql_mode=0;
insert into t1 values(null, "f");
ROLLBACK;
--- offset --
/*!40019 SET @@session.max_insert_delayed_threads=0*/;
......@@ -171,6 +172,7 @@ SET TIMESTAMP=1579609943;
SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1;
SET @@session.sql_mode=0;
insert into t1 values(null, "f");
ROLLBACK;
--- start-position --
/*!40019 SET @@session.max_insert_delayed_threads=0*/;
......@@ -188,6 +190,7 @@ SET TIMESTAMP=1579609943;
SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1;
SET @@session.sql_mode=0;
insert into t1 values(null, "f");
ROLLBACK;
--- stop-position --
/*!40019 SET @@session.max_insert_delayed_threads=0*/;
......@@ -233,6 +236,7 @@ SET TIMESTAMP=1579609943;
SET @@session.foreign_key_checks=1, @@session.sql_auto_is_null=1, @@session.unique_checks=1;
SET @@session.sql_mode=0;
insert into t1 values(null, "f");
ROLLBACK;
--- stop-datetime --
/*!40019 SET @@session.max_insert_delayed_threads=0*/;
......
......@@ -18,5 +18,5 @@ max(a)
8000
show slave status;
Slave_IO_State Master_Host Master_User Master_Port Connect_Retry Master_Log_File Read_Master_Log_Pos Relay_Log_File Relay_Log_Pos Relay_Master_Log_File Slave_IO_Running Slave_SQL_Running Replicate_Do_DB Replicate_Ignore_DB Replicate_Do_Table Replicate_Ignore_Table Replicate_Wild_Do_Table Replicate_Wild_Ignore_Table Last_Errno Last_Error Skip_Counter Exec_Master_Log_Pos Relay_Log_Space Until_Condition Until_Log_File Until_Log_Pos Master_SSL_Allowed Master_SSL_CA_File Master_SSL_CA_Path Master_SSL_Cert Master_SSL_Cipher Master_SSL_Key Seconds_Behind_Master
# 127.0.0.1 root MASTER_MYPORT 1 master-bin.000001 687235 # # master-bin.000001 Yes Yes 0 0 687235 # None 0 No #
# 127.0.0.1 root MASTER_MYPORT 1 master-bin.000001 687172 # # master-bin.000001 Yes Yes 0 0 687172 # None 0 No #
drop table t1;
......@@ -179,6 +179,7 @@ master-bin.000001 96 User var 1 137 @`a b`=_latin1 0x68656C6C6F COLLATE latin1_s
master-bin.000001 137 Query 1 223 use `test`; INSERT INTO t1 VALUES(@`a b`)
master-bin.000001 223 User var 1 265 @`var1`=_latin1 0x273B616161 COLLATE latin1_swedish_ci
master-bin.000001 265 Query 1 351 use `test`; insert into t1 values (@var1)
flush logs;
/*!40019 SET @@session.max_insert_delayed_threads=0*/;
SET @`a b`:=_latin1 0x68656C6C6F COLLATE latin1_swedish_ci;
use test;
......
......@@ -339,6 +339,7 @@ set @v=convert('abc' using ucs2);
reset master;
insert into t2 values (@v);
show binlog events from 96;
flush logs;
# more important than SHOW BINLOG EVENTS, mysqlbinlog (where we
# absolutely need variables names to be quoted and strings to be
# escaped).
......
......@@ -110,6 +110,7 @@ INSERT INTO t1 VALUES(@`a b`);
set @var1= "';aaa";
insert into t1 values (@var1);
show binlog events from 96;
flush logs;
# more important than SHOW BINLOG EVENTS, mysqlbinlog (where we
# absolutely need variables names to be quoted and strings to be
# escaped).
......
......@@ -119,7 +119,7 @@ static int binlog_commit(THD *thd, bool all)
}
/* Update the binary log as we have cached some queries */
error= mysql_bin_log.write(thd, trans_log, 1);
error= mysql_bin_log.write(thd, trans_log);
binlog_cleanup_trans(trans_log);
DBUG_RETURN(error);
}
......@@ -142,7 +142,11 @@ static int binlog_rollback(THD *thd, bool all)
non-transactional table inside a transaction...)
*/
if (unlikely(thd->options & OPTION_STATUS_NO_TRANS_UPDATE))
error= mysql_bin_log.write(thd, trans_log, 0);
{
Query_log_event qev(thd, "ROLLBACK", 8, TRUE, FALSE);
qev.write(trans_log);
error= mysql_bin_log.write(thd, trans_log);
}
binlog_cleanup_trans(trans_log);
DBUG_RETURN(error);
}
......@@ -425,7 +429,6 @@ const char *MYSQL_LOG::generate_name(const char *log_name,
const char *suffix,
bool strip_ext, char *buff)
{
DBUG_ASSERT(!strip_ext || (log_name && log_name[0]));
if (!log_name || !log_name[0])
{
/*
......@@ -611,6 +614,7 @@ bool MYSQL_LOG::open(const char *log_name,
even if this is not the very first binlog.
*/
Format_description_log_event s(BINLOG_VERSION);
s.flags|= LOG_EVENT_BINLOG_IN_USE_F;
if (!s.is_valid())
goto err;
if (null_created_arg)
......@@ -1779,8 +1783,6 @@ uint MYSQL_LOG::next_file_id()
write()
thd
cache The cache to copy to the binlog
is_commit If true, will write "COMMIT" in the end, if false will
write "ROLLBACK".
NOTE
- We only come here if there is something in the cache.
......@@ -1799,7 +1801,7 @@ uint MYSQL_LOG::next_file_id()
same updates are run on the slave.
*/
bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool is_commit)
bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache)
{
VOID(pthread_mutex_lock(&LOCK_log));
DBUG_ENTER("MYSQL_LOG::write(cache");
......@@ -1809,18 +1811,10 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool is_commit)
uint length;
/*
Add the "BEGIN" and "COMMIT" in the binlog around transactions
which may contain more than 1 SQL statement. If we run with
AUTOCOMMIT=1, then MySQL immediately writes each SQL statement to
the binlog when the statement has been completed. No need to add
"BEGIN" ... "COMMIT" around such statements. Otherwise, MySQL uses
trans_log (that is thd->ha_data[binlog_hton.slot]) to cache
the SQL statements until the explicit commit, and at the commit writes
the contents in trans_log to the binlog.
We write the "BEGIN" mark first in the buffer (trans_log) where we
store the SQL statements for a transaction. At the transaction commit
we will add the "COMMIT mark and write the buffer to the binlog.
Log "BEGIN" at the beginning of the transaction.
which may contain more than 1 SQL statement.
There is no need to append "COMMIT", as it's already in the 'cache'
(in fact, Xid_log_event is there which does the commit on slaves)
*/
{
Query_log_event qinfo(thd, "BEGIN", 5, TRUE, FALSE);
......@@ -1846,6 +1840,7 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool is_commit)
if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
goto err;
length=my_b_bytes_in_cache(cache);
DBUG_EXECUTE_IF("half_binlogged_transaction", length-=100;);
do
{
/* Write data to the binary log file */
......@@ -1854,21 +1849,9 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool is_commit)
cache->read_pos=cache->read_end; // Mark buffer used up
} while ((length=my_b_fill(cache)));
/*
We write the command "COMMIT" as the last SQL command in the
binlog segment cached for this transaction
*/
{
Query_log_event qinfo(thd,
is_commit ? "COMMIT" : "ROLLBACK",
is_commit ? 6 : 8,
TRUE, FALSE);
qinfo.error_code= 0;
if (qinfo.write(&log_file) || flush_io_cache(&log_file) ||
sync_binlog(&log_file))
if (flush_io_cache(&log_file) || sync_binlog(&log_file))
goto err;
}
DBUG_EXECUTE_IF("half_binlogged_transaction", abort(););
if (cache->error) // Error on read
{
sql_print_error(ER(ER_ERROR_ON_READ), cache->file_name, errno);
......@@ -2093,10 +2076,10 @@ void MYSQL_LOG::close(uint exiting)
end_io_cache(&log_file);
/* don't pwrite in a file opened with O_APPEND - it doesn't work */
if (log_file.type == WRITE_CACHE)
if (log_file.type == WRITE_CACHE && log_type == LOG_BIN)
{
my_off_t offset= BIN_LOG_HEADER_SIZE + FLAGS_OFFSET;
char flags=LOG_EVENT_BINLOG_CLOSED_F;
char flags=0; // clearing LOG_EVENT_BINLOG_IN_USE_F
my_pwrite(log_file.file, &flags, 1, offset, MYF(0));
}
......@@ -2944,15 +2927,12 @@ int TC_LOG_BINLOG::open(const char *opt_name)
goto err;
}
if (((ev= Log_event::read_log_event(&log, 0, &fdle))) &&
(ev->get_type_code() == FORMAT_DESCRIPTION_EVENT))
{
if (ev->flags & LOG_EVENT_BINLOG_CLOSED_F)
error=0;
else
if ((ev= Log_event::read_log_event(&log, 0, &fdle)) &&
ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
error= recover(&log, (Format_description_log_event *)ev);
}
// else nothing to do (probably MySQL 4.x binlog)
else
error=0;
delete ev;
end_io_cache(&log);
......@@ -3009,6 +2989,8 @@ int TC_LOG_BINLOG::recover(IO_CACHE *log, Format_description_log_event *fdle)
init_alloc_root(&mem_root, tc_log_page_size, tc_log_page_size);
fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
while ((ev= Log_event::read_log_event(log,0,fdle)) && ev->is_valid())
{
if (ev->get_type_code() == XID_EVENT)
......
......@@ -292,10 +292,10 @@ Log_event::Log_event(THD* thd_arg, uint16 flags_arg, bool using_trans)
/*
This minimal constructor is for when you are not even sure that there is a
valid THD. For example in the server when we are shutting down or flushing
logs after receiving a SIGHUP (then we must write a Rotate to the binlog but
we have no THD, so we need this minimal constructor).
This minimal constructor is for when you are not even sure that there
is a valid THD. For example in the server when we are shutting down or
flushing logs after receiving a SIGHUP (then we must write a Rotate to
the binlog but we have no THD, so we need this minimal constructor).
*/
Log_event::Log_event()
......@@ -331,14 +331,14 @@ Log_event::Log_event(const char* buf,
/* 4.0 or newer */
log_pos= uint4korr(buf + LOG_POS_OFFSET);
/*
If the log is 4.0 (so here it can only be a 4.0 relay log read by the SQL
thread or a 4.0 master binlog read by the I/O thread), log_pos is the
beginning of the event: we transform it into the end of the event, which is
more useful.
But how do you know that the log is 4.0: you know it if description_event
is version 3 *and* you are not reading a Format_desc (remember that
mysqlbinlog starts by assuming that 5.0 logs are in 4.0 format, until it
finds a Format_desc).
If the log is 4.0 (so here it can only be a 4.0 relay log read by
the SQL thread or a 4.0 master binlog read by the I/O thread),
log_pos is the beginning of the event: we transform it into the end
of the event, which is more useful.
But how do you know that the log is 4.0: you know it if
description_event is version 3 *and* you are not reading a
Format_desc (remember that mysqlbinlog starts by assuming that 5.0
logs are in 4.0 format, until it finds a Format_desc).
*/
if (description_event->binlog_version==3 &&
buf[EVENT_TYPE_OFFSET]<FORMAT_DESCRIPTION_EVENT && log_pos)
......@@ -346,13 +346,13 @@ Log_event::Log_event(const char* buf,
/*
If log_pos=0, don't change it. log_pos==0 is a marker to mean
"don't change rli->group_master_log_pos" (see
inc_group_relay_log_pos()). As it is unreal log_pos, adding the event
len's is nonsense. For example, a fake Rotate event should
inc_group_relay_log_pos()). As it is unreal log_pos, adding the
event len's is nonsense. For example, a fake Rotate event should
not have its log_pos (which is 0) changed or it will modify
Exec_master_log_pos in SHOW SLAVE STATUS, displaying a nonsense value
of (a non-zero offset which does not exist in the master's binlog, so
which will cause problems if the user uses this value in
CHANGE MASTER).
Exec_master_log_pos in SHOW SLAVE STATUS, displaying a nonsense
value of (a non-zero offset which does not exist in the master's
binlog, so which will cause problems if the user uses this value
in CHANGE MASTER).
*/
log_pos+= uint4korr(buf + EVENT_LEN_OFFSET);
}
......@@ -363,16 +363,17 @@ Log_event::Log_event(const char* buf,
(buf[EVENT_TYPE_OFFSET] == ROTATE_EVENT))
{
/*
These events always have a header which stops here (i.e. their header is
FROZEN).
These events always have a header which stops here (i.e. their
header is FROZEN).
*/
/*
Initialization to zero of all other Log_event members as they're not
specified. Currently there are no such members; in the future there will
be an event UID (but Format_description and Rotate don't need this UID,
as they are not propagated through --log-slave-updates (remember the UID
is used to not play a query twice when you have two masters which are
slaves of a 3rd master). Then we are done.
Initialization to zero of all other Log_event members as they're
not specified. Currently there are no such members; in the future
there will be an event UID (but Format_description and Rotate
don't need this UID, as they are not propagated through
--log-slave-updates (remember the UID is used to not play a query
twice when you have two masters which are slaves of a 3rd master).
Then we are done.
*/
return;
}
......@@ -405,10 +406,10 @@ int Log_event::exec_event(struct st_relay_log_info* rli)
if (rli)
{
/*
If in a transaction, and if the slave supports transactions,
just inc_event_relay_log_pos(). We only have to check for OPTION_BEGIN
(not OPTION_NOT_AUTOCOMMIT) as transactions are logged
with BEGIN/COMMIT, not with SET AUTOCOMMIT= .
If in a transaction, and if the slave supports transactions, just
inc_event_relay_log_pos(). We only have to check for OPTION_BEGIN
(not OPTION_NOT_AUTOCOMMIT) as transactions are logged with
BEGIN/COMMIT, not with SET AUTOCOMMIT= .
CAUTION: opt_using_transactions means
innodb || bdb ; suppose the master supports InnoDB and BDB,
......@@ -416,17 +417,18 @@ int Log_event::exec_event(struct st_relay_log_info* rli)
will arise:
- suppose an InnoDB table is created on the master,
- then it will be MyISAM on the slave
- but as opt_using_transactions is true, the slave will believe he is
transactional with the MyISAM table. And problems will come when one
does START SLAVE; STOP SLAVE; START SLAVE; (the slave will resume at
BEGIN whereas there has not been any rollback). This is the problem of
using opt_using_transactions instead of a finer
"does the slave support _the_transactional_handler_used_on_the_master_".
More generally, we'll have problems when a query mixes a transactional
handler and MyISAM and STOP SLAVE is issued in the middle of the
"transaction". START SLAVE will resume at BEGIN while the MyISAM table
has already been updated.
- but as opt_using_transactions is true, the slave will believe he
is transactional with the MyISAM table. And problems will come
when one does START SLAVE; STOP SLAVE; START SLAVE; (the slave
will resume at BEGIN whereas there has not been any rollback).
This is the problem of using opt_using_transactions instead of a
finer "does the slave support
_the_transactional_handler_used_on_the_master_".
More generally, we'll have problems when a query mixes a
transactional handler and MyISAM and STOP SLAVE is issued in the
middle of the "transaction". START SLAVE will resume at BEGIN
while the MyISAM table has already been updated.
*/
if ((thd->options & OPTION_BEGIN) && opt_using_transactions)
rli->inc_event_relay_log_pos();
......@@ -435,8 +437,8 @@ int Log_event::exec_event(struct st_relay_log_info* rli)
rli->inc_group_relay_log_pos(log_pos);
flush_relay_log_info(rli);
/*
Note that Rotate_log_event::exec_event() does not call this function,
so there is no chance that a fake rotate event resets
Note that Rotate_log_event::exec_event() does not call this
function, so there is no chance that a fake rotate event resets
last_master_timestamp.
*/
rli->last_master_timestamp= when;
......@@ -667,6 +669,7 @@ Log_event* Log_event::read_log_event(IO_CACHE* file,
const Format_description_log_event *description_event)
#endif
{
DBUG_ENTER("Log_event::read_log_event(IO_CACHE *, Format_description_log_event *");
DBUG_ASSERT(description_event);
char head[LOG_EVENT_MINIMAL_HEADER_LEN];
/*
......@@ -687,11 +690,11 @@ Log_event* Log_event::read_log_event(IO_CACHE* file,
failed my_b_read"));
UNLOCK_MUTEX;
/*
No error here; it could be that we are at the file's end. However if the
next my_b_read() fails (below), it will be an error as we were able to
read the first bytes.
No error here; it could be that we are at the file's end. However
if the next my_b_read() fails (below), it will be an error as we
were able to read the first bytes.
*/
return 0;
DBUG_RETURN(0);
}
uint data_len = uint4korr(head + EVENT_LEN_OFFSET);
......@@ -733,10 +736,11 @@ failed my_b_read"));
err:
UNLOCK_MUTEX;
if (error)
if (!res)
{
sql_print_error("\
Error in Log_event::read_log_event(): '%s', data_len: %d, event_type: %d",
DBUG_ASSERT(error);
sql_print_error("Error in Log_event::read_log_event(): "
"'%s', data_len: %d, event_type: %d",
error,data_len,head[EVENT_TYPE_OFFSET]);
my_free(buf, MYF(MY_ALLOW_ZERO_PTR));
/*
......@@ -749,7 +753,7 @@ Error in Log_event::read_log_event(): '%s', data_len: %d, event_type: %d",
*/
file->error= -1;
}
return res;
DBUG_RETURN(res);
}
......@@ -830,14 +834,15 @@ Log_event* Log_event::read_log_event(const char* buf, uint event_len,
ev= NULL;
break;
}
/*
is_valid() are small event-specific sanity tests which are important; for
example there are some my_malloc() in constructors
(e.g. Query_log_event::Query_log_event(char*...)); when these my_malloc()
fail we can't return an error out of the constructor (because constructor
is "void") ; so instead we leave the pointer we wanted to allocate
(e.g. 'query') to 0 and we test it in is_valid(). Same for
Format_description_log_event, member 'post_header_len'.
is_valid() are small event-specific sanity tests which are
important; for example there are some my_malloc() in constructors
(e.g. Query_log_event::Query_log_event(char*...)); when these
my_malloc() fail we can't return an error out of the constructor
(because constructor is "void") ; so instead we leave the pointer we
wanted to allocate (e.g. 'query') to 0 and we test it in is_valid().
Same for Format_description_log_event, member 'post_header_len'.
*/
if (!ev || !ev->is_valid())
{
......@@ -1279,18 +1284,12 @@ void Query_log_event::print(FILE* file, bool short_form,
my_fwrite(file, (byte*) buff, (uint) (end-buff),MYF(MY_NABP | MY_WME));
if (flags & LOG_EVENT_THREAD_SPECIFIC_F)
fprintf(file,"SET @@session.pseudo_thread_id=%lu;\n",(ulong)thread_id);
/*
Now the session variables;
it's more efficient to pass SQL_MODE as a number instead of a
comma-separated list.
FOREIGN_KEY_CHECKS, SQL_AUTO_IS_NULL, UNIQUE_CHECKS are session-only
variables (they have no global version; they're not listed in sql_class.h),
The tests below work for pure binlogs or pure relay logs. Won't work for
mixed relay logs but we don't create mixed relay logs (that is, there is no
relay log with a format change except within the 3 first events, which
mysqlbinlog handles gracefully). So this code should always be good.
If flags2_inited==0, this is an event from 3.23 or 4.0; nothing to
print (remember we don't produce mixed relay logs so there cannot be
5.0 events before that one so there is nothing to reset).
*/
if (likely(flags2_inited)) /* likely as this will mainly read 5.0 logs */
{
/* tmp is a bitmask of bits which have changed. */
......@@ -1319,9 +1318,16 @@ void Query_log_event::print(FILE* file, bool short_form,
}
/*
If flags2_inited==0, this is an event from 3.23 or 4.0; nothing to print
(remember we don't produce mixed relay logs so there cannot be 5.0 events
before that one so there is nothing to reset).
Now the session variables;
it's more efficient to pass SQL_MODE as a number instead of a
comma-separated list.
FOREIGN_KEY_CHECKS, SQL_AUTO_IS_NULL, UNIQUE_CHECKS are session-only
variables (they have no global version; they're not listed in
sql_class.h), The tests below work for pure binlogs or pure relay
logs. Won't work for mixed relay logs but we don't create mixed
relay logs (that is, there is no relay log with a format change
except within the 3 first events, which mysqlbinlog handles
gracefully). So this code should always be good.
*/
if (likely(sql_mode_inited))
......@@ -1687,15 +1693,19 @@ int Start_log_event_v3::exec_event(struct st_relay_log_info* rli)
}
/*
As a transaction NEVER spans on 2 or more binlogs:
if we have an active transaction at this point, the master died while
writing the transaction to the binary log, i.e. while flushing the binlog
cache to the binlog. As the write was started, the transaction had been
committed on the master, so we lack of information to replay this
transaction on the slave; all we can do is stop with error.
Note: this event could be sent by the master to inform us of the format
of its binlog; in other words maybe it is not at its original place when
it comes to us; we'll know this by checking log_pos ("artificial" events
have log_pos == 0).
if we have an active transaction at this point, the master died
while writing the transaction to the binary log, i.e. while
flushing the binlog cache to the binlog. As the write was started,
the transaction had been committed on the master, so we lack of
information to replay this transaction on the slave; all we can do
is stop with error.
Note: this event could be sent by the master to inform us of the
format of its binlog; in other words maybe it is not at its
original place when it comes to us; we'll know this by checking
log_pos ("artificial" events have log_pos == 0).
TODO test whether it's really necessary, as slave.cc does ROLLBACK
itself
*/
if (!artificial_event && (thd->options & OPTION_BEGIN))
{
......@@ -1959,7 +1969,7 @@ int Format_description_log_event::exec_event(struct st_relay_log_info* rli)
/**************************************************************************
Load_log_event methods
General note about Load_log_event: the binlogging of LOAD DATA INFILE is
going to be changed in 5.0 (or maybe in 4.1; not decided yet).
going to be changed in 5.0 (or maybe in 5.1; not decided yet).
However, the 5.0 slave could still have to read such events (from a 4.x
master), convert them (which just means maybe expand the header, when 5.0
servers have a UID in events) (remember that whatever is after the header
......@@ -2978,9 +2988,10 @@ int Rand_log_event::exec_event(struct st_relay_log_info* rli)
#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
void Xid_log_event::pack_info(Protocol *protocol)
{
char buf[64], *pos;
pos= strmov(buf, "xid=");
char buf[128], *pos;
pos= strmov(buf, "COMMIT /* xid=");
pos= longlong10_to_str(xid, pos, 10);
pos= strmov(pos, " */");
protocol->store(buf, (uint) (pos-buf), &my_charset_bin);
}
#endif
......@@ -3021,6 +3032,7 @@ void Xid_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_eve
fprintf(file, "\tXid = %s\n", buf);
fflush(file);
}
fprintf(file, "COMMIT;\n");
}
#endif /* MYSQL_CLIENT */
......@@ -3029,7 +3041,10 @@ void Xid_log_event::print(FILE* file, bool short_form, LAST_EVENT_INFO* last_eve
int Xid_log_event::exec_event(struct st_relay_log_info* rli)
{
rli->inc_event_relay_log_pos();
return 0;
/* For a slave Xid_log_event is COMMIT */
thd->options&= ~(ulong) (OPTION_BEGIN | OPTION_STATUS_NO_TRANS_UPDATE);
thd->server_status&= ~SERVER_STATUS_IN_TRANS;
return ha_commit(thd);
}
#endif /* !MYSQL_CLIENT */
......
......@@ -303,15 +303,38 @@ struct sql_ex_info
#endif
/*
This flag only makes sense for Format_description_log_event.
It is set not when the event is written, but when a binlog file
is closed. It serves as a reliable indicator that binlog was
closed correctly. (Stop_log_event is not enough, there's always
a small chance that mysqld crashes in the middle of insert
and end of the binlog would look like a Stop_log_event)
This flag only makes sense for Format_description_log_event. It is set
when the event is written, and *reset* when a binlog file is
closed (yes, it's the only case when MySQL modifies already written
part of binlog). Thus it is a reliable indicator that binlog was
closed correctly. (Stop_log_event is not enough, there's always a
small chance that mysqld crashes in the middle of insert and end of
the binlog would look like a Stop_log_event).
This flag is used to detect a restart after a crash,
and to provide "unbreakable" binlog. The problem is that on a crash
storage engines rollback automatically, while binlog does not.
To solve this we use this flag and automatically append ROLLBACK
to every non-closed binlog (append virtually, on reading, file itself
is not changed). If this flag is found, mysqlbinlog simply prints "ROLLBACK"
Replication master does not abort on binlog corruption, but takes it as EOF,
and replication slave forces a rollback in this case (see below).
Note, that old binlogs does not have this flag set, so we get a
a backward-compatible behaviour.
*/
#define LOG_EVENT_BINLOG_CLOSED_F 0x1
#define LOG_EVENT_BINLOG_IN_USE_F 0x1
/*
This flag is only used for fake Rotate_log_event. When a master, doing
binlog dump, reaches the end of the binlog and fakes a rotate to make
the slave to go to a new file, this flag is used if there was no
"natural" Rotate_log_event.
If this flag is set, slave will execute ROLLBACK before going further
*/
#define LOG_EVENT_FORCE_ROLLBACK_F 0x1
/*
If the query depends on the thread (for example: TEMPORARY TABLE).
......@@ -335,21 +358,22 @@ struct sql_ex_info
#define LOG_EVENT_SUPPRESS_USE_F 0x8
/*
OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be written
to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written into the
Format_description_log_event, so that if later we don't want to replicate a
variable we did replicate, or the contrary, it's doable. But it should not be
too hard to decide once for all of what we replicate and what we don't, among
the fixed 32 bits of thd->options.
OPTIONS_WRITTEN_TO_BIN_LOG are the bits of thd->options which must be
written to the binlog. OPTIONS_WRITTEN_TO_BINLOG could be written
into the Format_description_log_event, so that if later we don't want
to replicate a variable we did replicate, or the contrary, it's
doable. But it should not be too hard to decide once for all of what
we replicate and what we don't, among the fixed 32 bits of
thd->options.
I (Guilhem) have read through every option's usage, and it looks like
OPTION_AUTO_IS_NULL and OPTION_NO_FOREIGN_KEYS are the only ones which alter
how the query modifies the table. It's good to replicate
OPTION_RELAXED_UNIQUE_CHECKS too because otherwise, the slave may insert data
slower than the master, in InnoDB.
OPTION_AUTO_IS_NULL and OPTION_NO_FOREIGN_KEYS are the only ones
which alter how the query modifies the table. It's good to replicate
OPTION_RELAXED_UNIQUE_CHECKS too because otherwise, the slave may
insert data slower than the master, in InnoDB.
OPTION_BIG_SELECTS is not needed (the slave thread runs with
max_join_size=HA_POS_ERROR) and OPTION_BIG_TABLES is not needed either, as
the manual says (because a too big in-memory temp table is automatically
written to disk).
max_join_size=HA_POS_ERROR) and OPTION_BIG_TABLES is not needed
either, as the manual says (because a too big in-memory temp table is
automatically written to disk).
*/
#define OPTIONS_WRITTEN_TO_BIN_LOG (OPTION_AUTO_IS_NULL | \
OPTION_NO_FOREIGN_KEY_CHECKS | OPTION_RELAXED_UNIQUE_CHECKS)
......@@ -470,14 +494,15 @@ class Log_event
ulong data_written;
/*
The master's server id (is preserved in the relay log; used to prevent from
infinite loops in circular replication).
The master's server id (is preserved in the relay log; used to
prevent from infinite loops in circular replication).
*/
uint32 server_id;
/*
Some 16 flags. Look above for LOG_EVENT_TIME_F, LOG_EVENT_FORCED_ROTATE_F,
LOG_EVENT_THREAD_SPECIFIC_F, and LOG_EVENT_SUPPRESS_USE_F for notes.
Some 16 flags. Look above for LOG_EVENT_TIME_F,
LOG_EVENT_FORCED_ROTATE_F, LOG_EVENT_THREAD_SPECIFIC_F, and
LOG_EVENT_SUPPRESS_USE_F for notes.
*/
uint16 flags;
......
......@@ -2684,7 +2684,7 @@ server.");
ln= mysql_bin_log.generate_name(opt_bin_logname, "-bin", 1, buf);
if (ln == buf)
{
my_free(opt_bin_logname, MYF(0));
my_free(opt_bin_logname, MYF(MY_ALLOW_ZERO_PTR));
opt_bin_logname=my_strdup(buf, MYF(0));
}
mysql_bin_log.open_index_file(opt_binlog_index_name, ln);
......
......@@ -670,11 +670,11 @@ int terminate_slave_thread(THD* thd, pthread_mutex_t* term_lock,
}
}
DBUG_ASSERT(thd != 0);
THD_CHECK_SENTRY(thd);
/*
Is is criticate to test if the slave is running. Otherwise, we might
Is is critical to test if the slave is running. Otherwise, we might
be referening freed memory trying to kick it
*/
THD_CHECK_SENTRY(thd);
while (*slave_running) // Should always be true
{
......@@ -2935,8 +2935,7 @@ static ulong read_event(MYSQL* mysql, MASTER_INFO *mi, bool* suppress_warnings)
*suppress_warnings= TRUE;
}
else
sql_print_error("Error reading packet from server: %s (\
server_errno=%d)",
sql_print_error("Error reading packet from server: %s ( server_errno=%d)",
mysql_error(mysql), mysql_errno(mysql));
return packet_error;
}
......@@ -3167,7 +3166,21 @@ static int exec_relay_log_event(THD* thd, RELAY_LOG_INFO* rli)
thd->set_time(); // time the query
thd->lex->current_select= 0;
if (!ev->when)
{
ev->when = time(NULL);
/*
fake Rotate: it means that normal execution flow of statements is
interrupted. Let's fake ROLLBACK to undo any half-executed transaction
*/
if (ev->get_type_code() == ROTATE_EVENT &&
ev->flags & LOG_EVENT_FORCE_ROLLBACK_F)
{
ha_rollback_stmt(thd);
ha_rollback(thd);
thd->options&= ~(ulong) (OPTION_BEGIN | OPTION_STATUS_NO_TRANS_UPDATE);
thd->server_status&= ~SERVER_STATUS_IN_TRANS;
}
}
ev->thd = thd;
exec_res = ev->exec_event(rli);
DBUG_ASSERT(rli->sql_thd==thd);
......@@ -3260,7 +3273,6 @@ extern "C" pthread_handler_decl(handle_slave_io,arg)
goto err;
}
thd->proc_info = "Connecting to master";
// we can get killed during safe_connect
if (!safe_connect(thd, mysql, mi))
......@@ -3354,9 +3366,9 @@ after reconnect");
bool suppress_warnings= 0;
/*
We say "waiting" because read_event() will wait if there's nothing to
read. But if there's something to read, it will not wait. The important
thing is to not confuse users by saying "reading" whereas we're in fact
receiving nothing.
read. But if there's something to read, it will not wait. The
important thing is to not confuse users by saying "reading" whereas
we're in fact receiving nothing.
*/
thd->proc_info = "Waiting for master to send event";
ulong event_len = read_event(mysql, mi, &suppress_warnings);
......@@ -3870,6 +3882,7 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev)
if (disconnect_slave_event_count)
events_till_disconnect++;
#endif
/*
If description_event_for_queue is format <4, there is conversion in the
relay log to the slave's format (4). And Rotate can mean upgrade or
......@@ -3893,8 +3906,8 @@ static int process_io_rotate(MASTER_INFO *mi, Rotate_log_event *rev)
}
/*
Reads a 3.23 event and converts it to the slave's format. This code was copied
from MySQL 4.0.
Reads a 3.23 event and converts it to the slave's format. This code was
copied from MySQL 4.0.
*/
static int queue_binlog_ver_1_event(MASTER_INFO *mi, const char *buf,
ulong event_len)
......@@ -4157,9 +4170,9 @@ int queue_event(MASTER_INFO* mi,const char* buf, ulong event_len)
to write this event again).
*/
/*
We are the only thread which reads/writes description_event_for_queue. The
relay_log struct does not move (though some members of it can change), so
we needn't any lock (no rli->data_lock, no log lock).
We are the only thread which reads/writes description_event_for_queue.
The relay_log struct does not move (though some members of it can
change), so we needn't any lock (no rli->data_lock, no log lock).
*/
Format_description_log_event* tmp;
const char* errmsg;
......
......@@ -301,7 +301,7 @@ class MYSQL_LOG: public TC_LOG
bool write(THD *thd, const char *query, uint query_length,
time_t query_start=0);
bool write(Log_event* event_info); // binary log write
bool write(THD *thd, IO_CACHE *cache, bool commit_or_rollback);
bool write(THD *thd, IO_CACHE *cache);
/*
v stands for vector
......@@ -314,11 +314,11 @@ class MYSQL_LOG: public TC_LOG
void make_log_name(char* buf, const char* log_ident);
bool is_active(const char* log_file_name);
int update_log_index(LOG_INFO* linfo, bool need_update_threads);
int purge_logs(const char *to_log, bool included,
int purge_logs(const char *to_log, bool included,
bool need_mutex, bool need_update_threads,
ulonglong *decrease_log_space);
int purge_logs_before_date(time_t purge_time);
int purge_first_log(struct st_relay_log_info* rli, bool included);
int purge_first_log(struct st_relay_log_info* rli, bool included);
bool reset_logs(THD* thd);
void close(uint exiting);
......
......@@ -31,24 +31,29 @@ static int binlog_dump_count = 0;
binlog) Rotate event, which contains the name of the binlog we are going to
send to the slave (because the slave may not know it if it just asked for
MASTER_LOG_FILE='', MASTER_LOG_POS=4).
< 4.0.14, fake_rotate_event() was called only if the requested pos was
4. After this version we always call it, so that a 3.23.58 slave can rely on
< 4.0.14, fake_rotate_event() was called only if the requested pos was 4.
After this version we always call it, so that a 3.23.58 slave can rely on
it to detect if the master is 4.0 (and stop) (the _fake_ Rotate event has
zeros in the good positions which, by chance, make it possible for the 3.23
slave to detect that this event is unexpected) (this is luck which happens
because the master and slave disagree on the size of the header of
Log_event).
Relying on the event length of the Rotate event instead of these well-placed
zeros was not possible as Rotate events have a variable-length part.
Relying on the event length of the Rotate event instead of these
well-placed zeros was not possible as Rotate events have a variable-length
part.
*/
static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
ulonglong position, const char** errmsg)
ulonglong position, int flags, const char** errmsg)
{
DBUG_ENTER("fake_rotate_event");
char header[LOG_EVENT_HEADER_LEN], buf[ROTATE_HEADER_LEN];
memset(header, 0, 4); // 'when' (the timestamp) does not matter, is set to 0
char header[LOG_EVENT_HEADER_LEN], buf[ROTATE_HEADER_LEN+100];
/*
'when' (the timestamp) is set to 0 so that slave could distinguish between
real and fake Rotate events (if necessary)
*/
memset(header, 0, 4);
header[EVENT_TYPE_OFFSET] = ROTATE_EVENT;
char* p = log_file_name+dirname_length(log_file_name);
......@@ -56,11 +61,11 @@ static int fake_rotate_event(NET* net, String* packet, char* log_file_name,
ulong event_len = ident_len + LOG_EVENT_HEADER_LEN + ROTATE_HEADER_LEN;
int4store(header + SERVER_ID_OFFSET, server_id);
int4store(header + EVENT_LEN_OFFSET, event_len);
int2store(header + FLAGS_OFFSET, 0);
int2store(header + FLAGS_OFFSET, flags);
// TODO: check what problems this may cause and fix them
int4store(header + LOG_POS_OFFSET, 0);
packet->append(header, sizeof(header));
int8store(buf+R_POS_OFFSET,position);
packet->append(buf, ROTATE_HEADER_LEN);
......@@ -276,7 +281,7 @@ bool purge_master_logs_before_date(THD* thd, time_t purge_time)
int test_for_non_eof_log_read_errors(int error, const char **errmsg)
{
if (error == LOG_READ_EOF)
if (error == LOG_READ_EOF)
return 0;
my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
switch (error) {
......@@ -321,6 +326,7 @@ void mysql_binlog_send(THD* thd, char* log_ident, my_off_t pos,
const char *errmsg = "Unknown error";
NET* net = &thd->net;
pthread_mutex_t *log_lock;
bool binlog_can_be_corrupted= FALSE, rotate_was_found=FALSE;
#ifndef DBUG_OFF
int left_events = max_binlog_dump_events;
#endif
......@@ -388,37 +394,38 @@ impossible position";
/*
Tell the client about the log name with a fake Rotate event;
this is needed even if we also send a Format_description_log_event just
after, because that event does not contain the binlog's name.
Note that as this Rotate event is sent before Format_description_log_event,
the slave cannot have any info to understand this event's format, so the
header len of Rotate_log_event is FROZEN
(so in 5.0 it will have a header shorter than other events except
FORMAT_DESCRIPTION_EVENT).
Before 4.0.14 we called fake_rotate_event below only if
(pos == BIN_LOG_HEADER_SIZE), because if this is false then the slave
this is needed even if we also send a Format_description_log_event
just after, because that event does not contain the binlog's name.
Note that as this Rotate event is sent before
Format_description_log_event, the slave cannot have any info to
understand this event's format, so the header len of
Rotate_log_event is FROZEN (so in 5.0 it will have a header shorter
than other events except FORMAT_DESCRIPTION_EVENT).
Before 4.0.14 we called fake_rotate_event below only if (pos ==
BIN_LOG_HEADER_SIZE), because if this is false then the slave
already knows the binlog's name.
Since, we always call fake_rotate_event; if the slave already knew the log's
name (ex: CHANGE MASTER TO MASTER_LOG_FILE=...) this is useless but does
not harm much. It is nice for 3.23 (>=.58) slaves which test Rotate events
to see if the master is 4.0 (then they choose to stop because they can't
replicate 4.0); by always calling fake_rotate_event we are sure that
3.23.58 and newer will detect the problem as soon as replication starts
(BUG#198).
Since, we always call fake_rotate_event; if the slave already knew
the log's name (ex: CHANGE MASTER TO MASTER_LOG_FILE=...) this is
useless but does not harm much. It is nice for 3.23 (>=.58) slaves
which test Rotate events to see if the master is 4.0 (then they
choose to stop because they can't replicate 4.0); by always calling
fake_rotate_event we are sure that 3.23.58 and newer will detect the
problem as soon as replication starts (BUG#198).
Always calling fake_rotate_event makes sending of normal
(=from-binlog) Rotate events a priori unneeded, but it is not so simple:
the 2 Rotate events are not equivalent, the normal one is before the Stop
event, the fake one is after. If we don't send the normal one, then the
Stop event will be interpreted (by existing 4.0 slaves) as "the master
stopped", which is wrong. So for safety, given that we want minimum
modification of 4.0, we send the normal and fake Rotates.
(=from-binlog) Rotate events a priori unneeded, but it is not so
simple: the 2 Rotate events are not equivalent, the normal one is
before the Stop event, the fake one is after. If we don't send the
normal one, then the Stop event will be interpreted (by existing 4.0
slaves) as "the master stopped", which is wrong. So for safety,
given that we want minimum modification of 4.0, we send the normal
and fake Rotates.
*/
if (fake_rotate_event(net, packet, log_file_name, pos, &errmsg))
if (fake_rotate_event(net, packet, log_file_name, pos, 0, &errmsg))
{
/*
This error code is not perfect, as fake_rotate_event() does not read
anything from the binlog; if it fails it's because of an error in
my_net_write(), fortunately it will say it in errmsg.
/*
This error code is not perfect, as fake_rotate_event() does not
read anything from the binlog; if it fails it's because of an
error in my_net_write(), fortunately it will say so in errmsg.
*/
my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
goto err;
......@@ -426,30 +433,35 @@ impossible position";
packet->set("\0", 1, &my_charset_bin);
/*
We can set log_lock now, it does not move (it's a member of mysql_bin_log,
and it's already inited, and it will be destroyed only at shutdown).
We can set log_lock now, it does not move (it's a member of
mysql_bin_log, and it's already inited, and it will be destroyed
only at shutdown).
*/
log_lock = mysql_bin_log.get_log_lock();
log_lock = mysql_bin_log.get_log_lock();
if (pos > BIN_LOG_HEADER_SIZE)
{
/* Try to find a Format_description_log_event at the beginning of the binlog */
{
/*
Try to find a Format_description_log_event at the beginning of
the binlog
*/
if (!(error = Log_event::read_log_event(&log, packet, log_lock)))
{
/*
The packet has offsets equal to the normal offsets in a binlog event
+1 (the first character is \0).
The packet has offsets equal to the normal offsets in a binlog
event +1 (the first character is \0).
*/
DBUG_PRINT("info",
("Looked for a Format_description_log_event, found event type %d",
(*packet)[EVENT_TYPE_OFFSET+1]));
if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
{
binlog_can_be_corrupted= (*packet)[FLAGS_OFFSET+1] & LOG_EVENT_BINLOG_IN_USE_F;
/*
mark that this event with "log_pos=0", so the slave
should not increment master's binlog position
(rli->group_master_log_pos)
*/
int4store(packet->c_ptr() +LOG_POS_OFFSET+1,0);
int4store(packet->c_ptr()+LOG_POS_OFFSET+1, 0);
/* send it */
if (my_net_write(net, (char*)packet->ptr(), packet->length()))
{
......@@ -458,24 +470,25 @@ impossible position";
goto err;
}
/*
No need to save this event. We are only doing simple reads (no real
parsing of the events) so we don't need it. And so we don't need the
artificial Format_description_log_event of 3.23&4.x.
No need to save this event. We are only doing simple reads
(no real parsing of the events) so we don't need it. And so
we don't need the artificial Format_description_log_event of
3.23&4.x.
*/
}
}
else
if (test_for_non_eof_log_read_errors(error, &errmsg))
goto err;
/*
/*
else: it's EOF, nothing to do, go on reading next events, the
Format_description_log_event will be found naturally if it is written.
*/
/* reset the packet as we wrote to it in any case */
packet->set("\0", 1, &my_charset_bin);
} /* end of if (pos > BIN_LOG_HEADER_SIZE); if false, the Format_description_log_event
event will be found naturally. */
} /* end of if (pos > BIN_LOG_HEADER_SIZE); if false, the
Format_description_log_event event will be found naturally. */
/* seek to the requested position, to start the requested dump */
my_b_seek(&log, pos); // Seek will done on next read
......@@ -492,6 +505,14 @@ impossible position";
goto err;
}
#endif
if ((*packet)[EVENT_TYPE_OFFSET+1] == FORMAT_DESCRIPTION_EVENT)
binlog_can_be_corrupted= (*packet)[FLAGS_OFFSET+1] & LOG_EVENT_BINLOG_IN_USE_F;
else if ((*packet)[EVENT_TYPE_OFFSET+1] == STOP_EVENT)
binlog_can_be_corrupted= FALSE;
else if ((*packet)[EVENT_TYPE_OFFSET+1] == ROTATE_EVENT)
rotate_was_found=TRUE;
if (my_net_write(net, (char*)packet->ptr(), packet->length()))
{
errmsg = "Failed on my_net_write()";
......@@ -511,19 +532,25 @@ impossible position";
}
packet->set("\0", 1, &my_charset_bin);
}
/*
here we were reading binlog that was not closed properly (as a result
of a crash ?). treat any corruption as EOF
*/
if (binlog_can_be_corrupted && error != LOG_READ_MEM)
error=LOG_READ_EOF;
/*
TODO: now that we are logging the offset, check to make sure
the recorded offset and the actual match.
Guilhem 2003-06: this is not true if this master is a slave <4.0.15
running with --log-slave-updates, because then log_pos may be the offset
in the-master-of-this-master's binlog.
Guilhem 2003-06: this is not true if this master is a slave
<4.0.15 running with --log-slave-updates, because then log_pos may
be the offset in the-master-of-this-master's binlog.
*/
if (test_for_non_eof_log_read_errors(error, &errmsg))
goto err;
if (!(flags & BINLOG_DUMP_NON_BLOCK) &&
mysql_bin_log.is_active(log_file_name))
mysql_bin_log.is_active(log_file_name))
{
/*
Block until there is more data in the log
......@@ -559,9 +586,9 @@ impossible position";
now, but we'll be quick and just read one record
TODO:
Add an counter that is incremented for each time we update
the binary log. We can avoid the following read if the counter
has not been updated since last read.
Add an counter that is incremented for each time we update the
binary log. We can avoid the following read if the counter
has not been updated since last read.
*/
pthread_mutex_lock(log_lock);
......@@ -654,20 +681,23 @@ impossible position";
(void) my_close(file, MYF(MY_WME));
/*
Call fake_rotate_event() in case the previous log (the one which we have
just finished reading) did not contain a Rotate event (for example (I
don't know any other example) the previous log was the last one before
the master was shutdown & restarted).
This way we tell the slave about the new log's name and position.
If the binlog is 5.0, the next event we are going to read and send is
Format_description_log_event.
Call fake_rotate_event() in case the previous log (the one which
we have just finished reading) did not contain a Rotate event
(for example (I don't know any other example) the previous log
was the last one before the master was shutdown & restarted).
This way we tell the slave about the new log's name and
position. If the binlog is 5.0, the next event we are going to
read and send is Format_description_log_event.
*/
if ((file=open_binlog(&log, log_file_name, &errmsg)) < 0 ||
fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE, &errmsg))
fake_rotate_event(net, packet, log_file_name, BIN_LOG_HEADER_SIZE,
rotate_was_found ? 0 : LOG_EVENT_FORCE_ROLLBACK_F,
&errmsg))
{
my_errno= ER_MASTER_FATAL_ERROR_READING_BINLOG;
goto err;
}
rotate_was_found=FALSE;
packet->length(0);
packet->append('\0');
}
......@@ -708,17 +738,17 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report)
int slave_errno= 0;
int thread_mask;
DBUG_ENTER("start_slave");
if (check_access(thd, SUPER_ACL, any_db,0,0,0))
DBUG_RETURN(1);
lock_slave_threads(mi); // this allows us to cleanly read slave_running
// Get a mask of _stopped_ threads
init_thread_mask(&thread_mask,mi,1 /* inverse */);
/*
Below we will start all stopped threads.
But if the user wants to start only one thread, do as if the other thread
was running (as we don't wan't to touch the other thread), so set the
bit to 0 for the other thread
Below we will start all stopped threads. But if the user wants to
start only one thread, do as if the other thread was running (as we
don't wan't to touch the other thread), so set the bit to 0 for the
other thread
*/
if (thd->lex->slave_thd_opt)
thread_mask&= thd->lex->slave_thd_opt;
......@@ -729,9 +759,9 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report)
slave_errno=ER_MASTER_INFO;
else if (server_id_supplied && *mi->host)
{
/*
If we will start SQL thread we will care about UNTIL options
If not and they are specified we will ignore them and warn user
/*
If we will start SQL thread we will care about UNTIL options If
not and they are specified we will ignore them and warn user
about this fact.
*/
if (thread_mask & SLAVE_SQL)
......@@ -742,13 +772,13 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report)
{
mi->rli.until_condition= RELAY_LOG_INFO::UNTIL_MASTER_POS;
mi->rli.until_log_pos= thd->lex->mi.pos;
/*
We don't check thd->lex->mi.log_file_name for NULL here
/*
We don't check thd->lex->mi.log_file_name for NULL here
since it is checked in sql_yacc.yy
*/
strmake(mi->rli.until_log_name, thd->lex->mi.log_file_name,
sizeof(mi->rli.until_log_name)-1);
}
}
else if (thd->lex->mi.relay_log_pos)
{
mi->rli.until_condition= RELAY_LOG_INFO::UNTIL_RELAY_POS;
......@@ -772,15 +802,15 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report)
p_end points to the first invalid character. If it equals
to p, no digits were found, error. If it contains '\0' it
means conversion went ok.
*/
*/
if (p_end==p || *p_end)
slave_errno=ER_BAD_SLAVE_UNTIL_COND;
}
else
slave_errno=ER_BAD_SLAVE_UNTIL_COND;
/* mark the cached result of the UNTIL comparison as "undefined" */
mi->rli.until_log_names_cmp_result=
mi->rli.until_log_names_cmp_result=
RELAY_LOG_INFO::UNTIL_LOG_NAMES_CMP_UNKNOWN;
/* Issuing warning then started without --skip-slave-start */
......@@ -788,14 +818,13 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report)
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_MISSING_SKIP_SLAVE,
ER(ER_MISSING_SKIP_SLAVE));
}
pthread_mutex_unlock(&mi->rli.data_lock);
}
else if (thd->lex->mi.pos || thd->lex->mi.relay_log_pos)
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_UNTIL_COND_IGNORED,
ER(ER_UNTIL_COND_IGNORED));
if (!slave_errno)
slave_errno = start_slave_threads(0 /*no mutex */,
1 /* wait for start */,
......@@ -810,9 +839,9 @@ int start_slave(THD* thd , MASTER_INFO* mi, bool net_report)
//no error if all threads are already started, only a warning
push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_SLAVE_WAS_RUNNING,
ER(ER_SLAVE_WAS_RUNNING));
unlock_slave_threads(mi);
if (slave_errno)
{
if (net_report)
......@@ -912,7 +941,7 @@ int reset_slave(THD *thd, MASTER_INFO* mi)
1 /* just reset */,
&errmsg)))
goto err;
/*
Clear master's log coordinates and reset host/user/etc to the values
specified in mysqld's options (only for good display of SHOW SLAVE STATUS;
......@@ -921,13 +950,13 @@ int reset_slave(THD *thd, MASTER_INFO* mi)
STATUS; before doing START SLAVE;
*/
init_master_info_with_options(mi);
/*
/*
Reset errors, and master timestamp (the idea is that we forget about the
old master).
*/
clear_slave_error_timestamp(&mi->rli);
clear_until_condition(&mi->rli);
// close master_info_file, relay_log_info_file, set mi->inited=rli->inited=0
end_master_info(mi);
// and delete these two files
......@@ -1243,7 +1272,7 @@ bool mysql_show_binlog_events(THD* thd)
IO_CACHE log;
File file = -1;
Format_description_log_event *description_event= new
Format_description_log_event(3); /* MySQL 4.0 by default */
Format_description_log_event(3); /* MySQL 4.0 by default */
Log_event::init_show_field_list(&field_list);
if (protocol->send_fields(&field_list,
......@@ -1260,7 +1289,7 @@ bool mysql_show_binlog_events(THD* thd)
pthread_mutex_t *log_lock = mysql_bin_log.get_log_lock();
LOG_INFO linfo;
Log_event* ev;
limit_start= thd->lex->current_select->offset_limit;
limit_end= thd->lex->current_select->select_limit + limit_start;
......@@ -1284,15 +1313,15 @@ bool mysql_show_binlog_events(THD* thd)
pthread_mutex_lock(log_lock);
/*
/*
open_binlog() sought to position 4.
Read the first event in case it's a Format_description_log_event, to know the
format. If there's no such event, we are 3.23 or 4.x. This code, like
before, can't read 3.23 binlogs.
Read the first event in case it's a Format_description_log_event, to
know the format. If there's no such event, we are 3.23 or 4.x. This
code, like before, can't read 3.23 binlogs.
This code will fail on a mixed relay log (one which has Format_desc then
Rotate then Format_desc).
*/
ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event);
if (ev)
{
......@@ -1312,7 +1341,7 @@ bool mysql_show_binlog_events(THD* thd)
errmsg="Invalid Format_description event; could be out of memory";
goto err;
}
for (event_count = 0;
(ev = Log_event::read_log_event(&log,(pthread_mutex_t*)0,description_event)); )
{
......
......@@ -36,7 +36,11 @@ extern I_List<i_string> binlog_do_db, binlog_ignore_db;
extern int max_binlog_dump_events;
extern my_bool opt_sporadic_binlog_dump_fail;
#define KICK_SLAVE(thd) { pthread_mutex_lock(&(thd)->LOCK_delete); (thd)->awake(THD::NOT_KILLED); pthread_mutex_unlock(&(thd)->LOCK_delete); }
#define KICK_SLAVE(thd) do { \
pthread_mutex_lock(&(thd)->LOCK_delete); \
(thd)->awake(THD::NOT_KILLED); \
pthread_mutex_unlock(&(thd)->LOCK_delete); \
} while(0)
int start_slave(THD* thd, MASTER_INFO* mi, bool net_report);
int stop_slave(THD* thd, MASTER_INFO* mi, bool net_report);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment