Commit 3220a945 authored by unknown's avatar unknown

Robustness feature.

Won't be pushed as is - separate email sent for internal review.
WL#1717 "binlog-innodb consistency".
Now when mysqld starts, if InnoDB does a crash recovery, we use the binlog name
and position retrieved from InnoDB (corresponding to the last transaction
successfully committed by InnoDB) to cut any rolled back transaction from
the binary log. This is triggered by the --innodb-safe-binlog option.
Provided you configure mysqld to fsync() InnoDB at every commit (using
flush_log_at_trx_commit) and to fsync() the binlog at every write
(using --sync-binlog=1), this behaviour guarantees that a master always has
consistency between binlog and InnoDB, whenever the crash happens.
6 tests to verify that it works.


client/mysqltest.c:
  New command require_os (only "unix" accepted for now).
innobase/include/trx0sys.h:
  when InnoDB does crash recovery, we now save the binlog coords it prints, into variables for later use.
innobase/trx/trx0sys.c:
  when InnoDB does crash recovery, we now save the binlog coords it prints, into variables for later use.
mysql-test/mysql-test-run.sh:
  The tests which check that the binlog is cut at restart, need to not delete those binlogs, of course.
  And not delete replication info, so that we can test that the slave does not receive anything
  wrong from the cut binlog.
sql/ha_innodb.cc:
  methods to read from InnoDB the binlog coords stored into it
sql/ha_innodb.h:
  ethods to read from InnoDB the binlog coords stored into it
sql/log.cc:
  Added my_sync() when we create a binlog (my_sync of the binlog and of the index file);
  this is always done, whether --sync-binlog or not (binlog creation is rare, so no speed
  problem, and I like to have the existence of the binlog always reliably recorded, even if
  later content is not).
  If --crash-binlog-innodb, crash between the binlog write and the InnoDB commit.
  New methods:
  - report_pos_in_innodb() to store the binlog name and position into InnoDB (used only when
  we create a new binlog: at startup and at FLUSH LOGS)
  - cut_spurious_tail() to possibly cut the tail of a binlog based on the info we read
  from InnoDB (does something only if InnoDB has just done a crash recovery).
sql/mysql_priv.h:
  new option, to crash (use for testing only)
sql/mysqld.cc:
  New option --innodb-safe-binlog and --crash-binlog-innodb (the latter is for testing, it makes mysqld crash).
  Just after opening the logs and opening the storage engines, cut any wrong statement from the binlog, based
  on info read from InnoDB.
sql/sql_class.h:
  new methods for MYSQL_LOG.
parent 60cb0e5f
...@@ -223,7 +223,7 @@ Q_ENABLE_QUERY_LOG, Q_DISABLE_QUERY_LOG, ...@@ -223,7 +223,7 @@ Q_ENABLE_QUERY_LOG, Q_DISABLE_QUERY_LOG,
Q_ENABLE_RESULT_LOG, Q_DISABLE_RESULT_LOG, Q_ENABLE_RESULT_LOG, Q_DISABLE_RESULT_LOG,
Q_SERVER_START, Q_SERVER_STOP,Q_REQUIRE_MANAGER, Q_SERVER_START, Q_SERVER_STOP,Q_REQUIRE_MANAGER,
Q_WAIT_FOR_SLAVE_TO_STOP, Q_WAIT_FOR_SLAVE_TO_STOP,
Q_REQUIRE_VERSION, Q_REQUIRE_VERSION, Q_REQUIRE_OS,
Q_ENABLE_WARNINGS, Q_DISABLE_WARNINGS, Q_ENABLE_WARNINGS, Q_DISABLE_WARNINGS,
Q_ENABLE_INFO, Q_DISABLE_INFO, Q_ENABLE_INFO, Q_DISABLE_INFO,
Q_ENABLE_METADATA, Q_DISABLE_METADATA, Q_ENABLE_METADATA, Q_DISABLE_METADATA,
...@@ -297,6 +297,7 @@ const char *command_names[]= ...@@ -297,6 +297,7 @@ const char *command_names[]=
"require_manager", "require_manager",
"wait_for_slave_to_stop", "wait_for_slave_to_stop",
"require_version", "require_version",
"require_os",
"enable_warnings", "enable_warnings",
"disable_warnings", "disable_warnings",
"enable_info", "enable_info",
...@@ -848,6 +849,28 @@ int do_require_version(struct st_query* q) ...@@ -848,6 +849,28 @@ int do_require_version(struct st_query* q)
return 0; return 0;
} }
int do_require_os(struct st_query* q)
{
char *p=q->first_argument, *os_arg;
LINT_INIT(res);
DBUG_ENTER("do_require_os");
if (!*p)
die("Missing version argument in require_os\n");
os_arg= p;
while (*p && !my_isspace(charset_info,*p))
p++;
*p = 0;
if (strcmp(os_arg, "unix"))
die("For now only testing of os=unix is implemented\n");
#if defined(__NETWARE__) || defined(__WIN__) || defined(__OS2__)
abort_not_supported_test();
#endif
DBUG_RETURN(0);
}
int do_source(struct st_query* q) int do_source(struct st_query* q)
{ {
char* p=q->first_argument, *name; char* p=q->first_argument, *name;
...@@ -2705,6 +2728,7 @@ int main(int argc, char **argv) ...@@ -2705,6 +2728,7 @@ int main(int argc, char **argv)
case Q_SLEEP: do_sleep(q, 0); break; case Q_SLEEP: do_sleep(q, 0); break;
case Q_REAL_SLEEP: do_sleep(q, 1); break; case Q_REAL_SLEEP: do_sleep(q, 1); break;
case Q_REQUIRE_VERSION: do_require_version(q); break; case Q_REQUIRE_VERSION: do_require_version(q); break;
case Q_REQUIRE_OS: do_require_os(q); break;
case Q_WAIT_FOR_SLAVE_TO_STOP: do_wait_for_slave_to_stop(q); break; case Q_WAIT_FOR_SLAVE_TO_STOP: do_wait_for_slave_to_stop(q); break;
case Q_REQUIRE_MANAGER: do_require_manager(q); break; case Q_REQUIRE_MANAGER: do_require_manager(q); break;
#ifndef EMBEDDED_LIBRARY #ifndef EMBEDDED_LIBRARY
......
...@@ -32,6 +32,14 @@ or there was no master log position info inside InnoDB. */ ...@@ -32,6 +32,14 @@ or there was no master log position info inside InnoDB. */
extern char trx_sys_mysql_master_log_name[]; extern char trx_sys_mysql_master_log_name[];
extern ib_longlong trx_sys_mysql_master_log_pos; extern ib_longlong trx_sys_mysql_master_log_pos;
/* If this MySQL server uses binary logging, after InnoDB has been inited
and if it has done a crash recovery, we store the binlog file name and position
here. If .._pos is -1, it means there was no binlog position info inside
InnoDB. */
extern char trx_sys_mysql_bin_log_name[];
extern ib_longlong trx_sys_mysql_bin_log_pos;
/* The transaction system */ /* The transaction system */
extern trx_sys_t* trx_sys; extern trx_sys_t* trx_sys;
......
...@@ -45,6 +45,15 @@ or there was no master log position info inside InnoDB. */ ...@@ -45,6 +45,15 @@ or there was no master log position info inside InnoDB. */
char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN]; char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
ib_longlong trx_sys_mysql_master_log_pos = -1; ib_longlong trx_sys_mysql_master_log_pos = -1;
/* If this MySQL server uses binary logging, after InnoDB has been inited
and if it has done a crash recovery, we store the binlog file name and position
here. If .._pos is -1, it means there was no binlog position info inside
InnoDB. */
char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
ib_longlong trx_sys_mysql_bin_log_pos = -1;
/******************************************************************** /********************************************************************
Determines if a page number is located inside the doublewrite buffer. */ Determines if a page number is located inside the doublewrite buffer. */
...@@ -650,8 +659,8 @@ trx_sys_print_mysql_binlog_offset_from_page( ...@@ -650,8 +659,8 @@ trx_sys_print_mysql_binlog_offset_from_page(
#endif /* UNIV_HOTBACKUP */ #endif /* UNIV_HOTBACKUP */
/********************************************************************* /*********************************************************************
Prints to stderr the MySQL binlog offset info in the trx system header if Stores the MySQL binlog offset info in the trx system header if
the magic number shows it valid. */ the magic number shows it valid, and print the info to stderr */
void void
trx_sys_print_mysql_binlog_offset(void) trx_sys_print_mysql_binlog_offset(void)
...@@ -659,7 +668,8 @@ trx_sys_print_mysql_binlog_offset(void) ...@@ -659,7 +668,8 @@ trx_sys_print_mysql_binlog_offset(void)
{ {
trx_sysf_t* sys_header; trx_sysf_t* sys_header;
mtr_t mtr; mtr_t mtr;
ulong trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low;
mtr_start(&mtr); mtr_start(&mtr);
sys_header = trx_sysf_get(&mtr); sys_header = trx_sysf_get(&mtr);
...@@ -673,14 +683,22 @@ trx_sys_print_mysql_binlog_offset(void) ...@@ -673,14 +683,22 @@ trx_sys_print_mysql_binlog_offset(void)
return; return;
} }
fprintf(stderr, trx_sys_mysql_bin_log_pos_high = mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
"InnoDB: Last MySQL binlog file position %lu %lu, file name %s\n", + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
(ulong) mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO trx_sys_mysql_bin_log_pos_low = mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH), + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
(ulong) mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+ TRX_SYS_MYSQL_LOG_OFFSET_LOW), trx_sys_mysql_bin_log_pos = (((ib_longlong)trx_sys_mysql_bin_log_pos_high) << 32) +
sys_header + TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME); (ib_longlong)trx_sys_mysql_bin_log_pos_low;
ut_memcpy(trx_sys_mysql_bin_log_name, sys_header + TRX_SYS_MYSQL_LOG_INFO +
TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
fprintf(stderr,
"InnoDB: Last MySQL binlog file position %lu %lu, file name %s\n",
trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
trx_sys_mysql_bin_log_name);
mtr_commit(&mtr); mtr_commit(&mtr);
} }
......
-- require r/have_debug.require
disable_query_log;
select (version() like "%debug%") as debug;
enable_query_log;
kill -9 `cat var/run/master.pid`
...@@ -882,8 +882,12 @@ start_master() ...@@ -882,8 +882,12 @@ start_master()
if [ x$MASTER_RUNNING = x1 ] || [ x$LOCAL_MASTER = x1 ] ; then if [ x$MASTER_RUNNING = x1 ] || [ x$LOCAL_MASTER = x1 ] ; then
return return
fi fi
# Remove stale binary logs # Remove stale binary logs except for 2 tests which need them
$RM -f $MYSQL_TEST_DIR/var/log/master-bin.* if [ "$tname" != "rpl_crash_binlog_ib_1b" ] && [ "$tname" != "rpl_crash_binlog_ib_2b" ] && [ "$tname" != "rpl_crash_binlog_ib_3b" ]
then
$RM -f $MYSQL_TEST_DIR/var/log/master-bin.*
fi
# Remove old master.info and relay-log.info files # Remove old master.info and relay-log.info files
$RM -f $MYSQL_TEST_DIR/var/master-data/master.info $MYSQL_TEST_DIR/var/master-data/relay-log.info $RM -f $MYSQL_TEST_DIR/var/master-data/master.info $MYSQL_TEST_DIR/var/master-data/relay-log.info
...@@ -1005,8 +1009,12 @@ start_slave() ...@@ -1005,8 +1009,12 @@ start_slave()
slave_sock="$SLAVE_MYSOCK" slave_sock="$SLAVE_MYSOCK"
fi fi
# Remove stale binary logs and old master.info files # Remove stale binary logs and old master.info files
$RM -f $MYSQL_TEST_DIR/var/log/$slave_ident-*bin.* # except for too tests which need them
$RM -f $slave_datadir/master.info $slave_datadir/relay-log.info if [ "$tname" != "rpl_crash_binlog_ib_1b" ] && [ "$tname" != "rpl_crash_binlog_ib_2b" ] && [ "$tname" != "rpl_crash_binlog_ib_3b" ]
then
$RM -f $MYSQL_TEST_DIR/var/log/$slave_ident-*bin.*
$RM -f $slave_datadir/master.info $slave_datadir/relay-log.info
fi
#run slave initialization shell script if one exists #run slave initialization shell script if one exists
if [ -f "$slave_init_script" ] ; if [ -f "$slave_init_script" ] ;
......
stop slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
flush logs;
set autocommit=1;
set sql_log_bin=0;
create table t1 (n int) engine=innodb;
set sql_log_bin=1;
create table t1 (n int) engine=myisam;
insert into t1 values (3);
show master status;
File Position Binlog_Do_DB Binlog_Ignore_DB
master-bin.000002 64
insert into t1 values (4);
select * from t1;
n
3
set @a=load_file("MYSQL_TEST_DIR/var/log/master-bin.000002");
select length(@a);
length(@a)
124
select @a like "%values (4)%";
@a like "%values (4)%"
1
select * from t1;
n
3
insert into t1 values (5);
select * from t1;
n
3
5
select * from t1;
n
3
start slave;
select * from t1;
n
3
5
set @a=load_file("MYSQL_TEST_DIR/var/log/master-bin.000002");
select length(@a);
length(@a)
64
select @a like "%values (4)%";
@a like "%values (4)%"
0
drop table if exists t1;
stop slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
flush logs;
set autocommit=0;
set sql_log_bin=0;
create table t1(n int) engine=innodb;
set sql_log_bin=1;
create table t1(n int) engine=myisam;
insert into t1 values (3);
insert into t1 values (4);
commit;
show master status;
File Position Binlog_Do_DB Binlog_Ignore_DB
master-bin.000002 205
insert into t1 values (5);
insert into t1 values (6);
commit;
select * from t1;
n
3
4
set @a=load_file("MYSQL_TEST_DIR/var/log/master-bin.000002");
select length(@a);
length(@a)
406
select @a like "%values (5)%";
@a like "%values (5)%"
1
select * from t1;
n
3
4
insert into t1 values (7);
select * from t1;
n
3
4
7
select * from t1;
n
3
4
start slave;
select * from t1;
n
3
4
7
set @a=load_file("MYSQL_TEST_DIR/var/log/master-bin.000002");
select length(@a);
length(@a)
205
select @a like "%values (5)%";
@a like "%values (5)%"
0
drop table if exists t1;
stop slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
flush logs;
set autocommit=1;
set sql_log_bin=0;
create table t1 (n int) engine=innodb;
set sql_log_bin=1;
create table t1 (n int) engine=myisam;
show master status;
File Position Binlog_Do_DB Binlog_Ignore_DB
master-bin.000002 4
insert into t1 values (4);
select * from t1;
n
set @a=load_file("MYSQL_TEST_DIR/var/log/master-bin.000002");
select length(@a);
length(@a)
64
select @a like "%values (4)%";
@a like "%values (4)%"
1
select * from t1;
n
insert into t1 values (5);
select * from t1;
n
5
select * from t1;
n
start slave;
select * from t1;
n
5
set @a=load_file("MYSQL_TEST_DIR/var/log/master-bin.000002");
select length(@a);
length(@a)
4
select @a like "%values (4)%";
@a like "%values (4)%"
0
drop table if exists t1;
--innodb-safe-binlog --crash-binlog-innodb=3
# Test if master cuts binlog at InnoDB crash's recovery,
# if the transaction had been written to binlog but not committed into InnoDB
# We need InnoDB in the master, and a debug build in the master.
# There are 6 tests, in fact 3 pairs:
# 1st pair:
# rpl_crash_binlog_innodb_1a: crash when InnoDB in autocommit mode
# rpl_crash_binlog_innodb_1b: test of recovery after the crash of test 1a
# 2nd pair:
# rpl_crash_binlog_innodb_2a: crash when InnoDB in non autocommit mode
# rpl_crash_binlog_innodb_2b: test of recovery after the crash of test 1a
# 3rd pair:
# rpl_crash_binlog_innodb_3a: crash when InnoDB in autocommit mode, at
# very first transactional statement since master's startup (a purely
# academic case but which will be tested a lot)
# rpl_crash_binlog_innodb_3b: test of recovery after the crash of test 3a
# The *b tests should always be run just after their 1a; don't run *b
# alone it won't work properly.
# This test is only for autocommit mode.
source include/master-slave.inc ;
source include/have_debug.inc ;
source include/have_innodb.inc ;
require_os unix ;
flush logs; # this will help us be sure it's the same log in the next test
# One problem: we need InnoDB to know of the last good position, so it
# must have committed at least one transaction and in the binlog before the crash.
# NOTE: the above should become false quite soon
set autocommit=1;
set sql_log_bin=0;
create table t1 (n int) engine=innodb;
set sql_log_bin=1;
sync_slave_with_master;
# We use MyISAM on slave so that any spurious statement received from the
# master has a visible effect.
create table t1 (n int) engine=myisam;
connection master;
insert into t1 values (3);
# The reported size here should be exactly the same as the one we measure
# at the end of rpl_crash_binlog_innodb_1b.test
show master status;
# Master will crash in this (it crashes on 3rd binlog write, counting
# the DROP IF EXISTS in master-slave.inc):
error 2013;
send insert into t1 values (4);
sleep 4; # enough time to die
# No 'reap' as it may hang as master died hard.
# This kill speeds up:
system sh misc/kill_master.sh ;
# Check that slave did not receive the spurious INSERT statement
connection slave;
select * from t1;
# Check that the spurious statement is in the master's binlog
# LOAD_FILE() needs a file readable by all
system chmod ugo+r $MYSQL_TEST_DIR/var/log/master-bin.000002 ;
--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
eval set @a=load_file("$MYSQL_TEST_DIR/var/log/master-bin.000002");
select length(@a);
select @a like "%values (4)%";
# Now we will run rpl_crash_binlog_innodb_1b.test to test
# if the spurious statement gets truncated at master's restart.
# Test if master cuts binlog at InnoDB crash's recovery,
# after we crashed intentionally in rpl_crash_binlog_innodb_1a.test
# (1a and 1b are two tests, 1b should NOT be run if 1a has not be run
# just before). So don't run 1b alone.
# We need InnoDB in the master, and a debug build in the master.
# We don't use master-slave.inc because it would RESET MASTER.
connect (master,127.0.0.1,root,,test,$MASTER_MYPORT,);
connect (slave,127.0.0.1,root,,test,$SLAVE_MYPORT,);
source include/have_debug.inc
source include/have_innodb.inc
require_os unix ;
connection master;
# check that transaction was rolled back on master
select * from t1;
insert into t1 values (5);
select * from t1;
save_master_pos;
# Check that slave did not receive the spurious INSERT statement
connection slave;
select * from t1;
start slave;
sync_with_master;
select * from t1;
# Check that the spurious statement is NOT in the master's binlog anymore
# LOAD_FILE() needs a file readable by all
system chmod ugo+r $MYSQL_TEST_DIR/var/log/master-bin.000002 ;
--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
eval set @a=load_file("$MYSQL_TEST_DIR/var/log/master-bin.000002");
select length(@a);
select @a like "%values (4)%";
connection master;
drop table if exists t1;
sync_slave_with_master;
--innodb-safe-binlog --crash-binlog-innodb=3
# Test if master cuts binlog at InnoDB crash's recovery,
# if the transaction had been written to binlog but not committed into InnoDB
# We need InnoDB in the master, and a debug build in the master.
# This test is only for NON autocommit mode.
# More comments in rpl_crash_binlog_ib_1a.test
source include/master-slave.inc;
source include/have_debug.inc
source include/have_innodb.inc
require_os unix ;
flush logs;
set autocommit=0;
set sql_log_bin=0;
create table t1(n int) engine=innodb;
set sql_log_bin=1;
sync_slave_with_master;
create table t1(n int) engine=myisam;
connection master;
insert into t1 values (3);
insert into t1 values (4);
commit;
show master status;
insert into t1 values (5);
insert into t1 values (6);
error 2013;
send commit;
sleep 4;
system sh misc/kill_master.sh ;
connection slave;
select * from t1;
system chmod ugo+r $MYSQL_TEST_DIR/var/log/master-bin.000002 ;
--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
eval set @a=load_file("$MYSQL_TEST_DIR/var/log/master-bin.000002");
select length(@a);
select @a like "%values (5)%";
# Now we will run rpl_crash_binlog_ib_2b.test to test
# if the spurious transaction gets truncated at master's restart.
# Test if master cuts binlog at InnoDB crash's recovery,
# after we crashed intentionally in rpl_crash_binlog_innodb_2a.test
# We need InnoDB in the master, and a debug build in the master.
# We don't use master-slave.inc because it would RESET MASTER.
connect (master,127.0.0.1,root,,test,$MASTER_MYPORT,);
connect (slave,127.0.0.1,root,,test,$SLAVE_MYPORT,);
source include/have_debug.inc
source include/have_innodb.inc
require_os unix ;
connection master;
select * from t1;
insert into t1 values (7);
select * from t1;
save_master_pos;
connection slave;
select * from t1;
start slave;
sync_with_master;
select * from t1;
system chmod ugo+r $MYSQL_TEST_DIR/var/log/master-bin.000002 ;
--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
eval set @a=load_file("$MYSQL_TEST_DIR/var/log/master-bin.000002");
select length(@a);
select @a like "%values (5)%";
connection master;
drop table if exists t1;
sync_slave_with_master;
--innodb-safe-binlog --crash-binlog-innodb=2
# Test if master cuts binlog at InnoDB crash's recovery,
# if the transaction had been written to binlog but not committed into InnoDB
# We need InnoDB in the master, and a debug build in the master.
# This test is only for autocommit mode, with a crash at very first
# transactional statement since startup.
# More comments in rpl_crash_binlog_ib_1a.test
source include/master-slave.inc ;
source include/have_debug.inc ;
source include/have_innodb.inc ;
require_os unix ;
flush logs;
set autocommit=1;
set sql_log_bin=0;
create table t1 (n int) engine=innodb;
set sql_log_bin=1;
sync_slave_with_master;
create table t1 (n int) engine=myisam;
connection master;
show master status;
error 2013;
send insert into t1 values (4);
sleep 4; # enough time to die
system sh misc/kill_master.sh ;
connection slave;
select * from t1;
system chmod ugo+r $MYSQL_TEST_DIR/var/log/master-bin.000002 ;
--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
eval set @a=load_file("$MYSQL_TEST_DIR/var/log/master-bin.000002");
select length(@a);
select @a like "%values (4)%";
# Now we will run rpl_crash_binlog_innodb_3b.test to test
# if the spurious statement gets truncated at master's restart.
# Test if master cuts binlog at InnoDB crash's recovery,
# after we crashed intentionally in rpl_crash_binlog_innodb_3a.test
# We need InnoDB in the master, and a debug build in the master.
# We don't use master-slave.inc because it would RESET MASTER.
connect (master,127.0.0.1,root,,test,$MASTER_MYPORT,);
connect (slave,127.0.0.1,root,,test,$SLAVE_MYPORT,);
source include/have_debug.inc
source include/have_innodb.inc
require_os unix ;
connection master;
select * from t1;
insert into t1 values (5);
select * from t1;
save_master_pos;
connection slave;
select * from t1;
start slave;
sync_with_master;
select * from t1;
system chmod ugo+r $MYSQL_TEST_DIR/var/log/master-bin.000002 ;
--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR
eval set @a=load_file("$MYSQL_TEST_DIR/var/log/master-bin.000002");
select length(@a);
select @a like "%values (4)%";
connection master;
drop table if exists t1;
sync_slave_with_master;
...@@ -5213,4 +5213,19 @@ innobase_store_binlog_offset_and_flush_log( ...@@ -5213,4 +5213,19 @@ innobase_store_binlog_offset_and_flush_log(
/* Syncronous flush of the log buffer to disk */ /* Syncronous flush of the log buffer to disk */
log_buffer_flush_to_disk(); log_buffer_flush_to_disk();
} }
char *ha_innobase::get_mysql_bin_log_name()
{
return trx_sys_mysql_bin_log_name;
}
ulonglong ha_innobase::get_mysql_bin_log_pos()
{
/*
trx... is ib_longlong, which is a typedef for a 64-bit integer (__int64 or
longlong) so it's ok to cast it to ulonglong.
*/
return trx_sys_mysql_bin_log_pos;
}
#endif /* HAVE_INNOBASE_DB */ #endif /* HAVE_INNOBASE_DB */
...@@ -183,6 +183,9 @@ class ha_innobase: public handler ...@@ -183,6 +183,9 @@ class ha_innobase: public handler
void init_table_handle_for_HANDLER(); void init_table_handle_for_HANDLER();
longlong get_auto_increment(); longlong get_auto_increment();
uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; } uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; }
static char *get_mysql_bin_log_name();
static ulonglong get_mysql_bin_log_pos();
}; };
extern uint innobase_init_flags, innobase_lock_type; extern uint innobase_init_flags, innobase_lock_type;
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "mysql_priv.h" #include "mysql_priv.h"
#include "sql_acl.h" #include "sql_acl.h"
#include "sql_repl.h" #include "sql_repl.h"
#include "ha_innodb.h" // necessary to cut the binlog when crash recovery
#include <my_dir.h> #include <my_dir.h>
#include <stdarg.h> #include <stdarg.h>
...@@ -296,6 +297,7 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg, ...@@ -296,6 +297,7 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
if ((index_file_nr= my_open(index_file_name, if ((index_file_nr= my_open(index_file_name,
O_RDWR | O_CREAT | O_BINARY , O_RDWR | O_CREAT | O_BINARY ,
MYF(MY_WME))) < 0 || MYF(MY_WME))) < 0 ||
my_sync(index_file_nr, MYF(MY_WME)) ||
init_io_cache(&index_file, index_file_nr, init_io_cache(&index_file, index_file_nr,
IO_SIZE, WRITE_CACHE, IO_SIZE, WRITE_CACHE,
my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)), my_seek(index_file_nr,0L,MY_SEEK_END,MYF(0)),
...@@ -315,16 +317,21 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg, ...@@ -315,16 +317,21 @@ bool MYSQL_LOG::open(const char *log_name, enum_log_type log_type_arg,
s.set_log_pos(this); s.set_log_pos(this);
s.write(&log_file); s.write(&log_file);
} }
if (flush_io_cache(&log_file)) if (flush_io_cache(&log_file) ||
my_sync(log_file.file, MYF(MY_WME)))
goto err; goto err;
if (write_file_name_to_index_file) if (write_file_name_to_index_file)
{ {
/* As this is a new log file, we write the file name to the index file */ /*
As this is a new log file, we write the file name to the index
file. As every time we write to the index file, we sync it.
*/
if (my_b_write(&index_file, (byte*) log_file_name, if (my_b_write(&index_file, (byte*) log_file_name,
strlen(log_file_name)) || strlen(log_file_name)) ||
my_b_write(&index_file, (byte*) "\n", 1) || my_b_write(&index_file, (byte*) "\n", 1) ||
flush_io_cache(&index_file)) flush_io_cache(&index_file) ||
my_sync(index_file.file, MYF(MY_WME)))
goto err; goto err;
} }
break; break;
...@@ -405,7 +412,8 @@ static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset) ...@@ -405,7 +412,8 @@ static bool copy_up_file_and_fill(IO_CACHE *index_file, my_off_t offset)
goto err; goto err;
} }
/* The following will either truncate the file or fill the end with \n' */ /* The following will either truncate the file or fill the end with \n' */
if (my_chsize(file, offset - init_offset, '\n', MYF(MY_WME))) if (my_chsize(file, offset - init_offset, '\n', MYF(MY_WME)) ||
my_sync(file, MYF(MY_WME)))
goto err; goto err;
/* Reset data in old index cache */ /* Reset data in old index cache */
...@@ -995,6 +1003,8 @@ void MYSQL_LOG::new_file(bool need_lock) ...@@ -995,6 +1003,8 @@ void MYSQL_LOG::new_file(bool need_lock)
open(old_name, save_log_type, new_name_ptr, index_file_name, io_cache_type, open(old_name, save_log_type, new_name_ptr, index_file_name, io_cache_type,
no_auto_events, max_size); no_auto_events, max_size);
if (this == &mysql_bin_log)
report_pos_in_innodb();
my_free(old_name,MYF(0)); my_free(old_name,MYF(0));
end: end:
...@@ -1406,6 +1416,30 @@ COLLATION_CONNECTION=%lu,COLLATION_DATABASE=%lu,COLLATION_SERVER=%lu", ...@@ -1406,6 +1416,30 @@ COLLATION_CONNECTION=%lu,COLLATION_DATABASE=%lu,COLLATION_SERVER=%lu",
if (event_info->get_type_code() == QUERY_EVENT || if (event_info->get_type_code() == QUERY_EVENT ||
event_info->get_type_code() == EXEC_LOAD_EVENT) event_info->get_type_code() == EXEC_LOAD_EVENT)
{ {
#ifndef DBUG_OFF
if (unlikely(opt_crash_binlog_innodb))
{
/*
This option is for use in rpl_crash_binlog_innodb.test.
1st we want to verify that Binlog_dump thread cannot send the
event now (because of LOCK_log): we here tell the Binlog_dump
thread to wake up, sleep for the slave to have time to possibly
receive data from the master (it should not), and then crash.
2nd we want to verify that at crash recovery the rolled back
event is cut from the binlog.
*/
if (!(--opt_crash_binlog_innodb))
{
signal_update();
sleep(2);
fprintf(stderr,"This is a normal crash because of"
" --crash-binlog-innodb\n");
assert(0);
}
DBUG_PRINT("info",("opt_crash_binlog_innodb: %d",
opt_crash_binlog_innodb));
}
#endif
error = ha_report_binlog_offset_and_commit(thd, log_file_name, error = ha_report_binlog_offset_and_commit(thd, log_file_name,
file->pos_in_file); file->pos_in_file);
called_handler_commit=1; called_handler_commit=1;
...@@ -1561,6 +1595,22 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool commit_or_rollback) ...@@ -1561,6 +1595,22 @@ bool MYSQL_LOG::write(THD *thd, IO_CACHE *cache, bool commit_or_rollback)
write_error=1; // Don't give more errors write_error=1; // Don't give more errors
goto err; goto err;
} }
#ifndef DBUG_OFF
if (unlikely(opt_crash_binlog_innodb))
{
/* see the previous MYSQL_LOG::write() method for a comment */
if (!(--opt_crash_binlog_innodb))
{
signal_update();
sleep(2);
fprintf(stderr, "This is a normal crash because of"
" --crash-binlog-innodb\n");
assert(0);
}
DBUG_PRINT("info",("opt_crash_binlog_innodb: %d",
opt_crash_binlog_innodb));
}
#endif
if ((ha_report_binlog_offset_and_commit(thd, log_file_name, if ((ha_report_binlog_offset_and_commit(thd, log_file_name,
log_file.pos_in_file))) log_file.pos_in_file)))
goto err; goto err;
...@@ -1978,4 +2028,131 @@ bool flush_error_log() ...@@ -1978,4 +2028,131 @@ bool flush_error_log()
} }
/*
If the server has InnoDB on, and InnoDB has published the position of the
last committed transaction (which happens only if a crash recovery occured at
this startup) then truncate the previous binary log at the position given by
InnoDB. If binlog is shorter than the position, print a message to the error
log.
SYNOPSIS
cut_spurious_tail()
RETURN VALUES
1 Error
0 Ok
*/
bool MYSQL_LOG::cut_spurious_tail()
{
int error= 0;
char llbuf1[22], llbuf2[22];
ulonglong actual_size;
DBUG_ENTER("cut_spurious_tail");
#ifdef HAVE_INNOBASE_DB
if (have_innodb != SHOW_OPTION_YES)
DBUG_RETURN(0);
/*
This is the place where we use information from InnoDB to cut the
binlog.
*/
char *name= ha_innobase::get_mysql_bin_log_name();
ulonglong pos= ha_innobase::get_mysql_bin_log_pos();
if (name[0] == 0 || pos == (ulonglong)(-1))
{
DBUG_PRINT("info", ("InnoDB has not set binlog info"));
DBUG_RETURN(0);
}
/* The binlog given by InnoDB normally is never an active binlog */
if (is_open() && is_active(name))
{
sql_print_error("Warning: after InnoDB crash recovery, InnoDB says that "
"the binary log of the previous run has the same name "
"'%s' as the current one; this is likely to be abnormal.",
name);
DBUG_RETURN(1);
}
sql_print_error("After InnoDB crash recovery, trying to truncate "
"the binary log '%s' at position %s corresponding to the "
"last committed transaction...", name, llstr(pos, llbuf1));
/* If we have a too long binlog, cut. If too short, print error */
int fd= my_open(name, O_EXCL | O_APPEND | O_BINARY | O_WRONLY, MYF(MY_WME));
if (fd < 0)
{
int save_errno= my_errno;
sql_print_error("Could not open the binary log '%s' for truncation.",
name);
if (save_errno != ENOENT)
sql_print_error("The binary log '%s' should not be used for "
"replication.", name);
DBUG_RETURN(1);
}
if (pos > (actual_size= my_seek(fd, 0L, MY_SEEK_END, MYF(MY_WME))))
{
sql_print_error("The binary log '%s' is shorter than its expected size "
"(actual: %s, expected: %s) so it misses at least one "
"committed transaction; so it should not be used for "
"replication.", name, llstr(actual_size, llbuf1),
llstr(pos, llbuf2));
error= 1;
goto err;
}
if (pos < actual_size)
{
sql_print_error("The binary log '%s' is bigger than its expected size "
"(actual: %s, expected: %s) so it contains a rolled back "
"transaction; now truncating that.", name,
llstr(actual_size, llbuf1), llstr(pos, llbuf2));
/*
As on some OS, my_chsize() can only pad with 0s instead of really
truncating. Then mysqlbinlog (and Binlog_dump thread) will error on
these zeroes. This is annoying, but not more (you just need to manually
switch replication to the next binlog). Fortunately, in my_chsize.c, it
says that all modern machines support real ftruncate().
*/
if ((error= my_chsize(fd, pos, 0, MYF(MY_WME))))
goto err;
}
err:
if (my_close(fd, MYF(MY_WME)))
error= 1;
#endif
DBUG_RETURN(error);
}
/*
If the server has InnoDB on, store the binlog name and position into
InnoDB. This function is used every time we create a new binlog.
SYNOPSIS
report_pos_in_innodb()
NOTES
This cannot simply be done in MYSQL_LOG::open(), because when we create
the first binlog at startup, we have not called ha_init() yet so we cannot
write into InnoDB yet.
RETURN VALUES
1 Error
0 Ok
*/
void MYSQL_LOG::report_pos_in_innodb()
{
DBUG_ENTER("report_pos_in_innodb");
#ifdef HAVE_INNOBASE_DB
if (is_open() && have_innodb == SHOW_OPTION_YES)
{
DBUG_PRINT("info", ("Reporting binlog info into InnoDB - "
"name: '%s' position: %d",
log_file_name, my_b_tell(&log_file)));
innobase_store_binlog_offset_and_flush_log(log_file_name,
my_b_tell(&log_file));
}
#endif
DBUG_VOID_RETURN;
}
...@@ -886,6 +886,7 @@ extern my_bool opt_slave_compressed_protocol, use_temp_pool; ...@@ -886,6 +886,7 @@ extern my_bool opt_slave_compressed_protocol, use_temp_pool;
extern my_bool opt_readonly, lower_case_file_system; extern my_bool opt_readonly, lower_case_file_system;
extern my_bool opt_enable_named_pipe, opt_sync_frm; extern my_bool opt_enable_named_pipe, opt_sync_frm;
extern my_bool opt_secure_auth; extern my_bool opt_secure_auth;
extern uint opt_crash_binlog_innodb;
extern char *shared_memory_base_name, *mysqld_unix_port; extern char *shared_memory_base_name, *mysqld_unix_port;
extern bool opt_enable_shared_memory; extern bool opt_enable_shared_memory;
extern char *default_tz_name; extern char *default_tz_name;
......
...@@ -273,11 +273,13 @@ my_bool opt_secure_auth= 0; ...@@ -273,11 +273,13 @@ my_bool opt_secure_auth= 0;
my_bool opt_short_log_format= 0; my_bool opt_short_log_format= 0;
my_bool opt_log_queries_not_using_indexes= 0; my_bool opt_log_queries_not_using_indexes= 0;
my_bool lower_case_file_system= 0; my_bool lower_case_file_system= 0;
my_bool opt_innodb_safe_binlog;
volatile bool mqh_used = 0; volatile bool mqh_used = 0;
uint mysqld_port, test_flags, select_errors, dropping_tables, ha_open_options; uint mysqld_port, test_flags, select_errors, dropping_tables, ha_open_options;
uint delay_key_write_options, protocol_version; uint delay_key_write_options, protocol_version;
uint lower_case_table_names; uint lower_case_table_names;
uint opt_crash_binlog_innodb;
uint volatile thread_count, thread_running, kill_cached_threads, wake_thread; uint volatile thread_count, thread_running, kill_cached_threads, wake_thread;
ulong back_log, connect_timeout, concurrency; ulong back_log, connect_timeout, concurrency;
...@@ -2550,6 +2552,16 @@ server."); ...@@ -2550,6 +2552,16 @@ server.");
if (opt_myisam_log) if (opt_myisam_log)
(void) mi_log(1); (void) mi_log(1);
/*
Now that InnoDB is initialized, we can know the last good binlog position
and cut the binlog if needed. This function does nothing if there was no
crash recovery by InnoDB.
*/
if (opt_innodb_safe_binlog)
/* not fatal if fails (but print errors) */
mysql_bin_log.cut_spurious_tail();
mysql_bin_log.report_pos_in_innodb();
/* call ha_init_key_cache() on all key caches to init them */ /* call ha_init_key_cache() on all key caches to init them */
process_key_caches(&ha_init_key_cache); process_key_caches(&ha_init_key_cache);
/* We must set dflt_key_cache in case we are using ISAM tables */ /* We must set dflt_key_cache in case we are using ISAM tables */
...@@ -3824,8 +3836,8 @@ enum options_mysqld ...@@ -3824,8 +3836,8 @@ enum options_mysqld
OPT_INNODB_FLUSH_LOG_AT_TRX_COMMIT, OPT_INNODB_FLUSH_LOG_AT_TRX_COMMIT,
OPT_INNODB_FLUSH_METHOD, OPT_INNODB_FLUSH_METHOD,
OPT_INNODB_FAST_SHUTDOWN, OPT_INNODB_FAST_SHUTDOWN,
OPT_INNODB_FILE_PER_TABLE, OPT_INNODB_FILE_PER_TABLE, OPT_CRASH_BINLOG_INNODB,
OPT_SAFE_SHOW_DB, OPT_SAFE_SHOW_DB, OPT_INNODB_SAFE_BINLOG,
OPT_INNODB, OPT_ISAM, OPT_NDBCLUSTER, OPT_SKIP_SAFEMALLOC, OPT_INNODB, OPT_ISAM, OPT_NDBCLUSTER, OPT_SKIP_SAFEMALLOC,
OPT_TEMP_POOL, OPT_TX_ISOLATION, OPT_TEMP_POOL, OPT_TX_ISOLATION,
OPT_SKIP_STACK_TRACE, OPT_SKIP_SYMLINKS, OPT_SKIP_STACK_TRACE, OPT_SKIP_SYMLINKS,
...@@ -4506,6 +4518,12 @@ replicating a LOAD DATA INFILE command.", ...@@ -4506,6 +4518,12 @@ replicating a LOAD DATA INFILE command.",
"The number of seconds the mysqld server is waiting for a connect packet before responding with 'Bad handshake'.", "The number of seconds the mysqld server is waiting for a connect packet before responding with 'Bad handshake'.",
(gptr*) &connect_timeout, (gptr*) &connect_timeout, (gptr*) &connect_timeout, (gptr*) &connect_timeout,
0, GET_ULONG, REQUIRED_ARG, CONNECT_TIMEOUT, 2, LONG_TIMEOUT, 0, 1, 0 }, 0, GET_ULONG, REQUIRED_ARG, CONNECT_TIMEOUT, 2, LONG_TIMEOUT, 0, 1, 0 },
#ifdef HAVE_REPLICATION
{"crash_binlog_innodb", OPT_CRASH_BINLOG_INNODB,
"Used only for testing, to crash when writing Nth event to binlog.",
(gptr*) &opt_crash_binlog_innodb, (gptr*) &opt_crash_binlog_innodb,
0, GET_UINT, REQUIRED_ARG, 0, 0, ~(uint)0, 0, 1, 0},
#endif
{"delayed_insert_timeout", OPT_DELAYED_INSERT_TIMEOUT, {"delayed_insert_timeout", OPT_DELAYED_INSERT_TIMEOUT,
"How long a INSERT DELAYED thread should wait for INSERT statements before terminating.", "How long a INSERT DELAYED thread should wait for INSERT statements before terminating.",
(gptr*) &delayed_insert_timeout, (gptr*) &delayed_insert_timeout, 0, (gptr*) &delayed_insert_timeout, (gptr*) &delayed_insert_timeout, 0,
...@@ -4585,6 +4603,20 @@ replicating a LOAD DATA INFILE command.", ...@@ -4585,6 +4603,20 @@ replicating a LOAD DATA INFILE command.",
"Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back.", "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back.",
(gptr*) &innobase_lock_wait_timeout, (gptr*) &innobase_lock_wait_timeout, (gptr*) &innobase_lock_wait_timeout, (gptr*) &innobase_lock_wait_timeout,
0, GET_LONG, REQUIRED_ARG, 50, 1, 1024 * 1024 * 1024, 0, 1, 0}, 0, GET_LONG, REQUIRED_ARG, 50, 1, 1024 * 1024 * 1024, 0, 1, 0},
#ifdef HAVE_REPLICATION
/*
innodb_safe_binlog is not a variable, just an option. Does not make
sense to make it a variable, as it is only used at startup (and so the
value would be lost at next startup, so setting it on the fly would have no
effect).
*/
{"innodb_safe_binlog", OPT_INNODB_SAFE_BINLOG,
"After a crash recovery by InnoDB, truncate the binary log to the last \
InnoDB committed transaction. Use only if this server updates only InnoDB \
tables.",
(gptr*) &opt_innodb_safe_binlog, (gptr*) &opt_innodb_safe_binlog,
0, GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0},
#endif
{"innodb_thread_concurrency", OPT_INNODB_THREAD_CONCURRENCY, {"innodb_thread_concurrency", OPT_INNODB_THREAD_CONCURRENCY,
"Helps in performance tuning in heavily concurrent environments.", "Helps in performance tuning in heavily concurrent environments.",
(gptr*) &innobase_thread_concurrency, (gptr*) &innobase_thread_concurrency, (gptr*) &innobase_thread_concurrency, (gptr*) &innobase_thread_concurrency,
......
...@@ -169,6 +169,8 @@ public: ...@@ -169,6 +169,8 @@ public:
int purge_first_log(struct st_relay_log_info* rli, bool included); int purge_first_log(struct st_relay_log_info* rli, bool included);
bool reset_logs(THD* thd); bool reset_logs(THD* thd);
void close(uint exiting); void close(uint exiting);
bool cut_spurious_tail();
void report_pos_in_innodb();
// iterating through the log index file // iterating through the log index file
int find_log_pos(LOG_INFO* linfo, const char* log_name, int find_log_pos(LOG_INFO* linfo, const char* log_name,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment