Commit 55a7159f authored by unknown's avatar unknown

MDEV-4982: GTID looses all binlog state after crash if InnoDB is disabled

MDEV-4725: Incorrect binlog state recovery if crash while writing event group

The binlog state was not recovered correctly if XA is not used (eg. InnoDB
disabled), or if server crashed in the middle of writing an event group to the
binlog.

With this patch, we ensure that recovery of binlog state is done even if we do
not do the full XA binlog recovery, and we ensure that we only recover fully
written event groups into the binlog state.
parent 146c8a48
......@@ -90,5 +90,12 @@ a
6
7
8
*** MDEV-4725: Incorrect recovery when crash in the middle of writing an event group ***
SET GLOBAL debug_dbug="+d,crash_before_writing_xid";
INSERT INTO t1 VALUES (9), (10);
SHOW VARIABLES like 'gtid_strict_mode';
Variable_name Value
gtid_strict_mode ON
include/start_slave.inc
DROP TABLE t1;
include/rpl_end.inc
include/rpl_init.inc [topology=1->2]
*** Test crashing master with InnoDB disabled, the binlog gtid state should still be correctly recovered. ***
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=MyISAM;
include/stop_slave.inc
CHANGE MASTER TO master_use_gtid=slave_pos;
include/start_slave.inc
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
SELECT * FROM t1 ORDER BY a;
a
1
2
FLUSH TABLES;
SET SESSION debug_dbug="+d,crash_dispatch_command_before";
SELECT 1;
Got one of the listed errors
INSERT INTO t1 VALUES (3);
SELECT * FROM t1 ORDER BY a;
a
1
2
3
DROP TABLE t1;
include/rpl_end.inc
......@@ -256,6 +256,43 @@ EOF
SELECT * FROM t1 ORDER BY a;
--echo *** MDEV-4725: Incorrect recovery when crash in the middle of writing an event group ***
--connection server_2
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
wait
EOF
--let $old_gtid_strict= `SELECT @@gtid_strict_mode`
SET GLOBAL debug_dbug="+d,crash_before_writing_xid";
--connection server_1
INSERT INTO t1 VALUES (9), (10);
--save_master_pos
--connection server_2
--source include/wait_until_disconnected.inc
# The bug was that during crash recovery we would update the binlog state
# with the GTID of the half-written event group at the end of the slaves
# binlog, even though this event group was not committed.
# We restart the server with gtid_strict_mode; this way, we get an error
# about duplicate gtid when the slave re-executes the event group, if the
# binlog crash recovery is incorrect.
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
restart: --gtid_strict_mode=1
EOF
--enable_reconnect
--source include/wait_until_connected_again.inc
SHOW VARIABLES like 'gtid_strict_mode';
--source include/start_slave.inc
--sync_with_master
--disable_query_log
eval SET GLOBAL gtid_strict_mode= $old_gtid_strict;
--enable_query_log
--connection server_1
DROP TABLE t1;
......
--skip-stack-trace --skip-core-file --skip-innodb
--source include/have_debug.inc
# Valgrind does not work well with test that crashes the server
--source include/not_valgrind.inc
--let $rpl_topology=1->2
--source include/rpl_init.inc
--echo *** Test crashing master with InnoDB disabled, the binlog gtid state should still be correctly recovered. ***
--connection server_1
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=MyISAM;
--save_master_pos
--connection server_2
--sync_with_master
--source include/stop_slave.inc
CHANGE MASTER TO master_use_gtid=slave_pos;
--source include/start_slave.inc
--connection server_1
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
--save_master_pos
--connection server_2
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--connection server_1
--write_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
wait
EOF
FLUSH TABLES;
SET SESSION debug_dbug="+d,crash_dispatch_command_before";
--error 2006,2013
SELECT 1;
--source include/wait_until_disconnected.inc
--append_file $MYSQLTEST_VARDIR/tmp/mysqld.1.expect
restart-rpl_gtid_crash.test
EOF
--connection server_1
--enable_reconnect
--source include/wait_until_connected_again.inc
INSERT INTO t1 VALUES (3);
--save_master_pos
--connection server_2
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--connection server_1
DROP TABLE t1;
--connection default
--enable_reconnect
--source include/wait_until_connected_again.inc
--source include/rpl_end.inc
......@@ -2937,7 +2937,8 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_period)
bytes_written(0), file_id(1), open_count(1),
group_commit_queue(0), group_commit_queue_busy(FALSE),
num_commits(0), num_group_commits(0),
sync_period_ptr(sync_period), sync_counter(0), state_read(false),
sync_period_ptr(sync_period), sync_counter(0),
state_file_deleted(false), binlog_state_recover_done(false),
is_relay_log(0), signal_cnt(0),
checksum_alg_reset(BINLOG_CHECKSUM_ALG_UNDEF),
relay_log_checksum_alg(BINLOG_CHECKSUM_ALG_UNDEF),
......@@ -3139,12 +3140,19 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
DBUG_ENTER("MYSQL_BIN_LOG::open");
DBUG_PRINT("enter",("log_type: %d",(int) log_type_arg));
if (!is_relay_log && read_state_from_file())
DBUG_RETURN(1);
if (!is_relay_log)
{
if (!binlog_state_recover_done)
{
binlog_state_recover_done= true;
if (do_binlog_recovery(opt_bin_logname, false))
DBUG_RETURN(1);
}
if (!is_relay_log && !binlog_background_thread_started &&
start_binlog_background_thread())
DBUG_RETURN(1);
if (!binlog_background_thread_started &&
start_binlog_background_thread())
DBUG_RETURN(1);
}
if (init_and_set_log_file_name(log_name, new_name, log_type_arg,
io_cache_type_arg))
......@@ -3431,6 +3439,25 @@ bool MYSQL_BIN_LOG::open(const char *log_name,
my_free(binlog_xid_count_list.get());
binlog_xid_count_list.push_back(new_xid_list_entry);
mysql_mutex_unlock(&LOCK_xid_list);
/*
Now that we have synced a new binlog file with an initial Gtid_list
event, it is safe to delete the binlog state file. We will write out
a new, updated file at shutdown, and if we crash before we can recover
the state from the newly written binlog file.
Since the state file will contain out-of-date data as soon as the first
new GTID is binlogged, it is better to remove it, to avoid any risk of
accidentally reading incorrect data later.
*/
if (!state_file_deleted)
{
char buf[FN_REFLEN];
fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
MY_UNPACK_FILENAME);
my_delete(buf, MY_SYNC_DIR);
state_file_deleted= true;
}
}
log_state= LOG_OPENED;
......@@ -5488,10 +5515,6 @@ MYSQL_BIN_LOG::read_state_from_file()
bool opened= false;
bool inited= false;
if (state_read)
return 0;
state_read= true;
fn_format(buf, opt_bin_logname, mysql_data_home, ".state",
MY_UNPACK_FILENAME);
if ((file_no= mysql_file_open(key_file_binlog_state, buf,
......@@ -8559,7 +8582,6 @@ int TC_LOG::using_heuristic_recover()
int TC_LOG_BINLOG::open(const char *opt_name)
{
LOG_INFO log_info;
int error= 1;
DBUG_ASSERT(total_ha_2pc > 1);
......@@ -8580,65 +8602,8 @@ int TC_LOG_BINLOG::open(const char *opt_name)
return 1;
}
if ((error= find_log_pos(&log_info, NullS, 1)))
{
if (error != LOG_INFO_EOF)
sql_print_error("find_log_pos() failed (error: %d)", error);
else
error= 0;
goto err;
}
{
const char *errmsg;
IO_CACHE log;
File file;
Log_event *ev=0;
Format_description_log_event fdle(BINLOG_VERSION);
char log_name[FN_REFLEN];
if (! fdle.is_valid())
goto err;
do
{
strmake_buf(log_name, log_info.log_file_name);
} while (!(error= find_next_log(&log_info, 1)));
if (error != LOG_INFO_EOF)
{
sql_print_error("find_log_pos() failed (error: %d)", error);
goto err;
}
if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
{
sql_print_error("%s", errmsg);
goto err;
}
if ((ev= Log_event::read_log_event(&log, 0, &fdle,
opt_master_verify_checksum)) &&
ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
{
sql_print_information("Recovering after a crash using %s", opt_name);
error= recover(&log_info, log_name, &log,
(Format_description_log_event *)ev);
state_read= true;
}
else
error= read_state_from_file();
delete ev;
end_io_cache(&log);
mysql_file_close(file, MYF(MY_WME));
if (error)
goto err;
}
err:
error= do_binlog_recovery(opt_name, true);
binlog_state_recover_done= true;
return error;
}
......@@ -9014,9 +8979,9 @@ start_binlog_background_thread()
int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
IO_CACHE *first_log,
Format_description_log_event *fdle)
Format_description_log_event *fdle, bool do_xa)
{
Log_event *ev;
Log_event *ev= NULL;
HASH xids;
MEM_ROOT mem_root;
char binlog_checkpoint_name[FN_REFLEN];
......@@ -9025,13 +8990,19 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
IO_CACHE log;
File file= -1;
const char *errmsg;
#ifdef HAVE_REPLICATION
rpl_gtid last_gtid;
bool last_gtid_standalone= false;
bool last_gtid_valid= false;
#endif
if (! fdle->is_valid() ||
my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
sizeof(my_xid), 0, 0, MYF(0)))
(do_xa && my_hash_init(&xids, &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0,
sizeof(my_xid), 0, 0, MYF(0))))
goto err1;
init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0));
if (do_xa)
init_alloc_root(&mem_root, TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0));
fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error
......@@ -9051,22 +9022,23 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
0, fdle, opt_master_verify_checksum))
&& ev->is_valid())
{
switch (ev->get_type_code())
enum Log_event_type typ= ev->get_type_code();
switch (typ)
{
case XID_EVENT:
{
Xid_log_event *xev=(Xid_log_event *)ev;
uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
sizeof(xev->xid));
if (!x || my_hash_insert(&xids, x))
if (do_xa)
{
delete ev;
goto err2;
Xid_log_event *xev=(Xid_log_event *)ev;
uchar *x= (uchar *) memdup_root(&mem_root, (uchar*) &xev->xid,
sizeof(xev->xid));
if (!x || my_hash_insert(&xids, x))
goto err2;
break;
}
break;
}
case BINLOG_CHECKPOINT_EVENT:
if (first_round)
if (first_round && do_xa)
{
uint dir_len;
Binlog_checkpoint_log_event *cev= (Binlog_checkpoint_log_event *)ev;
......@@ -9084,8 +9056,8 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
cev->binlog_file_name, FN_REFLEN - 1 - dir_len);
binlog_checkpoint_found= true;
}
break;
}
break;
case GTID_LIST_EVENT:
if (first_round)
{
......@@ -9097,28 +9069,49 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
}
break;
#ifdef HAVE_REPLICATION
case GTID_EVENT:
if (first_round)
{
Gtid_log_event *gev= (Gtid_log_event *)ev;
rpl_gtid gtid;
/* Update the binlog state with any GTID logged after Gtid_list. */
gtid.domain_id= gev->domain_id;
gtid.server_id= gev->server_id;
gtid.seq_no= gev->seq_no;
if (rpl_global_gtid_binlog_state.update_nolock(&gtid, false))
goto err2;
last_gtid.domain_id= gev->domain_id;
last_gtid.server_id= gev->server_id;
last_gtid.seq_no= gev->seq_no;
last_gtid_standalone=
((gev->flags2 & Gtid_log_event::FL_STANDALONE) ? true : false);
last_gtid_valid= true;
}
break;
#endif
default:
/* Nothing. */
break;
}
#ifdef HAVE_REPLICATION
if (last_gtid_valid &&
((last_gtid_standalone && !ev->is_part_of_group(typ)) ||
(!last_gtid_standalone &&
(typ == XID_EVENT ||
(typ == QUERY_EVENT &&
(((Query_log_event *)ev)->is_commit() ||
((Query_log_event *)ev)->is_rollback()))))))
{
if (rpl_global_gtid_binlog_state.update_nolock(&last_gtid, false))
goto err2;
last_gtid_valid= false;
}
#endif
delete ev;
ev= NULL;
}
if (!do_xa)
break;
/*
If the last binlog checkpoint event points to an older log, we have to
scan all logs from there also, to get all possible XIDs to recover.
......@@ -9171,21 +9164,28 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const char *last_log_name,
}
}
if (ha_recover(&xids))
goto err2;
if (do_xa)
{
if (ha_recover(&xids))
goto err2;
free_root(&mem_root, MYF(0));
my_hash_free(&xids);
free_root(&mem_root, MYF(0));
my_hash_free(&xids);
}
return 0;
err2:
delete ev;
if (file >= 0)
{
end_io_cache(&log);
mysql_file_close(file, MYF(MY_WME));
}
free_root(&mem_root, MYF(0));
my_hash_free(&xids);
if (do_xa)
{
free_root(&mem_root, MYF(0));
my_hash_free(&xids);
}
err1:
sql_print_error("Crash recovery failed. Either correct the problem "
"(if it's, for example, out of memory error) and restart, "
......@@ -9195,6 +9195,67 @@ err1:
}
int
MYSQL_BIN_LOG::do_binlog_recovery(const char *opt_name, bool do_xa_recovery)
{
LOG_INFO log_info;
const char *errmsg;
IO_CACHE log;
File file;
Log_event *ev= 0;
Format_description_log_event fdle(BINLOG_VERSION);
char log_name[FN_REFLEN];
int error;
if ((error= find_log_pos(&log_info, NullS, 1)))
{
if (error != LOG_INFO_EOF)
sql_print_error("find_log_pos() failed (error: %d)", error);
else
error= 0;
return error;
}
if (! fdle.is_valid())
return 1;
do
{
strmake_buf(log_name, log_info.log_file_name);
} while (!(error= find_next_log(&log_info, 1)));
if (error != LOG_INFO_EOF)
{
sql_print_error("find_log_pos() failed (error: %d)", error);
return error;
}
if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
{
sql_print_error("%s", errmsg);
return 1;
}
if ((ev= Log_event::read_log_event(&log, 0, &fdle,
opt_master_verify_checksum)) &&
ev->get_type_code() == FORMAT_DESCRIPTION_EVENT &&
ev->flags & LOG_EVENT_BINLOG_IN_USE_F)
{
sql_print_information("Recovering after a crash using %s", opt_name);
error= recover(&log_info, log_name, &log,
(Format_description_log_event *)ev, do_xa_recovery);
}
else
error= read_state_from_file();
delete ev;
end_io_cache(&log);
mysql_file_close(file, MYF(MY_WME));
return error;
}
#ifdef INNODB_COMPATIBILITY_HOOKS
/**
Get the file name of the MySQL binlog.
......
......@@ -521,8 +521,8 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
*/
uint *sync_period_ptr;
uint sync_counter;
/* Protect against reading the binlog state file twice. */
bool state_read;
bool state_file_deleted;
bool binlog_state_recover_done;
inline uint get_sync_period()
{
......@@ -661,7 +661,8 @@ public:
int unlog(ulong cookie, my_xid xid);
void commit_checkpoint_notify(void *cookie);
int recover(LOG_INFO *linfo, const char *last_log_name, IO_CACHE *first_log,
Format_description_log_event *fdle);
Format_description_log_event *fdle, bool do_xa);
int do_binlog_recovery(const char *opt_name, bool do_xa_recovery);
#if !defined(MYSQL_CLIENT)
int flush_and_set_pending_rows_event(THD *thd, Rows_log_event* event,
......
......@@ -5988,10 +5988,9 @@ static Log_event* next_event(rpl_group_info *rgi, ulonglong *event_size)
IO_CACHE* cur_log = rli->cur_log;
mysql_mutex_t *log_lock = rli->relay_log.get_log_lock();
const char* errmsg=0;
THD *thd = rgi->thd;
DBUG_ENTER("next_event");
DBUG_ASSERT(thd != 0 && thd == rli->sql_driver_thd);
DBUG_ASSERT(rgi->thd != 0 && rgi->thd == rli->sql_driver_thd);
*event_size= 0;
#ifndef DBUG_OFF
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment