Commit 3cf052b7 authored by Alfranio Correia's avatar Alfranio Correia

BUG#43929 binlog corruption when max_binlog_cache_size is exceeded

Large transactions and statements may corrupt the binary log if the size of the
cache, which is set by the max_binlog_cache_size, is not enough to store the
the changes.

In a nutshell, to fix the bug, we save the position of the next character in the
cache before starting processing a statement. If there is a problem, we simply
restore the position thus removing any effect of the statement from the cache.
Unfortunately, to avoid corrupting the binary log, we may end up loosing changes
on non-transactional tables if they do not fit in the cache. In such cases, we
store an Incident_log_event in order to stop the slave and alert users that some
changes were not logged.

Precisely, for every non-transactional changes that do not fit into the cache,
we do the following:
  a) the statement is *not* logged
  b) an incident event is logged after committing/rolling back the transaction,
  if any. Note that if a failure happens before writing the incident event to
  the binary log, the slave will not stop and the master will not have reported
  any error.
  c) its respective statement gives an error

For transactional changes that do not fit into the cache, we do the following:
  a) the statement is *not* logged
  b) its respective statement gives an error

To work properly, this patch requires two additional things. Firstly, callers to
MYSQL_BIN_LOG::write and THD::binlog_query must handle any error returned and
take the appropriate actions such as undoing the effects of a statement. We
already changed some calls in the sql_insert.cc, sql_update.cc and sql_insert.cc
modules but the remaining calls spread all over the code should be handled in
BUG#37148. Secondly, statements must be either classified as DDL or DML because
DDLs that do not get into the cache must generate an incident event since they
cannot be rolled back.
parent 48d911e7
......@@ -669,13 +669,9 @@ call p_verify_status_increment(1, 0, 1, 0);
insert t1 set a=3;
call p_verify_status_increment(2, 2, 2, 2);
savepoint a;
call p_verify_status_increment(0, 0, 0, 0);
call p_verify_status_increment(1, 0, 0, 0);
insert t1 set a=4;
--echo # Binlog does not register itself this time for other than the 1st
--echo # statement of the transaction with MIXED/STATEMENT binlog_format.
--echo # It needs registering with the ROW format. Therefore 1,0,2,2 are
--echo # the correct arguments to this test after bug#40221 fixed.
call p_verify_status_increment(1, 0, 2, 2);
call p_verify_status_increment(2, 2, 2, 2);
release savepoint a;
rollback;
call p_verify_status_increment(0, 0, 0, 0);
......
......@@ -766,15 +766,11 @@ call p_verify_status_increment(2, 2, 2, 2);
SUCCESS
savepoint a;
call p_verify_status_increment(0, 0, 0, 0);
call p_verify_status_increment(1, 0, 0, 0);
SUCCESS
insert t1 set a=4;
# Binlog does not register itself this time for other than the 1st
# statement of the transaction with MIXED/STATEMENT binlog_format.
# It needs registering with the ROW format. Therefore 1,0,2,2 are
# the correct arguments to this test after bug#40221 fixed.
call p_verify_status_increment(1, 0, 2, 2);
call p_verify_status_increment(2, 2, 2, 2);
SUCCESS
release savepoint a;
......
stop slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
reset master;
reset slave;
drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
start slave;
CREATE TABLE t1(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=Innodb;
CREATE TABLE t2(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=MyIsam;
CREATE TABLE t3(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=Innodb;
########################################################################################
# 1 - SINGLE STATEMENT
########################################################################################
*** Single statement on transactional table ***
Got one of the listed errors
*** Single statement on non-transactional table ***
*** After WL#2687 the difference between STATEMENT/MIXED and ROW will not exist. ***
Got one of the listed errors
*** Single statement on both transactional and non-transactional tables. ***
*** After WL#2687 we will be able to change the order of the tables. ***
Got one of the listed errors
SET GLOBAL SQL_SLAVE_SKIP_COUNTER = 1;
START SLAVE SQL_THREAD;
TRUNCATE TABLE t1;
TRUNCATE TABLE t2;
TRUNCATE TABLE t3;
BEGIN;
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
BEGIN;
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
BEGIN;
Got one of the listed errors
Got one of the listed errors
source include/diff_master_slave.inc;
########################################################################################
# 3 - BEGIN - COMMIT
########################################################################################
TRUNCATE TABLE t1;
TRUNCATE TABLE t2;
TRUNCATE TABLE t3;
BEGIN;
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
COMMIT;
source include/diff_master_slave.inc;
########################################################################################
# 4 - BEGIN - ROLLBACK
########################################################################################
TRUNCATE TABLE t1;
TRUNCATE TABLE t2;
TRUNCATE TABLE t3;
BEGIN;
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
ROLLBACK;
Warnings:
Warning 1196 Some non-transactional changed tables couldn't be rolled back
source include/diff_master_slave.inc;
########################################################################################
# 5 - PROCEDURE
########################################################################################
TRUNCATE TABLE t1;
TRUNCATE TABLE t2;
TRUNCATE TABLE t3;
CREATE PROCEDURE p1(pd VARCHAR(30000))
BEGIN
INSERT INTO t1 (a, data) VALUES (1, pd);
INSERT INTO t1 (a, data) VALUES (2, pd);
INSERT INTO t1 (a, data) VALUES (3, pd);
INSERT INTO t1 (a, data) VALUES (4, pd);
INSERT INTO t1 (a, data) VALUES (5, 's');
END//
TRUNCATE TABLE t1;
TRUNCATE TABLE t1;
BEGIN;
Got one of the listed errors
COMMIT;
TRUNCATE TABLE t1;
BEGIN;
Got one of the listed errors
ROLLBACK;
source include/diff_master_slave.inc;
########################################################################################
# 6 - XID
########################################################################################
TRUNCATE TABLE t1;
TRUNCATE TABLE t2;
TRUNCATE TABLE t3;
BEGIN;
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
ROLLBACK TO sv;
Warnings:
Warning 1196 Some non-transactional changed tables couldn't be rolled back
COMMIT;
source include/diff_master_slave.inc;
########################################################################################
# 7 - NON-TRANS TABLE
########################################################################################
TRUNCATE TABLE t1;
TRUNCATE TABLE t2;
TRUNCATE TABLE t3;
BEGIN;
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
Got one of the listed errors
COMMIT;
BEGIN;
Got one of the listed errors
COMMIT;
########################################################################################
# CLEAN
########################################################################################
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
DROP TABLE IF EXISTS t6;
DROP PROCEDURE p1;
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
DROP TABLE IF EXISTS t4;
DROP TABLE IF EXISTS t5;
DROP TABLE IF EXISTS t6;
DROP PROCEDURE p1;
--binlog_cache_size=4096 --max_binlog_cache_size=7680
This diff is collapsed.
......@@ -153,7 +153,8 @@ class Mutex_sentry
class binlog_trx_data {
public:
binlog_trx_data()
: at_least_one_stmt(0), m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF)
: at_least_one_stmt(0), incident(FALSE), m_pending(0),
before_stmt_pos(MY_OFF_T_UNDEF)
{
trans_log.end_of_file= max_binlog_cache_size;
}
......@@ -184,6 +185,7 @@ class binlog_trx_data {
delete pending();
set_pending(0);
reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0);
trans_log.end_of_file= max_binlog_cache_size;
if (pos < before_stmt_pos)
before_stmt_pos= MY_OFF_T_UNDEF;
......@@ -206,6 +208,7 @@ class binlog_trx_data {
if (!empty())
truncate(0);
before_stmt_pos= MY_OFF_T_UNDEF;
incident= FALSE;
trans_log.end_of_file= max_binlog_cache_size;
DBUG_ASSERT(empty());
}
......@@ -222,11 +225,22 @@ class binlog_trx_data {
IO_CACHE trans_log; // The transaction cache
void set_incident(void)
{
incident= TRUE;
}
bool has_incident(void)
{
return(incident);
}
/**
Boolean that is true if there is at least one statement in the
transaction cache.
*/
bool at_least_one_stmt;
bool incident;
private:
/*
......@@ -1391,7 +1405,8 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
*/
if (end_ev != NULL)
{
thd->binlog_flush_pending_rows_event(TRUE);
if (thd->binlog_flush_pending_rows_event(TRUE))
DBUG_RETURN(1);
/*
Doing a commit or a rollback including non-transactional tables,
i.e., ending a transaction where we might write the transaction
......@@ -1402,7 +1417,8 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
were, we would have to ensure that we're not ending a statement
inside a stored function.
*/
error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev);
error= mysql_bin_log.write(thd, &trx_data->trans_log, end_ev,
trx_data->has_incident());
trx_data->reset();
/*
......@@ -1428,7 +1444,11 @@ binlog_end_trans(THD *thd, binlog_trx_data *trx_data,
*/
thd->binlog_remove_pending_rows_event(TRUE);
if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
{
if (trx_data->has_incident())
mysql_bin_log.write_incident(thd, TRUE);
trx_data->reset();
}
else // ...statement
trx_data->truncate(trx_data->before_stmt_pos);
......@@ -1544,9 +1564,11 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
YESNO(all),
YESNO(thd->transaction.all.modified_non_trans_table),
YESNO(thd->transaction.stmt.modified_non_trans_table)));
if (all && thd->transaction.all.modified_non_trans_table ||
!all && thd->transaction.stmt.modified_non_trans_table ||
(thd->options & OPTION_KEEP_LOG))
if ((all && thd->transaction.all.modified_non_trans_table) ||
(!all && thd->transaction.stmt.modified_non_trans_table &&
!mysql_bin_log.check_write_error(thd)) ||
((thd->options & OPTION_KEEP_LOG) &&
!mysql_bin_log.check_write_error(thd)))
{
/*
We write the transaction cache with a rollback last if we have
......@@ -1559,14 +1581,22 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
Query_log_event qev(thd, STRING_WITH_LEN("ROLLBACK"), TRUE, TRUE, 0);
error= binlog_end_trans(thd, trx_data, &qev, all);
}
else if (all && !thd->transaction.all.modified_non_trans_table ||
!all && !thd->transaction.stmt.modified_non_trans_table)
else
{
/*
If we have modified only transactional tables, we can truncate
the transaction cache without writing anything to the binary
log.
We reach this point if either only transactional tables were modified or
the effect of a statement that did not get into the binlog needs to be
rolled back. In the latter case, if a statement changed non-transactional
tables or had the OPTION_KEEP_LOG associated, we write an incident event
to the binlog in order to stop slaves and notify users that some changes
on the master did not get into the binlog and slaves will be inconsistent.
On the other hand, if a statement is transactional, we just safely roll it
back.
*/
if ((thd->transaction.stmt.modified_non_trans_table ||
(thd->options & OPTION_KEEP_LOG)) &&
mysql_bin_log.check_write_error(thd))
trx_data->set_incident();
error= binlog_end_trans(thd, trx_data, 0, all);
}
if (!all)
......@@ -1574,6 +1604,44 @@ static int binlog_rollback(handlerton *hton, THD *thd, bool all)
DBUG_RETURN(error);
}
void MYSQL_BIN_LOG::set_write_error(THD *thd)
{
DBUG_ENTER("MYSQL_BIN_LOG::set_write_error");
write_error= 1;
if (check_write_error(thd))
DBUG_VOID_RETURN;
if (my_errno == EFBIG)
my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(MY_WME));
else
my_error(ER_ERROR_ON_WRITE, MYF(MY_WME), name, errno);
DBUG_VOID_RETURN;
}
bool MYSQL_BIN_LOG::check_write_error(THD *thd)
{
DBUG_ENTER("MYSQL_BIN_LOG::check_write_error");
bool checked= FALSE;
if (!thd->is_error())
DBUG_RETURN(checked);
switch (thd->main_da.sql_errno())
{
case ER_TRANS_CACHE_FULL:
case ER_ERROR_ON_WRITE:
case ER_BINLOG_LOGGING_IMPOSSIBLE:
checked= TRUE;
break;
}
DBUG_RETURN(checked);
}
/**
@note
How do we handle this (unlikely but legal) case:
......@@ -3854,6 +3922,7 @@ MYSQL_BIN_LOG::flush_and_set_pending_rows_event(THD *thd,
if (pending->write(file))
{
pthread_mutex_unlock(&LOCK_log);
set_write_error(thd);
DBUG_RETURN(1);
}
......@@ -3928,7 +3997,8 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
*/
bool const end_stmt=
thd->prelocked_mode && thd->lex->requires_prelocking();
thd->binlog_flush_pending_rows_event(end_stmt);
if (thd->binlog_flush_pending_rows_event(end_stmt))
DBUG_RETURN(error);
pthread_mutex_lock(&LOCK_log);
......@@ -3979,8 +4049,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
DBUG_PRINT("info", ("Using trans_log: cache: %d, trans_log_pos: %lu",
event_info->get_cache_stmt(),
(ulong) trans_log_pos));
if (trans_log_pos == 0)
thd->binlog_start_trans_and_stmt();
thd->binlog_start_trans_and_stmt();
file= trans_log;
}
/*
......@@ -4058,7 +4127,8 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
Write the SQL command
*/
if (event_info->write(file))
if (event_info->write(file) ||
DBUG_EVALUATE_IF("injecting_fault_writing", 1, 0))
goto err;
if (file == &log_file) // we are writing to the real log (disk)
......@@ -4072,13 +4142,7 @@ bool MYSQL_BIN_LOG::write(Log_event *event_info)
err:
if (error)
{
if (my_errno == EFBIG)
my_message(ER_TRANS_CACHE_FULL, ER(ER_TRANS_CACHE_FULL), MYF(0));
else
my_error(ER_ERROR_ON_WRITE, MYF(0), name, errno);
write_error=1;
}
set_write_error(thd);
}
if (event_info->flags & LOG_EVENT_UPDATE_TABLE_MAP_VERSION_F)
......@@ -4359,6 +4423,29 @@ int query_error_code(THD *thd, bool not_killed)
return error;
}
bool MYSQL_BIN_LOG::write_incident(THD *thd, bool lock)
{
uint error= 0;
DBUG_ENTER("MYSQL_BIN_LOG::write_incident");
LEX_STRING const write_error_msg=
{ C_STRING_WITH_LEN("error writing to the binary log") };
Incident incident= INCIDENT_LOST_EVENTS;
Incident_log_event ev(thd, incident, write_error_msg);
if (lock)
pthread_mutex_lock(&LOCK_log);
ev.write(&log_file);
if (lock)
{
if (!error && !(error= flush_and_sync()))
{
signal_update();
rotate_and_purge(RP_LOCK_LOG_IS_ALREADY_LOCKED);
}
pthread_mutex_unlock(&LOCK_log);
}
DBUG_RETURN(error);
}
/**
Write a cached log entry to the binary log.
- To support transaction over replication, we wrap the transaction
......@@ -4371,6 +4458,9 @@ int query_error_code(THD *thd, bool not_killed)
@param cache The cache to copy to the binlog
@param commit_event The commit event to print after writing the
contents of the cache.
@param incident Defines if an incident event should be created to
notify that some non-transactional changes did
not get into the binlog.
@note
We only come here if there is something in the cache.
......@@ -4380,7 +4470,8 @@ int query_error_code(THD *thd, bool not_killed)
'cache' needs to be reinitialized after this functions returns.
*/
bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event,
bool incident)
{
DBUG_ENTER("MYSQL_BIN_LOG::write(THD *, IO_CACHE *, Log_event *)");
VOID(pthread_mutex_lock(&LOCK_log));
......@@ -4429,6 +4520,10 @@ bool MYSQL_BIN_LOG::write(THD *thd, IO_CACHE *cache, Log_event *commit_event)
if (commit_event && commit_event->write(&log_file))
goto err;
if (incident && write_incident(thd, FALSE))
goto err;
if (flush_and_sync())
goto err;
DBUG_EXECUTE_IF("half_binlogged_transaction", abort(););
......
......@@ -356,9 +356,12 @@ class MYSQL_BIN_LOG: public TC_LOG, private MYSQL_LOG
void new_file();
bool write(Log_event* event_info); // binary log write
bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event);
bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event, bool incident);
bool write_incident(THD *thd, bool lock);
int write_cache(IO_CACHE *cache, bool lock_log, bool flush_and_sync);
void set_write_error(THD *thd);
bool check_write_error(THD *thd);
void start_union_events(THD *thd, query_id_t query_id_param);
void stop_union_events(THD *thd);
......
......@@ -408,7 +408,7 @@ bool mysql_delete(THD *thd, TABLE_LIST *table_list, COND *conds,
thd->query, thd->query_length,
is_trans, FALSE, errcode);
if (log_result && transactional_table)
if (log_result)
{
error=1;
}
......
......@@ -897,8 +897,7 @@ bool mysql_insert(THD *thd,TABLE_LIST *table_list,
if (thd->binlog_query(THD::ROW_QUERY_TYPE,
thd->query, thd->query_length,
transactional_table, FALSE,
errcode) &&
transactional_table)
errcode))
{
error=1;
}
......
......@@ -803,8 +803,7 @@ int mysql_update(THD *thd,
if (thd->binlog_query(THD::ROW_QUERY_TYPE,
thd->query, thd->query_length,
transactional_table, FALSE, errcode) &&
transactional_table)
transactional_table, FALSE, errcode))
{
error=1; // Rollback update
}
......@@ -2087,8 +2086,7 @@ bool multi_update::send_eof()
errcode= query_error_code(thd, killed_status == THD::NOT_KILLED);
if (thd->binlog_query(THD::ROW_QUERY_TYPE,
thd->query, thd->query_length,
transactional_tables, FALSE, errcode) &&
trans_safe)
transactional_tables, FALSE, errcode))
{
local_error= 1; // Rollback update
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment