Commit 0ad52e4d authored by Brandon Nesterenko's avatar Brandon Nesterenko

MDEV-27512: Assertion !thd->transaction_rollback_request failed in rows_event_stmt_cleanup

If replicating an event in ROW format, and InnoDB detects a deadlock
while searching for a row, the row event will error and rollback in
InnoDB and indicate that the binlog cache also needs to be cleared,
i.e. by marking thd->transaction_rollback_request. In the normal
case, this will trigger an error in Rows_log_event::do_apply_event()
and cause a rollback. During the Rows_log_event::do_apply_event()
cleanup of a successful event application, there is a DBUG_ASSERT in
log_event_server.cc::rows_event_stmt_cleanup(), which sets the
expectation that thd->transaction_rollback_request cannot be set
because the general rollback (i.e. not the InnoDB rollback) should
have happened already. However, if the replica is configured to skip
deadlock errors, the rows event logic will clear the error and
continue on, as if no error happened. This results in
thd->transaction_rollback_request being set while in
rows_event_stmt_cleanup(), thereby triggering the assertion.

This patch fixes this in the following ways:
 1) The assertion is invalid, and thereby removed.
 2) The rollback case is forced in rows_event_stmt_cleanup() if
transaction_rollback_request is set.

Note the differing behavior between transactions which are skipped
due to deadlock errors and other errors. When a transaction is
skipped due to an ignored deadlock error, the entire transaction is
rolled back and skipped (though note MDEV-33930 which allows
statements in the same transaction after the deadlock-inducing one
to commit). When a transaction is skipped due to ignoring a
different error, only the erroring statements are rolled-back and
skipped - the rest of the transaction will execute as normal. The
effect of this can be seen in the test results. The added test case
to rpl_skip_error.test shows that only statements which are ignored
due to non-deadlock errors are ignored in larger transactions. A
diff between rpl_temporary_error2_skip_all.result and
rpl_temporary_error2.result shows that all statements in the errored
transaction are rolled back (diff pasted below):

: diff rpl_temporary_error2.result rpl_temporary_error2_skip_all.result
49c49
< 2	1
---
> 2	NULL
51c51
< 4	1
---
> 4	NULL
53c53
< * There will be two rows in t2 due to the retry.
---
> * There will be one row in t2 because the ignored deadlock does not retry.
57d56
< 1
59c58
< 1
---
> 0

Reviewed By:
============
Andrei Elkin <andrei.elkin@mariadb.com>
parent 061adae9
......@@ -122,6 +122,31 @@ connection slave;
# Slave_skipped_errros = 5
**** We cannot execute a select as there are differences in the
**** behavior between STMT and RBR.
****
**** Ensure transactions which are skipped due to encountering a
**** non-deadlock error which is present in --slave-skip-errors result
**** in partially committed transactions
connection master;
CREATE TABLE t3 (a INT UNIQUE) ENGINE=InnoDB;
connection slave;
connection slave;
INSERT INTO t3 VALUES (3);
connection master;
BEGIN;
INSERT INTO t3 VALUES (1);
INSERT INTO t3 VALUES (2);
INSERT INTO t3 VALUES (3);
INSERT INTO t3 VALUES (4);
COMMIT;
connection slave;
**** Master and slave tables should have the same data, due to the
**** partially replicated transaction's data overlapping with the data
**** that pre-existed on the slave. That is, despite the transaction
**** consisting of 4 statements, the errored statement should be ignored
**** and the other 3 should commit successfully.
include/diff_tables.inc [master:t3,slave:t3]
connection master;
DROP TABLE t3;
==== Clean Up ====
connection master;
DROP TABLE t1;
......
include/master-slave.inc
[connection master]
call mtr.add_suppression("Deadlock found when trying to get lock; try restarting transaction");
*** Provoke a deadlock on the slave, check that transaction retry succeeds. ***
connection master;
CREATE TABLE t1 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
CREATE TABLE t2 (a INT) ENGINE=InnoDB;
INSERT INTO t1(a) VALUES (1), (2), (3), (4), (5);
connection slave;
SELECT * FROM t1 ORDER BY a;
a b
1 NULL
2 NULL
3 NULL
4 NULL
5 NULL
SET sql_log_bin=0;
ALTER TABLE t2 ENGINE=MyISAM;
SET sql_log_bin=1;
connect con_temp1,127.0.0.1,root,,test,$SERVER_MYPORT_2,;
connection con_temp1;
BEGIN;
UPDATE t1 SET b=2 WHERE a=4;
INSERT INTO t2 VALUES (2);
DELETE FROM t2 WHERE a=2;
connection master;
BEGIN;
UPDATE t1 SET b=1 WHERE a=2;
INSERT INTO t2 VALUES (1);
UPDATE t1 SET b=1 WHERE a=4;
COMMIT;
connection slave;
connection con_temp1;
UPDATE t1 SET b=2 WHERE a=2;
SELECT * FROM t1 WHERE a<10 ORDER BY a;
a b
1 NULL
2 2
3 NULL
4 2
5 NULL
ROLLBACK;
Warnings:
Warning 1196 Some non-transactional changed tables couldn't be rolled back
connection slave;
SELECT * FROM t1 ORDER BY a;
a b
1 NULL
2 NULL
3 NULL
4 NULL
5 NULL
* There will be one row in t2 because the ignored deadlock does not retry.
SELECT * FROM t2 ORDER BY a;
a
1
retries
0
Last_SQL_Errno = '0'
Last_SQL_Error = ''
connection master;
DROP TABLE t1;
DROP TABLE t2;
include/rpl_end.inc
......@@ -3,7 +3,11 @@
# Verify that --slave-skip-errors works correctly. The error messages
# specified by --slave-skip-errors on slave should be ignored. If
# such errors occur, they should not be reported and not cause the
# slave to stop.
# slave to stop. If a skipped-due-to-error statement is a part of a
# larger transaction, and the error is not a deadlock error, the rest
# of the transaction should still commit, with just the errored statement
# ignored (note transactions which are skipped due to deadlocks are
# rolled back fully, see rpl_temporary_error2_skip_all.test).
#
# ==== Method ====
#
......@@ -164,6 +168,42 @@ let $current_skipped_error= query_get_value(show global status like "Slave_skipp
--echo **** We cannot execute a select as there are differences in the
--echo **** behavior between STMT and RBR.
--echo ****
--echo **** Ensure transactions which are skipped due to encountering a
--echo **** non-deadlock error which is present in --slave-skip-errors result
--echo **** in partially committed transactions
# Slave will insert 3 first, and master will insert 3 within a larger trx
--let $value_preexisting_on_slave= 3
--connection master
CREATE TABLE t3 (a INT UNIQUE) ENGINE=InnoDB;
--sync_slave_with_master
--connection slave
--eval INSERT INTO t3 VALUES ($value_preexisting_on_slave)
--connection master
BEGIN;
INSERT INTO t3 VALUES (1);
INSERT INTO t3 VALUES (2);
--eval INSERT INTO t3 VALUES ($value_preexisting_on_slave)
INSERT INTO t3 VALUES (4);
COMMIT;
--sync_slave_with_master
--echo **** Master and slave tables should have the same data, due to the
--echo **** partially replicated transaction's data overlapping with the data
--echo **** that pre-existed on the slave. That is, despite the transaction
--echo **** consisting of 4 statements, the errored statement should be ignored
--echo **** and the other 3 should commit successfully.
let $diff_tables=master:t3,slave:t3;
source include/diff_tables.inc;
--connection master
DROP TABLE t3;
--echo ==== Clean Up ====
connection master;
......
......@@ -64,7 +64,14 @@ ROLLBACK;
--connection slave
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--echo * There will be two rows in t2 due to the retry.
if (!$ignored_db_deadlock)
{
--echo * There will be two rows in t2 due to the retry.
}
if ($ignored_db_deadlock)
{
--echo * There will be one row in t2 because the ignored deadlock does not retry.
}
SELECT * FROM t2 ORDER BY a;
let $new_retry= query_get_value(SHOW STATUS LIKE 'Slave_retried_transactions', Value, 1);
--disable_query_log
......
--source include/have_binlog_format_row.inc
--let $ignored_db_deadlock= 1
--source rpl_temporary_error2.test
......@@ -5926,11 +5926,13 @@ static int rows_event_stmt_cleanup(rpl_group_info *rgi, THD * thd)
Xid_log_event will come next which will, if some transactional engines
are involved, commit the transaction and flush the pending event to the
binlog.
If there was a deadlock the transaction should have been rolled back
already. So there should be no need to rollback the transaction.
We check for thd->transaction_rollback_request because it is possible
there was a deadlock that was ignored by slave-skip-errors. Normally, the
deadlock would have been rolled back already.
*/
DBUG_ASSERT(! thd->transaction_rollback_request);
error|= (int)(error ? trans_rollback_stmt(thd) : trans_commit_stmt(thd));
error|= (int) ((error || thd->transaction_rollback_request)
? trans_rollback_stmt(thd)
: trans_commit_stmt(thd));
/*
Now what if this is not a transactional engine? we still need to
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment