Commit 4b4db4a8 authored by Kristian Nielsen's avatar Kristian Nielsen

MDEV-34042: Deadlock kill of XA PREPARE can break replication /...

MDEV-34042: Deadlock kill of XA PREPARE can break replication / rpl.rpl_parallel_multi_domain_xa sporadic failure

Refinement of the original patch.

Move the code to reset the kill up into the parent class
Xid_apply_log_event, to also fix the similar issue for XA COMMIT.

Increase the number of slave retries in the test case
rpl.rpl_parallel_multi_domain_xa to fix some sporadic failures. The test
generates massive amounts of conflicting transactions in multiple
independent domains, which can cause multiple rollback+retry for a
transaction as it conflicts with transactions in other domains one-by-one.
Signed-off-by: default avatarKristian Nielsen <knielsen@knielsen-hq.org>
parent 2a2019e1
......@@ -6,6 +6,8 @@ connection master;
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
connection slave;
include/stop_slave.inc
SET @old_transaction_retries = @@GLOBAL.slave_transaction_retries;
SET @@global.slave_transaction_retries = 1000;
SET @old_parallel_threads = @@GLOBAL.slave_parallel_threads;
SET @old_slave_domain_parallel_threads = @@GLOBAL.slave_domain_parallel_threads;
SET @@global.slave_parallel_threads = 5;
......@@ -45,6 +47,7 @@ include/stop_slave.inc
SET @@global.slave_parallel_mode = @old_parallel_mode;
SET @@global.slave_parallel_threads = @old_parallel_threads;
SET @@global.slave_domain_parallel_threads = @old_slave_domain_parallel_threads;
SET @@global.slave_transaction_retries = @old_transaction_retries;
include/start_slave.inc
connection master;
DROP TABLE t1;
......
......@@ -21,6 +21,12 @@ ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
--connection slave
--sync_with_master
--source include/stop_slave.inc
# This test runs huge number of transactions independently in parallel that
# all conflict on a single row. This requires a large number of retries, as a
# transaction can repeatedly conflict/deadlock with a large number of other
# transactions (in a different domain) one by one.
SET @old_transaction_retries = @@GLOBAL.slave_transaction_retries;
SET @@global.slave_transaction_retries = 1000;
SET @old_parallel_threads = @@GLOBAL.slave_parallel_threads;
SET @old_slave_domain_parallel_threads = @@GLOBAL.slave_domain_parallel_threads;
SET @@global.slave_parallel_threads = 5;
......@@ -160,6 +166,7 @@ SET @@global.slave_parallel_mode = 'optimistic';
SET @@global.slave_parallel_mode = @old_parallel_mode;
SET @@global.slave_parallel_threads = @old_parallel_threads;
SET @@global.slave_domain_parallel_threads = @old_slave_domain_parallel_threads;
SET @@global.slave_transaction_retries = @old_transaction_retries;
--source include/start_slave.inc
--connection master
......
......@@ -4066,6 +4066,9 @@ int Xid_apply_log_event::do_apply_event(rpl_group_info *rgi)
thd->wsrep_affected_rows= 0;
#endif
#ifndef DBUG_OFF
bool record_gtid_delayed_for_xa= false;
#endif
if (rgi->gtid_pending)
{
sub_id= rgi->gtid_sub_id;
......@@ -4084,6 +4087,10 @@ int Xid_apply_log_event::do_apply_event(rpl_group_info *rgi)
return 1;
});
}
#ifndef DBUG_OFF
else
record_gtid_delayed_for_xa= true;
#endif
}
general_log_print(thd, COM_QUERY, get_query());
......@@ -4093,6 +4100,22 @@ int Xid_apply_log_event::do_apply_event(rpl_group_info *rgi)
{
DBUG_ASSERT(!thd->transaction->xid_state.is_explicit_XA());
DBUG_ASSERT(record_gtid_delayed_for_xa);
if (thd->rgi_slave->is_parallel_exec)
{
/*
With XA, since the transaction is prepared/committed without updating
the GTID pos (MDEV-32020...), we need here to clear any pending
deadlock kill.
Otherwise if the kill happened after the prepare/commit completed, it
might end up killing the subsequent GTID position update, causing the
slave to fail with error.
*/
wait_for_pending_deadlock_kill(thd, thd->rgi_slave);
thd->reset_killed();
}
if ((err= do_record_gtid(thd, rgi, false, &hton, true)))
return err;
}
......@@ -4209,19 +4232,6 @@ int XA_prepare_log_event::do_commit()
else
res= trans_xa_commit(thd);
if (thd->rgi_slave->is_parallel_exec)
{
/*
Since the transaction is prepared/committed without updating the GTID pos
(MDEV-32020...), we need here to clear any pending deadlock kill.
Otherwise if the kill happened after the prepare/commit completed, it
might end up killing the subsequent GTID position update, causing the
slave to fail with error.
*/
wait_for_pending_deadlock_kill(thd, thd->rgi_slave);
thd->reset_killed();
}
return res;
}
#endif // HAVE_REPLICATION
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment