Commit acb9c9e9 authored by Brandon Nesterenko's avatar Brandon Nesterenko

MDEV-31949: rpl_xa_prepare_gtid_fail deterministic paths

happy_xac is where the XA COMMIT completes before noticing the
error signalled by the prior XAP

sad_xac is where the XA COMMIT notices the error signalled by
the prior XAC and rolls back, leaving a dangling XAP.
parent bd37485a
#
# When handling the replication of an XA PREPARE, the commit phase is
# bifurcated. First, the prepare is handled by the relevant storage engines.
# Then second,the GTID slave state is updated as a separate autocommit
# transaction. If the second stage fails, i.e. we are unable to update the
# GTID slave state, then the slave should immediately quit in error, without
# retry.
#
# This tests validates the above behavior by forcing a lock-wait timeout on
# the GTID slave state table during the second part of XA PREPARE's commit, to
# ensure that the appropriate error is reported and the transaction was never
# retried.
#
#
# References
# MDEV-31038: Parallel Replication Breaks if XA PREPARE Fails Updating Slave
# GTID State
#
source include/master-slave.inc;
source include/have_binlog_format_row.inc;
source include/have_debug.inc;
source include/have_debug_sync.inc;
source include/have_innodb.inc;
--connection slave
set statement sql_log_bin=0 for call mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends");
--source include/stop_slave.inc
set @save_par_thds= @@global.slave_parallel_threads;
set @save_strict_mode= @@global.gtid_strict_mode;
set @save_innodb_lock_wait_timeout= @@global.innodb_lock_wait_timeout;
change master to master_use_gtid=slave_pos;
set @@global.slave_parallel_threads= 4;
set @@global.slave_parallel_mode= optimistic;
set @@global.gtid_strict_mode=ON;
set statement sql_log_bin=0 for alter table mysql.gtid_slave_pos engine=innodb;
--source include/start_slave.inc
--connection master
create table t1 (a int primary key, b int) engine=innodb;
insert t1 values (1,1);
--source include/save_master_gtid.inc
--connection slave
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
set @@global.innodb_lock_wait_timeout= 1;
--let $retried_tx_initial= query_get_value(SHOW ALL SLAVES STATUS, Retried_transactions, 1)
--connection master
--let $gtid_domain_id=`SELECT @@GLOBAL.gtid_domain_id`
--let $gtid_server_id=`SELECT @@GLOBAL.server_id`
# seq_no 100 (XA COMMIT) will hit debug_sync hold_worker_on_schedule
--let $xap_seq_no=99
--eval set @@session.gtid_seq_no=$xap_seq_no
xa start '1';
update t1 set b=b+10 where a=1;
xa end '1';
xa prepare '1';
xa commit '1';
--let $new_gtid= `SELECT @@global.gtid_binlog_pos`
--source include/save_master_gtid.inc
--connection slave1
BEGIN;
--eval SELECT * FROM mysql.gtid_slave_pos WHERE seq_no=$xap_seq_no FOR UPDATE
--connection slave
set @old_dbug= @@GLOBAL.debug_dbug;
set @@GLOBAL.debug_dbug= "+d,hold_xap_finalization";
--source include/start_slave.inc
set debug_sync='now wait_for xap_finalizing';
--echo # Waiting for XAC to binlog
--let $wait_condition= SELECT @@global.gtid_binlog_pos='0-1-100';
--source include/wait_condition.inc
set debug_sync='now signal xap_continue';
--let $slave_sql_errno= 1942
--source include/wait_for_slave_sql_error.inc
# TODO: Remove after fixing MDEV-21777
# Stop the IO thread too, so the existing relay logs are force purged on slave
# restart, as to not re-execute the already-prepared transaction
--source include/stop_slave_io.inc
--let $retried_tx_test= query_get_value(SHOW ALL SLAVES STATUS, Retried_transactions, 1)
if ($retried_tx_initial != $retried_tx_test)
{
--echo Transaction was retried when a failed XA PREPARE slave GTID update should lead to immediate slave stop without retry
--die Transaction was retried when a failed XA PREPARE slave GTID update should lead to immediate slave stop without retry
}
--connection slave1
ROLLBACK;
set @@GLOBAL.debug_dbug= @old_dbug;
set debug_sync= 'reset';
# XA COMMIT succeeds, this is empty
XA RECOVER;
--let $xac_failed= query_get_value(XA RECOVER, data, 1)
if ($xac_failed != "No such row")
{
die XAC should have suceeded;
}
--echo # Cleanup
--connection master
drop table t1;
--connection slave
--echo # TODO: Remove after fixing MDEV-21777
--eval set @@global.gtid_slave_pos= "$new_gtid"
set @@global.slave_parallel_threads= @save_par_thds;
set @@global.gtid_strict_mode= @save_strict_mode;
set @@global.innodb_lock_wait_timeout= @save_innodb_lock_wait_timeout;
--source include/start_slave.inc
--source include/rpl_end.inc
--echo # End of rpl_xa_prepare_gtid_fail.test
#
# When handling the replication of an XA PREPARE, the commit phase is
# bifurcated. First, the prepare is handled by the relevant storage engines.
# Then second,the GTID slave state is updated as a separate autocommit
# transaction. If the second stage fails, i.e. we are unable to update the
# GTID slave state, then the slave should immediately quit in error, without
# retry.
#
# This tests validates the above behavior by forcing a lock-wait timeout on
# the GTID slave state table during the second part of XA PREPARE's commit, to
# ensure that the appropriate error is reported and the transaction was never
# retried.
#
#
# References
# MDEV-31038: Parallel Replication Breaks if XA PREPARE Fails Updating Slave
# GTID State
#
source include/master-slave.inc;
source include/have_binlog_format_row.inc;
source include/have_debug.inc;
source include/have_debug_sync.inc;
source include/have_innodb.inc;
--connection slave
set statement sql_log_bin=0 for call mtr.add_suppression("Commit failed due to failure of an earlier commit on which this one depends");
--source include/stop_slave.inc
set @save_par_thds= @@global.slave_parallel_threads;
set @save_strict_mode= @@global.gtid_strict_mode;
set @save_innodb_lock_wait_timeout= @@global.innodb_lock_wait_timeout;
change master to master_use_gtid=slave_pos;
set @@global.slave_parallel_threads= 4;
set @@global.slave_parallel_mode= optimistic;
set @@global.gtid_strict_mode=ON;
set statement sql_log_bin=0 for alter table mysql.gtid_slave_pos engine=innodb;
--source include/start_slave.inc
--connection master
create table t1 (a int primary key, b int) engine=innodb;
insert t1 values (1,1);
--source include/save_master_gtid.inc
--connection slave
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
set @@global.innodb_lock_wait_timeout= 1;
--let $retried_tx_initial= query_get_value(SHOW ALL SLAVES STATUS, Retried_transactions, 1)
--connection master
--let $gtid_domain_id=`SELECT @@GLOBAL.gtid_domain_id`
--let $gtid_server_id=`SELECT @@GLOBAL.server_id`
# seq_no 100 (XA COMMIT) will hit debug_sync hold_worker_on_schedule
--let $xap_seq_no=99
--eval set @@session.gtid_seq_no=$xap_seq_no
xa start '1';
update t1 set b=b+10 where a=1;
xa end '1';
xa prepare '1';
xa commit '1';
--let $new_gtid= `SELECT @@global.gtid_binlog_pos`
--source include/save_master_gtid.inc
--connection slave1
BEGIN;
--eval SELECT * FROM mysql.gtid_slave_pos WHERE seq_no=$xap_seq_no FOR UPDATE
--connection slave
set @old_dbug= @@GLOBAL.debug_dbug;
set @@GLOBAL.debug_dbug= "+d,hold_worker_on_schedule";
--source include/start_slave.inc
# Note it's possible that this won't be signalled if XAP fails before another worker
# thread begins processing the XAC GTID
set debug_sync='now wait_for reached_pause timeout 1';
# Give time to ensure XAP gtid_slave_pos update fails and signals its waiter
# to stop due to failure of prior commit
sleep 3;
set DEBUG_SYNC='now signal continue_worker';
--let $slave_sql_errno= 1942
--source include/wait_for_slave_sql_error.inc
# TODO: Remove after fixing MDEV-21777
# Stop the IO thread too, so the existing relay logs are force purged on slave
# restart, as to not re-execute the already-prepared transaction
--source include/stop_slave_io.inc
--let $retried_tx_test= query_get_value(SHOW ALL SLAVES STATUS, Retried_transactions, 1)
if ($retried_tx_initial != $retried_tx_test)
{
--echo Transaction was retried when a failed XA PREPARE slave GTID update should lead to immediate slave stop without retry
--die Transaction was retried when a failed XA PREPARE slave GTID update should lead to immediate slave stop without retry
}
--connection slave1
ROLLBACK;
set @@GLOBAL.debug_dbug= @old_dbug;
set debug_sync= 'reset';
# XA COMMIT failed, and its XAP should still be dangling
XA RECOVER;
--let $xac_failed= query_get_value(XA RECOVER, data, 1)
if ($xac_failed == "No such row")
{
die XAC should have failed;
}
# So commit it
set statement gtid_domain_id=0, server_id=1, gtid_seq_no=100 for xa commit '1';
--echo # Cleanup
--connection master
drop table t1;
--connection slave
--echo # TODO: Remove after fixing MDEV-21777
--eval set @@global.gtid_slave_pos= "$new_gtid"
set @@global.slave_parallel_threads= @save_par_thds;
set @@global.gtid_strict_mode= @save_strict_mode;
set @@global.innodb_lock_wait_timeout= @save_innodb_lock_wait_timeout;
--source include/start_slave.inc
--source include/rpl_end.inc
--echo # End of rpl_xa_prepare_gtid_fail.test
......@@ -155,6 +155,11 @@ finish_event_group(rpl_parallel_thread *rpt, uint64 sub_id,
wait_for_commit *wfc= &rgi->commit_orderer;
int err;
DBUG_EXECUTE_IF("hold_xap_finalization", {
if (rgi->current_gtid.seq_no == 99) {
debug_sync_set_action(thd, STRING_WITH_LEN("now SIGNAL xap_finalizing WAIT_FOR xap_continue"));
}});
thd->get_stmt_da()->set_overwrite_status(true);
if (unlikely(rgi->worker_error))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment