Commit 235f33e3 authored by Denis Protivensky's avatar Denis Protivensky Committed by Julius Goryavsky

MDEV-33133: MDL conflict handling code should skip BF-aborted trxs

It's possible that MDL conflict handling code is called more
than once for a transaction when:
- it holds more than one conflicting MDL lock
- reschedule_waiters() is executed,
which results in repeated attempts to BF-abort already aborted
transaction.
In such situations, it might be that BF-aborting logic sees
a partially rolled back transaction and erroneously decides
on future actions for such a transaction.

The specific situation tested and fixed is when a SR transaction
applied in the node gets BF-aborted by a started TOI operation.
It's then caught with the server transaction already rolled back,
but with no MDL locks yet released. This caused wrong state
detection for such a transaction during repeated MDL conflict
handling code execution.
Signed-off-by: default avatarJulius Goryavsky <julius.goryavsky@mariadb.com>
parent 7e748d07
connection node_2;
connection node_1;
connect node_1a,127.0.0.1,root,,test,$NODE_MYPORT_1;
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_rollback_mdl_release';
connection node_2;
SET SESSION wsrep_trx_fragment_size = 1;
START TRANSACTION;
INSERT INTO t1 VALUES (1);
connection node_1a;
SELECT COUNT(*) FROM t1;
COUNT(*)
0
SET SESSION wsrep_retry_autocommit = 0;
SET DEBUG_SYNC = 'ha_write_row_start SIGNAL may_toi WAIT_FOR bf_abort';
INSERT INTO t1 VALUES (2);
connection node_1;
SET DEBUG_SYNC = 'now WAIT_FOR may_toi';
SET DEBUG_SYNC = 'after_wsrep_thd_abort WAIT_FOR sync.wsrep_rollback_mdl_release_reached';
TRUNCATE TABLE t1;
connection node_1a;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
SET DEBUG_SYNC = 'now SIGNAL signal.wsrep_rollback_mdl_release';
connection node_2;
INSERT INTO t1 VALUES (3);
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
connection node_1;
SET GLOBAL DEBUG_DBUG = '';
SET DEBUG_SYNC = 'RESET';
DROP TABLE t1;
disconnect node_1a;
disconnect node_2;
disconnect node_1;
#
# MDEV-33133: MDL conflict handling code should skip transactions
# BF-aborted before.
#
# It's possible that MDL conflict handling code is called more
# than once for a transaction when:
# - it holds more than one conflicting MDL lock
# - reschedule_waiters() is executed,
# which results in repeated attempts to BF-abort already aborted
# transaction.
# In such situations, it might be that BF-aborting logic sees
# a partially rolled back transaction and erroneously decides
# on future actions for such a transaction.
#
# The specific situation tested and fixed is when a SR transaction
# applied in the node gets BF-aborted by a started TOI operation.
# It's then caught with the server transaction already rolled back,
# but with no MDL locks yet released. This caused wrong state
# detection for such a transaction during repeated MDL conflict
# handling code execution.
#
--source include/galera_cluster.inc
--source include/have_debug_sync.inc
--source include/have_debug.inc
--connect node_1a,127.0.0.1,root,,test,$NODE_MYPORT_1
--connection node_1
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_rollback_mdl_release';
--connection node_2
SET SESSION wsrep_trx_fragment_size = 1;
START TRANSACTION;
INSERT INTO t1 VALUES (1);
--connection node_1a
# Sync wait for SR transaction to replicate and apply fragment.
SELECT COUNT(*) FROM t1;
SET SESSION wsrep_retry_autocommit = 0;
SET DEBUG_SYNC = 'ha_write_row_start SIGNAL may_toi WAIT_FOR bf_abort';
--send
INSERT INTO t1 VALUES (2);
--connection node_1
SET DEBUG_SYNC = 'now WAIT_FOR may_toi';
# BF-abort SR transaction and wait until it reaches the point
# prior to release MDL locks.
# Then abort local INSERT, which will go through rescedule_waiters()
# and see SR transaction holding MDL locks but already rolled back.
# In this case SR transaction should be skipped in MDL conflict
# handling code.
SET DEBUG_SYNC = 'after_wsrep_thd_abort WAIT_FOR sync.wsrep_rollback_mdl_release_reached';
--send
TRUNCATE TABLE t1;
--connection node_1a
# Local INSERT gets aborted.
--error ER_LOCK_DEADLOCK
--reap
# Let the aborted SR transaction continue and finally release MDL locks,
# which in turn allows TRUNCATE to complete.
SET DEBUG_SYNC = 'now SIGNAL signal.wsrep_rollback_mdl_release';
--connection node_2
# SR transaction has been BF-aborted.
--error ER_LOCK_DEADLOCK
INSERT INTO t1 VALUES (3);
--connection node_1
# TRUNCATE completes.
--reap
# Cleanup
SET GLOBAL DEBUG_DBUG = '';
SET DEBUG_SYNC = 'RESET';
DROP TABLE t1;
--disconnect node_1a
--source include/galera_end.inc
......@@ -2,7 +2,6 @@
--source include/big_test.inc
--source include/force_restart.inc
#
# Testing gtid consistency in 3 node cluster when nodes drop
# and join back to cluster.
......@@ -378,4 +377,3 @@ DROP TABLE t3;
--disconnect node_2b
--disconnect node_1b
--disconnect node_1c
......@@ -392,6 +392,18 @@ int Wsrep_high_priority_service::rollback(const wsrep::ws_handle& ws_handle,
wsrep_thd_transaction_state_str(m_thd),
m_thd->killed);
#ifdef ENABLED_DEBUG_SYNC
DBUG_EXECUTE_IF("sync.wsrep_rollback_mdl_release",
{
const char act[]=
"now "
"SIGNAL sync.wsrep_rollback_mdl_release_reached "
"WAIT_FOR signal.wsrep_rollback_mdl_release";
DBUG_ASSERT(!debug_sync_set_action(m_thd,
STRING_WITH_LEN(act)));
};);
#endif
m_thd->release_transactional_locks();
free_root(m_thd->mem_root, MYF(MY_KEEP_PREALLOC));
......
......@@ -2747,8 +2747,15 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
mysql_mutex_lock(&granted_thd->LOCK_thd_kill);
mysql_mutex_lock(&granted_thd->LOCK_thd_data);
if (wsrep_thd_is_toi(granted_thd) ||
wsrep_thd_is_applying(granted_thd))
if (granted_thd->wsrep_aborter != 0)
{
DBUG_ASSERT(granted_thd->wsrep_aborter == request_thd->thread_id);
WSREP_DEBUG("BF thread waiting for a victim to release locks");
mysql_mutex_unlock(&granted_thd->LOCK_thd_data);
mysql_mutex_unlock(&granted_thd->LOCK_thd_kill);
}
else if (wsrep_thd_is_toi(granted_thd) ||
wsrep_thd_is_applying(granted_thd))
{
if (wsrep_thd_is_aborting(granted_thd))
{
......@@ -2824,6 +2831,8 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
{
mysql_mutex_unlock(&request_thd->LOCK_thd_data);
}
DEBUG_SYNC(request_thd, "after_wsrep_thd_abort");
}
/**/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment