Commit 900c4d69 authored by Kristian Nielsen's avatar Kristian Nielsen

MDEV-31655: Parallel replication deadlock victim preference code errorneously removed

Restore code to make InnoDB choose the second transaction as a deadlock
victim if two transactions deadlock that need to commit in-order for
parallel replication. This code was erroneously removed when VATS was
implemented in InnoDB.

Also add a test case for InnoDB choosing the right deadlock victim.
Also fixes this bug, with testcase that reliably reproduces:

MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master

Note: This should be null-merged to 10.6, as a different fix is needed
there due to InnoDB locking code changes.
Signed-off-by: default avatarKristian Nielsen <knielsen@knielsen-hq.org>
parent 920789e9
......@@ -2,6 +2,7 @@ include/master-slave.inc
[connection master]
connection server_2;
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
SET GLOBAL slave_parallel_threads=10;
ERROR HY000: This operation cannot be performed as you have a running slave ''; run STOP SLAVE '' first
include/stop_slave.inc
......@@ -1680,13 +1681,52 @@ a
2000
SELECT * FROM t2 WHERE a>=2000 ORDER BY a;
a
MDEV-31655: Parallel replication deadlock victim preference code erroneously removed
connection server_1;
CREATE TABLE t7 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
BEGIN;
COMMIT;
include/save_master_gtid.inc
connection server_2;
include/sync_with_master_gtid.inc
include/stop_slave.inc
set @@global.slave_parallel_threads= 5;
set @@global.slave_parallel_mode= conservative;
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries";
connection master;
SET @old_dbug= @@SESSION.debug_dbug;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
SET @commit_id= 1+1000;
SET @commit_id= 2+1000;
SET @commit_id= 3+1000;
SET @commit_id= 4+1000;
SET @commit_id= 5+1000;
SET @commit_id= 6+1000;
SET @commit_id= 7+1000;
SET @commit_id= 8+1000;
SET @commit_id= 9+1000;
SET @commit_id= 10+1000;
SET SESSION debug_dbug= @old_dbug;
SELECT COUNT(*), SUM(a*100*b) FROM t7;
COUNT(*) SUM(a*100*b)
10 225000
include/save_master_gtid.inc
connection server_2;
include/start_slave.inc
include/sync_with_master_gtid.inc
SET GLOBAL debug_dbug= @old_dbug;
SELECT COUNT(*), SUM(a*100*b) FROM t7;
COUNT(*) SUM(a*100*b)
10 225000
connection server_2;
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
include/start_slave.inc
SET DEBUG_SYNC= 'RESET';
connection server_1;
DROP function foo;
DROP TABLE t1,t2,t3,t4,t5,t6;
DROP TABLE t1,t2,t3,t4,t5,t6,t7;
SET DEBUG_SYNC= 'RESET';
include/rpl_end.inc
......@@ -2,6 +2,7 @@ include/master-slave.inc
[connection master]
connection server_2;
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
SET GLOBAL slave_parallel_threads=10;
ERROR HY000: This operation cannot be performed as you have a running slave ''; run STOP SLAVE '' first
include/stop_slave.inc
......@@ -1679,13 +1680,52 @@ a
2000
SELECT * FROM t2 WHERE a>=2000 ORDER BY a;
a
MDEV-31655: Parallel replication deadlock victim preference code erroneously removed
connection server_1;
CREATE TABLE t7 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
BEGIN;
COMMIT;
include/save_master_gtid.inc
connection server_2;
include/sync_with_master_gtid.inc
include/stop_slave.inc
set @@global.slave_parallel_threads= 5;
set @@global.slave_parallel_mode= conservative;
SET @old_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries";
connection master;
SET @old_dbug= @@SESSION.debug_dbug;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
SET @commit_id= 1+1000;
SET @commit_id= 2+1000;
SET @commit_id= 3+1000;
SET @commit_id= 4+1000;
SET @commit_id= 5+1000;
SET @commit_id= 6+1000;
SET @commit_id= 7+1000;
SET @commit_id= 8+1000;
SET @commit_id= 9+1000;
SET @commit_id= 10+1000;
SET SESSION debug_dbug= @old_dbug;
SELECT COUNT(*), SUM(a*100*b) FROM t7;
COUNT(*) SUM(a*100*b)
10 225000
include/save_master_gtid.inc
connection server_2;
include/start_slave.inc
include/sync_with_master_gtid.inc
SET GLOBAL debug_dbug= @old_dbug;
SELECT COUNT(*), SUM(a*100*b) FROM t7;
COUNT(*) SUM(a*100*b)
10 225000
connection server_2;
include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
include/start_slave.inc
SET DEBUG_SYNC= 'RESET';
connection server_1;
DROP function foo;
DROP TABLE t1,t2,t3,t4,t5,t6;
DROP TABLE t1,t2,t3,t4,t5,t6,t7;
SET DEBUG_SYNC= 'RESET';
include/rpl_end.inc
include/master-slave.inc
[connection master]
connection master;
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
CREATE TABLE t1(a INT) ENGINE=INNODB;
INSERT INTO t1 VALUES(1);
include/save_master_gtid.inc
connection slave;
include/sync_with_master_gtid.inc
include/stop_slave.inc
set @@global.slave_parallel_threads= 2;
set @@global.slave_parallel_mode= OPTIMISTIC;
set @@global.slave_transaction_retries= 2;
*** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master
connection master;
SET @@gtid_seq_no= 100;
INSERT INTO t1 SELECT 1+a FROM t1;
SET @@gtid_seq_no= 200;
INSERT INTO t1 SELECT 2+a FROM t1;
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
include/save_master_gtid.inc
connection slave;
SET @save_dbug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start";
SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200";
SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait";
SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill";
include/start_slave.inc
include/sync_with_master_gtid.inc
SET GLOBAL debug_dbug= @save_dbug;
SELECT * FROM t1 ORDER BY a;
a
1
2
3
4
connection slave;
include/stop_slave.inc
SET @@global.slave_parallel_threads= 0;
SET @@global.slave_parallel_mode= conservative;
SET @@global.slave_transaction_retries= 10;
include/start_slave.inc
connection master;
DROP TABLE t1;
include/rpl_end.inc
......@@ -13,6 +13,7 @@
--connection server_2
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
SET @old_parallel_mode=@@GLOBAL.slave_parallel_mode;
--error ER_SLAVE_MUST_STOP
SET GLOBAL slave_parallel_threads=10;
--source include/stop_slave.inc
......@@ -2203,16 +2204,84 @@ SELECT * FROM t1 WHERE a>=2000 ORDER BY a;
SELECT * FROM t2 WHERE a>=2000 ORDER BY a;
--echo MDEV-31655: Parallel replication deadlock victim preference code erroneously removed
# The problem was that InnoDB would choose the wrong deadlock victim.
# Create a lot of transactions that can cause deadlocks, and use error
# injection to check that the first transactions in each group is never
# selected as deadlock victim.
--let $rows= 10
--let $transactions= 5
--let $gcos= 10
--connection server_1
CREATE TABLE t7 (a INT PRIMARY KEY, b INT) ENGINE=InnoDB;
BEGIN;
--disable_query_log
--let $i= 0
while ($i < 10) {
eval INSERT INTO t7 VALUES ($i, 0);
inc $i;
}
--enable_query_log
COMMIT;
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
eval set @@global.slave_parallel_threads= $transactions;
set @@global.slave_parallel_mode= conservative;
SET @old_dbug= @@GLOBAL.debug_dbug;
# This error injection will allow no retries for GTIDs divisible by 1000.
SET GLOBAL debug_dbug= "+d,rpl_mdev31655_zero_retries";
--connection master
SET @old_dbug= @@SESSION.debug_dbug;
SET SESSION debug_dbug="+d,binlog_force_commit_id";
--let $j= 1
while ($j <= $gcos) {
eval SET @commit_id= $j+1000;
--let $i= 0
while ($i < $transactions) {
--disable_query_log
eval SET SESSION gtid_seq_no= 1000 + 1000*$j + $i;
BEGIN;
--let $k= 0
while ($k < $rows) {
eval UPDATE t7 SET b=b+1 WHERE a=(($i+$k) MOD $rows);
inc $k;
}
COMMIT;
--enable_query_log
inc $i;
}
inc $j;
}
SET SESSION debug_dbug= @old_dbug;
SELECT COUNT(*), SUM(a*100*b) FROM t7;
--source include/save_master_gtid.inc
--connection server_2
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
SET GLOBAL debug_dbug= @old_dbug;
SELECT COUNT(*), SUM(a*100*b) FROM t7;
# Clean up.
--connection server_2
--source include/stop_slave.inc
SET GLOBAL slave_parallel_threads=@old_parallel_threads;
SET GLOBAL slave_parallel_mode=@old_parallel_mode;
--source include/start_slave.inc
SET DEBUG_SYNC= 'RESET';
--connection server_1
DROP function foo;
DROP TABLE t1,t2,t3,t4,t5,t6;
DROP TABLE t1,t2,t3,t4,t5,t6,t7;
SET DEBUG_SYNC= 'RESET';
--source include/rpl_end.inc
--source include/master-slave.inc
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_binlog_format_statement.inc
--connection master
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
CREATE TABLE t1(a INT) ENGINE=INNODB;
INSERT INTO t1 VALUES(1);
--source include/save_master_gtid.inc
--connection slave
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
--let $save_transaction_retries= `SELECT @@global.slave_transaction_retries`
--let $save_slave_parallel_threads= `SELECT @@global.slave_parallel_threads`
--let $save_slave_parallel_mode= `SELECT @@global.slave_parallel_mode`
set @@global.slave_parallel_threads= 2;
set @@global.slave_parallel_mode= OPTIMISTIC;
set @@global.slave_transaction_retries= 2;
--echo *** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master
# This was a failure where a transaction T1 could deadlock multiple times
# with T2, eventually exceeding the default --slave-transaction-retries=10.
# Root cause was MDEV-31655, causing InnoDB to wrongly choose T1 as deadlock
# victim over T2. If thread scheduling is right, it was possible for T1 to
# repeatedly deadlock, roll back, and have time to grab an S lock again before
# T2 woke up and got its waiting X lock, thus repeating the same deadlock over
# and over.
# Once the bug is fixed, it is not possible to re-create the same execution
# and thread scheduling. Instead we inject small sleeps in a way that
# triggered the problem when the bug was there, to demonstrate that the
# problem no longer occurs.
--connection master
# T1
SET @@gtid_seq_no= 100;
INSERT INTO t1 SELECT 1+a FROM t1;
# T2
SET @@gtid_seq_no= 200;
INSERT INTO t1 SELECT 2+a FROM t1;
SELECT * FROM t1 ORDER BY a;
--source include/save_master_gtid.inc
--connection slave
SET @save_dbug= @@GLOBAL.debug_dbug;
# Inject various delays to hint thread scheduling to happen in the way that
# triggered MDEV-28776.
# Small delay starting T1 so it will be the youngest trx and be chosen over
# T2 as the deadlock victim by default in InnoDB.
SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start";
# Small delay before taking insert X lock to give time for both T1 and T2 to
# get the S lock first and cause a deadlock.
SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200";
# Small delay after T2's wait on the X lock, to give time for T1 retry to
# re-aquire the T1 S lock first.
SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait";
# Delay deadlock kill of T2.
SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill";
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
SET GLOBAL debug_dbug= @save_dbug;
SELECT * FROM t1 ORDER BY a;
# Cleanup.
--connection slave
--source include/stop_slave.inc
eval SET @@global.slave_parallel_threads= $save_slave_parallel_threads;
eval SET @@global.slave_parallel_mode= $save_slave_parallel_mode;
eval SET @@global.slave_transaction_retries= $save_transaction_retries;
--source include/start_slave.inc
--connection master
DROP TABLE t1;
--source include/rpl_end.inc
......@@ -1284,6 +1284,11 @@ handle_rpl_parallel_thread(void *arg)
bool did_enter_cond= false;
PSI_stage_info old_stage;
DBUG_EXECUTE_IF("rpl_parallel_delay_gtid_0_x_100_start", {
if (rgi->current_gtid.domain_id==0 &&
rgi->current_gtid.seq_no == 100)
my_sleep(10000);
});
#ifdef ENABLED_DEBUG_SYNC
DBUG_EXECUTE_IF("hold_worker_on_schedule", {
if (rgi->current_gtid.domain_id == 0 &&
......@@ -1463,8 +1468,13 @@ handle_rpl_parallel_thread(void *arg)
err= dbug_simulate_tmp_error(rgi, thd););
if (unlikely(err))
{
ulong max_retries= slave_trans_retries;
convert_kill_to_deadlock_error(rgi);
if (has_temporary_error(thd) && slave_trans_retries > 0)
DBUG_EXECUTE_IF("rpl_mdev31655_zero_retries",
if ((rgi->current_gtid.seq_no % 1000) == 0)
max_retries= 0;
);
if (has_temporary_error(thd) && max_retries > 0)
err= retry_event_group(rgi, rpt, qev);
}
}
......
......@@ -502,6 +502,7 @@ static void bg_rpl_load_gtid_slave_state(void *)
static void bg_slave_kill(void *victim)
{
THD *to_kill= (THD *)victim;
DBUG_EXECUTE_IF("rpl_delay_deadlock_kill", my_sleep(1500000););
to_kill->awake(KILL_CONNECTION);
mysql_mutex_lock(&to_kill->LOCK_wakeup_ready);
to_kill->rgi_slave->killed_for_retry= rpl_group_info::RETRY_KILL_KILLED;
......
......@@ -5272,6 +5272,49 @@ thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd)
return 0;
}
/*
If the storage engine detects a deadlock, and needs to choose a victim
transaction to roll back, it can call this function to ask the upper
server layer for which of two possible transactions is prefered to be
aborted and rolled back.
In parallel replication, if two transactions are running in parallel and
one is fixed to commit before the other, then the one that commits later
will be prefered as the victim - chosing the early transaction as a victim
will not resolve the deadlock anyway, as the later transaction still needs
to wait for the earlier to commit.
The return value is -1 if the first transaction is prefered as a deadlock
victim, 1 if the second transaction is prefered, or 0 for no preference (in
which case the storage engine can make the choice as it prefers).
*/
extern "C" int
thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2)
{
rpl_group_info *rgi1, *rgi2;
if (!thd1 || !thd2)
return 0;
/*
If the transactions are participating in the same replication domain in
parallel replication, then request to select the one that will commit
later (in the fixed commit order from the master) as the deadlock victim.
*/
rgi1= thd1->rgi_slave;
rgi2= thd2->rgi_slave;
if (rgi1 && rgi2 &&
rgi1->is_parallel_exec &&
rgi1->rli == rgi2->rli &&
rgi1->current_gtid.domain_id == rgi2->current_gtid.domain_id)
return rgi1->gtid_sub_id < rgi2->gtid_sub_id ? 1 : -1;
/* No preferences, let the storage engine decide. */
return 0;
}
extern "C" int thd_non_transactional_update(const MYSQL_THD thd)
{
return(thd->transaction.all.modified_non_trans_table);
......
......@@ -78,6 +78,7 @@
#include "sql_audit.h"
#include "sql_derived.h" // mysql_handle_derived
#include "sql_prepare.h"
#include "rpl_rli.h"
#include <my_bit.h>
#include "debug_sync.h"
......@@ -1753,6 +1754,12 @@ int write_record(THD *thd, TABLE *table,COPY_INFO *info)
save_read_set= table->read_set;
save_write_set= table->write_set;
DBUG_EXECUTE_IF("rpl_write_record_small_sleep_gtid_100_200",
{
if (thd->rgi_slave && (thd->rgi_slave->current_gtid.seq_no == 100 ||
thd->rgi_slave->current_gtid.seq_no == 200))
my_sleep(20000);
});
if (info->handle_duplicates == DUP_REPLACE ||
info->handle_duplicates == DUP_UPDATE)
{
......
......@@ -71,6 +71,9 @@ static void lock_grant_after_reset(lock_t* lock);
extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd);
extern "C" int thd_need_wait_reports(const MYSQL_THD thd);
extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
#ifdef HAVE_REPLICATION
extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
#endif
/** Pretty-print a table lock.
@param[in,out] file output stream
......@@ -1546,6 +1549,20 @@ static bool has_higher_priority(lock_t *lock1, lock_t *lock2)
} else if (!lock_get_wait(lock2)) {
return false;
}
#ifdef HAVE_REPLICATION
// Ask the upper server layer if any of the two trx should be prefered.
int preference = thd_deadlock_victim_preference(lock1->trx->mysql_thd,
lock2->trx->mysql_thd);
if (preference == -1) {
// lock1 is preferred as a victim, so lock2 has higher priority
return false;
} else if (preference == 1) {
// lock2 is preferred as a victim, so lock1 has higher priority
return true;
}
#endif
return lock1->trx->start_time_micro <= lock2->trx->start_time_micro;
}
......
......@@ -278,7 +278,9 @@ lock_wait_suspend_thread(
}
ulint lock_type = ULINT_UNDEFINED;
#ifndef DBUG_OFF
ulint lock_mode = LOCK_NONE;
#endif
/* The wait_lock can be cleared by another thread when the
lock is released. But the wait can only be initiated by the
current thread which owns the transaction. Only acquire the
......@@ -288,6 +290,9 @@ lock_wait_suspend_thread(
wait_lock = trx->lock.wait_lock;
if (wait_lock) {
lock_type = lock_get_type_low(wait_lock);
#ifndef DBUG_OFF
lock_mode = lock_get_mode(wait_lock);
#endif
}
lock_mutex_exit();
}
......@@ -336,6 +341,14 @@ lock_wait_suspend_thread(
}
os_event_wait(slot->event);
DBUG_EXECUTE_IF("small_sleep_after_lock_wait",
{
if (lock_type == LOCK_REC && lock_mode == LOCK_X &&
trx->error_state != DB_DEADLOCK &&
!trx_is_interrupted(trx)) {
my_sleep(20000);
}
});
thd_wait_end(trx->mysql_thd);
......
......@@ -52,6 +52,11 @@ Created 3/26/1996 Heikki Tuuri
#include <set>
#include <new>
#ifdef HAVE_REPLICATION
extern "C"
int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
#endif
/** The bit pattern corresponding to TRX_ID_MAX */
const byte trx_id_max_bytes[8] = {
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
......@@ -1907,6 +1912,16 @@ trx_weight_ge(
ibool a_notrans_edit;
ibool b_notrans_edit;
#ifdef HAVE_REPLICATION
/* First ask the upper server layer if it has any preference for which
to prefer as a deadlock victim. */
int pref= thd_deadlock_victim_preference(a->mysql_thd, b->mysql_thd);
if (pref < 0)
return FALSE;
else if (pref > 0)
return TRUE;
#endif
/* If mysql_thd is NULL for a transaction we assume that it has
not edited non-transactional tables. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment