Commit ee974ca5 authored by Jan Lindström's avatar Jan Lindström Committed by Julius Goryavsky

MDEV-31658 : Deadlock found when trying to get lock during applying

Problem was that there was two non-conflicting local idle
transactions in node_1 that both inserted a key to primary key.
Then two transactions from other nodes inserted also
a key to primary key so that insert from node_2 conflicted
one of the local transactions in node_1 so that there would
be duplicate key if both are committed. For this insert
from other node tries to acquire S-lock for this record
and because this insert is high priority brute force (BF)
transaction it will kill idle local transaction.

Concurrently, second insert from node_3 conflicts the second
idle insert transaction in node_1. Again, it tries to acquire
S-lock for this record and kills idle local transaction.

At this point we have two non-conflicting high priority
transactions holding S-lock on different records in node_1.
For example like this: rec s-lock-node2-rec s-lock-node3-rec rec.

Because these high priority BF-transactions do not wait
each other insert from node3 that has later seqno compared
to insert from node2 can continue. It will try to acquire
insert intention for record it tries to insert (to avoid
duplicate key to be inserted by local transaction). Hower,
it will note that there is conflicting S-lock in same gap
between records. This will lead deadlock error as we have
defined that BF-transactions may not wait for record lock
but we can't kill conflicting BF-transaction because
it has lower seqno and it should commit first.

BF-transactions are executed concurrently because their
values to primary key are different i.e. they do not
conflict.

Galera certification will make sure that inserts from
other nodes i.e these high priority BF-transactions
can't insert duplicate keys. Local transactions naturally
can but they will be killed when BF-transaction
acquires required record locks.

Therefore, we can allow situation where there is conflicting
S-lock and insert intention lock regardless of their seqno
order and let both continue with no wait. This will lead
to situation where we need to allow BF-transaction
to wait when lock_rec_has_to_wait_in_queue is called
because this function is also called from
lock_rec_queue_validate and because lock is waiting
there would be assertion in ut_a(lock->is_gap()
|| lock_rec_has_to_wait_in_queue(cell, lock));

lock_wait_wsrep_kill
  Add debug sync points for BF-transactions killing
  local transaction.

wsrep_assert_no_bf_bf_wait
  Print also requested lock information

lock_rec_has_to_wait
  Add function to handle wsrep transaction lock wait
  cases.

lock_rec_has_to_wait_wsrep
  New function to handle wsrep transaction lock wait
  exceptions.

lock_rec_has_to_wait_in_queue
  Remove wsrep exception, in this function all
  conflicting locks need to wait in queue.
  Conflicts between BF and local transactions
  are handled in lock_wait.
Signed-off-by: default avatarJulius Goryavsky <julius.goryavsky@mariadb.com>
parent 5b26a076
connection node_2;
connection node_1;
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connect node_1d, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connect node_1e, 127.0.0.1, root, , test, $NODE_MYPORT_1;
connection node_1;
CREATE TABLE t1(a int not null primary key auto_increment, b int) engine=innodb;
INSERT INTO t1(b) VALUES (1);
connection node_1c;
begin;
insert into t1 values (2,2);
connection node_1d;
begin;
insert into t1 values (3,3);
connection node_1a;
SET GLOBAL DEBUG_DBUG='+d,wsrep_after_kill';
connection node_2;
insert into t1 values (2,6);
connection node_1a;
SET SESSION wsrep_sync_wait=0;
SET DEBUG_SYNC='now WAIT_FOR wsrep_after_kill_reached';
SET GLOBAL DEBUG_DBUG='';
SET GLOBAL DEBUG_DBUG='+d,wsrep_after_kill_2';
connection node_3;
insert into t1 values (3,9);
connection node_1a;
SET DEBUG_SYNC='now WAIT_FOR wsrep_after_kill_reached_2';
SET GLOBAL DEBUG_DBUG='';
SET DEBUG_SYNC='now SIGNAL wsrep_after_kill_continue';
connection node_1c;
COMMIT;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
connection node_1a;
SET GLOBAL DEBUG_DBUG='';
SET DEBUG_SYNC='now SIGNAL wsrep_after_kill_continue_2';
connection node_1d;
COMMIT;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
connection node_2;
SELECT * from t1;
a b
1 1
2 6
3 9
connection node_3;
SELECT * from t1;
a b
1 1
2 6
3 9
connection node_1a;
SET DEBUG_SYNC = reset;
connection node_1e;
set debug_sync = reset;
connection node_1;
SELECT * from t1;
a b
1 1
2 6
3 9
disconnect node_1a;
disconnect node_1b;
disconnect node_1c;
disconnect node_1d;
disconnect node_1e;
drop table t1;
!include ../galera_3nodes.cnf
[mysqld.1]
wsrep-debug=SERVER
loose-wsrep-duplicate-primary-value=1
wsrep-auto-increment-control=OFF
auto-increment-offset=1
[mysqld.2]
wsrep-debug=SERVER
loose-wsrep-duplicate-primary-value=1
wsrep-auto-increment-control=OFF
auto-increment-offset=1
[mysqld.3]
wsrep-debug=SERVER
loose-wsrep-duplicate-primary-value=1
wsrep-auto-increment-control=OFF
auto-increment-offset=1
--source include/galera_cluster.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/big_test.inc
--let $galera_connection_name = node_3
--let $galera_server_number = 3
--source include/galera_connect.inc
--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1
--connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1
--connect node_1d, 127.0.0.1, root, , test, $NODE_MYPORT_1
--connect node_1e, 127.0.0.1, root, , test, $NODE_MYPORT_1
--connection node_1
CREATE TABLE t1(a int not null primary key auto_increment, b int) engine=innodb;
INSERT INTO t1(b) VALUES (1);
--connection node_1c
begin;
insert into t1 values (2,2);
--connection node_1d
begin;
insert into t1 values (3,3);
--connection node_1a
SET GLOBAL DEBUG_DBUG='+d,wsrep_after_kill';
--connection node_2
insert into t1 values (2,6);
--connection node_1a
SET SESSION wsrep_sync_wait=0;
SET DEBUG_SYNC='now WAIT_FOR wsrep_after_kill_reached';
SET GLOBAL DEBUG_DBUG='';
SET GLOBAL DEBUG_DBUG='+d,wsrep_after_kill_2';
--connection node_3
insert into t1 values (3,9);
--connection node_1a
SET DEBUG_SYNC='now WAIT_FOR wsrep_after_kill_reached_2';
SET GLOBAL DEBUG_DBUG='';
SET DEBUG_SYNC='now SIGNAL wsrep_after_kill_continue';
--connection node_1c
--error 1213
COMMIT;
--connection node_1a
SET GLOBAL DEBUG_DBUG='';
SET DEBUG_SYNC='now SIGNAL wsrep_after_kill_continue_2';
--connection node_1d
--error 1213
COMMIT;
--connection node_2
SELECT * from t1;
--connection node_3
SELECT * from t1;
--connection node_1a
SET DEBUG_SYNC = reset;
--connection node_1e
set debug_sync = reset;
--connection node_1
SELECT * from t1;
--disconnect node_1a
--disconnect node_1b
--disconnect node_1c
--disconnect node_1d
--disconnect node_1e
drop table t1;
......@@ -249,18 +249,23 @@ extern "C" my_bool wsrep_thd_skip_locking(const THD *thd)
extern "C" my_bool wsrep_thd_order_before(const THD *left, const THD *right)
{
if (wsrep_thd_is_BF(left, false) &&
wsrep_thd_is_BF(right, false) &&
wsrep_thd_trx_seqno(left) < wsrep_thd_trx_seqno(right)) {
WSREP_DEBUG("BF conflict, order: %lld %lld\n",
(long long)wsrep_thd_trx_seqno(left),
(long long)wsrep_thd_trx_seqno(right));
return TRUE;
}
WSREP_DEBUG("waiting for BF, trx order: %lld %lld\n",
(long long)wsrep_thd_trx_seqno(left),
(long long)wsrep_thd_trx_seqno(right));
return FALSE;
my_bool before= (wsrep_thd_is_BF(left, false) &&
wsrep_thd_is_BF(right, false) &&
wsrep_thd_trx_seqno(left) < wsrep_thd_trx_seqno(right));
WSREP_DEBUG("wsrep_thd_order_before: %s thread=%llu seqno=%llu query=%s "
"%s %s thread=%llu, seqno=%llu query=%s",
(wsrep_thd_is_BF(left, false) ? "BF" : "def"),
thd_get_thread_id(left),
wsrep_thd_trx_seqno(left),
wsrep_thd_query(left),
(before ? " TRUE " : " FALSE "),
(wsrep_thd_is_BF(right, false) ? "BF" : "def"),
thd_get_thread_id(right),
wsrep_thd_trx_seqno(right),
wsrep_thd_query(right));
return before;
}
/** Check if wsrep transaction is aborting state.
......
......@@ -18738,6 +18738,25 @@ void lock_wait_wsrep_kill(trx_t *bf_trx, ulong thd_id, trx_id_t trx_id)
wsrep_thd_UNLOCK(vthd);
wsrep_thd_kill_UNLOCK(vthd);
}
#ifdef ENABLED_DEBUG_SYNC
DBUG_EXECUTE_IF(
"wsrep_after_kill",
{const char act[]=
"now "
"SIGNAL wsrep_after_kill_reached "
"WAIT_FOR wsrep_after_kill_continue";
DBUG_ASSERT(!debug_sync_set_action(bf_thd, STRING_WITH_LEN(act)));
};);
DBUG_EXECUTE_IF(
"wsrep_after_kill_2",
{const char act2[]=
"now "
"SIGNAL wsrep_after_kill_reached_2 "
"WAIT_FOR wsrep_after_kill_continue_2";
DBUG_ASSERT(!debug_sync_set_action(bf_thd, STRING_WITH_LEN(act2)));
};);
#endif /* ENABLED_DEBUG_SYNC*/
}
/** This function forces the victim transaction to abort. Aborting the
......
......@@ -503,8 +503,10 @@ this BF-BF wait correct and if not report BF wait and assert.
@param[in] lock_rec other waiting record lock
@param[in] trx trx requesting conflicting record lock
@param[in] type_mode lock type mode of requesting trx
*/
static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx)
static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx,
const unsigned type_mode = LOCK_NONE)
{
ut_ad(!lock->is_table());
lock_sys.assert_locked(*lock);
......@@ -546,6 +548,15 @@ static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx)
return;
}
if (type_mode != LOCK_NONE)
ib::error() << " Requested lock "
<< ((type_mode & LOCK_TABLE) ? "on table " : " on record ")
<< ((type_mode & LOCK_WAIT) ? " WAIT " : " ")
<< ((type_mode & LOCK_GAP) ? " GAP " : " ")
<< ((type_mode & LOCK_REC_NOT_GAP) ? " RECORD " : " ")
<< ((type_mode & LOCK_INSERT_INTENTION) ? " INSERT INTENTION " : " ")
<< ((type_mode & LOCK_X) ? " LOCK_X " : " LOCK_S ");
mtr_t mtr;
ib::error() << "Conflicting lock on table: "
......@@ -582,6 +593,80 @@ ATTRIBUTE_NOINLINE static bool wsrep_is_BF_lock_timeout(const trx_t &trx)
<< " query: " << wsrep_thd_query(trx.mysql_thd);
return true;
}
/** Checks if a lock request for a new lock has to wait for request
lock2 in Galera.
@param trx trx of new lock
@param type_mode precise mode of the new lock
to set: LOCK_S or LOCK_X, possibly
ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
LOCK_INSERT_INTENTION.
@param lock2 another record lock; NOTE that
it is assumed that this has a lock bit
set on the same record as in the new
lock we are setting.
@return TRUE if new lock has to wait for lock2 to be removed */
ATTRIBUTE_NOINLINE ATTRIBUTE_COLD
bool lock_rec_has_to_wait_wsrep(const trx_t *trx,
const unsigned type_mode,
const lock_t *lock2)
{
const trx_t* trx2= lock2->trx;
if (trx->is_wsrep_UK_scan() &&
wsrep_thd_is_BF(trx2->mysql_thd, false))
{
/* New lock request from a transaction is using unique key
scan and this transaction is a wsrep high priority transaction
(brute force). If conflicting transaction is also wsrep high
priority transaction we should avoid lock conflict because
ordering of these transactions is already decided and
conflicting transaction will be later replayed. */
return false;
}
if (wsrep_thd_is_BF(trx->mysql_thd, false) &&
wsrep_thd_is_BF(trx2->mysql_thd, false))
{
/* Both transactions are high priority transactions. */
if (((type_mode & LOCK_S) && lock2->is_insert_intention()) ||
((type_mode & LOCK_INSERT_INTENTION) && lock2->mode() == LOCK_S))
{
ut_ad(!wsrep_thd_is_local(trx->mysql_thd));
ut_ad(!wsrep_thd_is_local(trx2->mysql_thd));
/* High priority applier transaction might take S-locks to
conflicting primary/unique key records and those local
transactions are BF-killed. However, these S-locks
are released at commit time. Therefore, high priority
applier transaction when requesting insert intention (II-lock)
lock for primary/unique index might notice conflicting
S-lock. Certification makes sure that applier transactions
do not insert duplicate keys and so we can allow
S-lock and II-lock. */
return false;
}
if (wsrep_thd_order_before(trx->mysql_thd, trx2->mysql_thd))
{
/* If two high priority threads have lock conflict, we look at the
order of these transactions and honor the earlier transaction. */
return false;
}
/* We very well can let bf to wait normally as other
BF will be replayed in case of conflict. For debug
builds we will do additional sanity checks to catch
unsupported bf wait if any. */
ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx, type_mode));
}
return true;
}
#endif /* WITH_WSREP */
/*********************************************************************//**
......@@ -691,31 +776,8 @@ lock_rec_has_to_wait(
#endif /* HAVE_REPLICATION */
#ifdef WITH_WSREP
/* New lock request from a transaction is using unique key
scan and this transaction is a wsrep high priority transaction
(brute force). If conflicting transaction is also wsrep high
priority transaction we should avoid lock conflict because
ordering of these transactions is already decided and
conflicting transaction will be later replayed. */
if (trx->is_wsrep_UK_scan()
&& wsrep_thd_is_BF(lock2->trx->mysql_thd, false)) {
return false;
}
/* if BF-BF conflict, we have to look at write set order */
if (trx->is_wsrep() &&
(type_mode & LOCK_MODE_MASK) == LOCK_X &&
(lock2->type_mode & LOCK_MODE_MASK) == LOCK_X &&
wsrep_thd_order_before(trx->mysql_thd,
lock2->trx->mysql_thd)) {
return false;
}
/* We very well can let bf to wait normally as other
BF will be replayed in case of conflict. For debug
builds we will do additional sanity checks to catch
unsupported bf wait if any. */
ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx));
if (trx->is_wsrep())
return lock_rec_has_to_wait_wsrep(trx, type_mode, lock2);
#endif /* WITH_WSREP */
return true;
......@@ -1766,14 +1828,6 @@ lock_rec_has_to_wait_in_queue(const hash_cell_t &cell, const lock_t *wait_lock)
if (heap_no < lock_rec_get_n_bits(lock)
&& (p[bit_offset] & bit_mask)
&& lock_has_to_wait(wait_lock, lock)) {
#ifdef WITH_WSREP
if (lock->trx->is_wsrep() &&
wsrep_thd_order_before(wait_lock->trx->mysql_thd,
lock->trx->mysql_thd)) {
/* don't wait for another BF lock */
continue;
}
#endif
return(lock);
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment