Commit 8ff10969 authored by Vlad Lesin's avatar Vlad Lesin

MDEV-29081 trx_t::lock.was_chosen_as_deadlock_victim race in lock_wait_end()

The issue is that trx_t::lock.was_chosen_as_deadlock_victim can be reset
before the transaction check it and set trx_t::error_state.

The fix is to reset trx_t::lock.was_chosen_as_deadlock_victim only in
trx_t::commit_in_memory(), which is invoked on full rollback. There is
also no need to have separate bit in
trx_t::lock.was_chosen_as_deadlock_victim to flag transaction it was
chosen as a victim of Galera conflict resolution, the same variable can be
used for both cases except debug build. For debug build we need to
distinguish deadlock and Galera's abort victims for debug checks. Also
there is no need to check for deadlock in lock_table_enqueue_waiting() for
Galera as the coresponding check presents in lock_wait().

Local variable "error_state" in lock_wait() was replaced with
trx->error_state, because before the replace
lock_sys_t::cancel<false>(trx, lock) and lock_sys.deadlock_check() could
change trx->error_state, which then could be overwritten with the local
"error_state" variable value.

The lock_wait_suspend_thread_enter DEBUG_SYNC point name is misleading,
because lock_wait_suspend_thread was eliminated in e71e6133. It was renamed
to lock_wait_start.

Reviewed by: Marko Mäkelä, Jan Lindström.
parent 5b4c832c
......@@ -14,7 +14,7 @@ connection node_1_u;
begin;
update user set j = j + 1 WHERE id > 0;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
insert into user_session(id,fk1,fk2) values (2, 2, 2);
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -32,7 +32,7 @@ connection node_1_u;
begin;
update user set j = j + 1 WHERE id > 0;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
insert into user_session(id,fk1,fk2) values (2, 2, 2);
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -50,7 +50,7 @@ connection node_1_u;
begin;
update user set j = j + 1 WHERE id > 0;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
insert into user_session(id,fk1,fk2) values (2, 2, 2);
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -68,7 +68,7 @@ connection node_1_u;
begin;
update user set j = j + 1 WHERE id > 0;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
insert into user_session(id,fk1,fk2) values (2, 2, 2);
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -86,7 +86,7 @@ connection node_1_u;
begin;
update user set j = j + 1 WHERE id > 0;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
insert into user_session(id,fk1,fk2) values (2, 2, 2);
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -104,7 +104,7 @@ connection node_1_u;
begin;
update user set j = j + 1 WHERE id > 0;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
insert into user_session(id,fk1,fk2) values (2, 2, 2);
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -122,7 +122,7 @@ connection node_1_u;
begin;
update user set j = j + 1 WHERE id > 0;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
insert into user_session(id,fk1,fk2) values (2, 2, 2);
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -140,7 +140,7 @@ connection node_1_u;
begin;
update user set j = j + 1 WHERE id > 0;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
insert into user_session(id,fk1,fk2) values (2, 2, 2);
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -158,7 +158,7 @@ connection node_1_u;
begin;
update user set j = j + 1 WHERE id > 0;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
insert into user_session(id,fk1,fk2) values (2, 2, 2);
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -176,7 +176,7 @@ connection node_1_u;
begin;
update user set j = j + 1 WHERE id > 0;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
insert into user_session(id,fk1,fk2) values (2, 2, 2);
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -202,7 +202,7 @@ connection node_1_u;
begin;
execute upd;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
execute ins1;
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -220,7 +220,7 @@ connection node_1_u;
begin;
execute upd;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
execute ins1;
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -238,7 +238,7 @@ connection node_1_u;
begin;
execute upd;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
execute ins1;
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -256,7 +256,7 @@ connection node_1_u;
begin;
execute upd;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
execute ins1;
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -274,7 +274,7 @@ connection node_1_u;
begin;
execute upd;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
execute ins1;
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -292,7 +292,7 @@ connection node_1_u;
begin;
execute upd;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
execute ins1;
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -310,7 +310,7 @@ connection node_1_u;
begin;
execute upd;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
execute ins1;
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -328,7 +328,7 @@ connection node_1_u;
begin;
execute upd;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
execute ins1;
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -346,7 +346,7 @@ connection node_1_u;
begin;
execute upd;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
execute ins1;
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......@@ -364,7 +364,7 @@ connection node_1_u;
begin;
execute upd;
connection node_1_i;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
execute ins1;
connection node_1;
set debug_sync='now WAIT_FOR ins_waiting';
......
......@@ -72,7 +72,7 @@ while($counter > 0)
update user set j = j + 1 WHERE id > 0;
--connection node_1_i
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
send insert into user_session(id,fk1,fk2) values (2, 2, 2);
--connection node_1
......@@ -126,7 +126,7 @@ while($counter > 0)
#update user set j = j + 1 WHERE id > 0;
--connection node_1_i
set debug_sync='lock_wait_suspend_thread_enter SIGNAL ins_waiting WAIT_FOR cont_ins';
set debug_sync='lock_wait_start SIGNAL ins_waiting WAIT_FOR cont_ins';
send execute ins1;
--connection node_1
......
......@@ -7,12 +7,12 @@ SET DEBUG_SYNC = 'innodb_row_search_for_mysql_exit SIGNAL first_del_row_search_m
DELETE FROM t WHERE b = 20;
connect con_ins_1,localhost,root,,;
SET DEBUG_SYNC = 'now WAIT_FOR first_del_row_search_mvcc_finished';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL first_ins_locked';
SET DEBUG_SYNC = 'lock_wait_start SIGNAL first_ins_locked';
SET DEBUG_SYNC = 'ib_after_row_insert SIGNAL first_ins_row_inserted WAIT_FOR first_ins_cont';
INSERT INTO t VALUES(10, 20);
connect con_del_2,localhost,root,,;
SET DEBUG_SYNC = 'now WAIT_FOR first_ins_locked';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL second_del_locked';
SET DEBUG_SYNC = 'lock_wait_start SIGNAL second_del_locked';
DELETE FROM t WHERE b = 20;
connection default;
SET DEBUG_SYNC = 'now WAIT_FOR second_del_locked';
......
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB;
INSERT INTO t VALUES (10, 10), (20, 20), (30, 30);
INSERT INTO t2 VALUES (10), (20), (30);
BEGIN;
SELECT * FROM t WHERE a = 20 FOR UPDATE;
a b
20 20
connect con_2,localhost,root,,;
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
BEGIN;
SET DEBUG_SYNC = 'lock_trx_handle_wait_enter SIGNAL upd_locked WAIT_FOR upd_cont EXECUTE 2';
UPDATE t SET b = 100;
connect con_3,localhost,root,,;
BEGIN;
UPDATE t2 SET a = a + 100;
SELECT * FROM t WHERE a = 30 FOR UPDATE;
a b
30 30
SET DEBUG_SYNC='now WAIT_FOR upd_locked';
SET DEBUG_SYNC = 'lock_wait_start SIGNAL sel_locked';
SELECT * FROM t WHERE a = 20 FOR UPDATE;
connection default;
SET DEBUG_SYNC='now WAIT_FOR sel_locked';
ROLLBACK;
SET DEBUG_SYNC='now SIGNAL upd_cont';
SET innodb_lock_wait_timeout=1;
SET DEBUG_SYNC="now WAIT_FOR upd_locked";
SET DEBUG_SYNC="lock_wait_end SIGNAL upd_cont";
SELECT * FROM t WHERE a = 10 FOR UPDATE;
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
connection con_3;
a b
20 20
connection con_2;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
disconnect con_3;
disconnect con_2;
connection default;
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
DROP TABLE t2;
......@@ -38,7 +38,7 @@ select f1, f2 from t2 for update;
f1 f2
1 2
connection default;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL upd_waiting WAIT_FOR go_upd';
set debug_sync='lock_wait_start SIGNAL upd_waiting WAIT_FOR go_upd';
update t1 set f1 = 10 where f1 = 2;
connection con1;
set debug_sync='now WAIT_FOR upd_waiting';
......@@ -97,7 +97,7 @@ select f1, f2 from t2 for update;
f1 f2
1 91
connection default;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL upd_waiting WAIT_FOR go_upd';
set debug_sync='lock_wait_start SIGNAL upd_waiting WAIT_FOR go_upd';
update t1 set f2 = 28 where f2 = 91;
connection con1;
set debug_sync='now WAIT_FOR upd_waiting';
......@@ -164,7 +164,7 @@ select f1 from t3 for update;
f1
2
connection default;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL upd_waiting WAIT_FOR go_upd';
set debug_sync='lock_wait_start SIGNAL upd_waiting WAIT_FOR go_upd';
update t1 set f1 = 10 where f1 = 2;
connection con1;
set debug_sync='now WAIT_FOR upd_waiting';
......@@ -253,7 +253,7 @@ select f1 from t3 for update;
f1
2
connection default;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL upd_waiting WAIT_FOR go_upd';
set debug_sync='lock_wait_start SIGNAL upd_waiting WAIT_FOR go_upd';
update t1 set f2 = 28 where f2 = 91;
connection con1;
set debug_sync='now WAIT_FOR upd_waiting';
......
......@@ -16,19 +16,19 @@ SET DEBUG_SYNC = 'innodb_row_search_for_mysql_exit SIGNAL first_del_row_search_m
--connect(con_ins_1,localhost,root,,)
SET DEBUG_SYNC = 'now WAIT_FOR first_del_row_search_mvcc_finished';
# It's supposed the following INSERT will be suspended just after
# lock_wait_suspend_thread_enter syncpoint, and will be awaken
# lock_wait_start syncpoint, and will be awaken
# after the previous DELETE commits. ib_after_row_insert will be executed
# after the INSERT is woken up. The previous DELETE will wait for
# first_del_cont signal before commit, and this signal will be sent later.
# So it's safe to use two signals in a row here, it's guaranted the first
# signal will be received before the second signal is sent.
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL first_ins_locked';
SET DEBUG_SYNC = 'lock_wait_start SIGNAL first_ins_locked';
SET DEBUG_SYNC = 'ib_after_row_insert SIGNAL first_ins_row_inserted WAIT_FOR first_ins_cont';
--send INSERT INTO t VALUES(10, 20)
--connect(con_del_2,localhost,root,,)
SET DEBUG_SYNC = 'now WAIT_FOR first_ins_locked';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL second_del_locked';
SET DEBUG_SYNC = 'lock_wait_start SIGNAL second_del_locked';
###############################################################################
# This DELETE is locked by the previous DELETE, after that DELETE is
# committed, it will still be locked by the next INSERT on delete-marked
......
--source include/have_innodb.inc
--source include/have_debug_sync.inc
--source include/count_sessions.inc
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB;
INSERT INTO t VALUES (10, 10), (20, 20), (30, 30);
INSERT INTO t2 VALUES (10), (20), (30);
BEGIN; # trx 1
SELECT * FROM t WHERE a = 20 FOR UPDATE;
# Locking order:
# (10,10) (20,20) (30,30)
# ^
# trx 1
--connect(con_2,localhost,root,,)
# RC is neccessary to do semi-consistent read
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
BEGIN; # trx 2
# The first time it will be hit on trying to lock (20,20), the second hit
# will be on (30,30).
SET DEBUG_SYNC = 'lock_trx_handle_wait_enter SIGNAL upd_locked WAIT_FOR upd_cont EXECUTE 2';
# We must not modify primary key fields to cause rr_sequential() read record
# function choosing in mysql_update(), i.e. both query_plan.using_filesort and
# query_plan.using_io_buffer must be false during init_read_record() call.
--send UPDATE t SET b = 100
--connect(con_3,localhost,root,,)
BEGIN; # trx 3
# The following update is necessary to increase the transaction weight, which is
# calculated as the number of locks + the number of undo records during deadlock
# report. Victim's transaction should have minimum weight. We need trx 2 to be
# choosen as victim, that's why we need to increase the current transaction
# weight.
UPDATE t2 SET a = a + 100;
SELECT * FROM t WHERE a = 30 FOR UPDATE;
SET DEBUG_SYNC='now WAIT_FOR upd_locked';
# Locking queue:
# (10,10) (20,20) (30,30)
# ^ ^ ^
# trx 2 trx 1 trx 3
# trx 2 (waiting for 1)
SET DEBUG_SYNC = 'lock_wait_start SIGNAL sel_locked';
--send SELECT * FROM t WHERE a = 20 FOR UPDATE
--connection default
SET DEBUG_SYNC='now WAIT_FOR sel_locked';
# Locking queue:
# (10,10) (20,20) (30,30)
# ^ ^ ^
# trx 2 trx 1 trx 3
# trx 2 (waiting for 1)
# trx 3 (waiting for 1)
#
# Note trx 1 must grant lock to trx2 before trx 2 checks the lock state in
# lock_trx_handle_wait(), i.e. the function must return DB_SUCCESS, that's why
# the following ROLLBACK must be executed before sending upd_cont signal.
ROLLBACK;
SET DEBUG_SYNC='now SIGNAL upd_cont';
SET innodb_lock_wait_timeout=1;
SET DEBUG_SYNC="now WAIT_FOR upd_locked";
# Locking queue:
# (10,10) (20,20) (30,30)
# ^ ^ ^
# trx 2 trx 2 trx 3
# trx 3 (waiting for 2) trx 2 (waiting for 3)
#
# Deadlock happened after trx 1 granted lock to trx 2, and trx2 continued
# sequential read (with rr_sequential() read record function), and requested
# lock on (30,30). But the deadlock has not been determined yet.
SET DEBUG_SYNC="lock_wait_end SIGNAL upd_cont";
--error ER_LOCK_WAIT_TIMEOUT
# The deadlock will be determined in lock_wait() after lock wait timeout
# expired.
SELECT * FROM t WHERE a = 10 FOR UPDATE;
--connection con_3
--reap
--connection con_2
# As lock_trx_handle_wait() wrongly returned DB_SUCCESS instead of
# DB_DEADLOCK, row_search_mvcc() of trx 2 behaves so as if (30,30) was locked.
# But the waiting(for trx 3) lock was cancelled by deadlock checker after
# trx 2 was choosen as a victim (see lock_cancel_waiting_and_release() call
# from Deadlock::report() for details). The try to update non-locked record
# will cause assertion if the bug is not fixed.
--error ER_LOCK_DEADLOCK
--reap
--disconnect con_3
--disconnect con_2
--connection default
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
DROP TABLE t2;
--source include/wait_until_count_sessions.inc
......@@ -28,7 +28,7 @@ start transaction;
select f1, f2 from t2 for update;
connection default;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL upd_waiting WAIT_FOR go_upd';
set debug_sync='lock_wait_start SIGNAL upd_waiting WAIT_FOR go_upd';
send update t1 set f1 = 10 where f1 = 2;
connection con1;
......@@ -72,7 +72,7 @@ start transaction;
select f1, f2 from t2 for update;
connection default;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL upd_waiting WAIT_FOR go_upd';
set debug_sync='lock_wait_start SIGNAL upd_waiting WAIT_FOR go_upd';
send update t1 set f2 = 28 where f2 = 91;
connection con1;
......@@ -120,7 +120,7 @@ start transaction;
select f1 from t3 for update;
connection default;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL upd_waiting WAIT_FOR go_upd';
set debug_sync='lock_wait_start SIGNAL upd_waiting WAIT_FOR go_upd';
send update t1 set f1 = 10 where f1 = 2;
connection con1;
......@@ -183,7 +183,7 @@ start transaction;
select f1 from t3 for update;
connection default;
set debug_sync='lock_wait_suspend_thread_enter SIGNAL upd_waiting WAIT_FOR go_upd';
set debug_sync='lock_wait_start SIGNAL upd_waiting WAIT_FOR go_upd';
send update t1 set f2 = 28 where f2 = 91;
connection con1;
......
......@@ -245,7 +245,8 @@ void trx_t::commit(std::vector<pfs_os_file_t> &deleted)
ut_ad(ib_vector_is_empty(autoinc_locks));
mem_heap_empty(lock.lock_heap);
lock.table_locks.clear();
lock.was_chosen_as_deadlock_victim= false;
/* commit_persist() already reset this. */
ut_ad(!lock.was_chosen_as_deadlock_victim);
lock.n_rec_locks= 0;
while (dict_table_t *table= UT_LIST_GET_FIRST(lock.evicted_tables))
{
......
......@@ -338,27 +338,11 @@ struct trx_lock_t
/** lock wait start time */
Atomic_relaxed<my_hrtime_t> suspend_time;
#if defined(UNIV_DEBUG) || !defined(DBUG_OFF)
/** 2=high priority WSREP thread has marked this trx to abort;
1=another transaction chose this as a victim in deadlock resolution. */
Atomic_relaxed<byte> was_chosen_as_deadlock_victim;
/** Clear the deadlock victim status. */
void clear_deadlock_victim()
{
#ifndef WITH_WSREP
was_chosen_as_deadlock_victim= false;
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
/* There is no 8-bit version of the 80386 BTR instruction.
Technically, this is the wrong addressing mode (16-bit), but
there are other data members stored after the byte. */
__asm__ __volatile__("lock btrw $0, %0"
: "+m" (was_chosen_as_deadlock_victim));
#else
was_chosen_as_deadlock_victim.fetch_and(byte(~1));
#endif
}
#ifdef WITH_WSREP
/** Flag the lock owner as a victim in Galera conflict resolution. */
void set_wsrep_victim()
{
......@@ -372,7 +356,17 @@ struct trx_lock_t
was_chosen_as_deadlock_victim.fetch_or(2);
# endif
}
#endif
#else /* defined(UNIV_DEBUG) || !defined(DBUG_OFF) */
/** High priority WSREP thread has marked this trx to abort or
another transaction chose this as a victim in deadlock resolution. */
Atomic_relaxed<bool> was_chosen_as_deadlock_victim;
/** Flag the lock owner as a victim in Galera conflict resolution. */
void set_wsrep_victim() {
was_chosen_as_deadlock_victim= true;
}
#endif /* defined(UNIV_DEBUG) || !defined(DBUG_OFF) */
/** Next available rec_pool[] entry */
byte rec_cached;
......
......@@ -44,6 +44,7 @@ Created 5/7/1996 Heikki Tuuri
#include "row0vers.h"
#include "pars0pars.h"
#include "srv0mon.h"
#include "scope.h"
#include <set>
......@@ -1275,6 +1276,14 @@ lock_rec_enqueue_waiting(
trx_t* trx = thr_get_trx(thr);
ut_ad(xtest() || trx->mutex_is_owner());
ut_ad(!trx->dict_operation_lock_mode);
/* Apart from Galera, only transactions that have waiting lock can be
chosen as deadlock victim. Only one lock can be waited for at a time,
and a transaction is associated with a single thread. That is why there
must not be waiting lock requests if the transaction is deadlock victim
and it is not WSREP. Galera transaction abort can be invoked from MDL
acquisition code when the transaction does not have waiting record
lock, that's why we check only deadlock victim bit here. */
ut_ad(!(trx->lock.was_chosen_as_deadlock_victim & 1));
if (trx->mysql_thd && thd_lock_wait_timeout(trx->mysql_thd) == 0) {
trx->error_state = DB_LOCK_WAIT_TIMEOUT;
......@@ -1292,7 +1301,6 @@ lock_rec_enqueue_waiting(
}
trx->lock.wait_thr = thr;
trx->lock.clear_deadlock_victim();
DBUG_LOG("ib_lock", "trx " << ib::hex(trx->id)
<< " waits for lock in index " << index->name
......@@ -1475,7 +1483,14 @@ lock_rec_lock(
que_thr_t* thr) /*!< in: query thread */
{
trx_t *trx= thr_get_trx(thr);
/* There must not be lock requests for reads or updates if transaction was
chosen as deadlock victim. Apart from Galera, only transactions that have
waiting lock may be chosen as deadlock victims. Only one lock can be waited
for at a time, and a transaction is associated with a single thread. Galera
transaction abort can be invoked from MDL acquisition code when the
transaction does not have waiting lock, that's why we check only deadlock
victim bit here. */
ut_ad(!(trx->lock.was_chosen_as_deadlock_victim & 1));
ut_ad(!srv_read_only_mode);
ut_ad(((LOCK_MODE_MASK | LOCK_TABLE) & mode) == LOCK_S ||
((LOCK_MODE_MASK | LOCK_TABLE) & mode) == LOCK_X);
......@@ -1627,7 +1642,9 @@ void lock_sys_t::wait_resume(THD *thd, my_hrtime_t start, my_hrtime_t now)
#ifdef HAVE_REPLICATION
ATTRIBUTE_NOINLINE MY_ATTRIBUTE((nonnull))
/** Report lock waits to parallel replication.
/** Report lock waits to parallel replication. Sets
trx->error_state= DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was
set when lock_sys.wait_mutex was unlocked.
@param trx transaction that may be waiting for a lock
@param wait_lock lock that is being waited for */
static void lock_wait_rpl_report(trx_t *trx)
......@@ -1642,7 +1659,8 @@ static void lock_wait_rpl_report(trx_t *trx)
ut_ad(!(wait_lock->type_mode & LOCK_AUTO_INC));
/* This would likely be too large to attempt to use a memory transaction,
even for wait_lock->is_table(). */
if (!lock_sys.wr_lock_try())
const bool nowait= lock_sys.wr_lock_try();
if (!nowait)
{
mysql_mutex_unlock(&lock_sys.wait_mutex);
lock_sys.wr_lock(SRW_LOCK_CALL);
......@@ -1652,6 +1670,10 @@ static void lock_wait_rpl_report(trx_t *trx)
{
func_exit:
lock_sys.wr_unlock();
/* trx->lock.was_chosen_as_deadlock_victim can be set when
lock_sys.wait_mutex was unlocked, let's check it. */
if (!nowait && trx->lock.was_chosen_as_deadlock_victim)
trx->error_state= DB_DEADLOCK;
return;
}
ut_ad(wait_lock->is_waiting());
......@@ -1700,7 +1722,13 @@ dberr_t lock_wait(que_thr_t *thr)
trx_t *trx= thr_get_trx(thr);
if (trx->mysql_thd)
DEBUG_SYNC_C("lock_wait_suspend_thread_enter");
DEBUG_SYNC_C("lock_wait_start");
/* Create the sync point for any quit from the function. */
ut_d(SCOPE_EXIT([trx]() {
if (trx->mysql_thd)
DEBUG_SYNC_C("lock_wait_end");
}));
/* InnoDB system transactions may use the global value of
innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
......@@ -1731,11 +1759,8 @@ dberr_t lock_wait(que_thr_t *thr)
{
/* The lock has already been released or this transaction
was chosen as a deadlock victim: no need to wait */
if (trx->lock.was_chosen_as_deadlock_victim.fetch_and(byte(~1)))
trx->error_state= DB_DEADLOCK;
else
trx->error_state= DB_SUCCESS;
trx->error_state=
trx->lock.was_chosen_as_deadlock_victim ? DB_DEADLOCK : DB_SUCCESS;
return trx->error_state;
}
......@@ -1770,7 +1795,7 @@ dberr_t lock_wait(que_thr_t *thr)
wait_lock->un_member.tab_lock.table->id <= DICT_FIELDS_ID);
thd_wait_begin(trx->mysql_thd, (type_mode & LOCK_TABLE)
? THD_WAIT_TABLE_LOCK : THD_WAIT_ROW_LOCK);
dberr_t error_state= DB_SUCCESS;
trx->error_state= DB_SUCCESS;
mysql_mutex_lock(&lock_sys.wait_mutex);
if (trx->lock.wait_lock)
......@@ -1778,23 +1803,28 @@ dberr_t lock_wait(que_thr_t *thr)
if (Deadlock::check_and_resolve(trx))
{
ut_ad(!trx->lock.wait_lock);
error_state= DB_DEADLOCK;
trx->error_state= DB_DEADLOCK;
goto end_wait;
}
}
else
{
/* trx->lock.was_chosen_as_deadlock_victim can be changed before
lock_sys.wait_mutex is acquired, so let's check it once more. */
trx->error_state=
trx->lock.was_chosen_as_deadlock_victim ? DB_DEADLOCK : DB_SUCCESS;
goto end_wait;
}
if (row_lock_wait)
lock_sys.wait_start();
trx->error_state= DB_SUCCESS;
#ifdef HAVE_REPLICATION
if (rpl)
lock_wait_rpl_report(trx);
#endif
trx->error_state= DB_SUCCESS;
while (trx->lock.wait_lock)
{
int err;
......@@ -1807,20 +1837,19 @@ dberr_t lock_wait(que_thr_t *thr)
else
err= my_cond_timedwait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex,
&abstime);
error_state= trx->error_state;
switch (error_state) {
switch (trx->error_state) {
case DB_DEADLOCK:
case DB_INTERRUPTED:
break;
default:
ut_ad(error_state != DB_LOCK_WAIT_TIMEOUT);
ut_ad(trx->error_state != DB_LOCK_WAIT_TIMEOUT);
/* Dictionary transactions must ignore KILL, because they could
be executed as part of a multi-transaction DDL operation,
such as rollback_inplace_alter_table() or ha_innobase::delete_table(). */
if (!trx->dict_operation && trx_is_interrupted(trx))
/* innobase_kill_query() can only set trx->error_state=DB_INTERRUPTED
for any transaction that is attached to a connection. */
error_state= DB_INTERRUPTED;
trx->error_state= DB_INTERRUPTED;
else if (!err)
continue;
#ifdef WITH_WSREP
......@@ -1828,7 +1857,7 @@ dberr_t lock_wait(que_thr_t *thr)
#endif
else
{
error_state= DB_LOCK_WAIT_TIMEOUT;
trx->error_state= DB_LOCK_WAIT_TIMEOUT;
lock_sys.timeouts++;
}
}
......@@ -1848,8 +1877,7 @@ dberr_t lock_wait(que_thr_t *thr)
mysql_mutex_unlock(&lock_sys.wait_mutex);
thd_wait_end(trx->mysql_thd);
trx->error_state= error_state;
return error_state;
return trx->error_state;
}
......@@ -1862,7 +1890,7 @@ static void lock_wait_end(trx_t *trx)
ut_ad(state == TRX_STATE_ACTIVE || state == TRX_STATE_PREPARED);
ut_ad(trx->lock.wait_thr);
if (trx->lock.was_chosen_as_deadlock_victim.fetch_and(byte(~1)))
if (trx->lock.was_chosen_as_deadlock_victim)
{
ut_ad(state == TRX_STATE_ACTIVE);
trx->error_state= DB_DEADLOCK;
......@@ -3401,17 +3429,18 @@ lock_table_enqueue_waiting(
ut_ad(trx->mutex_is_owner());
ut_ad(!trx->dict_operation_lock_mode);
#ifdef WITH_WSREP
if (trx->is_wsrep() && trx->lock.was_chosen_as_deadlock_victim) {
return(DB_DEADLOCK);
}
#endif /* WITH_WSREP */
/* Enqueue the lock request that will wait to be granted */
lock_table_create(table, mode | LOCK_WAIT, trx, c_lock);
trx->lock.wait_thr = thr;
trx->lock.clear_deadlock_victim();
/* Apart from Galera, only transactions that have waiting lock
may be chosen as deadlock victims. Only one lock can be waited for at a
time, and a transaction is associated with a single thread. That is why
there must not be waiting lock requests if the transaction is deadlock
victim and it is not WSREP. Galera transaction abort can be invoked
from MDL acquisition code when the transaction does not have waiting
lock, that's why we check only deadlock victim bit here. */
ut_ad(!(trx->lock.was_chosen_as_deadlock_victim & 1));
MONITOR_INC(MONITOR_TABLELOCK_WAIT);
return(DB_LOCK_WAIT);
......@@ -3949,7 +3978,6 @@ void lock_release(trx_t *trx)
mysql_mutex_unlock(&lock_sys.wait_mutex);
}
trx->lock.was_chosen_as_deadlock_victim= false;
trx->lock.n_rec_locks= 0;
#ifdef UNIV_DEBUG
......@@ -5718,10 +5746,12 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
lock_sys.rd_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&lock_sys.wait_mutex);
lock= trx->lock.wait_lock;
if (!lock);
else if (check_victim && trx->lock.was_chosen_as_deadlock_victim)
/* Even if waiting lock was cancelled while lock_sys.wait_mutex was
unlocked, we need to return deadlock error if transaction was chosen
as deadlock victim to rollback it */
if (check_victim && trx->lock.was_chosen_as_deadlock_victim)
err= DB_DEADLOCK;
else
else if (lock)
goto resolve_table_lock;
}
else
......@@ -5769,10 +5799,12 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
lock_sys.wr_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&lock_sys.wait_mutex);
lock= trx->lock.wait_lock;
if (!lock);
else if (check_victim && trx->lock.was_chosen_as_deadlock_victim)
/* Even if waiting lock was cancelled while lock_sys.wait_mutex was
unlocked, we need to return deadlock error if transaction was chosen
as deadlock victim to rollback it */
if (check_victim && trx->lock.was_chosen_as_deadlock_victim)
err= DB_DEADLOCK;
else
else if (lock)
goto resolve_record_lock;
}
else
......@@ -5850,6 +5882,7 @@ while holding a clustered index leaf page latch.
lock request was released */
dberr_t lock_trx_handle_wait(trx_t *trx)
{
DEBUG_SYNC_C("lock_trx_handle_wait_enter");
if (trx->lock.was_chosen_as_deadlock_victim)
return DB_DEADLOCK;
if (!trx->lock.wait_lock)
......
......@@ -135,6 +135,9 @@ inline void trx_t::rollback_low(trx_savept_t *savept)
}
else
{
/* There must not be partial rollback if transaction was chosen as deadlock
victim. Galera transaction abort can be invoked during partial rollback. */
ut_ad(!(lock.was_chosen_as_deadlock_victim & 1));
ut_a(error_state == DB_SUCCESS);
const undo_no_t limit= savept->least_undo_no;
apply_online_log= false;
......@@ -211,6 +214,10 @@ dberr_t trx_rollback_for_mysql(trx_t* trx)
case TRX_STATE_NOT_STARTED:
trx->will_lock = false;
ut_ad(trx->mysql_thd);
/* Galera transaction abort can be invoked from MDL acquision
code, so trx->lock.was_chosen_as_deadlock_victim can be set
even if trx->state is TRX_STATE_NOT_STARTED. */
ut_ad(!(trx->lock.was_chosen_as_deadlock_victim & 1));
#ifdef WITH_WSREP
trx->wsrep= false;
trx->lock.was_chosen_as_deadlock_victim= false;
......@@ -418,9 +425,6 @@ trx_rollback_to_savepoint_for_mysql_low(
trx_mark_sql_stat_end(trx);
trx->op_info = "";
#ifdef WITH_WSREP
trx->lock.was_chosen_as_deadlock_victim = false;
#endif
return(err);
}
......
......@@ -1379,9 +1379,8 @@ TRANSACTIONAL_INLINE inline void trx_t::commit_in_memory(const mtr_t *mtr)
wsrep= false;
wsrep_commit_ordered(mysql_thd);
}
ut_ad(!(lock.was_chosen_as_deadlock_victim & byte(~2U)));
lock.was_chosen_as_deadlock_victim= false;
#endif /* WITH_WSREP */
lock.was_chosen_as_deadlock_victim= false;
}
void trx_t::commit_cleanup()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment