Commit 78a04a4c authored by Vlad Lesin's avatar Vlad Lesin

MDEV-29869 mtr failure: innodb.deadlock_wait_thr_race

1. The merge aeccbbd9 has overwritten
lock0lock.cc, and the changes of MDEV-29622 and MDEV-29635 were
partially lost, this commit restores the changes.

2. innodb.deadlock_wait_thr_race test:

The following hang was found during testing.

There is deadlock_report_before_lock_releasing sync point in
Deadlock::report(), which is waiting for sel_cont signal under lock_sys_t
lock. The signal must be issued after "UPDATE t SET b = 100" rollback,
and that rollback is executing undo record, which is blocked
on dict_sys latch request. dict_sys is locked by the thread of statistics
update(dict_stats_save()), and during that update lock_sys lock is
requested, and can't be acquired as Deadlock::report() holds it. We have
to disable statistics update to make the test stable.

But even if statistics update is disabled, and transaction with consistent
snapshot is started at the very beginning of the test to prevent purging,
the purge can still be invoked for system tables, and it tries to open
system table by id, what causes dict_sys.freeze() call and dict_sys
latching. What, in combination with lock_sys::xx_lock() causes the same
deadlock as described above. We need to disable purging globally for the
test as well.

All the above is applicable to innodb.deadlock_wait_lock_race test also.
parent 5027cb2b
deadlock_wait_lock_race : MDEV-29869: often hangs in lock wait timeout
deadlock_wait_thr_race : MDEV-29869: often hangs in lock wait timeout
connect suspend_purge,localhost,root,,; CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB STATS_PERSISTENT=0;
START TRANSACTION WITH CONSISTENT SNAPSHOT; CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB STATS_PERSISTENT=0;
connection default;
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB;
INSERT INTO t VALUES (10, 10), (20, 20), (30, 30); INSERT INTO t VALUES (10, 10), (20, 20), (30, 30);
INSERT INTO t2 VALUES (10), (20), (30); INSERT INTO t2 VALUES (10), (20), (30);
BEGIN; BEGIN;
...@@ -28,4 +25,3 @@ a b ...@@ -28,4 +25,3 @@ a b
SET DEBUG_SYNC = 'RESET'; SET DEBUG_SYNC = 'RESET';
DROP TABLE t; DROP TABLE t;
DROP TABLE t2; DROP TABLE t2;
disconnect suspend_purge;
connect suspend_purge,localhost,root,,; CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB STATS_PERSISTENT=0;
START TRANSACTION WITH CONSISTENT SNAPSHOT; CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB STATS_PERSISTENT=0;
connection default;
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB;
INSERT INTO t VALUES (10, 10), (20, 20), (30, 30); INSERT INTO t VALUES (10, 10), (20, 20), (30, 30);
INSERT INTO t2 VALUES (10), (20), (30); INSERT INTO t2 VALUES (10), (20), (30);
BEGIN; BEGIN;
...@@ -34,4 +31,3 @@ a b ...@@ -34,4 +31,3 @@ a b
SET DEBUG_SYNC = 'RESET'; SET DEBUG_SYNC = 'RESET';
DROP TABLE t; DROP TABLE t;
DROP TABLE t2; DROP TABLE t2;
disconnect suspend_purge;
...@@ -2,17 +2,23 @@ ...@@ -2,17 +2,23 @@
--source include/have_debug_sync.inc --source include/have_debug_sync.inc
--source include/count_sessions.inc --source include/count_sessions.inc
--connect(suspend_purge,localhost,root,,)
# Purge can cause deadlock in the test, requesting page's RW_X_LATCH for trx # Purge can cause deadlock in the test, requesting page's RW_X_LATCH for trx
# ids reseting, after trx 2 acqured RW_S_LATCH and suspended in debug sync point # ids reseting, after trx 2 acqured RW_S_LATCH and suspended in debug sync point
# lock_trx_handle_wait_enter, waiting for upd_cont signal, which must be # lock_trx_handle_wait_enter, waiting for upd_cont signal, which must be
# emitted after the last SELECT in this test. The last SELECT will hang waiting # emitted after the last SELECT in this test. The last SELECT will hang waiting
# for purge RW_X_LATCH releasing, and trx 2 will be rolled back by timeout. # for purge RW_X_LATCH releasing, and trx 2 will be rolled back by timeout.
START TRANSACTION WITH CONSISTENT SNAPSHOT;
--connection default # There is deadlock_report_before_lock_releasing sync point in
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB; # Deadlock::report(), which is waiting for sel_cont signal under
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB; # lock_sys_t lock. The signal must be issued after "UPDATE t SET b = 100"
# rollback, and that rollback is executing undo record, which is blocked on
# dict_sys latch request. dict_sys is locked by the thread of statistics
# update(dict_stats_save()), and during that update lock_sys lock is requested,
# and can't be acquired as Deadlock::report() holds it. We have to disable
# statistics update to make the test stable.
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB STATS_PERSISTENT=0;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB STATS_PERSISTENT=0;
INSERT INTO t VALUES (10, 10), (20, 20), (30, 30); INSERT INTO t VALUES (10, 10), (20, 20), (30, 30);
INSERT INTO t2 VALUES (10), (20), (30); INSERT INTO t2 VALUES (10), (20), (30);
...@@ -58,5 +64,4 @@ SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL upd_cont"; ...@@ -58,5 +64,4 @@ SET DEBUG_SYNC="lock_wait_before_suspend SIGNAL upd_cont";
SET DEBUG_SYNC = 'RESET'; SET DEBUG_SYNC = 'RESET';
DROP TABLE t; DROP TABLE t;
DROP TABLE t2; DROP TABLE t2;
--disconnect suspend_purge
--source include/wait_until_count_sessions.inc --source include/wait_until_count_sessions.inc
...@@ -2,17 +2,23 @@ ...@@ -2,17 +2,23 @@
--source include/have_debug_sync.inc --source include/have_debug_sync.inc
--source include/count_sessions.inc --source include/count_sessions.inc
--connect(suspend_purge,localhost,root,,)
# Purge can cause deadlock in the test, requesting page's RW_X_LATCH for trx # Purge can cause deadlock in the test, requesting page's RW_X_LATCH for trx
# ids reseting, after trx 2 acqured RW_S_LATCH and suspended in debug sync point # ids reseting, after trx 2 acqured RW_S_LATCH and suspended in debug sync point
# lock_trx_handle_wait_enter, waiting for upd_cont signal, which must be # lock_trx_handle_wait_enter, waiting for upd_cont signal, which must be
# emitted after the last SELECT in this test. The last SELECT will hang waiting # emitted after the last SELECT in this test. The last SELECT will hang waiting
# for purge RW_X_LATCH releasing, and trx 2 will be rolled back by timeout. # for purge RW_X_LATCH releasing, and trx 2 will be rolled back by timeout.
START TRANSACTION WITH CONSISTENT SNAPSHOT;
--connection default # There is deadlock_report_before_lock_releasing sync point in
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB; # Deadlock::report(), which is waiting for sel_cont signal under
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB; # lock_sys_t lock. The signal must be issued after "UPDATE t SET b = 100"
# rollback, and that rollback is executing undo record, which is blocked on
# dict_sys latch request. dict_sys is locked by the thread of statistics
# update(dict_stats_save()), and during that update lock_sys lock is requested,
# and can't be acquired as Deadlock::report() holds it. We have to disable
# statistics update to make the test stable.
CREATE TABLE t (a int PRIMARY KEY, b int) engine = InnoDB STATS_PERSISTENT=0;
CREATE TABLE t2 (a int PRIMARY KEY) engine = InnoDB STATS_PERSISTENT=0;
INSERT INTO t VALUES (10, 10), (20, 20), (30, 30); INSERT INTO t VALUES (10, 10), (20, 20), (30, 30);
INSERT INTO t2 VALUES (10), (20), (30); INSERT INTO t2 VALUES (10), (20), (30);
...@@ -62,5 +68,4 @@ SET DEBUG_SYNC="now SIGNAL upd_cont_2"; ...@@ -62,5 +68,4 @@ SET DEBUG_SYNC="now SIGNAL upd_cont_2";
SET DEBUG_SYNC = 'RESET'; SET DEBUG_SYNC = 'RESET';
DROP TABLE t; DROP TABLE t;
DROP TABLE t2; DROP TABLE t2;
--disconnect suspend_purge
--source include/wait_until_count_sessions.inc --source include/wait_until_count_sessions.inc
...@@ -1796,8 +1796,8 @@ dberr_t lock_wait(que_thr_t *thr) ...@@ -1796,8 +1796,8 @@ dberr_t lock_wait(que_thr_t *thr)
wait_lock->un_member.tab_lock.table->id <= DICT_FIELDS_ID); wait_lock->un_member.tab_lock.table->id <= DICT_FIELDS_ID);
thd_wait_begin(trx->mysql_thd, (type_mode & LOCK_TABLE) thd_wait_begin(trx->mysql_thd, (type_mode & LOCK_TABLE)
? THD_WAIT_TABLE_LOCK : THD_WAIT_ROW_LOCK); ? THD_WAIT_TABLE_LOCK : THD_WAIT_ROW_LOCK);
trx->error_state= DB_SUCCESS;
int err= 0;
mysql_mutex_lock(&lock_sys.wait_mutex); mysql_mutex_lock(&lock_sys.wait_mutex);
if (trx->lock.wait_lock) if (trx->lock.wait_lock)
{ {
...@@ -1819,25 +1819,24 @@ dberr_t lock_wait(que_thr_t *thr) ...@@ -1819,25 +1819,24 @@ dberr_t lock_wait(que_thr_t *thr)
if (row_lock_wait) if (row_lock_wait)
lock_sys.wait_start(); lock_sys.wait_start();
trx->error_state= DB_SUCCESS;
#ifdef HAVE_REPLICATION #ifdef HAVE_REPLICATION
if (rpl) if (rpl)
lock_wait_rpl_report(trx); lock_wait_rpl_report(trx);
#endif #endif
if (trx->error_state != DB_SUCCESS)
goto check_trx_error;
while (trx->lock.wait_lock) while (trx->lock.wait_lock)
{ {
int err; DEBUG_SYNC_C("lock_wait_before_suspend");
if (no_timeout) if (no_timeout)
{
my_cond_wait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex); my_cond_wait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex);
err= 0;
}
else else
err= my_cond_timedwait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex, err= my_cond_timedwait(&trx->lock.cond, &lock_sys.wait_mutex.m_mutex,
&abstime); &abstime);
check_trx_error:
switch (trx->error_state) { switch (trx->error_state) {
case DB_DEADLOCK: case DB_DEADLOCK:
case DB_INTERRUPTED: case DB_INTERRUPTED:
...@@ -1883,17 +1882,19 @@ dberr_t lock_wait(que_thr_t *thr) ...@@ -1883,17 +1882,19 @@ dberr_t lock_wait(que_thr_t *thr)
/** Resume a lock wait */ /** Resume a lock wait */
static void lock_wait_end(trx_t *trx) template <bool from_deadlock= false>
void lock_wait_end(trx_t *trx)
{ {
mysql_mutex_assert_owner(&lock_sys.wait_mutex); mysql_mutex_assert_owner(&lock_sys.wait_mutex);
ut_ad(trx->mutex_is_owner()); ut_ad(trx->mutex_is_owner());
ut_d(const auto state= trx->state); ut_d(const auto state= trx->state);
ut_ad(state == TRX_STATE_ACTIVE || state == TRX_STATE_PREPARED); ut_ad(state == TRX_STATE_COMMITTED_IN_MEMORY || state == TRX_STATE_ACTIVE ||
ut_ad(trx->lock.wait_thr); state == TRX_STATE_PREPARED);
ut_ad(from_deadlock || trx->lock.wait_thr);
if (trx->lock.was_chosen_as_deadlock_victim) if (trx->lock.was_chosen_as_deadlock_victim)
{ {
ut_ad(state == TRX_STATE_ACTIVE); ut_ad(from_deadlock || state == TRX_STATE_ACTIVE);
trx->error_state= DB_DEADLOCK; trx->error_state= DB_DEADLOCK;
} }
...@@ -5705,13 +5706,16 @@ static void lock_release_autoinc_locks(trx_t *trx) ...@@ -5705,13 +5706,16 @@ static void lock_release_autoinc_locks(trx_t *trx)
} }
/** Cancel a waiting lock request and release possibly waiting transactions */ /** Cancel a waiting lock request and release possibly waiting transactions */
static void lock_cancel_waiting_and_release(lock_t *lock) template <bool from_deadlock= false>
void lock_cancel_waiting_and_release(lock_t *lock)
{ {
lock_sys.assert_locked(*lock); lock_sys.assert_locked(*lock);
mysql_mutex_assert_owner(&lock_sys.wait_mutex); mysql_mutex_assert_owner(&lock_sys.wait_mutex);
trx_t *trx= lock->trx; trx_t *trx= lock->trx;
trx->mutex_lock(); trx->mutex_lock();
ut_ad(trx->state == TRX_STATE_ACTIVE); ut_d(const auto trx_state= trx->state);
ut_ad(trx_state == TRX_STATE_COMMITTED_IN_MEMORY ||
trx_state == TRX_STATE_ACTIVE);
if (!lock->is_table()) if (!lock->is_table())
lock_rec_dequeue_from_page(lock, true); lock_rec_dequeue_from_page(lock, true);
...@@ -5730,7 +5734,8 @@ static void lock_cancel_waiting_and_release(lock_t *lock) ...@@ -5730,7 +5734,8 @@ static void lock_cancel_waiting_and_release(lock_t *lock)
/* Reset the wait flag and the back pointer to lock in trx. */ /* Reset the wait flag and the back pointer to lock in trx. */
lock_reset_lock_and_trx_wait(lock); lock_reset_lock_and_trx_wait(lock);
lock_wait_end(trx); lock_wait_end<from_deadlock>(trx);
trx->mutex_unlock(); trx->mutex_unlock();
} }
...@@ -5901,6 +5906,7 @@ lock_unlock_table_autoinc( ...@@ -5901,6 +5906,7 @@ lock_unlock_table_autoinc(
/** Handle a pending lock wait (DB_LOCK_WAIT) in a semi-consistent read /** Handle a pending lock wait (DB_LOCK_WAIT) in a semi-consistent read
while holding a clustered index leaf page latch. while holding a clustered index leaf page latch.
@param trx transaction that is or was waiting for a lock @param trx transaction that is or was waiting for a lock
@retval DB_SUCCESS if the lock was granted @retval DB_SUCCESS if the lock was granted
@retval DB_DEADLOCK if the transaction must be aborted due to a deadlock @retval DB_DEADLOCK if the transaction must be aborted due to a deadlock
...@@ -5911,8 +5917,13 @@ dberr_t lock_trx_handle_wait(trx_t *trx) ...@@ -5911,8 +5917,13 @@ dberr_t lock_trx_handle_wait(trx_t *trx)
DEBUG_SYNC_C("lock_trx_handle_wait_enter"); DEBUG_SYNC_C("lock_trx_handle_wait_enter");
if (trx->lock.was_chosen_as_deadlock_victim) if (trx->lock.was_chosen_as_deadlock_victim)
return DB_DEADLOCK; return DB_DEADLOCK;
DEBUG_SYNC_C("lock_trx_handle_wait_before_unlocked_wait_lock_check");
/* trx->lock.was_chosen_as_deadlock_victim must always be set before
trx->lock.wait_lock if the transaction was chosen as deadlock victim,
the function must not return DB_SUCCESS if
trx->lock.was_chosen_as_deadlock_victim is set. */
if (!trx->lock.wait_lock) if (!trx->lock.wait_lock)
return DB_SUCCESS; return trx->lock.was_chosen_as_deadlock_victim ? DB_DEADLOCK : DB_SUCCESS;
dberr_t err= DB_SUCCESS; dberr_t err= DB_SUCCESS;
mysql_mutex_lock(&lock_sys.wait_mutex); mysql_mutex_lock(&lock_sys.wait_mutex);
if (trx->lock.was_chosen_as_deadlock_victim) if (trx->lock.was_chosen_as_deadlock_victim)
...@@ -6315,8 +6326,11 @@ namespace Deadlock ...@@ -6315,8 +6326,11 @@ namespace Deadlock
ut_ad(victim->state == TRX_STATE_ACTIVE); ut_ad(victim->state == TRX_STATE_ACTIVE);
/* victim->lock.was_chosen_as_deadlock_victim must always be set before
releasing waiting locks and reseting trx->lock.wait_lock */
victim->lock.was_chosen_as_deadlock_victim= true; victim->lock.was_chosen_as_deadlock_victim= true;
lock_cancel_waiting_and_release(victim->lock.wait_lock); DEBUG_SYNC_C("deadlock_report_before_lock_releasing");
lock_cancel_waiting_and_release<true>(victim->lock.wait_lock);
#ifdef WITH_WSREP #ifdef WITH_WSREP
if (victim->is_wsrep() && wsrep_thd_is_SR(victim->mysql_thd)) if (victim->is_wsrep() && wsrep_thd_is_SR(victim->mysql_thd))
wsrep_handle_SR_rollback(trx->mysql_thd, victim->mysql_thd); wsrep_handle_SR_rollback(trx->mysql_thd, victim->mysql_thd);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment