Commit a474e327 authored by Vlad Lesin's avatar Vlad Lesin

MDEV-27701 Race on trx->lock.wait_lock between lock_rec_move() and lock_sys_t::cancel()

The initial issue was in assertion failure, which checked the equality
of lock to cancel with trx->lock.wait_lock in lock_sys_t::cancel().

If we analyze lock_sys_t::cancel() code from the perspective of
trx->lock.wait_lock racing, we won't find the error there, except the
cases when we need to reload it after the corresponding latches
acquiring.

So the fix is just to remove the assertion and reload
trx->lock.wait_lock after acquiring necessary latches.

Reviewed by: Marko Mäkelä <marko.makela@mariadb.com>
parent 67a6ad0a
CREATE TABLE t (pk int PRIMARY KEY, c varchar(10)) ENGINE=InnoDB;
INSERT INTO t VALUES (10, "0123456789");
connection default;
BEGIN;
SELECT * FROM t WHERE c = 10 FOR UPDATE;
pk c
connect trx2, localhost,root,,;
BEGIN;
SET DEBUG_SYNC="lock_wait_start SIGNAL trx2_start_waiting";
SET DEBUG_SYNC="lock_wait_end SIGNAL trx2_wait_end WAIT_FOR trx2_cont_upd";
SET DEBUG_SYNC="lock_rec_store_on_page_infimum_end SIGNAL trx2_moved_locks WAIT_FOR trx2_cont";
UPDATE t SET c = NULL WHERE pk = 10;
connect trx3, localhost,root,,;
SET DEBUG_SYNC="now WAIT_FOR trx2_start_waiting";
SET innodb_lock_wait_timeout=1;
BEGIN;
SET DEBUG_SYNC="lock_wait_start SIGNAL trx3_start_waiting WAIT_FOR trx3_cont_waiting";
SET DEBUG_SYNC="lock_sys_t_cancel_enter SIGNAL trx3_cancel_enter WAIT_FOR trx3_cont_cancel_waiting";
UPDATE t SET c = "abcdefghij" WHERE pk = 10;
connection default;
SET DEBUG_SYNC="now WAIT_FOR trx3_start_waiting";
COMMIT;
SET DEBUG_SYNC="now WAIT_FOR trx2_wait_end";
SET DEBUG_SYNC="now SIGNAL trx3_cont_waiting";
SET DEBUG_SYNC="now WAIT_FOR trx3_cancel_enter";
SET DEBUG_SYNC="now SIGNAL trx2_cont_upd";
SET DEBUG_SYNC="now WAIT_FOR trx2_moved_locks";
SET DEBUG_SYNC="now SIGNAL trx3_cont_cancel_waiting";
SET DEBUG_SYNC="now SIGNAL trx2_cont";
disconnect trx2;
disconnect trx3;
connection default;
SET DEBUG_SYNC="RESET";
DROP TABLE t;
--source include/have_innodb.inc
--source include/count_sessions.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
CREATE TABLE t (pk int PRIMARY KEY, c varchar(10)) ENGINE=InnoDB;
INSERT INTO t VALUES (10, "0123456789");
--connection default
BEGIN;
SELECT * FROM t WHERE c = 10 FOR UPDATE;
--connect(trx2, localhost,root,,)
BEGIN;
SET DEBUG_SYNC="lock_wait_start SIGNAL trx2_start_waiting";
SET DEBUG_SYNC="lock_wait_end SIGNAL trx2_wait_end WAIT_FOR trx2_cont_upd";
SET DEBUG_SYNC="lock_rec_store_on_page_infimum_end SIGNAL trx2_moved_locks WAIT_FOR trx2_cont";
#################
# We need to update clustered record without changing ordering fields and
# changing the size of non-ordering fields to cause locks moving from deleted
# record to infimum.
###
--send UPDATE t SET c = NULL WHERE pk = 10
--connect(trx3, localhost,root,,)
SET DEBUG_SYNC="now WAIT_FOR trx2_start_waiting";
#################
# The condition wariable waiting in lock_wait() must be finished by timeout
###
SET innodb_lock_wait_timeout=1;
BEGIN;
SET DEBUG_SYNC="lock_wait_start SIGNAL trx3_start_waiting WAIT_FOR trx3_cont_waiting";
SET DEBUG_SYNC="lock_sys_t_cancel_enter SIGNAL trx3_cancel_enter WAIT_FOR trx3_cont_cancel_waiting";
--send UPDATE t SET c = "abcdefghij" WHERE pk = 10
--connection default
SET DEBUG_SYNC="now WAIT_FOR trx3_start_waiting";
COMMIT;
SET DEBUG_SYNC="now WAIT_FOR trx2_wait_end";
SET DEBUG_SYNC="now SIGNAL trx3_cont_waiting";
SET DEBUG_SYNC="now WAIT_FOR trx3_cancel_enter";
SET DEBUG_SYNC="now SIGNAL trx2_cont_upd";
SET DEBUG_SYNC="now WAIT_FOR trx2_moved_locks";
#################
# If the bug is not fixed, there will be assertion failure here, because trx2
# moved trx3 lock from deleted record to infimum when trx3 tried to cancel the
# lock.
###
SET DEBUG_SYNC="now SIGNAL trx3_cont_cancel_waiting";
SET DEBUG_SYNC="now SIGNAL trx2_cont";
--disconnect trx2
--disconnect trx3
--connection default
SET DEBUG_SYNC="RESET";
DROP TABLE t;
--source include/wait_until_count_sessions.inc
...@@ -891,8 +891,8 @@ class lock_sys_t ...@@ -891,8 +891,8 @@ class lock_sys_t
/** Cancel a waiting lock request. /** Cancel a waiting lock request.
@tparam check_victim whether to check for DB_DEADLOCK @tparam check_victim whether to check for DB_DEADLOCK
@param lock waiting lock request
@param trx active transaction @param trx active transaction
@param lock waiting lock request
@retval DB_SUCCESS if no lock existed @retval DB_SUCCESS if no lock existed
@retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set @retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set
@retval DB_LOCK_WAIT if the lock was canceled */ @retval DB_LOCK_WAIT if the lock was canceled */
......
...@@ -46,12 +46,12 @@ Created 5/7/1996 Heikki Tuuri ...@@ -46,12 +46,12 @@ Created 5/7/1996 Heikki Tuuri
#include "srv0mon.h" #include "srv0mon.h"
#include "que0que.h" #include "que0que.h"
#include "scope.h" #include "scope.h"
#include <debug_sync.h>
#include <set> #include <set>
#ifdef WITH_WSREP #ifdef WITH_WSREP
#include <mysql/service_wsrep.h> #include <mysql/service_wsrep.h>
#include <debug_sync.h>
#endif /* WITH_WSREP */ #endif /* WITH_WSREP */
/** The value of innodb_deadlock_detect */ /** The value of innodb_deadlock_detect */
...@@ -1882,6 +1882,7 @@ dberr_t lock_wait(que_thr_t *thr) ...@@ -1882,6 +1882,7 @@ dberr_t lock_wait(que_thr_t *thr)
if (row_lock_wait) if (row_lock_wait)
lock_sys.wait_resume(trx->mysql_thd, suspend_time, my_hrtime_coarse()); lock_sys.wait_resume(trx->mysql_thd, suspend_time, my_hrtime_coarse());
/* Cache trx->lock.wait_lock to avoid unnecessary atomic variable load */
if (lock_t *lock= trx->lock.wait_lock) if (lock_t *lock= trx->lock.wait_lock)
{ {
lock_sys_t::cancel<false>(trx, lock); lock_sys_t::cancel<false>(trx, lock);
...@@ -1905,6 +1906,12 @@ void lock_wait_end(trx_t *trx) ...@@ -1905,6 +1906,12 @@ void lock_wait_end(trx_t *trx)
ut_d(const auto state= trx->state); ut_d(const auto state= trx->state);
ut_ad(state == TRX_STATE_COMMITTED_IN_MEMORY || state == TRX_STATE_ACTIVE || ut_ad(state == TRX_STATE_COMMITTED_IN_MEMORY || state == TRX_STATE_ACTIVE ||
state == TRX_STATE_PREPARED); state == TRX_STATE_PREPARED);
/* lock_wait() checks trx->lock.was_chosen_as_deadlock_victim flag before
requesting lock_sys.wait_mutex, and if the flag is set, it returns error,
what causes transaction rollback, which can reset trx->lock.wait_thr before
deadlock resolution starts cancelling victim's waiting lock. That's why we
don't check trx->lock.wait_thr here if the function was called from deadlock
resolution function. */
ut_ad(from_deadlock || trx->lock.wait_thr); ut_ad(from_deadlock || trx->lock.wait_thr);
if (trx->lock.was_chosen_as_deadlock_victim) if (trx->lock.was_chosen_as_deadlock_victim)
...@@ -3193,6 +3200,8 @@ lock_rec_store_on_page_infimum( ...@@ -3193,6 +3200,8 @@ lock_rec_store_on_page_infimum(
ut_ad(block->page.frame == page_align(rec)); ut_ad(block->page.frame == page_align(rec));
const page_id_t id{block->page.id()}; const page_id_t id{block->page.id()};
ut_d(SCOPE_EXIT(
[]() { DEBUG_SYNC_C("lock_rec_store_on_page_infimum_end"); }));
LockGuard g{lock_sys.rec_hash, id}; LockGuard g{lock_sys.rec_hash, id};
lock_rec_move(g.cell(), *block, id, g.cell(), id, lock_rec_move(g.cell(), *block, id, g.cell(), id,
...@@ -5770,17 +5779,30 @@ void lock_sys_t::cancel_lock_wait_for_trx(trx_t *trx) ...@@ -5770,17 +5779,30 @@ void lock_sys_t::cancel_lock_wait_for_trx(trx_t *trx)
/** Cancel a waiting lock request. /** Cancel a waiting lock request.
@tparam check_victim whether to check for DB_DEADLOCK @tparam check_victim whether to check for DB_DEADLOCK
@param lock waiting lock request
@param trx active transaction @param trx active transaction
@param lock waiting lock request
@retval DB_SUCCESS if no lock existed @retval DB_SUCCESS if no lock existed
@retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set @retval DB_DEADLOCK if trx->lock.was_chosen_as_deadlock_victim was set
@retval DB_LOCK_WAIT if the lock was canceled */ @retval DB_LOCK_WAIT if the lock was canceled */
template<bool check_victim> template<bool check_victim>
dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock) dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
{ {
DEBUG_SYNC_C("lock_sys_t_cancel_enter");
mysql_mutex_assert_owner(&lock_sys.wait_mutex); mysql_mutex_assert_owner(&lock_sys.wait_mutex);
ut_ad(trx->lock.wait_lock == lock);
ut_ad(trx->state == TRX_STATE_ACTIVE); ut_ad(trx->state == TRX_STATE_ACTIVE);
/* trx->lock.wait_lock may be changed by other threads as long as
we are not holding lock_sys.latch.
So, trx->lock.wait_lock==lock does not necessarily hold, but both
pointers should be valid, because other threads cannot assign
trx->lock.wait_lock=nullptr (or invalidate *lock) while we are
holding lock_sys.wait_mutex. Also, the type of trx->lock.wait_lock
(record or table lock) cannot be changed by other threads. So, it is
safe to call lock->is_table() while not holding lock_sys.latch. If
we have to release and reacquire lock_sys.wait_mutex, we must reread
trx->lock.wait_lock. We must also reread trx->lock.wait_lock after
lock_sys.latch acquiring, as it can be changed to not-null in lock moving
functions even if we hold lock_sys.wait_mutex. */
dberr_t err= DB_SUCCESS; dberr_t err= DB_SUCCESS;
/* This would be too large for a memory transaction, except in the /* This would be too large for a memory transaction, except in the
DB_DEADLOCK case, which was already tested in lock_trx_handle_wait(). */ DB_DEADLOCK case, which was already tested in lock_trx_handle_wait(). */
...@@ -5802,6 +5824,15 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock) ...@@ -5802,6 +5824,15 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
} }
else else
{ {
/* This function is invoked from the thread which executes the
transaction. Table locks are requested before record locks. Some other
transaction can't change trx->lock.wait_lock from table to record for the
current transaction at this point, because the current transaction has not
requested record locks yet. There is no need to move any table locks by
other threads. And trx->lock.wait_lock can't be set to null while we are
holding lock_sys.wait_mutex. That's why there is no need to reload
trx->lock.wait_lock here. */
ut_ad(lock == trx->lock.wait_lock);
resolve_table_lock: resolve_table_lock:
dict_table_t *table= lock->un_member.tab_lock.table; dict_table_t *table= lock->un_member.tab_lock.table;
if (!table->lock_mutex_trylock()) if (!table->lock_mutex_trylock())
...@@ -5812,6 +5843,7 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock) ...@@ -5812,6 +5843,7 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
mysql_mutex_unlock(&lock_sys.wait_mutex); mysql_mutex_unlock(&lock_sys.wait_mutex);
table->lock_mutex_lock(); table->lock_mutex_lock();
mysql_mutex_lock(&lock_sys.wait_mutex); mysql_mutex_lock(&lock_sys.wait_mutex);
/* Cache trx->lock.wait_lock under the corresponding latches. */
lock= trx->lock.wait_lock; lock= trx->lock.wait_lock;
if (!lock) if (!lock)
goto retreat; goto retreat;
...@@ -5821,6 +5853,10 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock) ...@@ -5821,6 +5853,10 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
goto retreat; goto retreat;
} }
} }
else
/* Cache trx->lock.wait_lock under the corresponding latches if
it was not cached yet */
lock= trx->lock.wait_lock;
if (lock->is_waiting()) if (lock->is_waiting())
lock_cancel_waiting_and_release(lock); lock_cancel_waiting_and_release(lock);
/* Even if lock->is_waiting() did not hold above, we must return /* Even if lock->is_waiting() did not hold above, we must return
...@@ -5844,6 +5880,7 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock) ...@@ -5844,6 +5880,7 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
mysql_mutex_unlock(&lock_sys.wait_mutex); mysql_mutex_unlock(&lock_sys.wait_mutex);
lock_sys.wr_lock(SRW_LOCK_CALL); lock_sys.wr_lock(SRW_LOCK_CALL);
mysql_mutex_lock(&lock_sys.wait_mutex); mysql_mutex_lock(&lock_sys.wait_mutex);
/* Cache trx->lock.wait_lock under the corresponding latches. */
lock= trx->lock.wait_lock; lock= trx->lock.wait_lock;
/* Even if waiting lock was cancelled while lock_sys.wait_mutex was /* Even if waiting lock was cancelled while lock_sys.wait_mutex was
unlocked, we need to return deadlock error if transaction was chosen unlocked, we need to return deadlock error if transaction was chosen
...@@ -5855,6 +5892,9 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock) ...@@ -5855,6 +5892,9 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
} }
else else
{ {
/* Cache trx->lock.wait_lock under the corresponding latches if
it was not cached yet */
lock= trx->lock.wait_lock;
resolve_record_lock: resolve_record_lock:
if (lock->is_waiting()) if (lock->is_waiting())
lock_cancel_waiting_and_release(lock); lock_cancel_waiting_and_release(lock);
...@@ -5876,6 +5916,7 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock) ...@@ -5876,6 +5916,7 @@ dberr_t lock_sys_t::cancel(trx_t *trx, lock_t *lock)
void lock_sys_t::cancel(trx_t *trx) void lock_sys_t::cancel(trx_t *trx)
{ {
mysql_mutex_lock(&lock_sys.wait_mutex); mysql_mutex_lock(&lock_sys.wait_mutex);
/* Cache trx->lock.wait_lock to avoid unnecessary atomic variable load */
if (lock_t *lock= trx->lock.wait_lock) if (lock_t *lock= trx->lock.wait_lock)
{ {
/* Dictionary transactions must be immune to KILL, because they /* Dictionary transactions must be immune to KILL, because they
...@@ -5943,6 +5984,7 @@ dberr_t lock_trx_handle_wait(trx_t *trx) ...@@ -5943,6 +5984,7 @@ dberr_t lock_trx_handle_wait(trx_t *trx)
mysql_mutex_lock(&lock_sys.wait_mutex); mysql_mutex_lock(&lock_sys.wait_mutex);
if (trx->lock.was_chosen_as_deadlock_victim) if (trx->lock.was_chosen_as_deadlock_victim)
err= DB_DEADLOCK; err= DB_DEADLOCK;
/* Cache trx->lock.wait_lock to avoid unnecessary atomic variable load */
else if (lock_t *wait_lock= trx->lock.wait_lock) else if (lock_t *wait_lock= trx->lock.wait_lock)
err= lock_sys_t::cancel<true>(trx, wait_lock); err= lock_sys_t::cancel<true>(trx, wait_lock);
lock_sys.deadlock_check(); lock_sys.deadlock_check();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment