Commit bd03c0e5 authored by Vlad Lesin's avatar Vlad Lesin

MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock

When lock is checked for conflict, ignore other locks on the record if
they wait for the requesting transaction.

lock_rec_has_to_wait_in_queue() iterates not all locks for
the page, but only the locks located before the waiting lock in the
queue. So there is some invariant - any lock in the queue can wait only
lock which is located before the waiting lock in the queue.

In the case when conflicting lock waits for the transaction of
requesting lock, we need to place the requesting lock before the waiting
lock in the queue to preserve the invariant. That is why we are looking
for the first waiting for requesting transation lock and place the new
lock just after the last granted requesting transaction lock before the
first waiting for requesting transaction lock.

Example:

trx1 waiting lock, trx1 granted lock, ..., trx2 lock - waiting for trx1
place new lock here -----------------^

There are also implicit locks which are lazily converted to explicit
ones, and we need to place the newly created explicit lock to the correct
place in a queue. All explicit locks converted from implicit ones are
placed just after the last non-waiting lock of the same transaction before
the first waiting for the transaction lock.

Code review and cleanup was made by Marko Mäkelä.
parent 1abc476f
#
# MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock
#
CREATE TABLE t (a INT PRIMARY KEY, b INT NOT NULL UNIQUE) ENGINE=InnoDB;
connect prevent_purge,localhost,root,,;
start transaction with consistent snapshot;
connection default;
INSERT INTO t VALUES (20,20);
DELETE FROM t WHERE b = 20;
connect con_ins,localhost,root,,;
SET DEBUG_SYNC = 'row_ins_sec_index_entry_dup_locks_created SIGNAL ins_set_locks WAIT_FOR ins_cont';
INSERT INTO t VALUES(10, 20);
connect con_del,localhost,root,,;
SET DEBUG_SYNC = 'now WAIT_FOR ins_set_locks';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL del_locked';
DELETE FROM t WHERE b = 20;
connection default;
SET DEBUG_SYNC = 'now WAIT_FOR del_locked';
SET DEBUG_SYNC = 'now SIGNAL ins_cont';
connection con_ins;
disconnect con_ins;
connection con_del;
disconnect con_del;
disconnect prevent_purge;
connection default;
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
--source include/have_innodb.inc
--source include/count_sessions.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--echo #
--echo # MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock
--echo #
# The test checks the ability to acquire exclusive record lock if the acquiring
# transaction already holds a shared lock on the record and another transaction
# is waiting for a lock.
CREATE TABLE t (a INT PRIMARY KEY, b INT NOT NULL UNIQUE) ENGINE=InnoDB;
--connect(prevent_purge,localhost,root,,)
start transaction with consistent snapshot;
--connection default
INSERT INTO t VALUES (20,20);
DELETE FROM t WHERE b = 20;
--connect(con_ins,localhost,root,,)
SET DEBUG_SYNC = 'row_ins_sec_index_entry_dup_locks_created SIGNAL ins_set_locks WAIT_FOR ins_cont';
send
INSERT INTO t VALUES(10, 20);
--connect(con_del,localhost,root,,)
SET DEBUG_SYNC = 'now WAIT_FOR ins_set_locks';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL del_locked';
###############################################################################
# This DELETE creates waiting ORDINARY X-lock for heap_no 2 as the record is
# delete-marked, this lock conflicts with ORDINARY S-lock set by the the last
# INSERT. After the last INSERT creates insert-intention lock on
# heap_no 2, this lock will conflict with waiting ORDINARY X-lock of this
# DELETE, what causes DEADLOCK error for this DELETE.
###############################################################################
send
DELETE FROM t WHERE b = 20;
--connection default
SET DEBUG_SYNC = 'now WAIT_FOR del_locked';
SET DEBUG_SYNC = 'now SIGNAL ins_cont';
--connection con_ins
--reap
--disconnect con_ins
--connection con_del
# Without the fix, ER_LOCK_DEADLOCK would be reported here.
--reap
--disconnect con_del
--disconnect prevent_purge
--connection default
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
--source include/wait_until_count_sessions.inc
...@@ -283,7 +283,6 @@ connection default; ...@@ -283,7 +283,6 @@ connection default;
update t1 set b = 'foo'; update t1 set b = 'foo';
connection con1; connection con1;
update t1 set a = 'bar'; update t1 set a = 'bar';
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
disconnect con1; disconnect con1;
connection default; connection default;
drop table t1; drop table t1;
......
...@@ -186,7 +186,9 @@ send update t1 set b = 'foo'; ...@@ -186,7 +186,9 @@ send update t1 set b = 'foo';
connection con1; connection con1;
let $wait_condition= select count(*) from information_schema.innodb_lock_waits; let $wait_condition= select count(*) from information_schema.innodb_lock_waits;
source include/wait_condition.inc; source include/wait_condition.inc;
error ER_LOCK_DEADLOCK; # There must no be DEADLOCK here as con1 transaction already holds locks, and
# default's transaction lock is waiting, so the locks of the following "UPDATE"
# must not conflict with waiting lock.
update t1 set a = 'bar'; update t1 set a = 'bar';
disconnect con1; disconnect con1;
connection default; connection default;
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2018, 2021, MariaDB Corporation. Copyright (c) 2018, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -29,8 +29,43 @@ Created 5/20/1997 Heikki Tuuri ...@@ -29,8 +29,43 @@ Created 5/20/1997 Heikki Tuuri
#include "ut0new.h" #include "ut0new.h"
struct hash_table_t; struct hash_table_t;
struct hash_cell_t{ struct hash_cell_t
void* node; /*!< hash chain node, NULL if none */ {
/** singly-linked, nullptr terminated list of hash buckets */
void *node;
/** Append an element.
@tparam T type of the element
@param insert the being-inserted element
@param next the next-element pointer in T */
template<typename T>
void append(T &insert, T *T::*next)
{
void **after;
for (after= &node; *after;
after= reinterpret_cast<void**>(&(static_cast<T*>(*after)->*next)));
insert.*next= nullptr;
*after= &insert;
}
/** Insert an element after another.
@tparam T type of the element
@param after the element after which to insert
@param insert the being-inserted element
@param next the next-element pointer in T */
template<typename T>
void insert_after(T &after, T &insert, T *T::*next)
{
#ifdef UNIV_DEBUG
for (const T *c= static_cast<const T*>(node); c; c= c->*next)
if (c == &after)
goto found;
ut_error;
found:
#endif
insert.*next= after.*next;
after.*next= &insert;
}
}; };
/*******************************************************************//** /*******************************************************************//**
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2021, MariaDB Corporation. Copyright (c) 2017, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -1175,6 +1175,10 @@ without checking for deadlocks or conflicts. ...@@ -1175,6 +1175,10 @@ without checking for deadlocks or conflicts.
@param[in] index the index tree @param[in] index the index tree
@param[in,out] trx transaction @param[in,out] trx transaction
@param[in] holds_trx_mutex whether the caller holds trx->mutex @param[in] holds_trx_mutex whether the caller holds trx->mutex
@param[in] insert_before_waiting if true, inserts new B-tree record lock
just after the last non-waiting lock of the current transaction which is
located before the first waiting for the current transaction lock, otherwise
the lock is inserted at the end of the queue
@return created lock */ @return created lock */
lock_t* lock_t*
lock_rec_create_low( lock_rec_create_low(
...@@ -1185,7 +1189,8 @@ lock_rec_create_low( ...@@ -1185,7 +1189,8 @@ lock_rec_create_low(
ulint heap_no, ulint heap_no,
dict_index_t* index, dict_index_t* index,
trx_t* trx, trx_t* trx,
bool holds_trx_mutex); bool holds_trx_mutex,
bool insert_before_waiting = false);
/** Enqueue a waiting request for a lock which cannot be granted immediately. /** Enqueue a waiting request for a lock which cannot be granted immediately.
Check for deadlocks. Check for deadlocks.
......
...@@ -1004,24 +1004,43 @@ static void lock_wait_wsrep(trx_t *trx) ...@@ -1004,24 +1004,43 @@ static void lock_wait_wsrep(trx_t *trx)
/*********************************************************************//** /*********************************************************************//**
Checks if some other transaction has a conflicting explicit lock request Checks if some other transaction has a conflicting explicit lock request
in the queue, so that we have to wait. in the queue, so that we have to wait.
@return lock or NULL */ @param[in] mode LOCK_S or LOCK_X, possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP,
static LOCK_INSERT_INTENTION
lock_t* @param[in] cell lock hash table cell
lock_rec_other_has_conflicting( @param[in] id page identifier
/*===========================*/ @param[in] heap_no heap number of the record
unsigned mode, /*!< in: LOCK_S or LOCK_X, @param[in] trx our transaction
possibly ORed to LOCK_GAP or @param[out] was_ignored true if conflicting locks waiting for the current
LOC_REC_NOT_GAP, transaction were ignored
LOCK_INSERT_INTENTION */ @return conflicting lock and the flag which indicated if conflicting locks
const hash_cell_t& cell, /*!< in: lock hash table cell */ which wait for the current transaction were ignored */
const page_id_t id, /*!< in: page identifier */ static lock_t *lock_rec_other_has_conflicting(unsigned mode,
ulint heap_no,/*!< in: heap number of the record */ const hash_cell_t &cell,
const trx_t* trx) /*!< in: our transaction */ const page_id_t id,
ulint heap_no, const trx_t *trx,
bool *was_ignored= nullptr)
{ {
bool is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM); bool is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
for (lock_t* lock = lock_sys_t::get_first(cell, id, heap_no); for (lock_t* lock = lock_sys_t::get_first(cell, id, heap_no);
lock; lock = lock_rec_get_next(heap_no, lock)) { lock; lock = lock_rec_get_next(heap_no, lock)) {
/* There is no need to lock lock_sys.wait_mutex to check
trx->lock.wait_trx here because the current function is
executed under the cell latch, and trx->lock.wait_trx
transaction can change wait_trx field only under the cell
latch, wait_trx trx_t object can not be deinitialized before
releasing all its locks, and during releasing the locks the
cell latch will also be requested. So while the cell latch is
held, lock->trx->lock.wait_trx can't be changed. There also
can't be lock loops for one record, because all waiting locks
of the record will always wait for the same lock of the record
in a cell array, and check for conflicting lock will always
start with the first lock for the heap_no, and go ahead with
the same order(the order of the locks in the cell array) */
if (lock->is_waiting() && lock->trx->lock.wait_trx == trx) {
if (was_ignored) *was_ignored= true;
continue;
}
if (lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) { if (lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) {
return(lock); return(lock);
} }
...@@ -1147,6 +1166,10 @@ without checking for deadlocks or conflicts. ...@@ -1147,6 +1166,10 @@ without checking for deadlocks or conflicts.
@param[in] index the index tree @param[in] index the index tree
@param[in,out] trx transaction @param[in,out] trx transaction
@param[in] holds_trx_mutex whether the caller holds trx->mutex @param[in] holds_trx_mutex whether the caller holds trx->mutex
@param[in] insert_before_waiting if true, inserts new B-tree record lock
just after the last non-waiting lock of the current transaction which is
located before the first waiting for the current transaction lock, otherwise
the lock is inserted at the end of the queue
@return created lock */ @return created lock */
lock_t* lock_t*
lock_rec_create_low( lock_rec_create_low(
...@@ -1157,7 +1180,8 @@ lock_rec_create_low( ...@@ -1157,7 +1180,8 @@ lock_rec_create_low(
ulint heap_no, ulint heap_no,
dict_index_t* index, dict_index_t* index,
trx_t* trx, trx_t* trx,
bool holds_trx_mutex) bool holds_trx_mutex,
bool insert_before_waiting)
{ {
lock_t* lock; lock_t* lock;
ulint n_bytes; ulint n_bytes;
...@@ -1232,7 +1256,36 @@ lock_rec_create_low( ...@@ -1232,7 +1256,36 @@ lock_rec_create_low(
ut_ad(index->table->get_ref_count() || !index->table->can_be_evicted); ut_ad(index->table->get_ref_count() || !index->table->can_be_evicted);
const auto lock_hash = &lock_sys.hash_get(type_mode); const auto lock_hash = &lock_sys.hash_get(type_mode);
HASH_INSERT(lock_t, hash, lock_hash, page_id.fold(), lock); hash_cell_t& cell = *lock_hash->cell_get(page_id.fold());
if (insert_before_waiting
&& !(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE))) {
/* Try to insert the lock just after the last non-waiting
lock of the current transaction which immediately
precedes the first waiting lock request. */
lock_t* last_non_waiting = nullptr;
for (lock_t* l = lock_sys_t::get_first(cell, page_id, heap_no);
l; l = lock_rec_get_next(heap_no, l)) {
if (l->is_waiting() && l->trx->lock.wait_trx == trx) {
break;
}
if (l->trx == trx) {
last_non_waiting = l;
}
}
if (!last_non_waiting) {
goto append_last;
}
cell.insert_after(*last_non_waiting, *lock, &lock_t::hash);
}
else {
append_last:
cell.append(*lock, &lock_t::hash);
}
if (type_mode & LOCK_WAIT) { if (type_mode & LOCK_WAIT) {
if (trx->lock.wait_trx) { if (trx->lock.wait_trx) {
...@@ -1357,24 +1410,22 @@ on the record, and the request to be added is not a waiting request, we ...@@ -1357,24 +1410,22 @@ on the record, and the request to be added is not a waiting request, we
can reuse a suitable record lock object already existing on the same page, can reuse a suitable record lock object already existing on the same page,
just setting the appropriate bit in its bitmap. This is a low-level function just setting the appropriate bit in its bitmap. This is a low-level function
which does NOT check for deadlocks or lock compatibility! which does NOT check for deadlocks or lock compatibility!
@return lock where the bit was set */ @param[in] type_mode lock mode, wait, gap etc. flags
@param[in,out] cell first hash table cell
@param[in] id page identifier
@param[in] page buffer block containing the record
@param[in] heap_no heap number of the record
@param[in] index index of record
@param[in,out] trx transaction
@param[in] caller_owns_trx_mutex TRUE if caller owns the transaction mutex
@param[in] insert_before_waiting true=insert B-tree record lock right
before a waiting lock request; false=insert the lock at the end of the queue */
TRANSACTIONAL_TARGET TRANSACTIONAL_TARGET
static static void lock_rec_add_to_queue(unsigned type_mode, hash_cell_t &cell,
void const page_id_t id, const page_t *page,
lock_rec_add_to_queue( ulint heap_no, dict_index_t *index,
/*==================*/ trx_t *trx, bool caller_owns_trx_mutex,
unsigned type_mode,/*!< in: lock mode, wait, gap bool insert_before_waiting= false)
etc. flags */
hash_cell_t& cell, /*!< in,out: first hash table cell */
const page_id_t id, /*!< in: page identifier */
const page_t* page, /*!< in: buffer block containing
the record */
ulint heap_no,/*!< in: heap number of the record */
dict_index_t* index, /*!< in: index of record */
trx_t* trx, /*!< in/out: transaction */
bool caller_owns_trx_mutex)
/*!< in: TRUE if caller owns the
transaction mutex */
{ {
ut_d(lock_sys.hash_get(type_mode).assert_locked(id)); ut_d(lock_sys.hash_get(type_mode).assert_locked(id));
ut_ad(xtest() || caller_owns_trx_mutex == trx->mutex_is_owner()); ut_ad(xtest() || caller_owns_trx_mutex == trx->mutex_is_owner());
...@@ -1469,7 +1520,7 @@ lock_rec_add_to_queue( ...@@ -1469,7 +1520,7 @@ lock_rec_add_to_queue(
lock_rec_create_low(nullptr, lock_rec_create_low(nullptr,
type_mode, id, page, heap_no, index, trx, type_mode, id, page, heap_no, index, trx,
caller_owns_trx_mutex); caller_owns_trx_mutex, insert_before_waiting);
} }
/*********************************************************************//** /*********************************************************************//**
...@@ -1536,8 +1587,10 @@ lock_rec_lock( ...@@ -1536,8 +1587,10 @@ lock_rec_lock(
/* Do nothing if the trx already has a strong enough lock on rec */ /* Do nothing if the trx already has a strong enough lock on rec */
if (!lock_rec_has_expl(mode, g.cell(), id, heap_no, trx)) if (!lock_rec_has_expl(mode, g.cell(), id, heap_no, trx))
{ {
bool was_ignored = false;
if (lock_t *c_lock= lock_rec_other_has_conflicting(mode, g.cell(), id, if (lock_t *c_lock= lock_rec_other_has_conflicting(mode, g.cell(), id,
heap_no, trx)) heap_no, trx,
&was_ignored))
/* /*
If another transaction has a non-gap conflicting If another transaction has a non-gap conflicting
request in the queue, as this transaction does not request in the queue, as this transaction does not
...@@ -1550,7 +1603,7 @@ lock_rec_lock( ...@@ -1550,7 +1603,7 @@ lock_rec_lock(
{ {
/* Set the requested lock on the record. */ /* Set the requested lock on the record. */
lock_rec_add_to_queue(mode, g.cell(), id, block->page.frame, heap_no, lock_rec_add_to_queue(mode, g.cell(), id, block->page.frame, heap_no,
index, trx, true); index, trx, true, was_ignored);
err= DB_SUCCESS_LOCKED_REC; err= DB_SUCCESS_LOCKED_REC;
} }
} }
...@@ -4654,7 +4707,7 @@ lock_rec_queue_validate( ...@@ -4654,7 +4707,7 @@ lock_rec_queue_validate(
wsrep_report_bf_lock_wait(impl_trx->mysql_thd, impl_trx->id); wsrep_report_bf_lock_wait(impl_trx->mysql_thd, impl_trx->id);
wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id); wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id);
if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, if (!lock_rec_has_expl(LOCK_S | LOCK_REC_NOT_GAP,
cell, id, heap_no, cell, id, heap_no,
impl_trx)) { impl_trx)) {
ib::info() << "WSREP impl BF lock conflict"; ib::info() << "WSREP impl BF lock conflict";
...@@ -4663,8 +4716,22 @@ lock_rec_queue_validate( ...@@ -4663,8 +4716,22 @@ lock_rec_queue_validate(
#endif /* WITH_WSREP */ #endif /* WITH_WSREP */
{ {
ut_ad(other_lock->is_waiting()); ut_ad(other_lock->is_waiting());
ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, /* After MDEV-27025 fix the following case is
cell, id, heap_no, possible:
1. trx 1 acquires S-lock;
2. trx 2 creates X-lock waiting for trx 1;
3. trx 1 creates implicit lock, as
lock_rec_other_has_conflicting() returns no
conflicting trx 2 X-lock, the explicit lock
will not be created;
4. trx 3 creates waiting X-lock,
it will wait for S-lock of trx 1.
That is why we relaxing the condition here and
check only for S-lock.
*/
ut_ad(lock_rec_has_expl(LOCK_S
| LOCK_REC_NOT_GAP,
cell, id, heap_no,
impl_trx)); impl_trx));
} }
} }
...@@ -5060,7 +5127,7 @@ lock_rec_convert_impl_to_expl_for_trx( ...@@ -5060,7 +5127,7 @@ lock_rec_convert_impl_to_expl_for_trx(
!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, heap_no, !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, heap_no,
trx)) trx))
lock_rec_add_to_queue(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id, lock_rec_add_to_queue(LOCK_X | LOCK_REC_NOT_GAP, g.cell(), id,
page_align(rec), heap_no, index, trx, true); page_align(rec), heap_no, index, trx, true, true);
} }
trx->mutex_unlock(); trx->mutex_unlock();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment