MDEV-18706 ER_LOCK_DEADLOCK on concurrent read and insert into already locked gap

* Cause No real deadlock, but incomplete conflict detection: 1. Con1-X blocks on Def-X lock (p.2 in test); 2. Def-II-X has to wait Con1-X and here conflict resolution starts (p.3 in test); 3. Conflict resolution goes through a tree of locks: a. (Def-II-X, Def-X): ok b. (Def-II-X, Con1-X): conflict, traversing from Con1-X c. (Con1-X, Def-X): conflict, found cycle (because resolution started from Def). The weak point here is 2. (lock_rec_has_to_wait()), because Def-II-X doesn't have to wait Con1: Def already got X "next-key" lock that also holds the gap before the record. * Fix Allow lock in `lock_rec_other_has_conflicting()` if there is already non-waiting stronger or equal lock in this transaction (`lock_rec_has_expl()`). row_search_mvcc(): SELECT FOR UPDATE skips inserted records when next-key lock is released. Fixed by stepping back before wait and stepping forward before acquire: it steps into the gap again and discovers newly inserted records. Note that change to row_sel() is not required as it is used for internal queries only. * Test fixes innodb.auto_increment_dup: log-bin behaves like skip-log-bin because of no bogus deadlock on UPDATE. versioning.update: originally described deadlock from this task is now disappeared.

MDEV-18706 ER_LOCK_DEADLOCK on concurrent read and insert into already locked gap
* Cause No real deadlock, but incomplete conflict detection: 1. Con1-X blocks on Def-X lock (p.2 in test); 2. Def-II-X has to wait Con1-X and here conflict resolution starts (p.3 in test); 3. Conflict resolution goes through a tree of locks: a. (Def-II-X, Def-X): ok b. (Def-II-X, Con1-X): conflict, traversing from Con1-X c. (Con1-X, Def-X): conflict, found cycle (because resolution started from Def). The weak point here is 2. (lock_rec_has_to_wait()), because Def-II-X doesn't have to wait Con1: Def already got X "next-key" lock that also holds the gap before the record. * Fix Allow lock in `lock_rec_other_has_conflicting()` if there is already non-waiting stronger or equal lock in this transaction (`lock_rec_has_expl()`). row_search_mvcc(): SELECT FOR UPDATE skips inserted records when next-key lock is released. Fixed by stepping back before wait and stepping forward before acquire: it steps into the gap again and discovers newly inserted records. Note that change to row_sel() is not required as it is used for internal queries only. * Test fixes innodb.auto_increment_dup: log-bin behaves like skip-log-bin because of no bogus deadlock on UPDATE. versioning.update: originally described deadlock from this task is now disappeared.
1904b11b · Aleksey Midenkov · 9dedba16 · 1904b11b · 1904b11b · 1904b11b
Commit 1904b11b authored Sep 07, 2020 by Aleksey Midenkov
7 changed files
--- a/mysql-test/suite/innodb/r/innodb-lock.result
+++ b/mysql-test/suite/innodb/r/innodb-lock.result
@@ -159,3 +159,152 @@ connection con1;
 disconnect con1;
 connection default;
 DROP TABLE t1, t2, t3;
+#
+# MDEV-18706 ER_LOCK_DEADLOCK on concurrent read and insert into already locked gap
+#
+# A) Def inserts between infimum and Def-locked record, between Def-locked record and supremum
+create or replace table t1 (pk int primary key, x int) engine innodb;
+insert into t1 values (3, 0);
+start transaction;
+update t1 set x= 1;
+connect  con1, localhost, root,, test;
+select * from t1 for update;
+connection default;
+insert into t1 values (1, 1);
+insert into t1 values (2, 1);
+insert into t1 values (4, 1);
+insert into t1 values (5, 1);
+commit;
+connection con1;
+# No ER_LOCK_DEADLOCK; all rows returned
+pk	x
+1	1
+2	1
+3	1
+4	1
+5	1
+connection default;
+disconnect con1;
+drop table t1;
+# B) Def inserts between Con1-locked and Def-locked record
+create or replace table t1 (pk int primary key, x int) engine innodb;
+insert into t1 values (1, 2);
+insert into t1 values (4, 0);
+start transaction;
+update t1 set x= 2 where pk > 3;
+connect  con1,localhost,root,,test;
+select * from t1 for update;
+connection default;
+insert into t1 values (2, 2);
+insert into t1 values (3, 2);
+insert into t1 values (5, 2);
+commit;
+connection con1;
+# No ER_LOCK_DEADLOCK; all rows returned
+pk	x
+1	2
+2	2
+3	2
+4	2
+5	2
+connection default;
+disconnect con1;
+drop table t1;
+# C) Same as A) but reverse direction
+create or replace table t1 (pk int primary key, x int) engine innodb;
+insert into t1 values (3, 0);
+start transaction;
+update t1 set x= 3;
+connect  con1,localhost,root,,test;
+select * from t1 order by pk desc for update;
+connection default;
+insert into t1 values (1, 3);
+insert into t1 values (2, 3);
+insert into t1 values (4, 3);
+insert into t1 values (5, 3);
+commit;
+connection con1;
+# No ER_LOCK_DEADLOCK; all rows returned
+pk	x
+5	3
+4	3
+3	3
+2	3
+1	3
+connection default;
+disconnect con1;
+drop table t1;
+# D) Same as B) but reverse direction
+create or replace table t1 (pk int primary key, x int) engine innodb;
+insert into t1 values (2, 0);
+insert into t1 values (5, 4);
+start transaction;
+update t1 set x= 4 where pk < 3;
+connect  con1,localhost,root,,test;
+select * from t1 order by pk desc for update;
+connection default;
+insert into t1 values (1, 4);
+insert into t1 values (3, 4);
+insert into t1 values (4, 4);
+commit;
+connection con1;
+# No ER_LOCK_DEADLOCK; all rows returned
+pk	x
+5	4
+4	4
+3	4
+2	4
+1	4
+connection default;
+disconnect con1;
+drop table t1;
+# E) Same as A) but UPDATE instead SELECT
+create or replace table t1 (pk int primary key, x int) engine innodb;
+insert into t1 values (3, 0);
+start transaction;
+update t1 set x= 5;
+connect  con1, localhost, root,, test;
+update t1 set x= x + 10;
+connection default;
+insert into t1 values (1, 5);
+insert into t1 values (2, 5);
+insert into t1 values (4, 5);
+insert into t1 values (5, 5);
+commit;
+connection con1;
+# No ER_LOCK_DEADLOCK; all rows returned
+connection default;
+select * from t1;
+pk	x
+1	15
+2	15
+3	15
+4	15
+5	15
+disconnect con1;
+drop table t1;
+# F) Same as B) but UPDATE instead SELECT
+create or replace table t1 (pk int primary key, x int) engine innodb;
+insert into t1 values (1, 6);
+insert into t1 values (4, 0);
+start transaction;
+update t1 set x= 6 where pk > 3;
+connect  con1,localhost,root,,test;
+update t1 set x= x + 10;
+connection default;
+insert into t1 values (2, 6);
+insert into t1 values (3, 6);
+insert into t1 values (5, 6);
+commit;
+connection con1;
+# No ER_LOCK_DEADLOCK; all rows returned
+connection default;
+select * from t1;
+pk	x
+1	16
+2	16
+3	16
+4	16
+5	16
+disconnect con1;
+drop table t1;
--- a/mysql-test/suite/innodb/t/innodb-lock.opt
+++ b/mysql-test/suite/innodb/t/innodb-lock.opt
+--enable-plugin-innodb-lock-waits
--- a/mysql-test/suite/innodb/t/innodb-lock.test
+++ b/mysql-test/suite/innodb/t/innodb-lock.test
@@ -219,3 +219,150 @@ reap;
 disconnect con1;
 connection default;
 DROP TABLE t1, t2, t3;
+
+--echo #
+--echo # MDEV-18706 ER_LOCK_DEADLOCK on concurrent read and insert into already locked gap
+--echo #
+--echo # A) Def inserts between infimum and Def-locked record, between Def-locked record and supremum
+create or replace table t1 (pk int primary key, x int) engine innodb;
+insert into t1 values (3, 0);
+start transaction;
+# 1. Def: X-lock "3" as "next key" (row + gap)
+update t1 set x= 1;
+connect (con1, localhost, root,, test);
+# 2. Con1: Block on "3"
+send select * from t1 for update;
+connection default;
+--let $wait_condition= select count(*) from information_schema.innodb_lock_waits
+--source include/wait_condition.inc
+# 3. Def: Insert before and after "3"
+insert into t1 values (1, 1);
+insert into t1 values (2, 1);
+insert into t1 values (4, 1);
+insert into t1 values (5, 1);
+# 4. Def: Unlock "3"
+commit;
+connection con1;
+# 5. Con1: Continue SELECT
+--echo # No ER_LOCK_DEADLOCK; all rows returned
+reap;
+connection default;
+disconnect con1;
+drop table t1;
+
+--echo # B) Def inserts between Con1-locked and Def-locked record
+create or replace table t1 (pk int primary key, x int) engine innodb;
+insert into t1 values (1, 2);
+insert into t1 values (4, 0);
+start transaction;
+# 1. Def: X-lock "4" as "next key" (row + gap)
+update t1 set x= 2 where pk > 3;
+connect (con1,localhost,root,,test);
+# 2. Con1: X-lock "1", block on "4"
+send select * from t1 for update;
+connection default;
+--let $wait_condition= select count(*) from information_schema.innodb_lock_waits
+--source include/wait_condition.inc
+# 3. Def: Insert before and after "4"
+# Note: we cannot INSERT before "1" because Con1 already X-locked it
+insert into t1 values (2, 2);
+insert into t1 values (3, 2);
+insert into t1 values (5, 2);
+commit;
+connection con1;
+--echo # No ER_LOCK_DEADLOCK; all rows returned
+reap;
+connection default;
+disconnect con1;
+drop table t1;
+
+--echo # C) Same as A) but reverse direction
+create or replace table t1 (pk int primary key, x int) engine innodb;
+insert into t1 values (3, 0);
+start transaction;
+update t1 set x= 3;
+connect (con1,localhost,root,,test);
+send select * from t1 order by pk desc for update;
+connection default;
+--let $wait_condition= select count(*) from information_schema.innodb_lock_waits
+--source include/wait_condition.inc
+insert into t1 values (1, 3);
+insert into t1 values (2, 3);
+insert into t1 values (4, 3);
+insert into t1 values (5, 3);
+commit;
+connection con1;
+--echo # No ER_LOCK_DEADLOCK; all rows returned
+reap;
+connection default;
+disconnect con1;
+drop table t1;
+
+--echo # D) Same as B) but reverse direction
+create or replace table t1 (pk int primary key, x int) engine innodb;
+insert into t1 values (2, 0);
+insert into t1 values (5, 4);
+start transaction;
+update t1 set x= 4 where pk < 3;
+connect (con1,localhost,root,,test);
+send select * from t1 order by pk desc for update;
+connection default;
+--let $wait_condition= select count(*) from information_schema.innodb_lock_waits
+--source include/wait_condition.inc
+insert into t1 values (1, 4);
+insert into t1 values (3, 4);
+insert into t1 values (4, 4);
+commit;
+connection con1;
+--echo # No ER_LOCK_DEADLOCK; all rows returned
+reap;
+connection default;
+disconnect con1;
+drop table t1;
+
+--echo # E) Same as A) but UPDATE instead SELECT
+create or replace table t1 (pk int primary key, x int) engine innodb;
+insert into t1 values (3, 0);
+start transaction;
+update t1 set x= 5;
+connect (con1, localhost, root,, test);
+send update t1 set x= x + 10;
+connection default;
+--let $wait_condition= select count(*) from information_schema.innodb_lock_waits
+--source include/wait_condition.inc
+insert into t1 values (1, 5);
+insert into t1 values (2, 5);
+insert into t1 values (4, 5);
+insert into t1 values (5, 5);
+commit;
+connection con1;
+--echo # No ER_LOCK_DEADLOCK; all rows returned
+reap;
+connection default;
+select * from t1;
+disconnect con1;
+drop table t1;
+
+--echo # F) Same as B) but UPDATE instead SELECT
+create or replace table t1 (pk int primary key, x int) engine innodb;
+insert into t1 values (1, 6);
+insert into t1 values (4, 0);
+start transaction;
+update t1 set x= 6 where pk > 3;
+connect (con1,localhost,root,,test);
+send update t1 set x= x + 10;
+connection default;
+--let $wait_condition= select count(*) from information_schema.innodb_lock_waits
+--source include/wait_condition.inc
+insert into t1 values (2, 6);
+insert into t1 values (3, 6);
+insert into t1 values (5, 6);
+commit;
+connection con1;
+--echo # No ER_LOCK_DEADLOCK; all rows returned
+reap;
+connection default;
+select * from t1;
+disconnect con1;
+drop table t1;
+
--- a/storage/innobase/include/lock0priv.h
+++ b/storage/innobase/include/lock0priv.h
@@ -685,6 +685,25 @@ inline void lock_reset_lock_and_trx_wait(lock_t* lock)
 	lock->type_mode &= ~LOCK_WAIT;
 }

+inline
+bool ib_lock_t::is_stronger(ulint precise_mode, ulint heap_no, const trx_t* t) const
+{
+	ut_ad(is_record_lock());
+	return trx == t
+	    && !is_waiting()
+	    && !is_insert_intention()
+	    && (!is_record_not_gap()
+		|| (precise_mode & LOCK_REC_NOT_GAP) /* only record */
+		|| heap_no == PAGE_HEAP_NO_SUPREMUM)
+	    && (!is_gap()
+		|| (precise_mode & LOCK_GAP)         /* only gap */
+		|| heap_no == PAGE_HEAP_NO_SUPREMUM)
+	    && lock_mode_stronger_or_eq(
+		mode(),
+		static_cast<lock_mode>(
+			precise_mode & LOCK_MODE_MASK));
+}
+
 #include "lock0priv.ic"

 #endif /* lock0priv_h */
--- a/storage/innobase/include/lock0types.h
+++ b/storage/innobase/include/lock0types.h
@@ -237,7 +237,10 @@ struct ib_lock_t
 		return(type_mode & LOCK_INSERT_INTENTION);
 	}

-	ulint type() const {
+	bool is_stronger(ulint precise_mode, ulint heap_no, const trx_t* t) const;
+
+	ulint type() const
+	{
 		return(type_mode & LOCK_TYPE_MASK);
 	}


--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -1022,19 +1022,7 @@ lock_rec_has_expl(
 	     lock != NULL;
 	     lock = lock_rec_get_next(heap_no, lock)) {

-		if (lock->trx == trx
-		    && !lock_rec_get_insert_intention(lock)
-		    && lock_mode_stronger_or_eq(
-			    lock_get_mode(lock),
-			    static_cast<lock_mode>(
-				    precise_mode & LOCK_MODE_MASK))
-		    && !lock_get_wait(lock)
-		    && (!lock_rec_get_rec_not_gap(lock)
-			|| (precise_mode & LOCK_REC_NOT_GAP)
-			|| heap_no == PAGE_HEAP_NO_SUPREMUM)
-		    && (!lock_rec_get_gap(lock)
-			|| (precise_mode & LOCK_GAP)
-			|| heap_no == PAGE_HEAP_NO_SUPREMUM)) {
+		if (lock->is_stronger(precise_mode, heap_no, trx)) {

 			return(lock);
 		}
@@ -1165,39 +1153,47 @@ lock_rec_other_has_conflicting(
 /*===========================*/
 	ulint			mode,	/*!< in: LOCK_S or LOCK_X,
 					possibly ORed to LOCK_GAP or
-					LOC_REC_NOT_GAP,
+					LOCK_REC_NOT_GAP,
 					LOCK_INSERT_INTENTION */
 	const buf_block_t*	block,	/*!< in: buffer block containing
 					the record */
 	ulint			heap_no,/*!< in: heap number of the record */
 	const trx_t*		trx)	/*!< in: our transaction */
 {
-	lock_t*		lock;
+	lock_t*		res = NULL;

 	ut_ad(lock_mutex_own());

 	bool	is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);

-	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
+	for (lock_t* lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
 	     lock != NULL;
 	     lock = lock_rec_get_next(heap_no, lock)) {

-		if (lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) {
-#ifdef WITH_WSREP
-			if (trx->is_wsrep()) {
-				trx_mutex_enter(lock->trx);
-				/* Below function will roll back either trx
-				or lock->trx depending on priority of the
-				transaction. */
-				wsrep_kill_victim(const_cast<trx_t*>(trx), lock);
-				trx_mutex_exit(lock->trx);
-			}
-#endif /* WITH_WSREP */
-			return(lock);
+		/* If current trx already acquired a lock not weaker covering
+		same types then we don't have to wait for any locks. */
+		if (lock->is_stronger(mode, heap_no, trx)) {
+
+			return(NULL);
+		}
+
+		if (!res && lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) {
+			res = lock;
 		}
 	}

-	return(NULL);
+#ifdef WITH_WSREP
+	if (res && trx->is_wsrep()) {
+		trx_mutex_enter(res->trx);
+		/* Below function will roll back either trx
+		or lock->trx depending on priority of the
+		transaction. */
+		wsrep_kill_victim(const_cast<trx_t*>(trx), res);
+		trx_mutex_exit(res->trx);
+	}
+#endif /* WITH_WSREP */
+
+	return(res);
 }

 /*********************************************************************//**

--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@@ -5733,6 +5733,13 @@ row_search_mvcc(

 lock_wait_or_error:
 	if (!dict_index_is_spatial(index)) {
+		/* Locked gap may be filled with inserted records.
+		Make sure we don't miss them. */
+		if (moves_up) {
+			btr_pcur_move_to_prev(pcur, &mtr);
+		} else {
+			btr_pcur_move_to_next(pcur, &mtr);
+		}
 		btr_pcur_store_position(pcur, &mtr);
 	}
 page_read_error:
@@ -5775,6 +5782,16 @@ row_search_mvcc(
 			sel_restore_position_for_mysql(
 				&same_user_rec, BTR_SEARCH_LEAF, pcur,
 				moves_up, &mtr);
+			/* Counterpart of the stepping backward in lock_wait_or_error.
+			This is linked tight with that btr_pcur_store_position().
+			The jumps to page_read_error: and lock_table_wait: do not get here. */
+			if (same_user_rec) {
+				if (!moves_up) {
+					btr_pcur_move_to_prev(pcur, &mtr);
+				} else {
+					btr_pcur_move_to_next(pcur, &mtr);
+				}
+			}
 		}

 		if ((srv_locks_unsafe_for_binlog