Commit 3ddc00dc authored by Vlad Lesin's avatar Vlad Lesin

MDEV-30225 RR isolation violation with locking unique search

Before the fix next-key lock was requested only if a record was
delete-marked for locking unique search in RR isolation level.
There can be several delete-marked records for the same unique key,
that's why InnoDB scans the records until eighter non-delete-marked record
is reached or all delete-marked records with the same unique key are
scanned.

For range scan next-key locks are used for RR to protect scanned range from
inserting new records by other transactions. And this is the reason of why
next-key locks are used for delete-marked records for unique searches.

If a record is not delete-marked, the requested lock type was "not-gap".
When a record is not delete-marked during lock request by trx 1, and
some other transaction holds conflicting lock, trx 1 creates waiting
not-gap lock on the record and suspends. During trx 1 suspending the
record can be delete-marked. And when the lock is granted on conflicting
transaction commit or rollback, its type is still "not-gap". So we have
"not-gap" lock on delete-marked record for RR. And this let some other
transaction to insert some record with the same unique key when trx 1 is
not committed, what can cause isolation level violation.

The fix is to set next-key locks for both delete-marked and
non-delete-marked records for unique search in RR.
parent 3f63aa18
......@@ -11,6 +11,7 @@ SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL first_ins_locked';
SET DEBUG_SYNC = 'ib_after_row_insert SIGNAL first_ins_row_inserted WAIT_FOR first_ins_cont';
INSERT INTO t VALUES(10, 20);
connect con_del_2,localhost,root,,;
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET DEBUG_SYNC = 'now WAIT_FOR first_ins_locked';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL second_del_locked';
DELETE FROM t WHERE b = 20;
......
connect pause_purge,localhost,root;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
CREATE TABLE t (pk int PRIMARY KEY, sk INT UNIQUE) ENGINE=InnoDB;
INSERT INTO t VALUES (10, 100);
connect con1,localhost,root;
BEGIN;
SELECT * FROM t WHERE sk = 100 FOR UPDATE;
pk sk
10 100
connect con2,localhost,root;
SET DEBUG_SYNC="lock_wait_suspend_thread_enter SIGNAL insert_wait_started";
INSERT INTO t VALUES (5, 100) # trx 1;
connect con3,localhost,root;
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
SET DEBUG_SYNC="now WAIT_FOR insert_wait_started";
SET DEBUG_SYNC="lock_wait_suspend_thread_enter SIGNAL delete_started_waiting";
DELETE FROM t WHERE sk = 100 # trx 2;
connection con1;
SET DEBUG_SYNC="now WAIT_FOR delete_started_waiting";
DELETE FROM t WHERE sk=100;
COMMIT;
disconnect con1;
connection con2;
disconnect con2;
connection con3;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
disconnect con3;
connection default;
SELECT * FROM t;
pk sk
5 100
disconnect pause_purge;
SET DEBUG_SYNC="RESET";
DROP TABLE t;
......@@ -673,7 +673,7 @@ SET @end = (SELECT COUNT FROM INFORMATION_SCHEMA.INNODB_METRICS WHERE NAME
= 'lock_rec_lock_created');
SELECT @end - @start;
@end - @start
0
1
DROP TABLE t1;
SET GLOBAL innodb_monitor_enable=default;
SET GLOBAL innodb_monitor_disable=default;
......
......@@ -27,6 +27,10 @@ SET DEBUG_SYNC = 'ib_after_row_insert SIGNAL first_ins_row_inserted WAIT_FOR fir
--send INSERT INTO t VALUES(10, 20)
--connect(con_del_2,localhost,root,,)
# After MDEV-30225 is fixed, the following DELETE creates next-key lock for
# unqique search for RR, and the above INSERT kills it as deadlock victim.
# But it still requests not-gap lock for RC.
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET DEBUG_SYNC = 'now WAIT_FOR first_ins_locked';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL second_del_locked';
###############################################################################
......
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/count_sessions.inc
--connect (pause_purge,localhost,root)
START TRANSACTION WITH CONSISTENT SNAPSHOT;
--connection default
CREATE TABLE t (pk int PRIMARY KEY, sk INT UNIQUE) ENGINE=InnoDB;
INSERT INTO t VALUES (10, 100);
--connect (con1,localhost,root)
BEGIN; # trx 0
SELECT * FROM t WHERE sk = 100 FOR UPDATE;
--connect (con2,localhost,root)
SET DEBUG_SYNC="lock_wait_suspend_thread_enter SIGNAL insert_wait_started";
# trx 1 is locked on try to read the record in secondary index during duplicates
# check. It's the first in waiting queue, that's why it will be woken up firstly
# when trx 0 commits.
--send INSERT INTO t VALUES (5, 100) # trx 1
--connect (con3,localhost,root)
# MDEV-30225 is fixed only for RR
SET TRANSACTION ISOLATION LEVEL REPEATABLE READ;
SET DEBUG_SYNC="now WAIT_FOR insert_wait_started";
SET DEBUG_SYNC="lock_wait_suspend_thread_enter SIGNAL delete_started_waiting";
# trx 2 can delete (5, 100) on master, but not on slave, as on slave trx 1
# can insert (5, 100) after trx 2 positioned it's cursor. Trx 2 lock is placed
# in waiting queue after trx 1 lock, but its persistent cursor position was
# stored on (100, 10) record in secondary index before suspending. After trx 1
# is committed, trx 2 will restore persistent cursor position on (100, 10). As
# (100, 5) secondary index record was inserted before (100, 10) in logical
# order, and (100, 10) record is delete-marked, trx 2 just continues scanning.
#
# Note. There can be several records with the same key in unique secondary
# index, but only one of them must be non-delete-marked. That's why when we do
# point query, cursor position is set in the first record in logical order, and
# then records are iterated until either non-delete-marked record is found or
# all records with the same unique fields are iterated.
--send DELETE FROM t WHERE sk = 100 # trx 2
--connection con1
SET DEBUG_SYNC="now WAIT_FOR delete_started_waiting";
DELETE FROM t WHERE sk=100; # trx 0
COMMIT;
--disconnect con1
--connection con2
--reap
--disconnect con2
--connection con3
# If the bug is fixed, deadlock error will be there, as trx 2 owns
# next-key lock waiting for trx 1, and trx 1 requests
# insert-intention lock, conflicting with trx 2 next-key lock.
--error ER_LOCK_DEADLOCK
--reap
--disconnect con3
--connection default
# If the bug is not fixed, we will see the row inserted by trx 1 here. This can
# cause duplicate key error on slave, when some other trx tries in insert row
# with the same secondary key, as was inserted by trx 1, and not deleted by trx
# 2.
SELECT * FROM t;
--disconnect pause_purge
SET DEBUG_SYNC="RESET";
DROP TABLE t;
--source include/wait_until_count_sessions.inc
......@@ -435,6 +435,9 @@ INSERT INTO t1 VALUES(1,1,'a'),(2,9999,'b'),(3,10000,'c'),(4,4,'d');
DELETE FROM t1 WHERE a = 9999 AND b='b';
COMMIT;
# After MDEV-30225 is fixed, the above DELETE creates next-key lock during
# secondary index unique search. That's why the result of the following must
# be 1.
SET @end = (SELECT COUNT FROM INFORMATION_SCHEMA.INNODB_METRICS WHERE NAME
= 'lock_rec_lock_created');
SELECT @end - @start;
......
......@@ -5114,8 +5114,10 @@ row_search_mvcc(
goto no_gap_lock;
}
/* Set next-key lock both for delete- and non-delete-marked
records for unique search, because non-delete-marked record can
be marked as deleted while transaction suspends. */
if (!set_also_gap_locks
|| (unique_search && !rec_get_deleted_flag(rec, comp))
|| dict_index_is_spatial(index)) {
goto no_gap_lock;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment