Commit aed761ce authored by Marko Mäkelä's avatar Marko Mäkelä

WIP: MDEV-17603 REPLACE and INSERT…ON DUPLICATE KEY UPDATE are deadlock-prone

Implement an alternative fix for the bug whose original fix
mysql/mysql-server@c93b0d9a972cb6f98fd445f2b69d924350f9128a
in MySQL 5.7.4 caused problems.

This is based on
mysql/mysql-server@e0e4bacddf421550baca3578bc0db13693874fdb
in MySQL 5.7.26.

When performing a rollback to the start of the current row
operation in REPLACE or INSERT...ON DUPLICATE KEY UPDATE
we were not maintaining serializability, because we would
release implicit locks that could already have been acquired
for some of the indexes.

lock_rec_convert_impl_to_expl_for_trx(): Declare globally.

undo_node_t::convert_impl_to_expl(): Convert an implicit lock
to an explicit one during a partial rollback.

row_insert_for_mysql(): Set trx->duplicates=ULINT_UNDEFINED
for rolling back the current row operation. This will allow
undo_node_t::convert_impl_to_expl() to be effective only
for this use case, not for other scenarios, such as
rolling back to the start of the statement, or
ROLLBACK TO SAVEPOINT.

FIXME: Neither innodb.auto_increment_dup,log-bin nor the
upstream fix (which we did not add) innodb.iodku pass.
While the undo_node_t::convert_impl_to_expl() is working
as intended, what happens in innodb.auto_increment_dup,log-bin
is that the newly created explicit record lock for the record
heap number 6 on the PRIMARY key root page (3) will be released
when that record is deleted moments later, with the following
stack trace:

lock_rec_reset_nth_bit
lock_rec_reset_and_release_wait_low
lock_rec_reset_and_release_wait
lock_update_delete
btr_cur_optimistic_delete_func
row_undo_ins_remove_clust_rec
row_undo_ins
row_undo
row_undo_step
que_thr_step
que_run_threads_low
que_run_threads
trx_rollback_to_savepoint_low
trx_rollback_to_savepoint
row_mysql_handle_errors
row_insert_for_mysql

The idea might work with predicate locks, which we do not have.
This entire scenario could also be fixed by MDEV-16232, which
could allow the entire operation to be protected with page latches.
parent 762663d8
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2018, MariaDB Corporation.
Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -913,6 +913,20 @@ lock_rec_create(
/*!< in: true if caller owns
trx mutex */
/*********************************************************************//**
Creates an explicit record lock for a running transaction that currently only
has an implicit lock on the record. The transaction instance must have a
reference count > 0 so that it can't be committed and freed before this
function has completed. */
void
lock_rec_convert_impl_to_expl_for_trx(
/*==================================*/
const buf_block_t* block, /*!< in: buffer block of rec */
const rec_t* rec, /*!< in: user record on page */
dict_index_t* index, /*!< in: index of record */
trx_t* trx, /*!< in/out: active transaction */
ulint heap_no);/*!< in: rec heap number to lock */
/*************************************************************//**
Removes a record lock request, waiting or granted, from the queue. */
void
......
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
Copyright (c) 1997, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -120,6 +120,32 @@ struct undo_node_t{
mem_heap_t* heap; /*!< memory heap used as auxiliary storage for
row; this must be emptied after undo is tried
on a row */
/** On rollback, convert an implicit lock into explicit before
undoing an insert (or update of delete-marked record), if needed.
@param[in] cursor record whose insert is about to be undone */
void convert_impl_to_expl(const btr_cur_t& cursor) const
{
ut_ad(trx->in_rollback);
if (trx->duplicates != ULINT_UNDEFINED
|| dict_index_is_spatial(cursor.index)) {
return;
}
ulint heap_no = page_rec_get_heap_no(btr_cur_get_rec(&cursor));
if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
convert_impl_to_expl(cursor, heap_no);
}
}
private:
/** On a partial rollback, convert an implicit lock into explicit
before undoing an insert (or update of delete-marked record).
Releasing an implicit lock could break the serializability of
INSERT...ON DUPLICATE KEY UPDATE and REPLACE statements.
@param[in] cursor record whose insert is about to be undone */
void convert_impl_to_expl(const btr_cur_t& cursor,ulint heap_no) const;
};
#endif
......@@ -6023,7 +6023,6 @@ Creates an explicit record lock for a running transaction that currently only
has an implicit lock on the record. The transaction instance must have a
reference count > 0 so that it can't be committed and freed before this
function has completed. */
static
void
lock_rec_convert_impl_to_expl_for_trx(
/*==================================*/
......
......@@ -1424,8 +1424,11 @@ row_insert_for_mysql(
/* FIXME: What's this ? */
thr->lock_state = QUE_THR_LOCK_ROW;
ulint duplicates = trx->duplicates;
trx->duplicates = ULINT_UNDEFINED;
was_lock_wait = row_mysql_handle_errors(
&err, trx, thr, &savept);
trx->duplicates = duplicates;
thr->lock_state = QUE_THR_LOCK_NOLOCK;
......
......@@ -137,6 +137,8 @@ row_undo_ins_remove_clust_rec(
ut_a(success);
}
node->convert_impl_to_expl(*btr_cur);
if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
err = DB_SUCCESS;
goto func_exit;
......@@ -193,7 +195,8 @@ row_undo_ins_remove_sec_low(
pessimistic descent down the index tree */
dict_index_t* index, /*!< in: index */
dtuple_t* entry, /*!< in: index entry to remove */
que_thr_t* thr) /*!< in: query thread */
que_thr_t* thr, /*!< in: query thread */
undo_node_t* node) /*!< in: undo node */
{
btr_pcur_t pcur;
btr_cur_t* btr_cur;
......@@ -251,6 +254,8 @@ row_undo_ins_remove_sec_low(
btr_cur = btr_pcur_get_btr_cur(&pcur);
node->convert_impl_to_expl(*btr_cur);
if (modify_leaf) {
err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
? DB_SUCCESS : DB_FAIL;
......@@ -281,14 +286,15 @@ row_undo_ins_remove_sec(
/*====================*/
dict_index_t* index, /*!< in: index */
dtuple_t* entry, /*!< in: index entry to insert */
que_thr_t* thr) /*!< in: query thread */
que_thr_t* thr, /*!< in: query thread */
undo_node_t* node)
{
dberr_t err;
ulint n_tries = 0;
/* Try first optimistic descent to the B-tree */
err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry, thr);
err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry, thr, node);
if (err == DB_SUCCESS) {
......@@ -299,7 +305,7 @@ row_undo_ins_remove_sec(
retry:
err = row_undo_ins_remove_sec_low(
BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
index, entry, thr);
index, entry, thr, node);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
......@@ -453,7 +459,7 @@ row_undo_ins_remove_sec_rec(
assume that the secondary index record does
not exist. */
} else {
err = row_undo_ins_remove_sec(index, entry, thr);
err = row_undo_ins_remove_sec(index, entry, thr, node);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
goto func_exit;
......
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 1997, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
......@@ -217,6 +217,7 @@ row_undo_mod_remove_clust_low(
/* In delete-marked records, DB_TRX_ID must
always refer to an existing update_undo log record. */
ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index));
node->convert_impl_to_expl(*btr_cur);
if (mode == BTR_MODIFY_LEAF) {
err = btr_cur_optimistic_delete(btr_cur, 0, mtr)
......@@ -525,6 +526,8 @@ row_undo_mod_del_mark_or_remove_sec_low(
}
}
node->convert_impl_to_expl(*btr_cur);
if (modify_leaf) {
success = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
if (success) {
......
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 1997, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2019, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
......@@ -39,7 +39,24 @@ Created 1/8/1997 Heikki Tuuri
#include "row0umod.h"
#include "row0upd.h"
#include "row0mysql.h"
#include "srv0srv.h"
#include "lock0lock.h"
/** On a partial rollback, convert an implicit lock into explicit
before undoing an insert (or update of delete-marked record).
Releasing an implicit lock could break the serializability of
INSERT...ON DUPLICATE KEY UPDATE and REPLACE statements.
@param[in] cursor record whose insert is about to be undone */
void
undo_node_t::convert_impl_to_expl(const btr_cur_t& cursor, ulint heap_no) const
{
trx_mutex_enter(trx);
trx->n_ref++;
trx_mutex_exit(trx);
lock_rec_convert_impl_to_expl_for_trx(
cursor.page_cur.block, cursor.page_cur.rec, cursor.index,
trx, heap_no);
}
/* How to undo row operations?
(1) For an insert, we have stored a prefix of the clustered index record
......
......@@ -141,13 +141,16 @@ trx_rollback_to_savepoint(
partial rollback requested, or NULL for
complete rollback */
{
ut_ad(!trx_mutex_own(trx));
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE)
|| trx_state_eq(trx, TRX_STATE_NOT_STARTED));
trx_start_if_not_started_xa(trx, true);
trx->error_state = DB_SUCCESS;
trx_rollback_to_savepoint_low(trx, savept);
if (trx_state_eq(trx, TRX_STATE_ACTIVE)) {
trx_rollback_to_savepoint_low(trx, savept);
}
return(trx->error_state);
return trx->error_state;
}
/*******************************************************************//**
......@@ -160,18 +163,9 @@ trx_rollback_for_mysql_low(
trx_t* trx) /*!< in/out: transaction */
{
trx->op_info = "rollback";
/* If we are doing the XA recovery of prepared transactions,
then the transaction object does not have an InnoDB session
object, and we set a dummy session that we use for all MySQL
transactions. */
trx_rollback_to_savepoint_low(trx, NULL);
trx->op_info = "";
ut_a(trx->error_state == DB_SUCCESS);
return(trx->error_state);
}
......
......@@ -120,6 +120,7 @@ trx_init(
trx->error_state = DB_SUCCESS;
trx->error_info = NULL;
trx->error_key_num = ULINT_UNDEFINED;
trx->undo_no = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment