Commit 97e51d24 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-13697 DB_TRX_ID is not always reset

The rollback of the modification of a pre-existing record
should involve a purge-like operation. Before MDEV-12288
the only purge-like operation was the removal of a
delete-marked record.

After MDEV-12288, any rollback of updating an existing record
must reset the DB_TRX_ID column when it is no longer visible
in the purge read view.

row_vers_must_preserve_del_marked(): Remove. It is cleaner to
perform the check directly in row0umod.cc.

row_trx_id_offset(): Auxiliary function to retrieve the byte
offset of DB_TRX_ID in a clustered index leaf page record.

row_undo_mod_must_purge(): Determine if a record should be purged.

row_undo_mod_clust(): For temporary tables, skip the purge checks.
When rolling back an update so that the original record was not
delete-marked, reset DB_TRX_ID if the history is no longer visible.
parent 56074319
......@@ -7,9 +7,18 @@ SET GLOBAL innodb_purge_rseg_truncate_frequency = 1;
SET GLOBAL innodb_purge_rseg_truncate_frequency = 1;
CREATE TABLE t1(a INT PRIMARY KEY, b INT NOT NULL)
ROW_FORMAT=REDUNDANT ENGINE=InnoDB;
connect prevent_purge,localhost,root;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
INSERT INTO t1 VALUES(1,2),(3,4);
UPDATE t1 SET b=-3 WHERE a=3;
connect con1,localhost,root;
BEGIN;
UPDATE t1 SET b=4 WHERE a=3;
disconnect prevent_purge;
connection default;
InnoDB 0 transactions not purged
disconnect con1;
FLUSH TABLE t1 FOR EXPORT;
Clustered index root page contents:
N_RECS=2; LEVEL=0
......
......@@ -15,11 +15,24 @@ SET GLOBAL innodb_purge_rseg_truncate_frequency = 1;
CREATE TABLE t1(a INT PRIMARY KEY, b INT NOT NULL)
ROW_FORMAT=REDUNDANT ENGINE=InnoDB;
--connect (prevent_purge,localhost,root)
START TRANSACTION WITH CONSISTENT SNAPSHOT;
--connection default
INSERT INTO t1 VALUES(1,2),(3,4);
UPDATE t1 SET b=-3 WHERE a=3;
# Initiate a full purge, which should reset all DB_TRX_ID.
--connect (con1,localhost,root)
BEGIN;
# For purgeable records, we must record DB_TRX_ID=0 in the undo log!
UPDATE t1 SET b=4 WHERE a=3;
--disconnect prevent_purge
--connection default
# Initiate a full purge, which should reset the DB_TRX_ID except for a=3.
--source include/wait_all_purged.inc
# Initiate a ROLLBACK of the update, which should reset the DB_TRX_ID for a=3.
--disconnect con1
FLUSH TABLE t1 FOR EXPORT;
# The following is based on innodb.table_flags:
......
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -54,22 +54,6 @@ row_vers_impl_x_locked(
dict_index_t* index,
const ulint* offsets);
/*****************************************************************//**
Finds out if we must preserve a delete marked earlier version of a clustered
index record, because it is >= the purge view.
@param[in] trx_id transaction id in the version
@param[in] name table name
@param[in,out] mtr mini transaction holding the latch on the
clustered index record; it will also hold
the latch on purge_view
@return TRUE if earlier version should be preserved */
ibool
row_vers_must_preserve_del_marked(
/*==============================*/
trx_id_t trx_id,
const table_name_t& name,
mtr_t* mtr);
/*****************************************************************//**
Finds out if a version of the record, where the version >= the current
purge view, should have ientry as its secondary index entry. We check
......
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -32,6 +32,7 @@ Created 2/27/1997 Heikki Tuuri
#include "dict0boot.h"
#include "trx0undo.h"
#include "trx0roll.h"
#include "trx0purge.h"
#include "btr0btr.h"
#include "mach0data.h"
#include "ibuf0ibuf.h"
......@@ -148,101 +149,55 @@ row_undo_mod_clust_low(
return(err);
}
/***********************************************************//**
Purges a clustered index record after undo if possible.
This is attempted when the record was inserted by updating a
delete-marked record and there no longer exist transactions
that would see the delete-marked record.
@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_undo_mod_remove_clust_low(
/*==========================*/
undo_node_t* node, /*!< in: row undo node */
mtr_t* mtr, /*!< in/out: mini-transaction */
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
/** Get the byte offset of the DB_TRX_ID column
@param[in] rec clustered index record
@param[in] index clustered index
@return the byte offset of DB_TRX_ID, from the start of rec */
static ulint row_trx_id_offset(const rec_t* rec, const dict_index_t* index)
{
btr_cur_t* btr_cur;
dberr_t err;
ulint trx_id_offset;
ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
/* Find out if the record has been purged already
or if we can remove it. */
if (!btr_pcur_restore_position(mode, &node->pcur, mtr)
|| row_vers_must_preserve_del_marked(node->new_trx_id,
node->table->name,
mtr)) {
return(DB_SUCCESS);
}
btr_cur = btr_pcur_get_btr_cur(&node->pcur);
trx_id_offset = btr_cur_get_index(btr_cur)->trx_id_offset;
ut_ad(index->n_uniq <= MAX_REF_PARTS);
ulint trx_id_offset = index->trx_id_offset;
if (!trx_id_offset) {
mem_heap_t* heap = NULL;
ulint trx_id_col;
const ulint* offsets;
ulint len;
trx_id_col = dict_index_get_sys_col_pos(
btr_cur_get_index(btr_cur), DATA_TRX_ID);
ut_ad(trx_id_col > 0);
ut_ad(trx_id_col != ULINT_UNDEFINED);
offsets = rec_get_offsets(
btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur),
NULL, true, trx_id_col + 1, &heap);
/* Reserve enough offsets for the PRIMARY KEY and 2 columns
so that we can access DB_TRX_ID, DB_ROLL_PTR. */
ulint offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
mem_heap_t* heap = NULL;
const ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
ulint* offsets = rec_get_offsets(rec, index, offsets_, true,
trx_id_pos + 1, &heap);
ut_ad(!heap);
ulint len;
trx_id_offset = rec_get_nth_field_offs(
offsets, trx_id_col, &len);
offsets, trx_id_pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
mem_heap_free(heap);
}
if (trx_read_trx_id(btr_cur_get_rec(btr_cur) + trx_id_offset)
!= node->new_trx_id) {
/* The record must have been purged and then replaced
with a different one. */
return(DB_SUCCESS);
}
return trx_id_offset;
}
/* We are about to remove an old, delete-marked version of the
record that may have been delete-marked by a different transaction
than the rolling-back one. */
ut_ad(rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
dict_table_is_comp(node->table)));
/* In delete-marked records, DB_TRX_ID must
always refer to an existing update_undo log record. */
ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index));
if (mode == BTR_MODIFY_LEAF) {
err = btr_cur_optimistic_delete(btr_cur, 0, mtr)
? DB_SUCCESS
: DB_FAIL;
} else {
ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
/** Determine if rollback must execute a purge-like operation.
@param[in,out] node row undo
@param[in,out] mtr mini-transaction
@return whether the record should be purged */
static bool row_undo_mod_must_purge(undo_node_t* node, mtr_t* mtr)
{
ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
ut_ad(!node->table->is_temporary());
/* This operation is analogous to purge, we can free also
inherited externally stored fields.
We can also assume that the record was complete
(including BLOBs), because it had been delete-marked
after it had been completely inserted. Therefore, we
are passing rollback=false, just like purge does. */
btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&node->pcur);
ut_ad(btr_cur->index->is_primary());
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
false, mtr);
mtr_s_lock(&purge_sys.latch, mtr);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
and restart with more file space */
if (!purge_sys.view.changes_visible(node->new_trx_id,
node->table->name)) {
return false;
}
return(err);
const rec_t* rec = btr_cur_get_rec(btr_cur);
return trx_read_trx_id(rec + row_trx_id_offset(rec, btr_cur->index))
== node->new_trx_id;
}
/***********************************************************//**
......@@ -271,6 +226,7 @@ row_undo_mod_clust(
log_free_check();
pcur = &node->pcur;
index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
ut_ad(index->is_primary());
mtr.start();
if (index->table->is_temporary()) {
......@@ -364,44 +320,122 @@ row_undo_mod_clust(
btr_pcur_commit_specify_mtr(pcur, &mtr);
if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
if (err != DB_SUCCESS) {
goto func_exit;
}
/* FIXME: Perform the below operations in the above
mini-transaction when possible. */
if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
/* In delete-marked records, DB_TRX_ID must
always refer to an existing update_undo log record. */
ut_ad(node->new_trx_id);
mtr.start();
if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) {
goto mtr_commit_exit;
}
if (index->table->is_temporary()) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
} else {
if (!row_undo_mod_must_purge(node, &mtr)) {
goto mtr_commit_exit;
}
index->set_modified(mtr);
}
/* It is not necessary to call row_log_table,
because the record is delete-marked and would thus
be omitted from the rebuilt copy of the table. */
err = row_undo_mod_remove_clust_low(
node, &mtr, BTR_MODIFY_LEAF);
if (err != DB_SUCCESS) {
btr_pcur_commit_specify_mtr(pcur, &mtr);
ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
dict_table_is_comp(node->table)));
if (btr_cur_optimistic_delete(&pcur->btr_cur, 0, &mtr)) {
goto mtr_commit_exit;
}
btr_pcur_commit_specify_mtr(pcur, &mtr);
/* We may have to modify tree structure: do a
pessimistic descent down the index tree */
mtr.start();
if (!btr_pcur_restore_position(
BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE,
pcur, &mtr)) {
goto mtr_commit_exit;
}
mtr.start();
if (index->table->is_temporary()) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
} else {
index->set_modified(mtr);
if (index->table->is_temporary()) {
mtr.set_log_mode(MTR_LOG_NO_REDO);
} else {
if (!row_undo_mod_must_purge(node, &mtr)) {
goto mtr_commit_exit;
}
index->set_modified(mtr);
}
err = row_undo_mod_remove_clust_low(
node, &mtr,
BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE);
ut_ad(rec_get_deleted_flag(btr_pcur_get_rec(pcur),
dict_table_is_comp(node->table)));
/* This operation is analogous to purge, we can free
also inherited externally stored fields. We can also
assume that the record was complete (including BLOBs),
because it had been delete-marked after it had been
completely inserted. Therefore, we are passing
rollback=false, just like purge does. */
btr_cur_pessimistic_delete(&err, FALSE, &pcur->btr_cur, 0,
false, &mtr);
ut_ad(err == DB_SUCCESS
|| err == DB_OUT_OF_FILE_SPACE);
} else if (!index->table->is_temporary() && node->new_trx_id) {
/* We rolled back a record so that it still exists.
We must reset the DB_TRX_ID if the history is no
longer accessible by any active read view. */
ut_ad(err == DB_SUCCESS
|| err == DB_OUT_OF_FILE_SPACE);
mtr.start();
if (!btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, &mtr)) {
goto mtr_commit_exit;
}
rec_t* rec = btr_pcur_get_rec(pcur);
mtr_s_lock(&purge_sys.latch, &mtr);
if (!purge_sys.view.changes_visible(node->new_trx_id,
node->table->name)) {
goto mtr_commit_exit;
}
btr_pcur_commit_specify_mtr(pcur, &mtr);
ulint trx_id_pos = index->n_uniq ? index->n_uniq : 1;
ut_ad(index->n_uniq <= MAX_REF_PARTS);
/* Reserve enough offsets for the PRIMARY KEY and 2 columns
so that we can access DB_TRX_ID, DB_ROLL_PTR. */
ulint offsets_[REC_OFFS_HEADER_SIZE + MAX_REF_PARTS + 2];
rec_offs_init(offsets_);
offsets = rec_get_offsets(
rec, index, offsets_, true, trx_id_pos + 2, &heap);
ulint len;
ulint trx_id_offset = rec_get_nth_field_offs(
offsets, trx_id_pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
if (trx_read_trx_id(rec + trx_id_offset) == node->new_trx_id) {
ut_ad(!rec_get_deleted_flag(
rec, dict_table_is_comp(node->table)));
index->set_modified(mtr);
if (page_zip_des_t* page_zip = buf_block_get_page_zip(
btr_pcur_get_block(&node->pcur))) {
page_zip_write_trx_id_and_roll_ptr(
page_zip, rec, offsets, trx_id_pos,
0, 1ULL << ROLL_PTR_INSERT_FLAG_POS,
&mtr);
} else {
mlog_write_string(rec + trx_id_offset,
reset_trx_id,
sizeof reset_trx_id, &mtr);
}
}
} else {
goto func_exit;
}
mtr_commit_exit:
btr_pcur_commit_specify_mtr(pcur, &mtr);
func_exit:
node->state = UNDO_NODE_FETCH_NEXT;
if (offsets_heap) {
......
......@@ -420,29 +420,6 @@ row_vers_impl_x_locked(
return(trx);
}
/*****************************************************************//**
Finds out if we must preserve a delete marked earlier version of a clustered
index record, because it is >= the purge view.
@param[in] trx_id transaction id in the version
@param[in] name table name
@param[in,out] mtr mini transaction holding the latch on the
clustered index record; it will also hold
the latch on purge_view
@return TRUE if earlier version should be preserved */
ibool
row_vers_must_preserve_del_marked(
/*==============================*/
trx_id_t trx_id,
const table_name_t& name,
mtr_t* mtr)
{
ut_ad(!rw_lock_own(&(purge_sys.latch), RW_LOCK_S));
mtr_s_lock(&purge_sys.latch, mtr);
return(!purge_sys.view.changes_visible(trx_id, name));
}
/** build virtual column value from current cluster index record data
@param[in,out] row the cluster index row in dtuple form
@param[in] clust_index clustered index
......
......@@ -945,8 +945,8 @@ trx_undo_page_report_modify(
allowed to ignore blob prefixes if the delete marking was done
by some other trx as it must have committed by now for us to
allow an over-write. */
if (ignore_prefix) {
ignore_prefix = (trx_id != trx->id);
if (trx_id == trx->id) {
ignore_prefix = false;
}
ptr += mach_u64_write_compressed(ptr, trx_id);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment