Commit 609d0a91 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-14407 Assertion failure during rollback

Rollback attempted to dereference DB_ROLL_PTR=0, which cannot possibly
be a valid undo log pointer. A safer canonical value would be
roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS
which is what was chosen in MDEV-12288, corresponding to reset_trx_id.

No deterministic test case for the bug was found. The simplest test
cases may be related to MDEV-11415, which suppresses undo logging for
ALGORITHM=COPY operations. In those operations, in the spirit of
MDEV-12288, we should actually have written reset_trx_id instead of
using the transaction identifier of the current transaction
(and a bogus value of DB_ROLL_PTR=0). However, thanks to MySQL Bug#28432
which I had fixed in MySQL 5.6.8 as part of WL#6255, access to the
rebuilt table by earlier-started transactions should actually have been
refused with ER_TABLE_DEF_CHANGED.

reset_trx_id: Move the definition to data0type.cc and the declaration
to data0type.h.

btr_cur_ins_lock_and_undo(): When undo logging is disabled, use the
safe value that corresponds to reset_trx_id.

btr_cur_optimistic_insert(): Validate the DB_TRX_ID,DB_ROLL_PTR before
inserting into a clustered index leaf page.

ins_node_t::sys_buf[]: Replaces row_id_buf and trx_id_buf and some
heap usage.

row_ins_alloc_sys_fields(): Init ins_node_t::sys_buf[] to reset_trx_id.

row_ins_buf(): Only if undo logging is enabled, copy trx->id
to node->sys_buf. Otherwise, rely on the initialization in
row_ins_alloc_sys_fields().

row_purge_reset_trx_id(): Invoke mlog_write_string() with reset_trx_id
directly. (No functional change.)

trx_undo_page_report_modify(): Assert that the DB_ROLL_PTR is not 0.

trx_undo_get_undo_rec_low(): Assert that the roll_ptr is valid before
trying to dereference it.

dict_index_t::is_primary(): Check if the index is the primary key.

PageConverter::adjust_cluster_record(): Fix
MDEV-15249 Crash in MVCC read after IMPORT TABLESPACE
by resetting the system fields to reset_trx_id instead of writing
the current transaction ID (which will be committed at the
end of the IMPORT TABLESPACE) and DB_ROLL_PTR=0.
This can partially be viewed as a follow-up fix of MDEV-12288,
because IMPORT should already then have written
DB_TRX_ID=0 and DB_ROLL_PTR=1<<55 to prevent unnecessary
DB_TRX_ID lookups in subsequent accesses to the table.
parent e2387835
......@@ -3029,24 +3029,22 @@ btr_cur_ins_lock_and_undo(
}
if (flags & BTR_NO_UNDO_LOG_FLAG) {
roll_ptr = 0;
roll_ptr = roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS;
if (!(flags & BTR_KEEP_SYS_FLAG)) {
upd_sys:
row_upd_index_entry_sys_field(entry, index,
DATA_ROLL_PTR, roll_ptr);
}
} else {
err = trx_undo_report_row_operation(thr, index, entry,
NULL, 0, NULL, NULL,
&roll_ptr);
if (err != DB_SUCCESS) {
return(err);
if (err == DB_SUCCESS) {
goto upd_sys;
}
}
/* Now we can fill in the roll ptr field in entry */
if (!(flags & BTR_KEEP_SYS_FLAG)) {
row_upd_index_entry_sys_field(entry, index,
DATA_ROLL_PTR, roll_ptr);
}
return(DB_SUCCESS);
return(err);
}
/**
......@@ -3234,7 +3232,7 @@ btr_cur_optimistic_insert(
DBUG_LOG("ib_cur",
"insert " << index->name << " (" << index->id << ") by "
<< ib::hex(thr ? trx_get_id_for_print(thr_get_trx(thr)) : 0)
<< ib::hex(thr ? thr->graph->trx->id : 0)
<< ' ' << rec_printer(entry).str());
DBUG_EXECUTE_IF("do_page_reorganize",
btr_page_reorganize(page_cursor, index, mtr););
......@@ -3251,6 +3249,36 @@ btr_cur_optimistic_insert(
goto fail_err;
}
#ifdef UNIV_DEBUG
if (!(flags & BTR_CREATE_FLAG)
&& index->is_primary() && page_is_leaf(page)) {
const dfield_t* trx_id = dtuple_get_nth_field(
entry, dict_col_get_clust_pos(
dict_table_get_sys_col(index->table,
DATA_TRX_ID),
index));
ut_ad(trx_id->len == DATA_TRX_ID_LEN);
ut_ad(trx_id[1].len == DATA_ROLL_PTR_LEN);
if (flags & BTR_NO_UNDO_LOG_FLAG) {
ut_ad(!memcmp(trx_id->data, reset_trx_id,
DATA_TRX_ID_LEN));
ut_ad(!memcmp(trx_id[1].data,
reset_trx_id + DATA_TRX_ID_LEN,
DATA_ROLL_PTR_LEN));
} else {
ut_ad(thr->graph->trx->id);
ut_ad(thr->graph->trx->id
== trx_read_trx_id(
static_cast<const byte*>(
trx_id->data)));
ut_ad(!memcmp(field_ref_zero, trx_id[1].data,
DATA_ROLL_PTR_LEN)
== !(~flags & BTR_NO_UNDO_LOG_FLAG));
}
}
#endif
*rec = page_cur_tuple_insert(
page_cursor, entry, index, offsets, heap,
n_ext, mtr);
......
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -28,6 +28,12 @@ Created 1/16/1996 Heikki Tuuri
#include "data0type.h"
/** The DB_TRX_ID,DB_ROLL_PTR values for "no history is available" */
const byte reset_trx_id[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN] = {
0, 0, 0, 0, 0, 0,
0x80, 0, 0, 0, 0, 0, 0
};
/* At the database startup we store the default-charset collation number of
this MySQL installation to this global variable. If we have < 4.1.2 format
column definitions, or records in the insert buffer, we use this
......
......@@ -577,6 +577,9 @@ struct dtype_t{
}
};
/** The DB_TRX_ID,DB_ROLL_PTR values for "no history is available" */
extern const byte reset_trx_id[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
#include "data0type.ic"
#endif
......@@ -1082,6 +1082,12 @@ struct dict_index_t{
/** @return whether the index is the clustered index */
bool is_clust() const { return type & DICT_CLUSTERED; }
/** @return whether the index is the primary key index
(not the clustered index of the change buffer) */
bool is_primary() const
{
return DICT_CLUSTERED == (type & (DICT_CLUSTERED | DICT_IBUF));
}
/** Determine how many fields of a given prefix can be set NULL.
@param[in] n_prefix number of fields in the prefix
......
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -198,10 +198,11 @@ struct ins_node_t{
this should be reset to NULL */
UT_LIST_BASE_NODE_T(dtuple_t)
entry_list;/* list of entries, one for each index */
byte* row_id_buf;/* buffer for the row id sys field in row */
/** buffer for the system columns */
byte sys_buf[DATA_ROW_ID_LEN
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
trx_id_t trx_id; /*!< trx id or the last trx which executed the
node */
byte* trx_id_buf;/* buffer for the trx id sys field in row */
byte vers_start_buf[8]; /* Buffers for System Versioning */
byte vers_end_buf[8]; /* system fields. */
mem_heap_t* entry_sys_heap;
......
......@@ -59,9 +59,6 @@ Created 13/06/2005 Jan Lindstrom
// Forward declaration
struct ib_sequence_t;
/** The DB_TRX_ID,DB_ROLL_PTR values for "no history is available" */
extern const byte reset_trx_id[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
/** @brief Block size for I/O operations in merge sort.
The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
......
......@@ -65,7 +65,7 @@ trx_undo_rec_copy(
len = mach_read_from_2(undo_rec)
- ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
ut_ad(len < UNIV_PAGE_SIZE);
ut_a(len < UNIV_PAGE_SIZE);
trx_undo_rec_t* rec = static_cast<trx_undo_rec_t*>(
mem_heap_dup(heap, undo_rec, len));
mach_write_to_2(rec, len);
......
/*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2017, MariaDB Corporation.
Copyright (c) 2015, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -32,11 +32,11 @@ Created 2012-02-08 by Sunny Bains.
#include "dict0boot.h"
#include "ibuf0ibuf.h"
#include "pars0pars.h"
#include "row0upd.h"
#include "row0sel.h"
#include "row0mysql.h"
#include "srv0start.h"
#include "row0quiesce.h"
#include "trx0undo.h"
#include "ut0new.h"
#include <vector>
......@@ -897,13 +897,11 @@ class PageConverter : public AbstractCallback {
@param index the index being converted
@param rec record to update
@param offsets column offsets for the record
@param deleted true if row is delete marked
@return DB_SUCCESS or error code. */
dberr_t adjust_cluster_record(
const dict_index_t* index,
rec_t* rec,
const ulint* offsets,
bool deleted) UNIV_NOTHROW;
const ulint* offsets) UNIV_NOTHROW;
/** Find an index with the matching id.
@return row_index_t* instance or 0 */
......@@ -1681,14 +1679,12 @@ PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
/** Adjust the BLOB references and sys fields for the current record.
@param rec record to update
@param offsets column offsets for the record
@param deleted true if row is delete marked
@return DB_SUCCESS or error code. */
dberr_t
PageConverter::adjust_cluster_record(
const dict_index_t* index,
rec_t* rec,
const ulint* offsets,
bool deleted) UNIV_NOTHROW
const ulint* offsets) UNIV_NOTHROW
{
dberr_t err;
......@@ -1697,10 +1693,20 @@ PageConverter::adjust_cluster_record(
/* Reset DB_TRX_ID and DB_ROLL_PTR. Normally, these fields
are only written in conjunction with other changes to the
record. */
row_upd_rec_sys_fields(
rec, m_page_zip_ptr, m_cluster_index, m_offsets,
m_trx, 0);
ulint trx_id_pos = m_cluster_index->n_uniq
? m_cluster_index->n_uniq : 1;
if (m_page_zip_ptr) {
page_zip_write_trx_id_and_roll_ptr(
m_page_zip_ptr, rec, m_offsets, trx_id_pos,
0, roll_ptr_t(1) << ROLL_PTR_INSERT_FLAG_POS,
NULL);
} else {
ulint len;
byte* ptr = rec_get_nth_field(
rec, m_offsets, trx_id_pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
memcpy(ptr, reset_trx_id, sizeof reset_trx_id);
}
}
return(err);
......@@ -1743,8 +1749,7 @@ PageConverter::update_records(
if (clust_index) {
dberr_t err = adjust_cluster_record(
m_index->m_srv_index, rec, m_offsets,
deleted);
m_index->m_srv_index, rec, m_offsets);
if (err != DB_SUCCESS) {
return(err);
......
......@@ -139,49 +139,44 @@ row_ins_alloc_sys_fields(
{
dtuple_t* row;
dict_table_t* table;
mem_heap_t* heap;
const dict_col_t* col;
dfield_t* dfield;
byte* ptr;
row = node->row;
table = node->table;
heap = node->entry_sys_heap;
ut_ad(row && table && heap);
ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table));
/* allocate buffer to hold the needed system created hidden columns. */
const uint len = DATA_ROW_ID_LEN + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
ptr = static_cast<byte*>(mem_heap_zalloc(heap, len));
compile_time_assert(DATA_ROW_ID_LEN
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
== sizeof node->sys_buf);
memset(node->sys_buf, 0, DATA_ROW_ID_LEN);
memcpy(node->sys_buf + DATA_ROW_ID_LEN, reset_trx_id,
sizeof reset_trx_id);
/* 1. Populate row-id */
col = dict_table_get_sys_col(table, DATA_ROW_ID);
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN);
node->row_id_buf = ptr;
ptr += DATA_ROW_ID_LEN;
dfield_set_data(dfield, node->sys_buf, DATA_ROW_ID_LEN);
/* 2. Populate trx id */
col = dict_table_get_sys_col(table, DATA_TRX_ID);
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN);
node->trx_id_buf = ptr;
ptr += DATA_TRX_ID_LEN;
dfield_set_data(dfield, &node->sys_buf[DATA_ROW_ID_LEN],
DATA_TRX_ID_LEN);
col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
dfield_set_data(dfield, &node->sys_buf[DATA_ROW_ID_LEN
+ DATA_TRX_ID_LEN],
DATA_ROLL_PTR_LEN);
}
/*********************************************************************//**
......@@ -3557,7 +3552,7 @@ row_ins_alloc_row_id_step(
row_id = dict_sys_get_new_row_id();
dict_sys_write_row_id(node->row_id_buf, row_id);
dict_sys_write_row_id(node->sys_buf, row_id);
}
/***********************************************************//**
......@@ -3848,7 +3843,7 @@ row_ins_step(
This happens, for example, when a row update moves it to another
partition. In that case, we have already set the IX lock on the
table during the search operation, and there is no need to set
it again here. But we must write trx->id to node->trx_id_buf. */
it again here. But we must write trx->id to node->sys_buf. */
if (node->table->no_rollback()) {
/* No-rollback tables should only be written to by a
......@@ -3863,15 +3858,15 @@ row_ins_step(
restarting here. In theory, we could allow resumption
from the INS_NODE_INSERT_ENTRIES state here. */
DBUG_ASSERT(node->state == INS_NODE_SET_IX_LOCK);
memset(node->trx_id_buf, 0, DATA_TRX_ID_LEN);
memset(node->row_id_buf, 0, DATA_ROW_ID_LEN);
node->index = dict_table_get_first_index(node->table);
node->entry = UT_LIST_GET_FIRST(node->entry_list);
node->state = INS_NODE_INSERT_ENTRIES;
goto do_insert;
}
trx_write_trx_id(node->trx_id_buf, trx->id);
if (UNIV_LIKELY(!node->table->skip_alter_undo)) {
trx_write_trx_id(&node->sys_buf[DATA_ROW_ID_LEN], trx->id);
}
if (node->state == INS_NODE_SET_IX_LOCK) {
......
......@@ -63,12 +63,6 @@ float my_log2f(float n)
# define posix_fadvise(fd, offset, len, advice) /* nothing */
#endif /* _WIN32 */
/** The DB_TRX_ID,DB_ROLL_PTR values for "no history is available" */
const byte reset_trx_id[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN] = {
0, 0, 0, 0, 0, 0,
0x80, 0, 0, 0, 0, 0, 0
};
/* Whether to disable file system cache */
char srv_disable_sort_file_cache;
......
......@@ -1616,7 +1616,7 @@ row_insert_for_mysql(
if (prebuilt->clust_index_was_generated) {
/* set row id to prebuilt */
ut_memcpy(prebuilt->row_id, node->row_id_buf, DATA_ROW_ID_LEN);
memcpy(prebuilt->row_id, node->sys_buf, DATA_ROW_ID_LEN);
}
dict_stats_update_if_needed(table);
......
......@@ -722,13 +722,8 @@ row_purge_reset_trx_id(purge_node_t* node, mtr_t* mtr)
byte* ptr = rec_get_nth_field(
rec, offsets, trx_id_pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
memset(ptr, 0, DATA_TRX_ID_LEN
+ DATA_ROLL_PTR_LEN);
ptr[DATA_TRX_ID_LEN] = 1U
<< (ROLL_PTR_INSERT_FLAG_POS - CHAR_BIT
* (DATA_ROLL_PTR_LEN - 1));
mlog_log_string(ptr, DATA_TRX_ID_LEN
+ DATA_ROLL_PTR_LEN, mtr);
mlog_write_string(ptr, reset_trx_id,
sizeof reset_trx_id, mtr);
}
}
}
......
......@@ -954,6 +954,7 @@ trx_undo_page_report_modify(
dict_index_get_sys_col_pos(
index, DATA_ROLL_PTR), &flen);
ut_ad(flen == DATA_ROLL_PTR_LEN);
ut_ad(memcmp(field, field_ref_zero, DATA_ROLL_PTR_LEN));
ptr += mach_u64_write_compressed(ptr, trx_read_roll_ptr(field));
......@@ -2159,6 +2160,8 @@ trx_undo_get_undo_rec_low(
trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
&offset);
ut_ad(page_no > FSP_FIRST_INODE_PAGE_NO);
ut_ad(offset >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
rseg = is_temp
? trx_sys.temp_rsegs[rseg_id]
: trx_sys.rseg_array[rseg_id];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment