Commit 2c4844c9 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-17813 Crash in instant ALTER TABLE due to purge concurrently emptying table

Several race conditions between MDEV-15562 instant ALTER TABLE and purge
were observed.

The most obvious race condition resulted in a reported assertion failure in
dict_index_t::instant_add_field(): instant.n_core_fields == n_core_fields
would not hold if the table was emptied by purge after the time
dict_table_t::prepare_instant() was called.

During purge, it can turn out that the table is logically empty, only
containing a metadata record. If the metadata record is of the type
created by MDEV-11369 instant ADD COLUMN, it can be removed and
dict_index_t::clear_instant_add() can be called. This will convert the
table to the canonical non-instant format. (If the metadata record is
of the MDEV-15562 type, then it can only be deleted if the table becomes
empty as the result of rollback of an instant ALTER TABLE operation.)

row_purge_remove_clust_if_poss_low(): Add a debug check that ensures
that purge can never remove a MDEV-15562 metadata record.

ha_innobase::open(): Add a comment about the necessity of rolling
back any recovered instant ALTER TABLE transaction on the table.

instant_metadata_lock(): An auxiliary function to acquire a page latch
on the metadata record, to prevent race conditions.

dict_table_t::prepare_instant(), dict_index_t::instant_add_field(),
dict_table_t::rollback_instant(), innobase_instant_try():
Invoke instant_metadata_lock() in order to prevent race conditions.

dict_index_t::instant_add_field(): Correct debug assertions.
The == was guaranteed by code in dict_table_t::prepare_instant()
that was introduced in MDEV-15562. Due to the race condition,
we could occasionally have <=, but never >= like the code was
after MDEV-11369.

ha_innobase_inplace_ctx::instant_column(): Wrapper for
dict_table_t::instant_column(). Add debug assertions.
parent 46a41108
......@@ -21,4 +21,26 @@ ALTER TABLE t1 DROP extra;
disconnect prevent_purge;
InnoDB 0 transactions not purged
DROP TABLE t1;
#
# MDEV-17813 Crash in instant ALTER TABLE due to purge
# concurrently emptying table
#
CREATE TABLE t1 (f2 INT) ENGINE=InnoDB;
INSERT INTO t1 SET f2=1;
ALTER TABLE t1 ADD COLUMN f1 INT;
connect purge_control,localhost,root;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
DELETE FROM t1;
SET DEBUG_SYNC='innodb_commit_inplace_alter_table_enter SIGNAL go WAIT_FOR do';
ALTER TABLE t1 ADD COLUMN f3 INT;
connection purge_control;
SET DEBUG_SYNC='now WAIT_FOR go';
COMMIT;
InnoDB 0 transactions not purged
SET DEBUG_SYNC='now SIGNAL do';
disconnect purge_control;
connection default;
SET DEBUG_SYNC=RESET;
DROP TABLE t1;
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
......@@ -30,4 +30,34 @@ disconnect prevent_purge;
let $wait_all_purged= 0;
--source include/wait_all_purged.inc
DROP TABLE t1;
--echo #
--echo # MDEV-17813 Crash in instant ALTER TABLE due to purge
--echo # concurrently emptying table
--echo #
CREATE TABLE t1 (f2 INT) ENGINE=InnoDB;
INSERT INTO t1 SET f2=1;
ALTER TABLE t1 ADD COLUMN f1 INT;
connect (purge_control,localhost,root);
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
DELETE FROM t1;
SET DEBUG_SYNC='innodb_commit_inplace_alter_table_enter SIGNAL go WAIT_FOR do';
send ALTER TABLE t1 ADD COLUMN f3 INT;
connection purge_control;
SET DEBUG_SYNC='now WAIT_FOR go';
COMMIT;
--source include/wait_all_purged.inc
SET DEBUG_SYNC='now SIGNAL do';
disconnect purge_control;
connection default;
reap;
SET DEBUG_SYNC=RESET;
DROP TABLE t1;
SET GLOBAL innodb_purge_rseg_truncate_frequency = @saved_frequency;
......@@ -6046,6 +6046,14 @@ initialize_auto_increment(dict_table_t* table, const Field* field)
int
ha_innobase::open(const char* name, int, uint)
{
/* TODO: If trx_rollback_recovered(bool all=false) is ever
removed, the first-time open() must hold (or acquire and release)
a table lock that conflicts with trx_resurrect_table_locks(),
to ensure that any recovered incomplete ALTER TABLE will have been
rolled back. Otherwise, dict_table_t::instant could be cleared by
the rollback invoking dict_index_t::clear_instant_alter() while
open table handles exist in client connections. */
dict_table_t* ib_table;
char norm_name[FN_REFLEN];
dict_err_ignore_t ignore_err = DICT_ERR_IGNORE_NONE;
......
......@@ -136,6 +136,30 @@ static const alter_table_operations INNOBASE_ALTER_INSTANT
| ALTER_COLUMN_UNVERSIONED
| ALTER_DROP_VIRTUAL_COLUMN;
/** Acquire a page latch on the possible metadata record,
to prevent concurrent invocation of dict_index_t::clear_instant_alter()
by purge when the table turns out to be empty.
@param[in,out] index clustered index
@param[in,out] mtr mini-transaction */
static void instant_metadata_lock(dict_index_t& index, mtr_t& mtr)
{
DBUG_ASSERT(index.is_primary());
if (!index.is_instant()) {
/* dict_index_t::clear_instant_alter() cannot be called.
No need for a latch. */
return;
}
btr_cur_t btr_cur;
btr_cur_open_at_index_side(true, &index, BTR_SEARCH_LEAF,
&btr_cur, 0, &mtr);
ut_ad(page_cur_is_before_first(btr_cur_get_page_cur(&btr_cur)));
ut_ad(page_is_leaf(btr_cur_get_page(&btr_cur)));
ut_ad(!page_has_prev(btr_cur_get_page(&btr_cur)));
ut_ad(!buf_block_get_page_zip(btr_cur_get_block(&btr_cur)));
}
/** Set is_instant() before instant_column().
@param[in] old previous table definition
@param[in] col_map map from old.cols[] and old.v_cols[] to this
......@@ -152,10 +176,16 @@ inline void dict_table_t::prepare_instant(const dict_table_t& old,
DBUG_ASSERT(old.supports_instant());
DBUG_ASSERT(supports_instant());
const dict_index_t& oindex = *old.indexes.start;
dict_index_t& oindex = *old.indexes.start;
dict_index_t& index = *indexes.start;
first_alter_pos = 0;
mtr_t mtr;
mtr.start();
/* Prevent oindex.n_core_fields and others, so that
purge cannot invoke dict_index_t::clear_instant_alter(). */
instant_metadata_lock(oindex, mtr);
for (unsigned i = 0; i + DATA_N_SYS_COLS < old.n_cols;
i++) {
if (col_map[i] != i) {
......@@ -315,6 +345,7 @@ inline void dict_table_t::prepare_instant(const dict_table_t& old,
DBUG_ASSERT(n_dropped() >= old.n_dropped());
DBUG_ASSERT(index.n_core_fields == oindex.n_core_fields);
DBUG_ASSERT(index.n_core_null_bytes == oindex.n_core_null_bytes);
mtr.commit();
}
......@@ -335,8 +366,15 @@ inline void dict_index_t::instant_add_field(const dict_index_t& instant)
DBUG_ASSERT(n_uniq == instant.n_uniq);
DBUG_ASSERT(instant.n_fields >= n_fields);
DBUG_ASSERT(instant.n_nullable >= n_nullable);
DBUG_ASSERT(instant.n_core_fields == n_core_fields);
DBUG_ASSERT(instant.n_core_null_bytes == n_core_null_bytes);
/* dict_table_t::prepare_instant() initialized n_core_fields
to be equal. However, after that purge could have emptied the
table and invoked dict_index_t::clear_instant_alter(). */
DBUG_ASSERT(instant.n_core_fields <= n_core_fields);
DBUG_ASSERT(instant.n_core_null_bytes <= n_core_null_bytes);
DBUG_ASSERT(instant.n_core_fields == n_core_fields
|| (!is_instant() && instant.is_instant()));
DBUG_ASSERT(instant.n_core_null_bytes == n_core_null_bytes
|| (!is_instant() && instant.is_instant()));
/* instant will have all fields (including ones for columns
that have been or are being instantly dropped) in the same position
......@@ -627,7 +665,13 @@ inline void dict_table_t::rollback_instant(
const ulint* col_map)
{
ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_X));
dict_index_t* index = indexes.start;
mtr_t mtr;
mtr.start();
/* Prevent concurrent execution of dict_index_t::clear_instant_alter()
by acquiring a latch on the leftmost leaf page. */
instant_metadata_lock(*index, mtr);
/* index->is_instant() does not necessarily hold here, because
the table may have been emptied */
DBUG_ASSERT(old_n_cols >= DATA_N_SYS_COLS);
......@@ -667,6 +711,7 @@ inline void dict_table_t::rollback_instant(
n_t_def = n_t_cols = n_cols + n_v_cols;
index->fields = old_fields;
mtr.commit();
while ((index = dict_table_get_next_index(index)) != NULL) {
if (index->to_be_dropped) {
......@@ -924,6 +969,15 @@ struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
first_alter_pos);
}
/** Adjust table metadata for instant ADD/DROP/reorder COLUMN. */
void instant_column()
{
DBUG_ASSERT(is_instant());
DBUG_ASSERT(old_n_fields
== old_table->indexes.start->n_fields);
old_table->instant_column(*instant_table, col_map);
}
/** Revert prepare_instant() if the transaction is rolled back. */
void rollback_instant()
{
......@@ -5287,13 +5341,24 @@ static bool innobase_instant_try(
dict_table_t* user_table = ctx->old_table;
dict_index_t* index = dict_table_get_first_index(user_table);
uint n_old_fields = index->n_fields;
mtr_t mtr;
mtr.start();
/* Prevent purge from calling dict_index_t::clear_instant_alter(),
to protect index->n_core_fields, index->table->instant and others
from changing during ctx->instant_column(). */
instant_metadata_lock(*index, mtr);
const unsigned n_old_fields = index->n_fields;
const dict_col_t* old_cols = user_table->cols;
DBUG_ASSERT(user_table->n_cols == ctx->old_n_cols);
user_table->instant_column(*ctx->instant_table, ctx->col_map);
ctx->instant_column();
DBUG_ASSERT(index->n_fields >= n_old_fields);
/* Release the page latch. Between this and the next
btr_pcur_open_at_index_side(), data fields such as
index->n_core_fields and index->table->instant could change,
but we would handle that in empty_table: below. */
mtr.commit();
/* The table may have been emptied and may have lost its
'instantness' during this ALTER TABLE. */
......@@ -5441,7 +5506,6 @@ static bool innobase_instant_try(
memset(roll_ptr, 0, sizeof roll_ptr);
dtuple_t* entry = index->instant_metadata(*row, ctx->heap);
mtr_t mtr;
mtr.start();
index->set_modified(mtr);
btr_pcur_t pcur;
......
......@@ -127,33 +127,32 @@ row_purge_remove_clust_if_poss_low(
purge_node_t* node, /*!< in/out: row purge node */
ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
dict_index_t* index;
bool success = true;
mtr_t mtr;
rec_t* rec;
mem_heap_t* heap = NULL;
ulint* offsets;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs_init(offsets_);
ut_ad(rw_lock_own(dict_operation_lock, RW_LOCK_S)
|| node->vcol_info.is_used());
index = dict_table_get_first_index(node->table);
dict_index_t* index = dict_table_get_first_index(node->table);
log_free_check();
mtr_start(&mtr);
index->set_modified(mtr);
mtr_t mtr;
mtr.start();
if (!row_purge_reposition_pcur(mode, node, &mtr)) {
/* The record was already removed. */
goto func_exit;
mtr.commit();
return true;
}
rec = btr_pcur_get_rec(&node->pcur);
ut_d(const bool was_instant = !!index->table->instant);
index->set_modified(mtr);
offsets = rec_get_offsets(
rec_t* rec = btr_pcur_get_rec(&node->pcur);
ulint offsets_[REC_OFFS_NORMAL_SIZE];
rec_offs_init(offsets_);
mem_heap_t* heap = NULL;
ulint* offsets = rec_get_offsets(
rec, index, offsets_, true, ULINT_UNDEFINED, &heap);
bool success = true;
if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
/* Someone else has modified the record later: do not remove */
......@@ -186,6 +185,10 @@ row_purge_remove_clust_if_poss_low(
}
}
/* Prove that dict_index_t::clear_instant_alter() was
not called with index->table->instant != NULL. */
ut_ad(!was_instant || index->table->instant);
func_exit:
if (heap) {
mem_heap_free(heap);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment