Commit f2096478 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-29835 InnoDB hang on B-tree split or merge

This is a follow-up to
commit de4030e4 (MDEV-30400),
which fixed some hangs related to B-tree split or merge.

btr_root_block_get(): Use and update the root page guess. This is just
a minor performance optimization, not affecting correctness.

btr_validate_level(): Remove the parameter "lockout", and always
acquire an exclusive dict_index_t::lock in CHECK TABLE without QUICK.
This is needed in order to avoid latching order violation in
btr_page_get_father_node_ptr_for_validate().

btr_cur_need_opposite_intention(): Return true in case
btr_cur_compress_recommendation() would hold later during the
mini-transaction, or if a page underflow or overflow is possible.
If we return true, our caller will escalate to aqcuiring an exclusive
dict_index_t::lock, to prevent a latching order violation and deadlock
during btr_compress() or btr_page_split_and_insert().

btr_cur_t::search_leaf(), btr_cur_t::open_leaf():
Also invoke btr_cur_need_opposite_intention() on the leaf page.

btr_cur_t::open_leaf(): When escalating to exclusive index locking,
acquire exclusive latches on all pages as well.

innobase_instant_try(): Return an error code if the root page cannot
be retrieved.

In addition to the normal stress testing with Random Query Generator (RQG)
this has been tested with
./mtr --mysqld=--loose-innodb-limit-optimistic-insert-debug=2
but with the injection in btr_cur_optimistic_insert() for non-leaf pages
adjusted so that it would use the value 3. (Otherwise, infinite page
splits could occur in some mtr tests.)

Tested by: Matthias Leich
parent 85cbfaef
...@@ -254,7 +254,7 @@ Gets the root node of a tree and x- or s-latches it. ...@@ -254,7 +254,7 @@ Gets the root node of a tree and x- or s-latches it.
buf_block_t* buf_block_t*
btr_root_block_get( btr_root_block_get(
/*===============*/ /*===============*/
const dict_index_t* index, /*!< in: index tree */ dict_index_t* index, /*!< in: index tree */
rw_lock_type_t mode, /*!< in: either RW_S_LATCH rw_lock_type_t mode, /*!< in: either RW_S_LATCH
or RW_X_LATCH */ or RW_X_LATCH */
mtr_t* mtr, /*!< in: mtr */ mtr_t* mtr, /*!< in: mtr */
...@@ -266,11 +266,31 @@ btr_root_block_get( ...@@ -266,11 +266,31 @@ btr_root_block_get(
return nullptr; return nullptr;
} }
buf_block_t *block = btr_block_get(*index, index->page, mode, false, mtr, buf_block_t *block;
err); #ifndef BTR_CUR_ADAPT
if (block) static constexpr buf_block_t *guess= nullptr;
#else
buf_block_t *&guess= btr_search_get_info(index)->root_guess;
guess=
#endif
block=
buf_page_get_gen(page_id_t{index->table->space->id, index->page},
index->table->space->zip_size(), mode, guess, BUF_GET,
mtr, err, false);
ut_ad(!block == (*err != DB_SUCCESS));
if (UNIV_LIKELY(block != nullptr))
{ {
if (index->is_ibuf()); if (!!page_is_comp(block->page.frame) != index->table->not_redundant() ||
btr_page_get_index_id(block->page.frame) != index->id ||
!fil_page_index_page_check(block->page.frame) ||
index->is_spatial() !=
(fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE))
{
*err= DB_PAGE_CORRUPTED;
block= nullptr;
}
else if (index->is_ibuf());
else if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF, else if (!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF,
*block, *index->table->space) || *block, *index->table->space) ||
!btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP, !btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP,
...@@ -280,6 +300,9 @@ btr_root_block_get( ...@@ -280,6 +300,9 @@ btr_root_block_get(
block= nullptr; block= nullptr;
} }
} }
else if (*err == DB_DECRYPTION_FAILED)
btr_decryption_failed(*index);
return block; return block;
} }
...@@ -290,7 +313,7 @@ static ...@@ -290,7 +313,7 @@ static
page_t* page_t*
btr_root_get( btr_root_get(
/*=========*/ /*=========*/
const dict_index_t* index, /*!< in: index tree */ dict_index_t* index, /*!< in: index tree */
mtr_t* mtr, /*!< in: mtr */ mtr_t* mtr, /*!< in: mtr */
dberr_t* err) /*!< out: error code */ dberr_t* err) /*!< out: error code */
{ {
...@@ -502,9 +525,7 @@ btr_block_reget(mtr_t *mtr, const dict_index_t &index, ...@@ -502,9 +525,7 @@ btr_block_reget(mtr_t *mtr, const dict_index_t &index,
return block; return block;
} }
#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */
ut_ad(mtr->memo_contains_flagged(&index.lock, MTR_MEMO_X_LOCK)); ut_ad(mtr->memo_contains_flagged(&index.lock, MTR_MEMO_X_LOCK));
#endif
return btr_block_get(index, id.page_no(), rw_latch, true, mtr, err); return btr_block_get(index, id.page_no(), rw_latch, true, mtr, err);
} }
...@@ -773,9 +794,7 @@ btr_page_get_father_node_ptr_for_validate( ...@@ -773,9 +794,7 @@ btr_page_get_father_node_ptr_for_validate(
const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no(); const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no();
dict_index_t* index = btr_cur_get_index(cursor); dict_index_t* index = btr_cur_get_index(cursor);
ut_ad(!dict_index_is_spatial(index)); ut_ad(!dict_index_is_spatial(index));
ut_ad(mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK
| MTR_MEMO_SX_LOCK));
ut_ad(dict_index_get_page(index) != page_no); ut_ad(dict_index_get_page(index) != page_no);
const auto level = btr_page_get_level(btr_cur_get_page(cursor)); const auto level = btr_page_get_level(btr_cur_get_page(cursor));
...@@ -793,10 +812,6 @@ btr_page_get_father_node_ptr_for_validate( ...@@ -793,10 +812,6 @@ btr_page_get_father_node_ptr_for_validate(
} }
const rec_t* node_ptr = btr_cur_get_rec(cursor); const rec_t* node_ptr = btr_cur_get_rec(cursor);
#if 0 /* MDEV-29835 FIXME */
ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive()
|| mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
#endif
offsets = rec_get_offsets(node_ptr, index, offsets, 0, offsets = rec_get_offsets(node_ptr, index, offsets, 0,
ULINT_UNDEFINED, &heap); ULINT_UNDEFINED, &heap);
...@@ -2456,11 +2471,10 @@ btr_insert_on_non_leaf_level( ...@@ -2456,11 +2471,10 @@ btr_insert_on_non_leaf_level(
} }
ut_ad(cursor.flag == BTR_CUR_BINARY); ut_ad(cursor.flag == BTR_CUR_BINARY);
#if 0 /* MDEV-29835 FIXME */ ut_ad(btr_cur_get_block(&cursor)
ut_ad(!btr_cur_get_block(&cursor)->page.lock.not_recursive() != mtr->at_savepoint(mtr->get_savepoint() - 1)
|| index->is_spatial() || index->is_spatial()
|| mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK));
#endif
if (UNIV_LIKELY(err == DB_SUCCESS)) { if (UNIV_LIKELY(err == DB_SUCCESS)) {
err = btr_cur_optimistic_insert(flags, err = btr_cur_optimistic_insert(flags,
...@@ -2568,10 +2582,8 @@ btr_attach_half_pages( ...@@ -2568,10 +2582,8 @@ btr_attach_half_pages(
prev_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); prev_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ #if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
if (!prev_block) { if (!prev_block) {
# if 0 /* MDEV-29835 FIXME */
ut_ad(mtr->memo_contains(index->lock, ut_ad(mtr->memo_contains(index->lock,
MTR_MEMO_X_LOCK)); MTR_MEMO_X_LOCK));
# endif
prev_block = btr_block_get(*index, prev_page_no, prev_block = btr_block_get(*index, prev_page_no,
RW_X_LATCH, !level, mtr); RW_X_LATCH, !level, mtr);
} }
...@@ -2582,10 +2594,8 @@ btr_attach_half_pages( ...@@ -2582,10 +2594,8 @@ btr_attach_half_pages(
next_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); next_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX);
#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ #if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
if (!next_block) { if (!next_block) {
# if 0 /* MDEV-29835 FIXME */
ut_ad(mtr->memo_contains(index->lock, ut_ad(mtr->memo_contains(index->lock,
MTR_MEMO_X_LOCK)); MTR_MEMO_X_LOCK));
# endif
next_block = btr_block_get(*index, next_page_no, next_block = btr_block_get(*index, next_page_no,
RW_X_LATCH, !level, mtr); RW_X_LATCH, !level, mtr);
} }
...@@ -3397,9 +3407,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block, ...@@ -3397,9 +3407,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block,
#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ #if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
if (!prev) if (!prev)
{ {
# if 0 /* MDEV-29835 FIXME */
ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK)); ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK));
# endif
prev= btr_block_get(index, id.page_no(), RW_X_LATCH, prev= btr_block_get(index, id.page_no(), RW_X_LATCH,
page_is_leaf(block.page.frame), mtr, &err); page_is_leaf(block.page.frame), mtr, &err);
if (UNIV_UNLIKELY(!prev)) if (UNIV_UNLIKELY(!prev))
...@@ -3415,9 +3423,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block, ...@@ -3415,9 +3423,7 @@ dberr_t btr_level_list_remove(const buf_block_t& block,
#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ #if 1 /* MDEV-29835 FIXME: acquire page latches upfront */
if (!next) if (!next)
{ {
# if 0 /* MDEV-29835 FIXME */
ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK)); ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK));
# endif
next= btr_block_get(index, id.page_no(), RW_X_LATCH, next= btr_block_get(index, id.page_no(), RW_X_LATCH,
page_is_leaf(block.page.frame), mtr, &err); page_is_leaf(block.page.frame), mtr, &err);
if (UNIV_UNLIKELY(!next)) if (UNIV_UNLIKELY(!next))
...@@ -4291,7 +4297,7 @@ btr_discard_page( ...@@ -4291,7 +4297,7 @@ btr_discard_page(
if (UNIV_UNLIKELY(!merge_block)) { if (UNIV_UNLIKELY(!merge_block)) {
return err; return err;
} }
#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ #if 1 /* MDEV-29835 FIXME: Acquire the page latch upfront. */
ut_ad(!memcmp_aligned<4>(merge_block->page.frame ut_ad(!memcmp_aligned<4>(merge_block->page.frame
+ FIL_PAGE_NEXT, + FIL_PAGE_NEXT,
block->page.frame + FIL_PAGE_OFFSET, block->page.frame + FIL_PAGE_OFFSET,
...@@ -4317,7 +4323,7 @@ btr_discard_page( ...@@ -4317,7 +4323,7 @@ btr_discard_page(
if (UNIV_UNLIKELY(!merge_block)) { if (UNIV_UNLIKELY(!merge_block)) {
return err; return err;
} }
#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ #if 1 /* MDEV-29835 FIXME: Acquire the page latch upfront. */
ut_ad(!memcmp_aligned<4>(merge_block->page.frame ut_ad(!memcmp_aligned<4>(merge_block->page.frame
+ FIL_PAGE_PREV, + FIL_PAGE_PREV,
block->page.frame + FIL_PAGE_OFFSET, block->page.frame + FIL_PAGE_OFFSET,
...@@ -4898,8 +4904,7 @@ btr_validate_level( ...@@ -4898,8 +4904,7 @@ btr_validate_level(
/*===============*/ /*===============*/
dict_index_t* index, /*!< in: index tree */ dict_index_t* index, /*!< in: index tree */
const trx_t* trx, /*!< in: transaction or NULL */ const trx_t* trx, /*!< in: transaction or NULL */
ulint level, /*!< in: level number */ ulint level) /*!< in: level number */
bool lockout)/*!< in: true if X-latch index is intended */
{ {
buf_block_t* block; buf_block_t* block;
page_t* page; page_t* page;
...@@ -4918,18 +4923,10 @@ btr_validate_level( ...@@ -4918,18 +4923,10 @@ btr_validate_level(
#ifdef UNIV_ZIP_DEBUG #ifdef UNIV_ZIP_DEBUG
page_zip_des_t* page_zip; page_zip_des_t* page_zip;
#endif /* UNIV_ZIP_DEBUG */ #endif /* UNIV_ZIP_DEBUG */
ulint savepoint = 0;
uint32_t parent_page_no = FIL_NULL;
uint32_t parent_right_page_no = FIL_NULL;
bool rightmost_child = false;
mtr.start(); mtr.start();
if (lockout) { mtr_x_lock_index(index, &mtr);
mtr_x_lock_index(index, &mtr);
} else {
mtr_sx_lock_index(index, &mtr);
}
dberr_t err; dberr_t err;
block = btr_root_block_get(index, RW_SX_LATCH, &mtr, &err); block = btr_root_block_get(index, RW_SX_LATCH, &mtr, &err);
...@@ -5025,11 +5022,7 @@ btr_validate_level( ...@@ -5025,11 +5022,7 @@ btr_validate_level(
mem_heap_empty(heap); mem_heap_empty(heap);
offsets = offsets2 = NULL; offsets = offsets2 = NULL;
if (lockout) { mtr_x_lock_index(index, &mtr);
mtr_x_lock_index(index, &mtr);
} else {
mtr_sx_lock_index(index, &mtr);
}
page = block->page.frame; page = block->page.frame;
...@@ -5073,7 +5066,6 @@ btr_validate_level( ...@@ -5073,7 +5066,6 @@ btr_validate_level(
if (right_page_no != FIL_NULL) { if (right_page_no != FIL_NULL) {
const rec_t* right_rec; const rec_t* right_rec;
savepoint = mtr.get_savepoint();
right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH, right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH,
!level, &mtr, &err); !level, &mtr, &err);
...@@ -5177,11 +5169,6 @@ btr_validate_level( ...@@ -5177,11 +5169,6 @@ btr_validate_level(
father_page = btr_cur_get_page(&node_cur); father_page = btr_cur_get_page(&node_cur);
node_ptr = btr_cur_get_rec(&node_cur); node_ptr = btr_cur_get_rec(&node_cur);
parent_page_no = page_get_page_no(father_page);
parent_right_page_no = btr_page_get_next(father_page);
rightmost_child = page_rec_is_supremum(
page_rec_get_next(node_ptr));
rec = page_rec_get_prev(page_get_supremum_rec(page)); rec = page_rec_get_prev(page_get_supremum_rec(page));
if (rec) { if (rec) {
btr_cur_position(index, rec, block, &node_cur); btr_cur_position(index, rec, block, &node_cur);
...@@ -5263,37 +5250,6 @@ btr_validate_level( ...@@ -5263,37 +5250,6 @@ btr_validate_level(
} }
} else if (const rec_t* right_node_ptr } else if (const rec_t* right_node_ptr
= page_rec_get_next(node_ptr)) { = page_rec_get_next(node_ptr)) {
if (!lockout && rightmost_child) {
/* To obey latch order of tree blocks,
we should release the right_block once to
obtain lock of the uncle block. */
ut_ad(right_block
== mtr.at_savepoint(savepoint));
mtr.rollback_to_savepoint(savepoint,
savepoint + 1);
if (parent_right_page_no != FIL_NULL) {
btr_block_get(*index,
parent_right_page_no,
RW_SX_LATCH, false,
&mtr);
}
right_block = btr_block_get(*index,
right_page_no,
RW_SX_LATCH,
!level, &mtr,
&err);
if (!right_block) {
btr_validate_report1(index, level,
block);
fputs("InnoDB: broken FIL_PAGE_NEXT"
" link\n", stderr);
goto invalid_page;
}
}
btr_cur_position( btr_cur_position(
index, index,
page_get_infimum_rec(right_block->page.frame), page_get_infimum_rec(right_block->page.frame),
...@@ -5365,20 +5321,6 @@ btr_validate_level( ...@@ -5365,20 +5321,6 @@ btr_validate_level(
mtr.start(); mtr.start();
if (!lockout) {
if (rightmost_child) {
if (parent_right_page_no != FIL_NULL) {
btr_block_get(*index,
parent_right_page_no,
RW_SX_LATCH, false,
&mtr);
}
} else if (parent_page_no != FIL_NULL) {
btr_block_get(*index, parent_page_no,
RW_SX_LATCH, false, &mtr);
}
}
block = btr_block_get(*index, right_page_no, RW_SX_LATCH, block = btr_block_get(*index, right_page_no, RW_SX_LATCH,
!level, &mtr, &err); !level, &mtr, &err);
goto loop; goto loop;
...@@ -5396,21 +5338,16 @@ btr_validate_index( ...@@ -5396,21 +5338,16 @@ btr_validate_index(
dict_index_t* index, /*!< in: index */ dict_index_t* index, /*!< in: index */
const trx_t* trx) /*!< in: transaction or NULL */ const trx_t* trx) /*!< in: transaction or NULL */
{ {
const bool lockout= index->is_spatial();
mtr_t mtr; mtr_t mtr;
mtr.start(); mtr.start();
if (lockout) mtr_x_lock_index(index, &mtr);
mtr_x_lock_index(index, &mtr);
else
mtr_sx_lock_index(index, &mtr);
dberr_t err; dberr_t err;
if (page_t *root= btr_root_get(index, &mtr, &err)) if (page_t *root= btr_root_get(index, &mtr, &err))
for (auto level= btr_page_get_level(root);; level--) for (auto level= btr_page_get_level(root);; level--)
{ {
if (dberr_t err_level= btr_validate_level(index, trx, level, lockout)) if (dberr_t err_level= btr_validate_level(index, trx, level))
err= err_level; err= err_level;
if (!level) if (!level)
break; break;
......
This diff is collapsed.
...@@ -6104,6 +6104,7 @@ static bool innobase_instant_try( ...@@ -6104,6 +6104,7 @@ static bool innobase_instant_try(
id, MTR_MEMO_PAGE_SX_FIX); id, MTR_MEMO_PAGE_SX_FIX);
if (UNIV_UNLIKELY(!root)) { if (UNIV_UNLIKELY(!root)) {
err = DB_CORRUPTION;
goto func_exit; goto func_exit;
} }
......
...@@ -445,7 +445,7 @@ Gets the root node of a tree and x- or s-latches it. ...@@ -445,7 +445,7 @@ Gets the root node of a tree and x- or s-latches it.
buf_block_t* buf_block_t*
btr_root_block_get( btr_root_block_get(
/*===============*/ /*===============*/
const dict_index_t* index, /*!< in: index tree */ dict_index_t* index, /*!< in: index tree */
rw_lock_type_t mode, /*!< in: either RW_S_LATCH rw_lock_type_t mode, /*!< in: either RW_S_LATCH
or RW_X_LATCH */ or RW_X_LATCH */
mtr_t* mtr, /*!< in: mtr */ mtr_t* mtr, /*!< in: mtr */
......
...@@ -103,6 +103,9 @@ enum btr_latch_mode { ...@@ -103,6 +103,9 @@ enum btr_latch_mode {
dict_index_t::lock is being held in non-exclusive mode. */ dict_index_t::lock is being held in non-exclusive mode. */
BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF
| BTR_ALREADY_S_LATCHED, | BTR_ALREADY_S_LATCHED,
/** Attempt to modify records in an x-latched tree. */
BTR_MODIFY_TREE_ALREADY_LATCHED = BTR_MODIFY_TREE
| BTR_ALREADY_S_LATCHED,
/** U-latch root and X-latch a leaf page, assuming that /** U-latch root and X-latch a leaf page, assuming that
dict_index_t::lock is being held in U mode. */ dict_index_t::lock is being held in U mode. */
BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF
......
...@@ -344,6 +344,9 @@ struct mtr_t { ...@@ -344,6 +344,9 @@ struct mtr_t {
/** Upgrade U locks on a block to X */ /** Upgrade U locks on a block to X */
void page_lock_upgrade(const buf_block_t &block); void page_lock_upgrade(const buf_block_t &block);
/** Upgrade index U lock to X */
ATTRIBUTE_COLD void index_lock_upgrade();
/** Check if we are holding tablespace latch /** Check if we are holding tablespace latch
@param space tablespace to search for @param space tablespace to search for
@return whether space.latch is being held */ @return whether space.latch is being held */
......
...@@ -3078,6 +3078,9 @@ row_log_apply_op_low( ...@@ -3078,6 +3078,9 @@ row_log_apply_op_low(
mtr_start(&mtr); mtr_start(&mtr);
index->set_modified(mtr); index->set_modified(mtr);
cursor.page_cur.index = index; cursor.page_cur.index = index;
if (has_index_lock) {
mtr_x_lock_index(index, &mtr);
}
/* We perform the pessimistic variant of the operations if we /* We perform the pessimistic variant of the operations if we
already hold index->lock exclusively. First, search the already hold index->lock exclusively. First, search the
...@@ -3085,7 +3088,8 @@ row_log_apply_op_low( ...@@ -3085,7 +3088,8 @@ row_log_apply_op_low(
depending on when the row in the clustered index was depending on when the row in the clustered index was
scanned. */ scanned. */
*error = cursor.search_leaf(entry, PAGE_CUR_LE, has_index_lock *error = cursor.search_leaf(entry, PAGE_CUR_LE, has_index_lock
? BTR_MODIFY_TREE : BTR_MODIFY_LEAF, &mtr); ? BTR_MODIFY_TREE_ALREADY_LATCHED
: BTR_MODIFY_LEAF, &mtr);
if (UNIV_UNLIKELY(*error != DB_SUCCESS)) { if (UNIV_UNLIKELY(*error != DB_SUCCESS)) {
goto func_exit; goto func_exit;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment