Commit 6d7a8269 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-20788: Bogus assertion failure for PAGE_FREE list

In MDEV-11369 (instant ADD COLUMN) in MariaDB Server 10.3,
we introduced the hidden metadata record that must be the
first record in the clustered index if and only if
index->is_instant() holds.

To catch MDEV-19783, in
commit ed0793e0 and
commit 99dc40d6
we added some assertions to find cases where
the metadata record is missing while it should not be, or a
record exists when it should not. Those assertions were invalid
when traversing the PAGE_FREE list. That list can contain anything;
we must only be able to determine the successor and the size of
each garbage record in it.

page_validate(), page_simple_validate_old(), page_simple_validate_new():
Do not invoke page_rec_get_next_const() for traversing the PAGE_FREE
list, but instead use a lower-level accessor that does not attempt to
validate the REC_INFO_MIN_REC_FLAG.

page_copy_rec_list_end_no_locks(),
page_copy_rec_list_start(), page_delete_rec_list_start():
Add assertions.

btr_page_get_split_rec_to_left(): Remove a redundant return value,
and make the output parameter the return value.

btr_page_get_split_rec_to_right(), btr_page_split_and_insert(): Clean up.
parent 726b1998
......@@ -2030,104 +2030,78 @@ btr_root_raise_and_insert(
}
}
/*************************************************************//**
Decides if the page should be split at the convergence point of inserts
/** Decide if the page should be split at the convergence point of inserts
converging to the left.
@return TRUE if split recommended */
ibool
btr_page_get_split_rec_to_left(
/*===========================*/
btr_cur_t* cursor, /*!< in: cursor at which to insert */
rec_t** split_rec) /*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple to be inserted should
be first */
@param[in] cursor insert position
@return the first record to be moved to the right half page
@retval NULL if no split is recommended */
rec_t* btr_page_get_split_rec_to_left(const btr_cur_t* cursor)
{
page_t* page;
rec_t* insert_point;
rec_t* infimum;
page = btr_cur_get_page(cursor);
insert_point = btr_cur_get_rec(cursor);
rec_t* split_rec = btr_cur_get_rec(cursor);
const page_t* page = page_align(split_rec);
if (page_header_get_ptr(page, PAGE_LAST_INSERT)
== page_rec_get_next(insert_point)) {
infimum = page_get_infimum_rec(page);
/* If the convergence is in the middle of a page, include also
the record immediately before the new insert to the upper
page. Otherwise, we could repeatedly move from page to page
lots of records smaller than the convergence point. */
!= page_rec_get_next(split_rec)) {
return NULL;
}
if (infimum != insert_point
&& page_rec_get_next(infimum) != insert_point) {
const rec_t* infimum = page_get_infimum_rec(page);
*split_rec = insert_point;
} else {
*split_rec = page_rec_get_next(insert_point);
}
/* If the convergence is in the middle of a page, include also
the record immediately before the new insert to the upper
page. Otherwise, we could repeatedly move from page to page
lots of records smaller than the convergence point. */
return(TRUE);
if (split_rec == infimum
|| split_rec == page_rec_get_next_const(infimum)) {
split_rec = page_rec_get_next(split_rec);
}
return(FALSE);
return split_rec;
}
/*************************************************************//**
Decides if the page should be split at the convergence point of inserts
/** Decide if the page should be split at the convergence point of inserts
converging to the right.
@return TRUE if split recommended */
ibool
btr_page_get_split_rec_to_right(
/*============================*/
btr_cur_t* cursor, /*!< in: cursor at which to insert */
rec_t** split_rec) /*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple to be inserted should
be first */
@param[in] cursor insert position
@param[out] split_rec if split recommended, the first record
on the right half page, or
NULL if the to-be-inserted record
should be first
@return whether split is recommended */
bool
btr_page_get_split_rec_to_right(const btr_cur_t* cursor, rec_t** split_rec)
{
page_t* page;
rec_t* insert_point;
page = btr_cur_get_page(cursor);
insert_point = btr_cur_get_rec(cursor);
rec_t* insert_point = btr_cur_get_rec(cursor);
const page_t* page = page_align(insert_point);
/* We use eager heuristics: if the new insert would be right after
the previous insert on the same page, we assume that there is a
pattern of sequential inserts here. */
if (page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point) {
rec_t* next_rec;
next_rec = page_rec_get_next(insert_point);
if (page_rec_is_supremum(next_rec)) {
split_at_new:
/* Split at the new record to insert */
*split_rec = NULL;
} else {
rec_t* next_next_rec = page_rec_get_next(next_rec);
if (page_rec_is_supremum(next_next_rec)) {
goto split_at_new;
}
if (page_header_get_ptr(page, PAGE_LAST_INSERT) != insert_point) {
return false;
}
/* If there are >= 2 user records up from the insert
point, split all but 1 off. We want to keep one because
then sequential inserts can use the adaptive hash
index, as they can do the necessary checks of the right
search position just by looking at the records on this
page. */
insert_point = page_rec_get_next(insert_point);
*split_rec = next_next_rec;
if (page_rec_is_supremum(insert_point)) {
insert_point = NULL;
} else {
insert_point = page_rec_get_next(insert_point);
if (page_rec_is_supremum(insert_point)) {
insert_point = NULL;
}
return(TRUE);
/* If there are >= 2 user records up from the insert
point, split all but 1 off. We want to keep one because
then sequential inserts can use the adaptive hash
index, as they can do the necessary checks of the right
search position just by looking at the records on this
page. */
}
return(FALSE);
*split_rec = insert_point;
return true;
}
/*************************************************************//**
......@@ -2782,30 +2756,20 @@ btr_page_split_and_insert(
buf_block_t* block;
page_t* page;
page_zip_des_t* page_zip;
ulint page_no;
byte direction;
ulint hint_page_no;
buf_block_t* new_block;
page_t* new_page;
page_zip_des_t* new_page_zip;
rec_t* split_rec;
buf_block_t* left_block;
buf_block_t* right_block;
buf_block_t* insert_block;
page_cur_t* page_cursor;
rec_t* first_rec;
byte* buf = 0; /* remove warning */
rec_t* move_limit;
ibool insert_will_fit;
ibool insert_left;
ulint n_iterations = 0;
rec_t* rec;
ulint n_uniq;
dict_index_t* index;
index = btr_cur_get_index(cursor);
if (dict_index_is_spatial(index)) {
if (dict_index_is_spatial(cursor->index)) {
/* Split rtree page and update parent */
return(rtr_page_split_and_insert(flags, cursor, offsets, heap,
tuple, n_ext, mtr));
......@@ -2837,23 +2801,19 @@ btr_page_split_and_insert(
ut_ad(!page_is_empty(page));
/* try to insert to the next page if possible before split */
rec = btr_insert_into_right_sibling(
flags, cursor, offsets, *heap, tuple, n_ext, mtr);
if (rec != NULL) {
if (rec_t* rec = btr_insert_into_right_sibling(
flags, cursor, offsets, *heap, tuple, n_ext, mtr)) {
return(rec);
}
page_no = block->page.id.page_no();
/* 1. Decide the split record; split_rec == NULL means that the
tuple to be inserted should be the first record on the upper
half-page */
insert_left = FALSE;
bool insert_left = false;
ulint hint_page_no = block->page.id.page_no() + 1;
byte direction = FSP_UP;
if (tuple != NULL && n_iterations > 0) {
direction = FSP_UP;
hint_page_no = page_no + 1;
if (tuple && n_iterations > 0) {
split_rec = btr_page_get_split_rec(cursor, tuple, n_ext);
if (split_rec == NULL) {
......@@ -2861,17 +2821,10 @@ btr_page_split_and_insert(
cursor, tuple, offsets, n_uniq, heap);
}
} else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
direction = FSP_UP;
hint_page_no = page_no + 1;
} else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
} else if ((split_rec = btr_page_get_split_rec_to_left(cursor))) {
direction = FSP_DOWN;
hint_page_no = page_no - 1;
ut_ad(split_rec);
hint_page_no -= 2;
} else {
direction = FSP_UP;
hint_page_no = page_no + 1;
/* If there is only one record in the index page, we
can't split the node in the middle by default. We need
to determine whether the new record will be inserted
......@@ -2896,7 +2849,7 @@ btr_page_split_and_insert(
new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
btr_page_get_level(page, mtr), mtr, mtr);
if (new_block == NULL && os_has_said_disk_full) {
if (!new_block) {
return(NULL);
}
......@@ -2921,12 +2874,8 @@ btr_page_split_and_insert(
*offsets = rec_get_offsets(split_rec, cursor->index, *offsets,
page_is_leaf(page), n_uniq, heap);
if (tuple != NULL) {
insert_left = cmp_dtuple_rec(
tuple, split_rec, *offsets) < 0;
} else {
insert_left = 1;
}
insert_left = !tuple
|| cmp_dtuple_rec(tuple, split_rec, *offsets) < 0;
if (!insert_left && new_page_zip && n_iterations > 0) {
/* If a compressed page has already been split,
......@@ -2961,10 +2910,10 @@ btr_page_split_and_insert(
on the appropriate half-page, we may release the tree x-latch.
We can then move the records after releasing the tree latch,
thus reducing the tree latch contention. */
bool insert_will_fit;
if (tuple == NULL) {
insert_will_fit = 1;
}
else if (split_rec) {
insert_will_fit = true;
} else if (split_rec) {
insert_will_fit = !new_page_zip
&& btr_page_insert_fits(cursor, split_rec,
offsets, tuple, n_ext, heap);
......@@ -3069,8 +3018,6 @@ btr_page_split_and_insert(
new_block, block, move_limit);
}
ut_ad(!dict_index_is_spatial(index));
btr_search_move_or_delete_hash_entries(
new_block, block, cursor->index);
......@@ -3102,18 +3049,15 @@ btr_page_split_and_insert(
/* 6. The split and the tree modification is now completed. Decide the
page where the tuple should be inserted */
rec_t* rec;
buf_block_t* const insert_block = insert_left
? left_block : right_block;
if (tuple == NULL) {
if (UNIV_UNLIKELY(!tuple)) {
rec = NULL;
goto func_exit;
}
if (insert_left) {
insert_block = left_block;
} else {
insert_block = right_block;
}
/* 7. Reposition the cursor for insert and try insertion */
page_cursor = btr_cur_get_page_cur(cursor);
......@@ -3190,9 +3134,7 @@ btr_page_split_and_insert(
ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
if (tuple == NULL) {
ut_ad(rec == NULL);
}
ut_ad(tuple || !rec);
ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
return(rec);
}
......
......@@ -3138,7 +3138,7 @@ btr_cur_optimistic_insert(
&& page_get_n_recs(page) >= 2
&& dict_index_get_space_reserve() + rec_size > max_size
&& (btr_page_get_split_rec_to_right(cursor, &dummy)
|| btr_page_get_split_rec_to_left(cursor, &dummy))) {
|| btr_page_get_split_rec_to_left(cursor))) {
goto fail;
}
......
......@@ -445,30 +445,22 @@ btr_page_reorganize(
dict_index_t* index, /*!< in: the index tree of the page */
mtr_t* mtr) /*!< in/out: mini-transaction */
MY_ATTRIBUTE((nonnull));
/*************************************************************//**
Decides if the page should be split at the convergence point of
inserts converging to left.
@return TRUE if split recommended */
ibool
btr_page_get_split_rec_to_left(
/*===========================*/
btr_cur_t* cursor, /*!< in: cursor at which to insert */
rec_t** split_rec)/*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
MY_ATTRIBUTE((warn_unused_result));
/*************************************************************//**
Decides if the page should be split at the convergence point of
inserts converging to right.
@return TRUE if split recommended */
ibool
btr_page_get_split_rec_to_right(
/*============================*/
btr_cur_t* cursor, /*!< in: cursor at which to insert */
rec_t** split_rec)/*!< out: if split recommended,
the first record on upper half page,
or NULL if tuple should be first */
MY_ATTRIBUTE((warn_unused_result));
/** Decide if the page should be split at the convergence point of inserts
converging to the left.
@param[in] cursor insert position
@return the first record to be moved to the right half page
@retval NULL if no split is recommended */
rec_t* btr_page_get_split_rec_to_left(const btr_cur_t* cursor);
/** Decide if the page should be split at the convergence point of inserts
converging to the right.
@param[in] cursor insert position
@param[out] split_rec if split recommended, the first record
on the right half page, or
NULL if the to-be-inserted record
should be first
@return whether split is recommended */
bool
btr_page_get_split_rec_to_right(const btr_cur_t* cursor, rec_t** split_rec);
/*************************************************************//**
Splits an index page to halves and inserts the tuple. It is assumed
......
......@@ -604,20 +604,20 @@ page_copy_rec_list_end_no_locks(
/* Copy records from the original page to the new page */
while (!page_cur_is_after_last(&cur1)) {
rec_t* cur1_rec = page_cur_get_rec(&cur1);
rec_t* ins_rec;
offsets = rec_get_offsets(cur1_rec, index, offsets, is_leaf,
offsets = rec_get_offsets(cur1.rec, index, offsets, is_leaf,
ULINT_UNDEFINED, &heap);
ins_rec = page_cur_insert_rec_low(cur2, index,
cur1_rec, offsets, mtr);
cur1.rec, offsets, mtr);
if (UNIV_UNLIKELY(!ins_rec)) {
ib::fatal() << "Rec offset " << page_offset(rec)
<< ", cur1 offset "
<< page_offset(page_cur_get_rec(&cur1))
<< ", cur1 offset " << page_offset(cur1.rec)
<< ", cur2 offset " << page_offset(cur2);
}
page_cur_move_to_next(&cur1);
ut_ad(!(rec_get_info_bits(cur1.rec, page_is_comp(new_page))
& REC_INFO_MIN_REC_FLAG));
cur2 = ins_rec;
}
......@@ -803,6 +803,8 @@ page_copy_rec_list_start(
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr) /*!< in: mtr */
{
ut_ad(page_align(rec) == block->frame);
page_t* new_page = buf_block_get_frame(new_block);
page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
page_cur_t cur1;
......@@ -820,7 +822,6 @@ page_copy_rec_list_start(
predefined infimum record. */
if (page_rec_is_infimum(rec)) {
return(ret);
}
......@@ -854,17 +855,18 @@ page_copy_rec_list_start(
rec_move, max_to_move,
&num_moved, mtr);
} else {
while (page_cur_get_rec(&cur1) != rec) {
rec_t* cur1_rec = page_cur_get_rec(&cur1);
offsets = rec_get_offsets(cur1_rec, index, offsets,
offsets = rec_get_offsets(cur1.rec, index, offsets,
is_leaf,
ULINT_UNDEFINED, &heap);
cur2 = page_cur_insert_rec_low(cur2, index,
cur1_rec, offsets, mtr);
cur1.rec, offsets, mtr);
ut_a(cur2);
page_cur_move_to_next(&cur1);
ut_ad(!(rec_get_info_bits(cur1.rec,
page_is_comp(new_page))
& REC_INFO_MIN_REC_FLAG));
}
}
......@@ -1254,6 +1256,7 @@ page_delete_rec_list_start(
rec_offs_init(offsets_);
ut_ad(page_align(rec) == block->frame);
ut_ad((ibool) !!page_rec_is_comp(rec)
== dict_table_is_comp(index->table));
#ifdef UNIV_ZIP_DEBUG
......@@ -2163,7 +2166,17 @@ page_simple_validate_old(
goto func_exit;
}
rec = page_rec_get_next_const(rec);
ulint offs = rec_get_next_offs(rec, FALSE);
if (!offs) {
break;
}
if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM
|| offs >= srv_page_size)) {
ib::error() << "Page free list is corrupted " << count;
goto func_exit;
}
rec = page + offs;
}
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
......@@ -2355,7 +2368,17 @@ page_simple_validate_new(
goto func_exit;
}
rec = page_rec_get_next_const(rec);
const ulint offs = rec_get_next_offs(rec, TRUE);
if (!offs) {
break;
}
if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM
|| offs >= srv_page_size)) {
ib::error() << "Page free list is corrupted " << count;
goto func_exit;
}
rec = page + offs;
}
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
......@@ -2668,16 +2691,32 @@ page_validate(
page_is_leaf(page),
ULINT_UNDEFINED, &heap);
if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
ret = FALSE;
next_free:
const ulint offs = rec_get_next_offs(
rec, page_is_comp(page));
if (!offs) {
break;
}
if (UNIV_UNLIKELY(offs < PAGE_OLD_INFIMUM
|| offs >= srv_page_size)) {
ib::error() << "Page free list is corrupted";
ret = FALSE;
break;
}
goto func_exit;
rec = page + offs;
continue;
}
count++;
offs = page_offset(rec_get_start(rec, offsets));
i = rec_offs_size(offsets);
if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
ib::error() << "Record offset out of bounds";
goto func_exit;
if (UNIV_UNLIKELY(offs + i >= srv_page_size)) {
ib::error() << "Free record offset out of bounds: "
<< offs << '+' << i;
ret = FALSE;
goto next_free;
}
while (i--) {
......@@ -2691,7 +2730,7 @@ page_validate(
buf[offs + i] = 1;
}
rec = page_rec_get_next_const(rec);
goto next_free;
}
if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment