Commit 263932d5 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-33325 Crash in flst_read_addr on corrupted data

flst_read_addr(): Remove assertions. Instead, we will check these
conditions in the callers and avoid a crash in case of corruption.
We will check the conditions more carefully, because the callers
know more exact bounds for the page numbers and the byte offsets
withing pages.

flst_remove(), flst_add_first(), flst_add_last(): Add a parameter
for passing fil_space_t::free_limit. None of the lists may point to
pages that are beyond the current initialized length of the
tablespace.

trx_rseg_mem_restore(): Access the first page of the tablespace,
so that we will correctly recover rseg->space->free_limit
in case some log based recovery is pending.

ibuf_remove_free_page(): Only look up the root page once, and
validate the last page number.

Reviewed by: Debarun Banerjee
parent da47c037
......@@ -562,7 +562,8 @@ btr_page_alloc_for_ibuf(
{
buf_page_make_young_if_needed(&new_block->page);
*err= flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, new_block,
PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
fil_system.sys_space->free_limit, mtr);
}
ut_d(if (*err == DB_SUCCESS)
flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
......@@ -666,7 +667,8 @@ btr_page_free_for_ibuf(
buf_block_t *root= btr_get_latched_root(*index, mtr);
dberr_t err=
flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
fil_system.sys_space->free_limit, mtr);
ut_d(if (err == DB_SUCCESS)
flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
return err;
......
This diff is collapsed.
This diff is collapsed.
......@@ -1831,7 +1831,7 @@ static bool ibuf_add_free_page()
err = flst_add_last(ibuf_root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
&mtr);
fil_system.sys_space->free_limit, &mtr);
if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
goto corrupted;
}
......@@ -1862,7 +1862,6 @@ Removes a page from the free list and frees it to the fsp system. */
static void ibuf_remove_free_page()
{
mtr_t mtr;
mtr_t mtr2;
page_t* header_page;
log_free_check();
......@@ -1889,26 +1888,28 @@ static void ibuf_remove_free_page()
return;
}
ibuf_mtr_start(&mtr2);
buf_block_t* root = ibuf_tree_root_get(&mtr2);
const auto root_savepoint = mtr.get_savepoint();
buf_block_t* root = ibuf_tree_root_get(&mtr);
if (UNIV_UNLIKELY(!root)) {
ibuf_mtr_commit(&mtr2);
goto early_exit;
}
mysql_mutex_unlock(&ibuf_mutex);
const uint32_t page_no = flst_get_last(PAGE_HEADER
+ PAGE_BTR_IBUF_FREE_LIST
+ root->page.frame).page;
if (page_no >= fil_system.sys_space->free_limit) {
goto early_exit;
}
mysql_mutex_unlock(&ibuf_mutex);
/* NOTE that we must release the latch on the ibuf tree root
because in fseg_free_page we access level 1 pages, and the root
is a level 2 page. */
ibuf_mtr_commit(&mtr2);
root->page.lock.u_unlock();
mtr.lock_register(root_savepoint, MTR_MEMO_BUF_FIX);
ibuf_exit(&mtr);
/* Since pessimistic inserts were prevented, we know that the
......@@ -1931,15 +1932,7 @@ static void ibuf_remove_free_page()
ibuf_enter(&mtr);
mysql_mutex_lock(&ibuf_mutex);
root = ibuf_tree_root_get(&mtr, &err);
if (UNIV_UNLIKELY(!root)) {
mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
goto func_exit;
}
ut_ad(page_no == flst_get_last(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST
+ root->page.frame).page);
mtr.upgrade_buffer_fix(root_savepoint, RW_X_LATCH);
/* Remove the page from the free list and update the ibuf size data */
if (buf_block_t* block =
......@@ -1948,7 +1941,7 @@ static void ibuf_remove_free_page()
err = flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
block,
PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
&mtr);
fil_system.sys_space->free_limit, &mtr);
}
mysql_mutex_unlock(&ibuf_pessimistic_insert_mutex);
......
......@@ -78,34 +78,40 @@ void flst_init(const buf_block_t &block, byte *base, mtr_t *mtr)
MY_ATTRIBUTE((nonnull));
/** Append a file list node to a list.
@param[in,out] base base node block
@param[in] boffset byte offset of the base node
@param[in,out] add block to be added
@param[in] aoffset byte offset of the node to be added
@param[in,out] mtr mini-transaction
@param base base node block
@param boffset byte offset of the base node
@param add block to be added
@param aoffset byte offset of the node to be added
@param limit fil_space_t::free_limit
@param mtr mini-transaction
@return error code */
dberr_t flst_add_last(buf_block_t *base, uint16_t boffset,
buf_block_t *add, uint16_t aoffset, mtr_t *mtr)
buf_block_t *add, uint16_t aoffset,
uint32_t limit, mtr_t *mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Prepend a file list node to a list.
@param[in,out] base base node block
@param[in] boffset byte offset of the base node
@param[in,out] add block to be added
@param[in] aoffset byte offset of the node to be added
@param[in,out] mtr mini-transaction
@param base base node block
@param boffset byte offset of the base node
@param add block to be added
@param aoffset byte offset of the node to be added
@param limit fil_space_t::free_limit
@param mtr mini-transaction
@return error code */
dberr_t flst_add_first(buf_block_t *base, uint16_t boffset,
buf_block_t *add, uint16_t aoffset, mtr_t *mtr)
buf_block_t *add, uint16_t aoffset,
uint32_t limit, mtr_t *mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Remove a file list node.
@param[in,out] base base node block
@param[in] boffset byte offset of the base node
@param[in,out] cur block to be removed
@param[in] coffset byte offset of the current record to be removed
@param[in,out] mtr mini-transaction
@param base base node block
@param boffset byte offset of the base node
@param cur block to be removed
@param coffset byte offset of the current record to be removed
@param limit fil_space_t::free_limit
@param mtr mini-transaction
@return error code */
dberr_t flst_remove(buf_block_t *base, uint16_t boffset,
buf_block_t *cur, uint16_t coffset, mtr_t *mtr)
buf_block_t *cur, uint16_t coffset,
uint32_t limit, mtr_t *mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** @return the length of a list */
......@@ -117,11 +123,9 @@ inline uint32_t flst_get_len(const flst_base_node_t *base)
/** @return a file address */
inline fil_addr_t flst_read_addr(const byte *faddr)
{
fil_addr_t addr= { mach_read_from_4(faddr + FIL_ADDR_PAGE),
mach_read_from_2(faddr + FIL_ADDR_BYTE) };
ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
ut_a(ut_align_offset(faddr, srv_page_size) >= FIL_PAGE_DATA);
return addr;
ut_ad(ut_align_offset(faddr, srv_page_size) >= FIL_PAGE_DATA);
return fil_addr_t{mach_read_from_4(faddr + FIL_ADDR_PAGE),
mach_read_from_2(faddr + FIL_ADDR_BYTE)};
}
/** @return list first node address */
......
......@@ -266,7 +266,8 @@ trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr)
that is known to be corrupted. */
ut_a(flst_add_first(rseg_header, TRX_RSEG + TRX_RSEG_HISTORY, undo_page,
uint16_t(page_offset(undo_header) +
TRX_UNDO_HISTORY_NODE), mtr) == DB_SUCCESS);
TRX_UNDO_HISTORY_NODE), rseg->space->free_limit,
mtr) == DB_SUCCESS);
mtr->write<2>(*undo_page, TRX_UNDO_SEG_HDR + TRX_UNDO_STATE +
undo_page->page.frame, undo_state);
......@@ -356,6 +357,19 @@ inline dberr_t purge_sys_t::iterator::free_history_rseg(trx_rseg_t &rseg) const
mtr_t mtr;
bool freed= false;
uint32_t rseg_ref= 0;
const auto last_boffset= srv_page_size - TRX_UNDO_LOG_OLD_HDR_SIZE;
/* Technically, rseg.space->free_limit is not protected by
rseg.latch, which we are holding, but rseg.space->latch. The value
that we are reading may become stale (too small) if other pages are
being allocated in this tablespace, for other rollback
segments. Nothing can be added to this rseg without holding
rseg.latch, and hence we can validate the entire file-based list
against the limit that we are reading here.
Note: The read here may look like a data race. On none of our target
architectures this should be an actual problem, because the uint32_t
value should always fit in a register and be correctly aligned. */
const auto last_page= rseg.space->free_limit;
mtr.start();
......@@ -371,13 +385,23 @@ inline dberr_t purge_sys_t::iterator::free_history_rseg(trx_rseg_t &rseg) const
}
hdr_addr= flst_get_last(TRX_RSEG + TRX_RSEG_HISTORY + rseg_hdr->page.frame);
hdr_addr.boffset= static_cast<uint16_t>(hdr_addr.boffset -
TRX_UNDO_HISTORY_NODE);
loop:
if (hdr_addr.page == FIL_NULL)
goto func_exit;
if (hdr_addr.page >= last_page ||
hdr_addr.boffset < TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE ||
hdr_addr.boffset >= last_boffset)
{
corrupted:
err= DB_CORRUPTION;
goto func_exit;
}
hdr_addr.boffset= static_cast<uint16_t>(hdr_addr.boffset -
TRX_UNDO_HISTORY_NODE);
loop:
buf_block_t *b=
buf_page_get_gen(page_id_t(rseg.space->id, hdr_addr.page),
0, RW_X_LATCH, nullptr, BUF_GET_POSSIBLY_FREED,
......@@ -426,11 +450,18 @@ inline dberr_t purge_sys_t::iterator::free_history_rseg(trx_rseg_t &rseg) const
fil_addr_t prev_hdr_addr=
flst_get_prev_addr(b->page.frame + hdr_addr.boffset +
TRX_UNDO_HISTORY_NODE);
if (prev_hdr_addr.page == FIL_NULL);
else if (prev_hdr_addr.page >= last_page ||
prev_hdr_addr.boffset < TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE ||
prev_hdr_addr.boffset >= last_boffset)
goto corrupted;
prev_hdr_addr.boffset= static_cast<uint16_t>(prev_hdr_addr.boffset -
TRX_UNDO_HISTORY_NODE);
err= flst_remove(rseg_hdr, TRX_RSEG + TRX_RSEG_HISTORY, b,
uint16_t(hdr_addr.boffset + TRX_UNDO_HISTORY_NODE), &mtr);
uint16_t(hdr_addr.boffset + TRX_UNDO_HISTORY_NODE),
last_page, &mtr);
if (UNIV_UNLIKELY(err != DB_SUCCESS))
goto func_exit;
......@@ -490,6 +521,9 @@ inline dberr_t purge_sys_t::iterator::free_history_rseg(trx_rseg_t &rseg) const
ut_ad(rseg_hdr->page.id() == rseg.page_id());
mtr.memo_push(rseg_hdr, MTR_MEMO_PAGE_X_FIX);
if (hdr_addr.page == FIL_NULL)
goto func_exit;
goto loop;
}
......@@ -780,13 +814,17 @@ bool purge_sys_t::rseg_get_next_history_log()
{
const byte *log_hdr= undo_page->page.frame + rseg->last_offset();
prev_log_addr= flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE);
if (prev_log_addr.boffset < TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE ||
prev_log_addr.boffset >= srv_page_size - TRX_UNDO_LOG_OLD_HDR_SIZE)
goto corrupted;
prev_log_addr.boffset = static_cast<uint16_t>(prev_log_addr.boffset -
TRX_UNDO_HISTORY_NODE);
}
else
prev_log_addr.page= FIL_NULL;
goto corrupted;
if (prev_log_addr.page == FIL_NULL)
if (prev_log_addr.page >= rseg->space->free_limit)
corrupted:
rseg->last_page_no= FIL_NULL;
else
{
......
......@@ -448,7 +448,14 @@ static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, mtr_t *mtr)
{
if (!rseg->space)
return DB_TABLESPACE_NOT_FOUND;
/* Access the tablespace header page to recover rseg->space->free_limit */
page_id_t page_id{rseg->space->id, 0};
dberr_t err;
if (!buf_page_get_gen(page_id, 0, RW_S_LATCH, nullptr, BUF_GET, mtr, &err))
return err;
mtr->release_last_page();
page_id.set_page_no(rseg->page_no);
const buf_block_t *rseg_hdr=
buf_page_get_gen(rseg->page_id(), 0, RW_S_LATCH, nullptr, BUF_GET, mtr,
&err);
......@@ -518,6 +525,11 @@ static dberr_t trx_rseg_mem_restore(trx_rseg_t *rseg, mtr_t *mtr)
fil_addr_t node_addr= flst_get_last(TRX_RSEG + TRX_RSEG_HISTORY +
rseg_hdr->page.frame);
if (node_addr.page >= rseg->space->free_limit ||
node_addr.boffset < TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE ||
node_addr.boffset >= srv_page_size - TRX_UNDO_LOG_OLD_HDR_SIZE)
return DB_CORRUPTION;
node_addr.boffset= static_cast<uint16_t>(node_addr.boffset -
TRX_UNDO_HISTORY_NODE);
rseg->last_page_no= node_addr.page;
......
......@@ -513,7 +513,7 @@ trx_undo_seg_create(fil_space_t *space, buf_block_t *rseg_hdr, ulint *id,
*err = flst_add_last(block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE,
mtr);
space->free_limit, mtr);
*id = slot_no;
mtr->write<4>(*rseg_hdr, TRX_RSEG + TRX_RSEG_UNDO_SLOTS
......@@ -696,7 +696,8 @@ buf_block_t *trx_undo_add_page(trx_undo_t *undo, mtr_t *mtr, dberr_t *err)
mtr->undo_create(*new_block);
trx_undo_page_init(*new_block);
*err= flst_add_last(header_block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
new_block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
new_block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE,
rseg->space->free_limit, mtr);
if (UNIV_UNLIKELY(*err != DB_SUCCESS))
new_block= nullptr;
else
......@@ -747,9 +748,11 @@ trx_undo_free_page(
buf_page_make_young_if_needed(&header_block->page);
const uint32_t limit = rseg->space->free_limit;
*err = flst_remove(header_block, TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
undo_block, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE,
mtr);
limit, mtr);
if (UNIV_UNLIKELY(*err != DB_SUCCESS)) {
return FIL_NULL;
......@@ -758,7 +761,13 @@ trx_undo_free_page(
const fil_addr_t last_addr = flst_get_last(
TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST
+ header_block->page.frame);
if (UNIV_UNLIKELY(last_addr.page == page_no)) {
if (UNIV_UNLIKELY(last_addr.page == page_no)
|| UNIV_UNLIKELY(last_addr.page != FIL_NULL
&& last_addr.page >= limit)
|| UNIV_UNLIKELY(last_addr.boffset < TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_NODE)
|| UNIV_UNLIKELY(last_addr.boffset >= srv_page_size
- TRX_UNDO_LOG_OLD_HDR_SIZE)) {
*err = DB_CORRUPTION;
return FIL_NULL;
}
......@@ -975,8 +984,8 @@ trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no)
ut_ad(id < TRX_RSEG_N_SLOTS);
mtr.start();
const buf_block_t* block = buf_page_get(
page_id_t(rseg->space->id, page_no), 0, RW_X_LATCH, &mtr);
const page_id_t page_id{rseg->space->id, page_no};
const buf_block_t* block = buf_page_get(page_id, 0, RW_X_LATCH, &mtr);
if (UNIV_UNLIKELY(!block)) {
corrupted:
mtr.commit();
......@@ -1078,6 +1087,15 @@ trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no)
fil_addr_t last_addr = flst_get_last(
TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + block->page.frame);
if (last_addr.page >= rseg->space->free_limit
|| last_addr.boffset < TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE
|| last_addr.boffset >= srv_page_size
- TRX_UNDO_LOG_OLD_HDR_SIZE) {
corrupted_undo:
ut_free(undo);
goto corrupted;
}
undo->last_page_no = last_addr.page;
undo->top_page_no = last_addr.page;
......@@ -1086,8 +1104,7 @@ trx_undo_mem_create_at_db_start(trx_rseg_t *rseg, ulint id, uint32_t page_no)
RW_X_LATCH, &mtr);
if (UNIV_UNLIKELY(!last)) {
ut_free(undo);
goto corrupted;
goto corrupted_undo;
}
if (const trx_undo_rec_t* rec = trx_undo_page_get_last_rec(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment