Commit 0c23e32d authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-24445 Using innodb_undo_tablespaces corrupts system tablespace

In the rewrite of MDEV-8139 (based on MDEV-15528), we introduced a
wrong assumption that any persistent tablespace that is not an .ibd
file is the system tablespace. This assumption is broken when
innodb_undo_tablespaces (files undo001, undo002, ...) are being used.
By default, we have innodb_undo_tablespaces=0 (the persistent undo
log is being stored in the system tablespace).

In MDEV-15528 and MDEV-8139 we rewrote the page scrubbing logic
so that it will follow the tried-and-true write-ahead logging
protocol, first writing FREE_PAGE records and then in the page
flushing, zerofilling or hole-punching freed pages.

Unfortunately, the implementation included a wrong assumption that
that anything that is not in an .ibd file must be the system tablespace.
This wrong assumption would cause overwrites of valid data pages in
the system tablespace.

mtr_t::m_freed_in_system_tablespace: Remove.

mtr_t::m_freed_space: The tablespace associated with m_freed_pages.

buf_page_free(): Take the tablespace and page number as a parameter,
instead of taking a page identifier.
parent cd093d79
......@@ -725,9 +725,11 @@ void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
fseg_header_t* seg_header = &root[blob || page_is_leaf(block->frame)
? PAGE_HEADER + PAGE_BTR_SEG_LEAF
: PAGE_HEADER + PAGE_BTR_SEG_TOP];
fseg_free_page(seg_header,
index->table->space, id.page_no(), mtr);
buf_page_free(id, mtr, __FILE__, __LINE__);
fil_space_t* space= index->table->space;
const uint32_t page= id.page_no();
fseg_free_page(seg_header, space, page, mtr);
buf_page_free(space, page, mtr, __FILE__, __LINE__);
/* The page was marked free in the allocation bitmap, but it
should remain exclusively latched until mtr_t::commit() or until it
......
......@@ -2489,52 +2489,49 @@ inline buf_page_t *buf_pool_t::watch_set(const page_id_t id,
}
/** Mark the page status as FREED for the given tablespace id and
page number. If the page is not in the buffer pool then ignore it.
X-lock should be taken on the page before marking the page status
as FREED. It avoids the concurrent flushing of freed page.
Currently, this function only marks the page as FREED if it is
in buffer pool.
@param[in] page_id page id
page number. If the page is not in buffer pool then ignore it.
@param[in,out] space tablespace
@param[in] page page number
@param[in,out] mtr mini-transaction
@param[in] file file name
@param[in] line line where called */
void buf_page_free(const page_id_t page_id,
mtr_t *mtr,
const char *file,
unsigned line)
void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr,
const char *file, unsigned line)
{
ut_ad(mtr);
ut_ad(mtr->is_active());
buf_pool.stat.n_page_gets++;
if (srv_immediate_scrub_data_uncompressed
#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32
|| space->is_compressed()
#endif
)
mtr->add_freed_offset(space, page);
buf_pool.stat.n_page_gets++;
const page_id_t page_id(space->id, page);
const ulint fold= page_id.fold();
page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold);
buf_block_t *block= reinterpret_cast<buf_block_t*>
(buf_pool.page_hash_get_low(page_id, fold));
/* TODO: try to all this part of mtr_t::free() */
if (srv_immediate_scrub_data_uncompressed || mtr->is_page_compressed())
mtr->add_freed_offset(page_id);
if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE)
if (buf_block_t *block= reinterpret_cast<buf_block_t*>
(buf_pool.page_hash_get_low(page_id, fold)))
{
/* FIXME: if block!=NULL, convert to BUF_BLOCK_FILE_PAGE,
but avoid buf_zip_decompress() */
hash_lock->read_unlock();
return;
}
block->fix();
if (block->page.state() != BUF_BLOCK_FILE_PAGE)
/* FIXME: convert, but avoid buf_zip_decompress() */;
else
{
buf_block_buf_fix_inc(block, file, line);
ut_ad(block->page.buf_fix_count());
ut_ad(fsp_is_system_temporary(page_id.space()) ||
rw_lock_s_lock_nowait(block->debug_latch, file, line));
hash_lock->read_unlock();
mtr_memo_type_t fix_type= MTR_MEMO_PAGE_X_FIX;
mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
rw_lock_x_lock_inline(&block->lock, 0, file, line);
mtr_memo_push(mtr, block, fix_type);
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
block->page.status= buf_page_t::FREED;
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
return;
}
}
hash_lock->read_unlock();
}
......
......@@ -2637,9 +2637,8 @@ fseg_free_extent(
for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) {
if (!xdes_is_free(descr, i)) {
buf_page_free(
page_id_t(space->id, first_page_in_extent + 1),
mtr, __FILE__, __LINE__);
buf_page_free(space, first_page_in_extent + 1, mtr,
__FILE__, __LINE__);
}
}
}
......
......@@ -1986,7 +1986,7 @@ ibuf_remove_free_page(void)
ibuf_bitmap_page_set_bits<IBUF_BITMAP_IBUF>(
bitmap_page, page_id, srv_page_size, false, &mtr);
buf_page_free(page_id, &mtr, __FILE__, __LINE__);
buf_page_free(fil_system.sys_space, page_no, &mtr, __FILE__, __LINE__);
ibuf_mtr_commit(&mtr);
}
......
......@@ -361,14 +361,13 @@ buf_page_release_latch(
void buf_page_make_young(buf_page_t *bpage);
/** Mark the page status as FREED for the given tablespace id and
page number. If the page is not in buffer pool then ignore it.
@param[in] page_id page_id
@param[in,out] space tablespace
@param[in] page page number
@param[in,out] mtr mini-transaction
@param[in] file file name
@param[in] line line where called */
void buf_page_free(const page_id_t page_id,
mtr_t *mtr,
const char *file,
unsigned line);
void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr,
const char *file, unsigned line);
/********************************************************************//**
Reads the freed_page_clock of a buffer block.
......
......@@ -511,16 +511,18 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str,
@param[in,out] b buffer page */
inline void mtr_t::init(buf_block_t *b)
{
if (UNIV_LIKELY_NULL(m_freed_pages))
{
ut_ad(m_log_mode != MTR_LOG_ALL ||
m_user_space_id == b->page.id().space());
if (m_freed_pages->remove_if_exists(b->page.id().page_no()) &&
const page_id_t id{b->page.id()};
ut_ad(is_named_space(id.space()));
ut_ad(!m_freed_pages == !m_freed_space);
if (UNIV_LIKELY_NULL(m_freed_space) &&
m_freed_space->id == id.space() &&
m_freed_pages->remove_if_exists(b->page.id().page_no()) &&
m_freed_pages->empty())
{
delete m_freed_pages;
m_freed_pages= nullptr;
}
m_freed_space= nullptr;
}
b->page.status= buf_page_t::INIT_ON_FLUSH;
......@@ -540,15 +542,11 @@ inline void mtr_t::init(buf_block_t *b)
@param[in] offset page offset to be freed */
inline void mtr_t::free(fil_space_t &space, uint32_t offset)
{
page_id_t freed_page_id(space.id, offset);
if (m_log_mode == MTR_LOG_ALL)
m_log.close(log_write<FREE_PAGE>(freed_page_id, nullptr));
ut_ad(is_named_space(&space));
ut_ad(!m_freed_space || m_freed_space == &space);
ut_ad(!m_user_space || m_user_space == &space);
if (&space == fil_system.sys_space)
freed_system_tablespace_page();
else
m_user_space= &space;
if (m_log_mode == MTR_LOG_ALL)
m_log.close(log_write<FREE_PAGE>({space.id, offset}, nullptr));
}
/** Write an EXTENDED log record.
......
......@@ -316,24 +316,12 @@ struct mtr_t {
/** @return true if we are inside the change buffer code */
bool is_inside_ibuf() const { return m_inside_ibuf; }
/** Note that system tablespace page has been freed. */
void freed_system_tablespace_page() { m_freed_in_system_tablespace= true; }
/** Note that pages has been trimed */
void set_trim_pages() { m_trim_pages= true; }
/** @return true if pages has been trimed */
bool is_trim_pages() { return m_trim_pages; }
/** @return whether a page_compressed table was modified */
bool is_page_compressed() const
{
#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
return m_user_space && m_user_space->is_compressed();
#else
return false;
#endif
}
#ifdef UNIV_DEBUG
/** Check if we are holding an rw-latch in this mini-transaction
@param lock latch to search for
......@@ -376,12 +364,6 @@ struct mtr_t {
/** @return the memo stack */
mtr_buf_t* get_memo() { return &m_memo; }
/** @return true if system tablespace page has been freed */
bool is_freed_system_tablespace_page()
{
return m_freed_in_system_tablespace;
}
#endif /* UNIV_DEBUG */
/** @return true if a record was added to the mini-transaction */
......@@ -587,12 +569,18 @@ struct mtr_t {
const char *new_path= nullptr);
/** Add freed page numbers to freed_pages */
void add_freed_offset(page_id_t id)
void add_freed_offset(fil_space_t *space, uint32_t page)
{
ut_ad(m_user_space == NULL || id.space() == m_user_space->id);
ut_ad(is_named_space(space));
if (!m_freed_pages)
{
m_freed_pages= new range_set();
m_freed_pages->add_value(id.page_no());
ut_ad(!m_freed_space);
m_freed_space= space;
}
else
ut_ad(m_freed_space == space);
m_freed_pages->add_value(page);
}
/** Determine the added buffer fix count of a block.
......@@ -670,9 +658,6 @@ struct mtr_t {
to suppress some read-ahead operations, @see ibuf_inside() */
uint16_t m_inside_ibuf:1;
/** whether the page has been freed in system tablespace */
uint16_t m_freed_in_system_tablespace:1;
/** whether the pages has been trimmed */
uint16_t m_trim_pages:1;
......@@ -694,6 +679,8 @@ struct mtr_t {
/** LSN at commit time */
lsn_t m_commit_lsn;
/** tablespace where pages have been freed */
fil_space_t *m_freed_space= nullptr;
/** set of freed page ids */
range_set *m_freed_pages= nullptr;
};
......
......@@ -354,8 +354,10 @@ struct ReleaseBlocks
void mtr_t::start()
{
ut_ad(!m_freed_pages);
ut_ad(!m_freed_space);
MEM_UNDEFINED(this, sizeof *this);
MEM_MAKE_DEFINED(&m_freed_pages, sizeof(m_freed_pages));
MEM_MAKE_DEFINED(&m_freed_space, sizeof m_freed_space);
MEM_MAKE_DEFINED(&m_freed_pages, sizeof m_freed_pages);
ut_d(m_start= true);
ut_d(m_commit= false);
......@@ -373,7 +375,7 @@ void mtr_t::start()
ut_d(m_user_space_id= TRX_SYS_SPACE);
m_user_space= nullptr;
m_commit_lsn= 0;
m_freed_in_system_tablespace= m_trim_pages= false;
m_trim_pages= false;
}
/** Release the resources */
......@@ -418,28 +420,23 @@ void mtr_t::commit()
if (m_freed_pages)
{
ut_ad(!m_freed_pages->empty());
fil_space_t *freed_space= m_user_space;
/* Get the freed tablespace in case of predefined tablespace */
if (!freed_space)
{
ut_ad(is_freed_system_tablespace_page());
freed_space= fil_system.sys_space;
}
ut_ad(memo_contains(*freed_space));
ut_ad(m_freed_space);
ut_ad(memo_contains(*m_freed_space));
ut_ad(is_named_space(m_freed_space));
/* Update the last freed lsn */
freed_space->update_last_freed_lsn(m_commit_lsn);
m_freed_space->update_last_freed_lsn(m_commit_lsn);
if (!is_trim_pages())
for (const auto &range : *m_freed_pages)
freed_space->add_free_range(range);
m_freed_space->add_free_range(range);
else
freed_space->clear_freed_ranges();
m_freed_space->clear_freed_ranges();
delete m_freed_pages;
m_freed_pages= nullptr;
/* Reset of m_trim_pages and m_freed_in_system_tablespace
happens in mtr_t::start() */
/* mtr_t::start() will reset m_trim_pages */
}
else
ut_ad(!m_freed_space);
m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks>
(ReleaseBlocks(lsns.first, m_commit_lsn,
......@@ -476,8 +473,8 @@ void mtr_t::commit_files(lsn_t checkpoint_lsn)
ut_ad(!m_made_dirty);
ut_ad(m_memo.size() == 0);
ut_ad(!srv_read_only_mode);
ut_ad(!m_freed_space);
ut_ad(!m_freed_pages);
ut_ad(!m_freed_in_system_tablespace);
if (checkpoint_lsn) {
byte* ptr = m_log.push<byte*>(SIZE_OF_FILE_CHECKPOINT);
......
......@@ -629,7 +629,7 @@ trx_undo_free_page(
fseg_free_page(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER
+ header_block->frame,
rseg->space, page_no, mtr);
buf_page_free(page_id_t(space, page_no), mtr, __FILE__, __LINE__);
buf_page_free(rseg->space, page_no, mtr, __FILE__, __LINE__);
const fil_addr_t last_addr = flst_get_last(
TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + header_block->frame);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment