Commit 0c23e32d authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-24445 Using innodb_undo_tablespaces corrupts system tablespace

In the rewrite of MDEV-8139 (based on MDEV-15528), we introduced a
wrong assumption that any persistent tablespace that is not an .ibd
file is the system tablespace. This assumption is broken when
innodb_undo_tablespaces (files undo001, undo002, ...) are being used.
By default, we have innodb_undo_tablespaces=0 (the persistent undo
log is being stored in the system tablespace).

In MDEV-15528 and MDEV-8139 we rewrote the page scrubbing logic
so that it will follow the tried-and-true write-ahead logging
protocol, first writing FREE_PAGE records and then in the page
flushing, zerofilling or hole-punching freed pages.

Unfortunately, the implementation included a wrong assumption that
that anything that is not in an .ibd file must be the system tablespace.
This wrong assumption would cause overwrites of valid data pages in
the system tablespace.

mtr_t::m_freed_in_system_tablespace: Remove.

mtr_t::m_freed_space: The tablespace associated with m_freed_pages.

buf_page_free(): Take the tablespace and page number as a parameter,
instead of taking a page identifier.
parent cd093d79
...@@ -725,9 +725,11 @@ void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, ...@@ -725,9 +725,11 @@ void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
fseg_header_t* seg_header = &root[blob || page_is_leaf(block->frame) fseg_header_t* seg_header = &root[blob || page_is_leaf(block->frame)
? PAGE_HEADER + PAGE_BTR_SEG_LEAF ? PAGE_HEADER + PAGE_BTR_SEG_LEAF
: PAGE_HEADER + PAGE_BTR_SEG_TOP]; : PAGE_HEADER + PAGE_BTR_SEG_TOP];
fseg_free_page(seg_header, fil_space_t* space= index->table->space;
index->table->space, id.page_no(), mtr); const uint32_t page= id.page_no();
buf_page_free(id, mtr, __FILE__, __LINE__);
fseg_free_page(seg_header, space, page, mtr);
buf_page_free(space, page, mtr, __FILE__, __LINE__);
/* The page was marked free in the allocation bitmap, but it /* The page was marked free in the allocation bitmap, but it
should remain exclusively latched until mtr_t::commit() or until it should remain exclusively latched until mtr_t::commit() or until it
......
...@@ -2489,52 +2489,49 @@ inline buf_page_t *buf_pool_t::watch_set(const page_id_t id, ...@@ -2489,52 +2489,49 @@ inline buf_page_t *buf_pool_t::watch_set(const page_id_t id,
} }
/** Mark the page status as FREED for the given tablespace id and /** Mark the page status as FREED for the given tablespace id and
page number. If the page is not in the buffer pool then ignore it. page number. If the page is not in buffer pool then ignore it.
X-lock should be taken on the page before marking the page status @param[in,out] space tablespace
as FREED. It avoids the concurrent flushing of freed page. @param[in] page page number
Currently, this function only marks the page as FREED if it is
in buffer pool.
@param[in] page_id page id
@param[in,out] mtr mini-transaction @param[in,out] mtr mini-transaction
@param[in] file file name @param[in] file file name
@param[in] line line where called */ @param[in] line line where called */
void buf_page_free(const page_id_t page_id, void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr,
mtr_t *mtr, const char *file, unsigned line)
const char *file,
unsigned line)
{ {
ut_ad(mtr); ut_ad(mtr);
ut_ad(mtr->is_active()); ut_ad(mtr->is_active());
buf_pool.stat.n_page_gets++;
if (srv_immediate_scrub_data_uncompressed
#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32
|| space->is_compressed()
#endif
)
mtr->add_freed_offset(space, page);
buf_pool.stat.n_page_gets++;
const page_id_t page_id(space->id, page);
const ulint fold= page_id.fold(); const ulint fold= page_id.fold();
page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold); page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold);
buf_block_t *block= reinterpret_cast<buf_block_t*> if (buf_block_t *block= reinterpret_cast<buf_block_t*>
(buf_pool.page_hash_get_low(page_id, fold)); (buf_pool.page_hash_get_low(page_id, fold)))
/* TODO: try to all this part of mtr_t::free() */
if (srv_immediate_scrub_data_uncompressed || mtr->is_page_compressed())
mtr->add_freed_offset(page_id);
if (!block || block->page.state() != BUF_BLOCK_FILE_PAGE)
{ {
/* FIXME: if block!=NULL, convert to BUF_BLOCK_FILE_PAGE, if (block->page.state() != BUF_BLOCK_FILE_PAGE)
but avoid buf_zip_decompress() */ /* FIXME: convert, but avoid buf_zip_decompress() */;
hash_lock->read_unlock(); else
return; {
} buf_block_buf_fix_inc(block, file, line);
ut_ad(block->page.buf_fix_count());
hash_lock->read_unlock();
block->fix(); mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX);
ut_ad(block->page.buf_fix_count()); rw_lock_x_lock_inline(&block->lock, 0, file, line);
ut_ad(fsp_is_system_temporary(page_id.space()) || buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
rw_lock_s_lock_nowait(block->debug_latch, file, line));
mtr_memo_type_t fix_type= MTR_MEMO_PAGE_X_FIX; block->page.status= buf_page_t::FREED;
rw_lock_x_lock_inline(&block->lock, 0, file, line); return;
mtr_memo_push(mtr, block, fix_type); }
}
block->page.status= buf_page_t::FREED;
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
hash_lock->read_unlock(); hash_lock->read_unlock();
} }
......
...@@ -2637,9 +2637,8 @@ fseg_free_extent( ...@@ -2637,9 +2637,8 @@ fseg_free_extent(
for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) { for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) {
if (!xdes_is_free(descr, i)) { if (!xdes_is_free(descr, i)) {
buf_page_free( buf_page_free(space, first_page_in_extent + 1, mtr,
page_id_t(space->id, first_page_in_extent + 1), __FILE__, __LINE__);
mtr, __FILE__, __LINE__);
} }
} }
} }
......
...@@ -1986,7 +1986,7 @@ ibuf_remove_free_page(void) ...@@ -1986,7 +1986,7 @@ ibuf_remove_free_page(void)
ibuf_bitmap_page_set_bits<IBUF_BITMAP_IBUF>( ibuf_bitmap_page_set_bits<IBUF_BITMAP_IBUF>(
bitmap_page, page_id, srv_page_size, false, &mtr); bitmap_page, page_id, srv_page_size, false, &mtr);
buf_page_free(page_id, &mtr, __FILE__, __LINE__); buf_page_free(fil_system.sys_space, page_no, &mtr, __FILE__, __LINE__);
ibuf_mtr_commit(&mtr); ibuf_mtr_commit(&mtr);
} }
......
...@@ -361,14 +361,13 @@ buf_page_release_latch( ...@@ -361,14 +361,13 @@ buf_page_release_latch(
void buf_page_make_young(buf_page_t *bpage); void buf_page_make_young(buf_page_t *bpage);
/** Mark the page status as FREED for the given tablespace id and /** Mark the page status as FREED for the given tablespace id and
page number. If the page is not in buffer pool then ignore it. page number. If the page is not in buffer pool then ignore it.
@param[in] page_id page_id @param[in,out] space tablespace
@param[in] page page number
@param[in,out] mtr mini-transaction @param[in,out] mtr mini-transaction
@param[in] file file name @param[in] file file name
@param[in] line line where called */ @param[in] line line where called */
void buf_page_free(const page_id_t page_id, void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr,
mtr_t *mtr, const char *file, unsigned line);
const char *file,
unsigned line);
/********************************************************************//** /********************************************************************//**
Reads the freed_page_clock of a buffer block. Reads the freed_page_clock of a buffer block.
......
...@@ -511,16 +511,18 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str, ...@@ -511,16 +511,18 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str,
@param[in,out] b buffer page */ @param[in,out] b buffer page */
inline void mtr_t::init(buf_block_t *b) inline void mtr_t::init(buf_block_t *b)
{ {
if (UNIV_LIKELY_NULL(m_freed_pages)) const page_id_t id{b->page.id()};
ut_ad(is_named_space(id.space()));
ut_ad(!m_freed_pages == !m_freed_space);
if (UNIV_LIKELY_NULL(m_freed_space) &&
m_freed_space->id == id.space() &&
m_freed_pages->remove_if_exists(b->page.id().page_no()) &&
m_freed_pages->empty())
{ {
ut_ad(m_log_mode != MTR_LOG_ALL || delete m_freed_pages;
m_user_space_id == b->page.id().space()); m_freed_pages= nullptr;
if (m_freed_pages->remove_if_exists(b->page.id().page_no()) && m_freed_space= nullptr;
m_freed_pages->empty())
{
delete m_freed_pages;
m_freed_pages= nullptr;
}
} }
b->page.status= buf_page_t::INIT_ON_FLUSH; b->page.status= buf_page_t::INIT_ON_FLUSH;
...@@ -540,15 +542,11 @@ inline void mtr_t::init(buf_block_t *b) ...@@ -540,15 +542,11 @@ inline void mtr_t::init(buf_block_t *b)
@param[in] offset page offset to be freed */ @param[in] offset page offset to be freed */
inline void mtr_t::free(fil_space_t &space, uint32_t offset) inline void mtr_t::free(fil_space_t &space, uint32_t offset)
{ {
page_id_t freed_page_id(space.id, offset); ut_ad(is_named_space(&space));
if (m_log_mode == MTR_LOG_ALL) ut_ad(!m_freed_space || m_freed_space == &space);
m_log.close(log_write<FREE_PAGE>(freed_page_id, nullptr));
ut_ad(!m_user_space || m_user_space == &space); if (m_log_mode == MTR_LOG_ALL)
if (&space == fil_system.sys_space) m_log.close(log_write<FREE_PAGE>({space.id, offset}, nullptr));
freed_system_tablespace_page();
else
m_user_space= &space;
} }
/** Write an EXTENDED log record. /** Write an EXTENDED log record.
......
...@@ -316,24 +316,12 @@ struct mtr_t { ...@@ -316,24 +316,12 @@ struct mtr_t {
/** @return true if we are inside the change buffer code */ /** @return true if we are inside the change buffer code */
bool is_inside_ibuf() const { return m_inside_ibuf; } bool is_inside_ibuf() const { return m_inside_ibuf; }
/** Note that system tablespace page has been freed. */
void freed_system_tablespace_page() { m_freed_in_system_tablespace= true; }
/** Note that pages has been trimed */ /** Note that pages has been trimed */
void set_trim_pages() { m_trim_pages= true; } void set_trim_pages() { m_trim_pages= true; }
/** @return true if pages has been trimed */ /** @return true if pages has been trimed */
bool is_trim_pages() { return m_trim_pages; } bool is_trim_pages() { return m_trim_pages; }
/** @return whether a page_compressed table was modified */
bool is_page_compressed() const
{
#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
return m_user_space && m_user_space->is_compressed();
#else
return false;
#endif
}
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
/** Check if we are holding an rw-latch in this mini-transaction /** Check if we are holding an rw-latch in this mini-transaction
@param lock latch to search for @param lock latch to search for
...@@ -376,12 +364,6 @@ struct mtr_t { ...@@ -376,12 +364,6 @@ struct mtr_t {
/** @return the memo stack */ /** @return the memo stack */
mtr_buf_t* get_memo() { return &m_memo; } mtr_buf_t* get_memo() { return &m_memo; }
/** @return true if system tablespace page has been freed */
bool is_freed_system_tablespace_page()
{
return m_freed_in_system_tablespace;
}
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
/** @return true if a record was added to the mini-transaction */ /** @return true if a record was added to the mini-transaction */
...@@ -587,12 +569,18 @@ struct mtr_t { ...@@ -587,12 +569,18 @@ struct mtr_t {
const char *new_path= nullptr); const char *new_path= nullptr);
/** Add freed page numbers to freed_pages */ /** Add freed page numbers to freed_pages */
void add_freed_offset(page_id_t id) void add_freed_offset(fil_space_t *space, uint32_t page)
{ {
ut_ad(m_user_space == NULL || id.space() == m_user_space->id); ut_ad(is_named_space(space));
if (!m_freed_pages) if (!m_freed_pages)
{
m_freed_pages= new range_set(); m_freed_pages= new range_set();
m_freed_pages->add_value(id.page_no()); ut_ad(!m_freed_space);
m_freed_space= space;
}
else
ut_ad(m_freed_space == space);
m_freed_pages->add_value(page);
} }
/** Determine the added buffer fix count of a block. /** Determine the added buffer fix count of a block.
...@@ -670,9 +658,6 @@ struct mtr_t { ...@@ -670,9 +658,6 @@ struct mtr_t {
to suppress some read-ahead operations, @see ibuf_inside() */ to suppress some read-ahead operations, @see ibuf_inside() */
uint16_t m_inside_ibuf:1; uint16_t m_inside_ibuf:1;
/** whether the page has been freed in system tablespace */
uint16_t m_freed_in_system_tablespace:1;
/** whether the pages has been trimmed */ /** whether the pages has been trimmed */
uint16_t m_trim_pages:1; uint16_t m_trim_pages:1;
...@@ -694,6 +679,8 @@ struct mtr_t { ...@@ -694,6 +679,8 @@ struct mtr_t {
/** LSN at commit time */ /** LSN at commit time */
lsn_t m_commit_lsn; lsn_t m_commit_lsn;
/** tablespace where pages have been freed */
fil_space_t *m_freed_space= nullptr;
/** set of freed page ids */ /** set of freed page ids */
range_set *m_freed_pages= nullptr; range_set *m_freed_pages= nullptr;
}; };
......
...@@ -354,8 +354,10 @@ struct ReleaseBlocks ...@@ -354,8 +354,10 @@ struct ReleaseBlocks
void mtr_t::start() void mtr_t::start()
{ {
ut_ad(!m_freed_pages); ut_ad(!m_freed_pages);
ut_ad(!m_freed_space);
MEM_UNDEFINED(this, sizeof *this); MEM_UNDEFINED(this, sizeof *this);
MEM_MAKE_DEFINED(&m_freed_pages, sizeof(m_freed_pages)); MEM_MAKE_DEFINED(&m_freed_space, sizeof m_freed_space);
MEM_MAKE_DEFINED(&m_freed_pages, sizeof m_freed_pages);
ut_d(m_start= true); ut_d(m_start= true);
ut_d(m_commit= false); ut_d(m_commit= false);
...@@ -373,7 +375,7 @@ void mtr_t::start() ...@@ -373,7 +375,7 @@ void mtr_t::start()
ut_d(m_user_space_id= TRX_SYS_SPACE); ut_d(m_user_space_id= TRX_SYS_SPACE);
m_user_space= nullptr; m_user_space= nullptr;
m_commit_lsn= 0; m_commit_lsn= 0;
m_freed_in_system_tablespace= m_trim_pages= false; m_trim_pages= false;
} }
/** Release the resources */ /** Release the resources */
...@@ -418,28 +420,23 @@ void mtr_t::commit() ...@@ -418,28 +420,23 @@ void mtr_t::commit()
if (m_freed_pages) if (m_freed_pages)
{ {
ut_ad(!m_freed_pages->empty()); ut_ad(!m_freed_pages->empty());
fil_space_t *freed_space= m_user_space; ut_ad(m_freed_space);
/* Get the freed tablespace in case of predefined tablespace */ ut_ad(memo_contains(*m_freed_space));
if (!freed_space) ut_ad(is_named_space(m_freed_space));
{
ut_ad(is_freed_system_tablespace_page());
freed_space= fil_system.sys_space;
}
ut_ad(memo_contains(*freed_space));
/* Update the last freed lsn */ /* Update the last freed lsn */
freed_space->update_last_freed_lsn(m_commit_lsn); m_freed_space->update_last_freed_lsn(m_commit_lsn);
if (!is_trim_pages()) if (!is_trim_pages())
for (const auto &range : *m_freed_pages) for (const auto &range : *m_freed_pages)
freed_space->add_free_range(range); m_freed_space->add_free_range(range);
else else
freed_space->clear_freed_ranges(); m_freed_space->clear_freed_ranges();
delete m_freed_pages; delete m_freed_pages;
m_freed_pages= nullptr; m_freed_pages= nullptr;
/* Reset of m_trim_pages and m_freed_in_system_tablespace /* mtr_t::start() will reset m_trim_pages */
happens in mtr_t::start() */
} }
else
ut_ad(!m_freed_space);
m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks> m_memo.for_each_block_in_reverse(CIterate<const ReleaseBlocks>
(ReleaseBlocks(lsns.first, m_commit_lsn, (ReleaseBlocks(lsns.first, m_commit_lsn,
...@@ -476,8 +473,8 @@ void mtr_t::commit_files(lsn_t checkpoint_lsn) ...@@ -476,8 +473,8 @@ void mtr_t::commit_files(lsn_t checkpoint_lsn)
ut_ad(!m_made_dirty); ut_ad(!m_made_dirty);
ut_ad(m_memo.size() == 0); ut_ad(m_memo.size() == 0);
ut_ad(!srv_read_only_mode); ut_ad(!srv_read_only_mode);
ut_ad(!m_freed_space);
ut_ad(!m_freed_pages); ut_ad(!m_freed_pages);
ut_ad(!m_freed_in_system_tablespace);
if (checkpoint_lsn) { if (checkpoint_lsn) {
byte* ptr = m_log.push<byte*>(SIZE_OF_FILE_CHECKPOINT); byte* ptr = m_log.push<byte*>(SIZE_OF_FILE_CHECKPOINT);
......
...@@ -629,7 +629,7 @@ trx_undo_free_page( ...@@ -629,7 +629,7 @@ trx_undo_free_page(
fseg_free_page(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER fseg_free_page(TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER
+ header_block->frame, + header_block->frame,
rseg->space, page_no, mtr); rseg->space, page_no, mtr);
buf_page_free(page_id_t(space, page_no), mtr, __FILE__, __LINE__); buf_page_free(rseg->space, page_no, mtr, __FILE__, __LINE__);
const fil_addr_t last_addr = flst_get_last( const fil_addr_t last_addr = flst_get_last(
TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + header_block->frame); TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST + header_block->frame);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment