Commit 4b822111 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-8139: Clean up the freeing of B-tree pages

btr_page_free(): Renamed from btr_page_free_low().
If scrubbing is enabled, zero out the page with proper redo logging.
Only pass ahi=true to fseg_free_page() if the page is actually indexed.

fil_space_t::modify_check(): Renamed from fsp_space_modify_check().

fsp_init_file_page(): Define inline.
parent e124ff17
......@@ -715,159 +715,57 @@ btr_page_free_for_ibuf(
mtr));
}
/**************************************************************//**
Frees a file page used in an index tree. Can be used also to (BLOB)
external storage pages. */
void
btr_page_free_low(
/*==============*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
ulint level, /*!< in: page level (ULINT_UNDEFINED=BLOB) */
bool blob, /*!< in: blob page */
mtr_t* mtr) /*!< in: mtr */
/** Free an index page.
@param[in,out] index index tree
@param[in,out] block block to be freed
@param[in,out] mtr mini-transaction
@param[in] blob whether this is freeing a BLOB page */
void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
bool blob)
{
fseg_header_t* seg_header;
page_t* root;
ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
#ifdef BTR_CUR_HASH_ADAPT
ut_ad(!block->index || !blob);
ut_ad(!block->index || page_is_leaf(block->frame));
#endif
ut_ad(index->space == block->page.id.space());
/* The root page is freed by btr_free_root(). */
ut_ad(block->page.id.page_no() != index->page);
ut_ad(mtr->is_named_space(index->space));
/* The page gets invalid for optimistic searches: increment the frame
modify clock */
buf_block_modify_clock_inc(block);
if (blob) {
ut_a(level == 0);
}
bool scrub = srv_immediate_scrub_data_uncompressed;
/* scrub page */
if (scrub && blob) {
/* blob page: scrub entire page */
// TODO(jonaso): scrub only what is actually needed
page_t* page = buf_block_get_frame(block);
memset(page + PAGE_HEADER, 0,
UNIV_PAGE_SIZE - PAGE_HEADER);
#ifdef UNIV_DEBUG_SCRUBBING
fprintf(stderr,
"btr_page_free_low: scrub blob page %lu/%lu\n",
buf_block_get_space(block),
buf_block_get_page_no(block));
#endif /* UNIV_DEBUG_SCRUBBING */
} else if (scrub) {
/* scrub records on page */
/* TODO(jonaso): in theory we could clear full page
* but, since page still remains in buffer pool, and
* gets flushed etc. Lots of routines validates consistency
* of it. And in order to remain structurally consistent
* we clear each record by it own
*
* NOTE: The TODO below mentions removing page from buffer pool
* and removing redo entries, once that is done, clearing full
* pages should be possible
*/
uint cnt = 0;
ulint bytes = 0;
page_t* page = buf_block_get_frame(block);
mem_heap_t* heap = NULL;
ulint* offsets = NULL;
rec_t* rec = page_rec_get_next(page_get_infimum_rec(page));
while (!page_rec_is_supremum(rec)) {
offsets = rec_get_offsets(rec, index, offsets,
page_is_leaf(page),
ULINT_UNDEFINED,
&heap);
ulint size = rec_offs_data_size(offsets);
memset(rec, 0, size);
rec = page_rec_get_next(rec);
cnt++;
bytes += size;
}
#ifdef UNIV_DEBUG_SCRUBBING
fprintf(stderr,
"btr_page_free_low: scrub %lu/%lu - "
"%u records " ULINTPF " bytes\n",
buf_block_get_space(block),
buf_block_get_page_no(block),
cnt, bytes);
#endif /* UNIV_DEBUG_SCRUBBING */
if (heap) {
mem_heap_free(heap);
}
}
#ifdef UNIV_DEBUG_SCRUBBING
if (scrub == false) {
fprintf(stderr,
"btr_page_free_low %lu/%lu blob: %u\n",
buf_block_get_space(block),
buf_block_get_page_no(block),
blob);
}
#endif /* UNIV_DEBUG_SCRUBBING */
if (dict_index_is_ibuf(index)) {
btr_page_free_for_ibuf(index, block, mtr);
return;
}
root = btr_root_get(index, mtr);
if (level == 0 || level == ULINT_UNDEFINED) {
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
} else {
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
}
#ifdef UNIV_GIS_DEBUG
if (dict_index_is_spatial(index)) {
fprintf(stderr, "GIS_DIAG: Freed %ld\n",
(long) block->page.id.page_no());
}
#endif
if (scrub) {
/**
* Reset page type so that scrub thread won't try to scrub it
*/
mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE,
FIL_PAGE_TYPE_ALLOCATED, MLOG_2BYTES, mtr);
}
/* TODO: Discard any operations for block from mtr->log.
The page will be freed, so previous changes to it by this
mini-transaction should not matter. */
page_t* root = btr_root_get(index, mtr);
fseg_header_t* seg_header = &root[blob || page_is_leaf(block->frame)
? PAGE_HEADER + PAGE_BTR_SEG_LEAF
: PAGE_HEADER + PAGE_BTR_SEG_TOP];
fseg_free_page(seg_header,
block->page.id.space(),
block->page.id.page_no(),
level != ULINT_UNDEFINED, mtr);
block->index != NULL, mtr);
/* The page was marked free in the allocation bitmap, but it
should remain buffer-fixed until mtr_commit(mtr) or until it
should remain exclusively latched until mtr_t::commit() or until it
is explicitly freed from the mini-transaction. */
ut_ad(mtr_is_block_fix(mtr, block, MTR_MEMO_PAGE_X_FIX, index->table));
/* TODO: Discard any operations on the page from the redo log
and remove the block from the flush list and the buffer pool.
This would free up buffer pool earlier and reduce writes to
both the tablespace and the redo log. */
}
/**************************************************************//**
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
void
btr_page_free(
/*==========*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
mtr_t* mtr) /*!< in: mtr */
{
const page_t* page = buf_block_get_frame(block);
ulint level = btr_page_get_level(page, mtr);
ut_ad(fil_page_index_page_check(block->frame));
ut_ad(level != ULINT_UNDEFINED);
btr_page_free_low(index, block, level, false, mtr);
if (srv_immediate_scrub_data_uncompressed) {
/* In MDEV-15528 this call must be removed, and we should
zero out the page after the redo log for this mini-transaction
has been durably written. */
fsp_init_file_page(fil_space_get(index->space), block, mtr);
}
}
/**************************************************************//**
......
......@@ -1037,7 +1037,7 @@ BtrBulk::finish(dberr_t err)
root_page_bulk.copyIn(first_rec);
/* Remove last page. */
btr_page_free_low(m_index, last_block, m_root_level, false, &mtr);
btr_page_free(m_index, last_block, &mtr);
/* Do not flush the last page. */
last_block->page.flush_observer = NULL;
......
......@@ -7484,8 +7484,7 @@ btr_free_externally_stored_field(
}
next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
btr_page_free_low(index, ext_block, 0,
true, &mtr);
btr_page_free(index, ext_block, &mtr, true);
if (page_zip != NULL) {
mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
......@@ -7511,12 +7510,7 @@ btr_free_externally_stored_field(
next_page_no = mach_read_from_4(
page + FIL_PAGE_DATA
+ BTR_BLOB_HDR_NEXT_PAGE_NO);
/* We must supply the page level (= 0) as an argument
because we did not store it on the page (we save the
space overhead from an index page header. */
btr_page_free_low(index, ext_block, 0,
true, &mtr);
btr_page_free(index, ext_block, &mtr, true);
mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
next_page_no,
......
......@@ -612,60 +612,34 @@ void fsp_apply_init_file_page(buf_block_t* block)
#ifdef UNIV_DEBUG
/** Assert that the mini-transaction is compatible with
updating an allocation bitmap page.
@param[in] id tablespace identifier
@param[in] mtr mini-transaction */
static
void
fsp_space_modify_check(
const fil_space_t* space,
const mtr_t* mtr)
void fil_space_t::modify_check(const mtr_t& mtr) const
{
switch (mtr->get_log_mode()) {
switch (mtr.get_log_mode()) {
case MTR_LOG_SHORT_INSERTS:
case MTR_LOG_NONE:
/* These modes are only allowed within a non-bitmap page
when there is a higher-level redo log record written. */
ut_ad(space->purpose == FIL_TYPE_TABLESPACE
|| space->purpose == FIL_TYPE_TEMPORARY);
ut_ad(purpose == FIL_TYPE_TABLESPACE
|| purpose == FIL_TYPE_TEMPORARY);
break;
case MTR_LOG_NO_REDO:
ut_ad(space->purpose == FIL_TYPE_TEMPORARY
|| space->purpose == FIL_TYPE_IMPORT
|| space->redo_skipped_count
|| space->is_being_truncated
|| srv_is_tablespace_truncated(space->id));
ut_ad(purpose == FIL_TYPE_TEMPORARY
|| purpose == FIL_TYPE_IMPORT
|| redo_skipped_count
|| is_being_truncated
|| srv_is_tablespace_truncated(id));
return;
case MTR_LOG_ALL:
/* We may only write redo log for a persistent tablespace. */
ut_ad(space->purpose == FIL_TYPE_TABLESPACE);
ut_ad(mtr->is_named_space(space->id));
/* We may only write redo log for a persistent
tablespace. */
ut_ad(purpose == FIL_TYPE_TABLESPACE);
ut_ad(mtr.is_named_space(id));
return;
}
ut_ad(0);
}
#endif /* UNIV_DEBUG */
/** Initialize a file page.
@param[in,out] block file page
@param[in,out] mtr mini-transaction */
static void fsp_init_file_page(buf_block_t* block, mtr_t* mtr)
{
fsp_apply_init_file_page(block);
mlog_write_initial_log_record(block->frame, MLOG_INIT_FILE_PAGE2, mtr);
}
#ifdef UNIV_DEBUG
static
void
fsp_init_file_page(const fil_space_t* space, buf_block_t* block, mtr_t* mtr)
{
ut_d(fsp_space_modify_check(space, mtr));
ut_ad(space->id == block->page.id.space());
fsp_init_file_page(block, mtr);
ut_ad(!"invalid log mode");
}
#else /* UNIV_DEBUG */
# define fsp_init_file_page(space, block, mtr) fsp_init_file_page(block, mtr)
#endif
/**********************************************************************//**
......@@ -816,7 +790,7 @@ fsp_header_inc_size(
ut_ad(mtr);
fil_space_t* space = mtr_x_lock_space(space_id, mtr);
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
header = fsp_get_space_header(
space, page_size_t(space->flags), mtr);
......@@ -877,7 +851,7 @@ fsp_try_extend_data_file_with_pages(
ulint size;
ut_a(!is_system_tablespace(space->id));
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
size = mach_read_from_4(header + FSP_SIZE);
ut_ad(size == space->size_in_header);
......@@ -909,7 +883,7 @@ fsp_try_extend_data_file(fil_space_t* space, fsp_header_t* header, mtr_t* mtr)
"ran out of space. Please add another file or use"
" 'autoextend' for the last file in setting";
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
if (space->id == TRX_SYS_SPACE
&& !srv_sys_space.can_auto_extend_last_file()) {
......@@ -1072,7 +1046,7 @@ fsp_fill_free_list(
ulint i;
ut_ad(page_offset(header) == FSP_HEADER_OFFSET);
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
/* Check if we can fill free list from above the free list limit */
size = mach_read_from_4(header + FSP_SIZE);
......@@ -1395,7 +1369,7 @@ fsp_alloc_free_page(
ulint free;
const ulint space_id = space->id;
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
header = fsp_get_space_header(space, page_size, mtr);
/* Get the hinted descriptor */
......@@ -1501,7 +1475,7 @@ fsp_free_page(
ulint frag_n_used;
ut_ad(mtr);
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
/* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */
......@@ -1823,7 +1797,7 @@ fsp_free_seg_inode(
page_t* page;
fsp_header_t* space_header;
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
page = page_align(inode);
......@@ -2074,7 +2048,7 @@ fseg_create_general(
fil_space_t* space = mtr_x_lock_space(space_id, mtr);
const page_size_t page_size(space->flags);
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
if (page != 0) {
block = buf_page_get(page_id_t(space_id, page), page_size,
......@@ -2273,7 +2247,7 @@ fseg_fill_free_list(
ut_ad(inode && mtr);
ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
......@@ -2341,7 +2315,7 @@ fseg_alloc_free_extent(
ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
if (flst_get_len(inode + FSEG_FREE) > 0) {
/* Segment free list is not empty, allocate from it */
......@@ -2428,7 +2402,7 @@ fseg_alloc_free_page_low(
seg_id = mach_read_from_8(seg_inode + FSEG_ID);
ut_ad(seg_id);
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
ut_ad(fil_page_get_type(page_align(seg_inode)) == FIL_PAGE_INODE);
reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr);
......@@ -3032,7 +3006,7 @@ fseg_free_page_low(
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
== FSEG_MAGIC_N_VALUE);
ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
#ifdef BTR_CUR_HASH_ADAPT
/* Drop search system page hash index if the page is found in
the pool and is hashed */
......@@ -3228,7 +3202,7 @@ fseg_free_extent(
ut_a(!memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8));
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
== FSEG_MAGIC_N_VALUE);
ut_d(fsp_space_modify_check(space, mtr));
ut_d(space->modify_check(*mtr));
first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
......
......@@ -671,16 +671,6 @@ btr_page_alloc(
the page */
MY_ATTRIBUTE((warn_unused_result));
/**************************************************************//**
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
void
btr_page_free(
/*==========*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull));
/**************************************************************//**
Creates a new index page (not the root, and also not
used in page reorganization). @see btr_page_empty(). */
void
......@@ -691,18 +681,16 @@ btr_page_create(
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: the B-tree level of the page */
mtr_t* mtr); /*!< in: mtr */
/**************************************************************//**
Frees a file page used in an index tree. Can be used also to BLOB
external storage pages. */
void
btr_page_free_low(
/*==============*/
dict_index_t* index, /*!< in: index tree */
buf_block_t* block, /*!< in: block to be freed, x-latched */
ulint level, /*!< in: page level (ULINT_UNDEFINED=BLOB) */
bool blob, /*!< in: blob page */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull(1,2)));
/** Free an index page.
@param[in,out] index index tree
@param[in,out] block block to be freed
@param[in,out] mtr mini-transaction
@param[in] blob whether this is freeing a BLOB page */
MY_ATTRIBUTE((nonnull))
void btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr,
bool blob = false);
/**************************************************************//**
Gets the root node of a tree and x- or s-latches it.
@return root page, x- or s-latched */
......
......@@ -108,7 +108,7 @@ struct fil_space_t {
ulint redo_skipped_count;
/*!< reference count for operations who want
to skip redo log in the file space in order
to make fsp_space_modify_check pass. */
to make modify_check() pass. */
#endif
fil_type_t purpose;/*!< purpose */
UT_LIST_BASE_NODE_T(fil_node_t) chain;
......@@ -208,6 +208,12 @@ struct fil_space_t {
fil_node_t* add(const char* name, pfs_os_file_t handle,
ulint size, bool is_raw, bool atomic_write,
ulint max_pages = ULINT_MAX);
#ifdef UNIV_DEBUG
/** Assert that the mini-transaction is compatible with
updating an allocation bitmap page.
@param[in] mtr mini-transaction */
void modify_check(const mtr_t& mtr) const;
#endif /* UNIV_DEBUG */
};
/** Value of fil_space_t::magic_n */
......
......@@ -688,6 +688,26 @@ fsp_descr_page(
@param[in,out] block buffer pool block */
void fsp_apply_init_file_page(buf_block_t* block);
/** Initialize a file page.
@param[in] space tablespace
@param[in,out] block file page
@param[in,out] mtr mini-transaction */
inline void fsp_init_file_page(
#ifdef UNIV_DEBUG
const fil_space_t* space,
#endif
buf_block_t* block, mtr_t* mtr)
{
ut_d(space->modify_check(*mtr));
ut_ad(space->id == block->page.id.space());
fsp_apply_init_file_page(block);
mlog_write_initial_log_record(block->frame, MLOG_INIT_FILE_PAGE2, mtr);
}
#ifndef UNIV_DEBUG
# define fsp_init_file_page(space, block, mtr) fsp_init_file_page(block, mtr)
#endif
#ifdef UNIV_BTR_PRINT
/*******************************************************************//**
Writes info of a segment. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment