Commit db5cdc31 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-12353: Replace MLOG_PAGE_REORGANIZE, MLOG_COMP_PAGE_REORGANIZE

Log page reorganize as a series of insert operations.
This will make the redo log volume proportional to the page payload size.

btr_page_reorganize_low(): Add template <bool recovery=false>

btr_page_reorganize_block(): Remove the parameter 'bool recovery'
parent 276f996a
...@@ -1365,17 +1365,14 @@ btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset) ...@@ -1365,17 +1365,14 @@ btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset)
mtr.commit(); mtr.commit();
} }
/** Reorganize an index page. */ /** Reorganize an index page.
static void @tparam recovery whether this is invoked by btr_parse_page_reorganize()
btr_page_reorganize_low( @param cursor index page cursor
bool recovery,/*!< in: true if called in recovery: @param index the index that the cursor belongs to
locks should not be updated, i.e., @param mtr mini-transaction */
there cannot exist locks on the template<bool recovery= false>
page, and a hash index should not be static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index,
dropped: it cannot exist */ mtr_t *mtr)
page_cur_t* cursor, /*!< in/out: page cursor */
dict_index_t* index, /*!< in: the index tree of the page */
mtr_t* mtr) /*!< in/out: mini-transaction */
{ {
buf_block_t* block = page_cur_get_block(cursor); buf_block_t* block = page_cur_get_block(cursor);
page_t* page = buf_block_get_frame(block); page_t* page = buf_block_get_frame(block);
...@@ -1398,8 +1395,6 @@ btr_page_reorganize_low( ...@@ -1398,8 +1395,6 @@ btr_page_reorganize_low(
|| !page_has_siblings(page)); || !page_has_siblings(page));
data_size1 = page_get_data_size(page); data_size1 = page_get_data_size(page);
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1); max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
/* Turn logging off */
mtr_log_t log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
temp_block = buf_block_alloc(); temp_block = buf_block_alloc();
...@@ -1434,30 +1429,25 @@ btr_page_reorganize_low( ...@@ -1434,30 +1429,25 @@ btr_page_reorganize_low(
index, mtr); index, mtr);
/* Copy the PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC. */ /* Copy the PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC. */
static_assert((PAGE_HEADER + PAGE_MAX_TRX_ID) % 8 == 0, "alignment"); ut_ad(!page_get_max_trx_id(block->frame));
memcpy_aligned<8>(&block->frame[PAGE_HEADER + PAGE_MAX_TRX_ID], if (trx_id_t trx_id = page_get_max_trx_id(temp_block->frame)) {
&temp_block->frame[PAGE_HEADER + PAGE_MAX_TRX_ID], /* PAGE_MAX_TRX_ID must be zero on non-leaf pages other than
8); clustered index root pages. */
/* PAGE_MAX_TRX_ID is unused in clustered index pages ut_ad(recovery || (dict_index_is_sec_or_ibuf(index)
(other than the root where it is repurposed as PAGE_ROOT_AUTO_INC), ? page_is_leaf(temp_block->frame)
non-leaf pages, and in temporary tables. It was always : block->page.id.page_no() == index->page));
zero-initialized in page_create() in all InnoDB versions. page_set_max_trx_id(block, NULL, trx_id, mtr);
PAGE_MAX_TRX_ID must be nonzero on dict_index_is_sec_or_ibuf() } else {
leaf pages. /* PAGE_MAX_TRX_ID is unused in clustered index pages
(other than the root where it is repurposed as
During redo log apply, dict_index_is_sec_or_ibuf() always PAGE_ROOT_AUTO_INC), non-leaf pages, and in temporary tables.
holds, even for clustered indexes. */ It was always zero-initialized in page_create().
ut_ad(recovery || index->table->is_temporary() PAGE_MAX_TRX_ID must be nonzero on
|| !page_is_leaf(temp_block->frame) dict_index_is_sec_or_ibuf() leaf pages. */
|| !dict_index_is_sec_or_ibuf(index) ut_ad(recovery || index->table->is_temporary()
|| page_get_max_trx_id(block->frame) != 0); || !page_is_leaf(temp_block->frame)
/* PAGE_MAX_TRX_ID must be zero on non-leaf pages other than || !dict_index_is_sec_or_ibuf(index));
clustered index root pages. */ }
ut_ad(recovery
|| page_get_max_trx_id(block->frame) == 0
|| (dict_index_is_sec_or_ibuf(index)
? page_is_leaf(temp_block->frame)
: block->page.id.page_no() == index->page));
data_size2 = page_get_data_size(block->frame); data_size2 = page_get_data_size(block->frame);
max_ins_size2 = page_get_max_insert_size_after_reorganize(block->frame, max_ins_size2 = page_get_max_insert_size_after_reorganize(block->frame,
...@@ -1514,23 +1504,17 @@ btr_page_reorganize_low( ...@@ -1514,23 +1504,17 @@ btr_page_reorganize_low(
buf_block_free(temp_block); buf_block_free(temp_block);
/* Restore logging mode */
mtr_set_log_mode(mtr, log_mode);
mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
? MLOG_COMP_PAGE_REORGANIZE
: MLOG_PAGE_REORGANIZE, 0);
MONITOR_INC(MONITOR_INDEX_REORG_SUCCESSFUL); MONITOR_INC(MONITOR_INDEX_REORG_SUCCESSFUL);
if (UNIV_UNLIKELY(fil_page_get_type(block->frame) if (!recovery && UNIV_UNLIKELY(fil_page_get_type(block->frame)
== FIL_PAGE_TYPE_INSTANT)) { == FIL_PAGE_TYPE_INSTANT)) {
/* Log the PAGE_INSTANT information. */ /* Log the PAGE_INSTANT information. */
ut_ad(index->is_instant()); ut_ad(index->is_instant());
ut_ad(!recovery);
mtr->write<2,mtr_t::FORCED>(*block, FIL_PAGE_TYPE mtr->write<2,mtr_t::FORCED>(*block, FIL_PAGE_TYPE
+ block->frame, + block->frame,
FIL_PAGE_TYPE_INSTANT); FIL_PAGE_TYPE_INSTANT);
byte* instant = PAGE_HEADER + PAGE_INSTANT + block->frame; byte* instant = my_assume_aligned<2>(PAGE_HEADER + PAGE_INSTANT
+ block->frame);
mtr->write<2,mtr_t::FORCED>(*block, instant, mtr->write<2,mtr_t::FORCED>(*block, instant,
mach_read_from_2(instant)); mach_read_from_2(instant));
if (!index->table->instant) { if (!index->table->instant) {
...@@ -1557,12 +1541,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization. ...@@ -1557,12 +1541,6 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
@retval false if it is a compressed page, and recompression failed */ @retval false if it is a compressed page, and recompression failed */
bool bool
btr_page_reorganize_block( btr_page_reorganize_block(
/*======================*/
bool recovery,/*!< in: true if called in recovery:
locks should not be updated, i.e.,
there cannot exist locks on the
page, and a hash index should not be
dropped: it cannot exist */
ulint z_level,/*!< in: compression level to be used ulint z_level,/*!< in: compression level to be used
if dealing with compressed page */ if dealing with compressed page */
buf_block_t* block, /*!< in/out: B-tree page */ buf_block_t* block, /*!< in/out: B-tree page */
...@@ -1576,7 +1554,7 @@ btr_page_reorganize_block( ...@@ -1576,7 +1554,7 @@ btr_page_reorganize_block(
page_cur_t cur; page_cur_t cur;
page_cur_set_before_first(block, &cur); page_cur_set_before_first(block, &cur);
btr_page_reorganize_low(recovery, &cur, index, mtr); btr_page_reorganize_low(&cur, index, mtr);
return true; return true;
} }
...@@ -1599,7 +1577,7 @@ btr_page_reorganize( ...@@ -1599,7 +1577,7 @@ btr_page_reorganize(
mtr_t* mtr) /*!< in/out: mini-transaction */ mtr_t* mtr) /*!< in/out: mini-transaction */
{ {
if (!buf_block_get_page_zip(cursor->block)) { if (!buf_block_get_page_zip(cursor->block)) {
btr_page_reorganize_low(false, cursor, index, mtr); btr_page_reorganize_low(cursor, index, mtr);
return true; return true;
} }
...@@ -1621,6 +1599,7 @@ btr_page_reorganize( ...@@ -1621,6 +1599,7 @@ btr_page_reorganize(
/***********************************************************//** /***********************************************************//**
Parses a redo log record of reorganizing a page. Parses a redo log record of reorganizing a page.
@return end of log record or NULL */ @return end of log record or NULL */
ATTRIBUTE_COLD /* only used when crash-upgrading */
const byte* const byte*
btr_parse_page_reorganize( btr_parse_page_reorganize(
/*======================*/ /*======================*/
...@@ -1653,8 +1632,13 @@ btr_parse_page_reorganize( ...@@ -1653,8 +1632,13 @@ btr_parse_page_reorganize(
level = page_zip_level; level = page_zip_level;
} }
if (block != NULL) { if (block == NULL) {
btr_page_reorganize_block(true, level, block, index, mtr); } else if (block->page.zip.data) {
page_zip_reorganize(block, index, level, mtr, true);
} else {
page_cur_t cur;
page_cur_set_before_first(block, &cur);
btr_page_reorganize_low<true>(&cur, index, mtr);
} }
return(ptr); return(ptr);
...@@ -3162,6 +3146,7 @@ void btr_level_list_remove(const buf_block_t& block, const dict_index_t& index, ...@@ -3162,6 +3146,7 @@ void btr_level_list_remove(const buf_block_t& block, const dict_index_t& index,
Parses the redo log record for setting an index record as the predefined Parses the redo log record for setting an index record as the predefined
minimum record. minimum record.
@return end of log record or NULL */ @return end of log record or NULL */
ATTRIBUTE_COLD /* only used when crash-upgrading */
const byte* const byte*
btr_parse_set_min_rec_mark( btr_parse_set_min_rec_mark(
/*=======================*/ /*=======================*/
...@@ -5182,12 +5167,9 @@ btr_can_merge_with_page( ...@@ -5182,12 +5167,9 @@ btr_can_merge_with_page(
max_ins_size = page_get_max_insert_size(mpage, n_recs); max_ins_size = page_get_max_insert_size(mpage, n_recs);
if (data_size > max_ins_size) { if (data_size > max_ins_size) {
/* We have to reorganize mpage */ /* We have to reorganize mpage */
if (!btr_page_reorganize_block(page_zip_level, mblock, index,
if (!btr_page_reorganize_block( mtr)) {
false, page_zip_level, mblock, index, mtr)) {
goto error; goto error;
} }
......
/***************************************************************************** /*****************************************************************************
Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved. Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved.
Copyright (C) 2014, 2019, MariaDB Corporation. Copyright (C) 2014, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -444,7 +444,7 @@ btr_defragment_merge_pages( ...@@ -444,7 +444,7 @@ btr_defragment_merge_pages(
// reorganizing the page, otherwise we need to reorganize the page // reorganizing the page, otherwise we need to reorganize the page
// first to release more space. // first to release more space.
if (move_size > max_ins_size) { if (move_size > max_ins_size) {
if (!btr_page_reorganize_block(false, page_zip_level, if (!btr_page_reorganize_block(page_zip_level,
to_block, index, to_block, index,
mtr)) { mtr)) {
if (!dict_index_is_clust(index) if (!dict_index_is_clust(index)
......
// Copyright (c) 2014, Google Inc. // Copyright (c) 2014, Google Inc.
// Copyright (c) 2017, 2019, MariaDB Corporation. // Copyright (c) 2017, 2020, MariaDB Corporation.
/**************************************************//** /**************************************************//**
@file btr/btr0scrub.cc @file btr/btr0scrub.cc
...@@ -369,7 +369,7 @@ btr_optimistic_scrub( ...@@ -369,7 +369,7 @@ btr_optimistic_scrub(
return DB_OVERFLOW; return DB_OVERFLOW;
} }
#endif #endif
if (!btr_page_reorganize_block(false, scrub_compression_level, block, if (!btr_page_reorganize_block(scrub_compression_level, block,
index, mtr)) { index, mtr)) {
return DB_OVERFLOW; return DB_OVERFLOW;
} }
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc. Copyright (c) 2012, Facebook Inc.
Copyright (c) 2014, 2019, MariaDB Corporation. Copyright (c) 2014, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -577,7 +577,8 @@ btr_discard_page( ...@@ -577,7 +577,8 @@ btr_discard_page(
Parses the redo log record for setting an index record as the predefined Parses the redo log record for setting an index record as the predefined
minimum record. minimum record.
@return end of log record or NULL */ @return end of log record or NULL */
ATTRIBUTE_COLD MY_ATTRIBUTE((nonnull(1,2), warn_unused_result)) ATTRIBUTE_COLD /* only used when crash-upgrading */
MY_ATTRIBUTE((nonnull(1,2), warn_unused_result))
const byte* const byte*
btr_parse_set_min_rec_mark( btr_parse_set_min_rec_mark(
/*=======================*/ /*=======================*/
...@@ -589,6 +590,7 @@ btr_parse_set_min_rec_mark( ...@@ -589,6 +590,7 @@ btr_parse_set_min_rec_mark(
/***********************************************************//** /***********************************************************//**
Parses a redo log record of reorganizing a page. Parses a redo log record of reorganizing a page.
@return end of log record or NULL */ @return end of log record or NULL */
ATTRIBUTE_COLD /* only used when crash-upgrading */
const byte* const byte*
btr_parse_page_reorganize( btr_parse_page_reorganize(
/*======================*/ /*======================*/
...@@ -704,15 +706,7 @@ IBUF_BITMAP_FREE is unaffected by reorganization. ...@@ -704,15 +706,7 @@ IBUF_BITMAP_FREE is unaffected by reorganization.
@retval true if the operation was successful @retval true if the operation was successful
@retval false if it is a compressed page, and recompression failed */ @retval false if it is a compressed page, and recompression failed */
UNIV_INTERN bool btr_page_reorganize_block(
bool
btr_page_reorganize_block(
/*======================*/
bool recovery,/*!< in: true if called in recovery:
locks should not be updated, i.e.,
there cannot exist locks on the
page, and a hash index should not be
dropped: it cannot exist */
ulint z_level,/*!< in: compression level to be used ulint z_level,/*!< in: compression level to be used
if dealing with compressed page */ if dealing with compressed page */
buf_block_t* block, /*!< in/out: B-tree page */ buf_block_t* block, /*!< in/out: B-tree page */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment