MDEV-34529 Shrink the system tablespace when system tablespace contains...

MDEV-34529  Shrink the system tablespace when system tablespace contains MDEV-30671 leaked undo pages

- InnoDB fails to shrink the system tablespace when it contains
the leaked undo log pages caused by MDEV-30671.

- InnoDB does free the unused segment in system tablespace
before shrinking the tablespace.

inode_info: Structure to store the inode page and offsets.

fil_space_t::garbage_collect(): Frees the system tablespace
unused segment

fsp_free_unused_seg(): Frees the unused segment

fsp_get_sys_used_segment(): Iterates through all default
file segment and index segment present in system tablespace.

fseg_inode_free(): Frees the extents, fragment pages for the
given index node and ignores any error similar to
trx_purge_free_segment()

trx_sys_t::reset_page(): Retain the TRX_SYS_FSEG_HEADER value
in trx_sys page while resetting the page.
parent f1b4d36c
# restart: --debug_dbug=d,undo_segment_leak
SET GLOBAL INNODB_FILE_PER_TABLE=0;
Warnings:
Warning 1287 '@@innodb_file_per_table' is deprecated and will be removed in a future release
CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL)ENGINE=InnoDB;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
UPDATE t1 SET f1 = f1 + 1 WHERE f1 > 1000;
UPDATE t1 SET f2 = f2 + 1 WHERE f1 > 1000;
UPDATE t1 SET f1 = f2 + 1 WHERE f1 > 1000;
UPDATE t1 SET f2 = f1 + 1 WHERE f1 > 1000;
DELETE FROM t1;
DROP TABLE t1;
set GLOBAL innodb_fast_shutdown=0;
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 79691776
# restart
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 12582912
call mtr.add_suppression("InnoDB: Can't free the unused segments in system tablespace because a previous shutdown was not with innodb_fast_shutdown=0");
call mtr.add_suppression("InnoDB: Cannot free the unused segment in system tablespace due to Data structure corruption. Ignorning the autoshrink option");
call mtr.add_suppression("InnoDB: Failed to free the unused segment");
# restart: --debug_dbug=d,undo_segment_leak
SET GLOBAL INNODB_FILE_PER_TABLE=0;
Warnings:
Warning 1287 '@@innodb_file_per_table' is deprecated and will be removed in a future release
CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL)ENGINE=InnoDB;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
UPDATE t1 SET f1 = f1 + 1 WHERE f1 > 1000;
UPDATE t1 SET f2 = f2 + 1 WHERE f1 > 1000;
UPDATE t1 SET f1 = f2 + 1 WHERE f1 > 1000;
UPDATE t1 SET f2 = f1 + 1 WHERE f1 > 1000;
DELETE FROM t1;
DROP TABLE t1;
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 79691776
# restart: --debug_dbug=d,unused_undo_free_fail_1
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 79691776
# restart: --debug_dbug=d,unused_undo_free_fail_2
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 79691776
# restart: --debug_dbug=d,unused_undo_free_fail_3
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 79691776
# restart: --debug_dbug=d,unused_undo_free_fail_4
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 79691776
# restart: --debug_dbug=d,unused_undo_free_fail_5
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 79691776
# restart: --debug_dbug=d,unused_undo_free_fail_6
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 79691776
# restart
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
NAME FILE_SIZE
innodb_system 79691776
--innodb_undo_tablespaces=0
--innodb_sys_tablespaces
--source include/have_innodb.inc
--source include/have_sequence.inc
--source include/have_debug.inc
let $restart_parameters=--debug_dbug=d,undo_segment_leak;
--source include/restart_mysqld.inc
SET GLOBAL INNODB_FILE_PER_TABLE=0;
CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL)ENGINE=InnoDB;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
UPDATE t1 SET f1 = f1 + 1 WHERE f1 > 1000;
UPDATE t1 SET f2 = f2 + 1 WHERE f1 > 1000;
UPDATE t1 SET f1 = f2 + 1 WHERE f1 > 1000;
UPDATE t1 SET f2 = f1 + 1 WHERE f1 > 1000;
DELETE FROM t1;
DROP TABLE t1;
set GLOBAL innodb_fast_shutdown=0;
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
let $restart_parameters=;
--source include/restart_mysqld.inc
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
--innodb_undo_tablespaces=0
--innodb_sys_tablespaces
--source include/have_innodb.inc
--source include/have_sequence.inc
--source include/have_debug.inc
call mtr.add_suppression("InnoDB: Can't free the unused segments in system tablespace because a previous shutdown was not with innodb_fast_shutdown=0");
call mtr.add_suppression("InnoDB: Cannot free the unused segment in system tablespace due to Data structure corruption. Ignorning the autoshrink option");
call mtr.add_suppression("InnoDB: Failed to free the unused segment");
let $restart_parameters=--debug_dbug=d,undo_segment_leak;
--source include/restart_mysqld.inc
SET GLOBAL INNODB_FILE_PER_TABLE=0;
CREATE TABLE t1(f1 INT NOT NULL, f2 INT NOT NULL)ENGINE=InnoDB;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
INSERT INTO t1 SELECT seq, seq FROM seq_1_to_4096;
UPDATE t1 SET f1 = f1 + 1 WHERE f1 > 1000;
UPDATE t1 SET f2 = f2 + 1 WHERE f1 > 1000;
UPDATE t1 SET f1 = f2 + 1 WHERE f1 > 1000;
UPDATE t1 SET f2 = f1 + 1 WHERE f1 > 1000;
DELETE FROM t1;
DROP TABLE t1;
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
let $restart_parameters=--debug_dbug=d,unused_undo_free_fail_1;
--source include/restart_mysqld.inc
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
let $restart_parameters=--debug_dbug=d,unused_undo_free_fail_2;
--source include/restart_mysqld.inc
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
let $restart_parameters=--debug_dbug=d,unused_undo_free_fail_3;
--source include/restart_mysqld.inc
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
let $restart_parameters=--debug_dbug=d,unused_undo_free_fail_4;
--source include/restart_mysqld.inc
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
let $restart_parameters=--debug_dbug=d,unused_undo_free_fail_5;
--source include/restart_mysqld.inc
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
let $restart_parameters=--debug_dbug=d,unused_undo_free_fail_6;
--source include/restart_mysqld.inc
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
let $restart_parameters=;
--source include/restart_mysqld.inc
SELECT NAME, FILE_SIZE FROM information_schema.innodb_sys_tablespaces WHERE SPACE = 0;
......@@ -172,36 +172,24 @@ name_of_col_is(
}
#endif /* UNIV_DEBUG */
/********************************************************************//**
This function gets the next system table record as it scans the table.
@return the next record if found, NULL if end of scan */
static
const rec_t*
dict_getnext_system_low(
/*====================*/
btr_pcur_t* pcur, /*!< in/out: persistent cursor to the
record*/
mtr_t* mtr) /*!< in: the mini-transaction */
dict_getnext_system_low(btr_pcur_t *pcur, mtr_t *mtr)
{
rec_t* rec = NULL;
while (!rec) {
rec_t *rec = NULL;
while (!rec)
{
btr_pcur_move_to_next_user_rec(pcur, mtr);
rec = btr_pcur_get_rec(pcur);
if (!btr_pcur_is_on_user_rec(pcur)) {
if (!btr_pcur_is_on_user_rec(pcur))
{
/* end of index */
btr_pcur_close(pcur);
return(NULL);
return NULL;
}
}
/* Get a record, let's save the position */
btr_pcur_store_position(pcur, mtr);
return(rec);
return rec;
}
/********************************************************************//**
......
......@@ -37,12 +37,16 @@ Created 11/29/1995 Heikki Tuuri
#include "btr0sea.h"
#include "dict0boot.h"
#include "log0log.h"
#include "dict0load.h"
#include "dict0mem.h"
#include "fsp0types.h"
#include "btr0pcur.h"
#include "trx0sys.h"
#include "log.h"
#ifndef DBUG_OFF
# include "trx0purge.h"
#endif
#include <unordered_set>
#include "trx0undo.h"
/** Returns the first extent descriptor for a segment.
We think of the extent lists of the segment catenated in the order
......@@ -2812,6 +2816,60 @@ fseg_free_extent(
return DB_SUCCESS;
}
/** Free the extent and fragment page associated with
the segment.
@param iblock page where segment header are placed
@param inode index node information
@param space tablespace where segment resides
@param mtr mini-transaction
@param header segment header
@param all_pages_freed Indicate that all pages are freed
@param ahi Adaptive hash index
@return whether freeing was completed, expect the segment node */
static
bool fseg_free_step_low(buf_block_t *iblock, fseg_inode_t *inode,
fil_space_t *space, mtr_t *mtr,
fseg_header_t *header, bool *all_pages_freed
#ifdef BTR_CUR_HASH_ADAPT
, bool ahi=false
#endif /* BTR_CUR_HASH_ADAPT */
)
{
dberr_t err= DB_SUCCESS;
if (xdes_t* descr= fseg_get_first_extent(inode, space, mtr, &err))
/* Free the extent held by the segment */
return fseg_free_extent(inode, iblock, space,
xdes_get_offset(descr), mtr
#ifdef BTR_CUR_HASH_ADAPT
, ahi
#endif /* BTR_CUR_HASH_ADAPT */
) != DB_SUCCESS;
if (err != DB_SUCCESS)
return true;
/* Free a frag page */
ulint n = fseg_find_last_used_frag_page_slot(inode);
if (UNIV_UNLIKELY(n == ULINT_UNDEFINED))
{
if (all_pages_freed)
*all_pages_freed= true;
return true;
}
uint32_t page_no = fseg_get_nth_frag_page_no(inode, n);
if (header && page_no == page_get_page_no(page_align(header)))
return true;
if (fseg_free_page_low(inode, iblock, space, page_no, mtr
#ifdef BTR_CUR_HASH_ADAPT
, ahi
#endif /* BTR_CUR_HASH_ADAPT */
) != DB_SUCCESS)
return true;
buf_page_free(space, page_no, mtr);
return false;
}
/** Frees part of a segment. This function can be used to free
a segment by repeatedly calling this function in different
mini-transactions. Doing the freeing in a single mini-transaction
......@@ -2864,43 +2922,12 @@ fseg_free_step(
fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr);
}
dberr_t err;
descr = fseg_get_first_extent(inode, space, mtr, &err);
if (descr) {
/* Free the extent held by the segment */
return fseg_free_extent(inode, iblock, space,
xdes_get_offset(descr), mtr
#ifdef BTR_CUR_HASH_ADAPT
, ahi
#endif /* BTR_CUR_HASH_ADAPT */
) != DB_SUCCESS;
}
if (err != DB_SUCCESS || space->is_stopping()) {
return true;
}
/* Free a frag page */
n = fseg_find_last_used_frag_page_slot(inode);
if (n == ULINT_UNDEFINED) {
/* Freeing completed: free the segment inode */
fsp_free_seg_inode(space, inode, iblock, mtr);
return true;
}
uint32_t page_no = fseg_get_nth_frag_page_no(inode, n);
if (fseg_free_page_low(inode, iblock, space, page_no, mtr
if (fseg_free_step_low(iblock, inode, space, mtr, nullptr, nullptr
#ifdef BTR_CUR_HASH_ADAPT
, ahi
#endif /* BTR_CUR_HASH_ADAPT */
) != DB_SUCCESS) {
))
return true;
}
buf_page_free(space, page_no, mtr);
n = fseg_find_last_used_frag_page_slot(inode);
......@@ -2923,15 +2950,14 @@ fseg_free_step_not_header(
#endif /* BTR_CUR_HASH_ADAPT */
)
{
fseg_inode_t* inode;
const uint32_t space_id = page_get_space_id(page_align(header));
ut_ad(mtr->is_named_space(space_id));
fil_space_t* space = mtr->x_lock_space(space_id);
buf_block_t* iblock;
inode = fseg_inode_try_get(header, space_id, space->zip_size(),
fseg_inode_t *inode = fseg_inode_try_get(header, space_id,
space->zip_size(),
mtr, &iblock);
if (space->is_stopping()) {
return true;
......@@ -2948,43 +2974,11 @@ fseg_free_step_not_header(
fil_block_check_type(*iblock, FIL_PAGE_INODE, mtr);
}
dberr_t err;
if (xdes_t* descr = fseg_get_first_extent(inode, space, mtr, &err)) {
/* Free the extent held by the segment */
return fseg_free_extent(inode, iblock, space,
xdes_get_offset(descr),
mtr
return fseg_free_step_low(iblock, inode, space, mtr, header, nullptr
#ifdef BTR_CUR_HASH_ADAPT
, ahi
#endif /* BTR_CUR_HASH_ADAPT */
) != DB_SUCCESS;
} else if (err != DB_SUCCESS) {
return true;
}
/* Free a frag page */
ulint n = fseg_find_last_used_frag_page_slot(inode);
if (UNIV_UNLIKELY(n == ULINT_UNDEFINED)) {
return true;
}
uint32_t page_no = fseg_get_nth_frag_page_no(inode, n);
if (page_no == page_get_page_no(page_align(header))) {
return true;
}
if (fseg_free_page_low(inode, iblock, space, page_no, mtr
#ifdef BTR_CUR_HASH_ADAPT
, ahi
#endif /* BTR_CUR_HASH_ADAPT */
) != DB_SUCCESS) {
return true;
}
buf_page_free(space, page_no, mtr);
return false;
);
}
/** Returns the first extent descriptor for a segment.
......@@ -3577,22 +3571,401 @@ dberr_t fsp_sys_tablespace_validate()
}
#endif /* UNIV_DEBUG */
void fsp_system_tablespace_truncate()
/** Store the inode information which basically stores
the page and offset */
struct inode_info : private std::unordered_set<uint64_t>
{
public:
__attribute__((warn_unused_result))
bool insert_inode(uint32_t page, uint16_t offset)
{
return page < fil_system.sys_space->free_limit &&
offset >= FIL_PAGE_DATA && offset < srv_page_size - FIL_PAGE_DATA_END &&
emplace(uint64_t{page} << 32 | offset).second;
}
__attribute__((warn_unused_result))
/** Register an inode */
bool insert_seg(const byte *inode)
{
return insert_inode(mach_read_from_4(inode + 4),
mach_read_from_2(inode + 8));
}
__attribute__((warn_unused_result))
bool find(uint32_t page, uint16_t offset) const
{ return std::unordered_set<uint64_t>::find(uint64_t{page} << 32 | offset) != end(); }
typename std::unordered_set<uint64_t>::iterator start()
{
return std::unordered_set<uint64_t>::begin();
}
typename std::unordered_set<uint64_t>::iterator last()
{
return std::unordered_set<uint64_t>::end();
}
};
/** Get the file segments from root page
@param inodes store the index nodes information
@param root root page
@return error code */
static dberr_t fsp_table_inodes_root(inode_info *inodes, uint32_t root)
{
if (root == FIL_NULL)
return DB_SUCCESS;
dberr_t err= DB_SUCCESS;
buf_block_t *block= buf_pool.page_fix(page_id_t{0, root}, &err,
buf_pool_t::FIX_WAIT_READ);
if (!block)
return err;
if (!inodes->insert_seg(block->page.frame + PAGE_HEADER + PAGE_BTR_SEG_TOP))
err= DB_CORRUPTION;
if (!inodes->insert_seg(block->page.frame + PAGE_HEADER + PAGE_BTR_SEG_LEAF))
err= DB_CORRUPTION;
block->page.unfix();
return err;
}
/** Add the file segment of all root pages in table
@param inodes store the index nodes information
@param table table to be read
@return error code */
static dberr_t add_index_root_pages(inode_info *inodes, dict_table_t *table)
{
dberr_t err= DB_SUCCESS;
for (auto i= UT_LIST_GET_FIRST(table->indexes);
i != nullptr && err == DB_SUCCESS; i= UT_LIST_GET_NEXT(indexes, i))
err= fsp_table_inodes_root(inodes, i->page);
return err;
}
/** Determine the inodes used by tables in the system tablespace.
@param inodes store the index nodes information
@param mtr mini-transaction
@return error code */
static dberr_t fsp_table_inodes(inode_info *inodes, mtr_t *mtr)
{
btr_pcur_t pcur;
ulint len;
const auto savepoint= mtr->get_savepoint();
dberr_t err= DB_SUCCESS;
dict_sys.freeze(SRW_LOCK_CALL);
for (const rec_t *rec= dict_startscan_system(&pcur, mtr,
dict_sys.sys_indexes);
rec; rec= dict_getnext_system_low(&pcur, mtr))
{
const byte *field=
rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
if (len != 4)
{
err= DB_CORRUPTION;
break;
}
uint32_t space= mach_read_from_4(field);
if (space > 0) continue;
field= rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
if (len != 4)
{
err= DB_CORRUPTION;
break;
}
err= fsp_table_inodes_root(inodes, mach_read_from_4(field));
if (err)
break;
}
mtr->rollback_to_savepoint(savepoint);
dict_sys.unfreeze();
if (err == DB_SUCCESS)
{
err= add_index_root_pages(inodes, dict_sys.sys_tables);
if (err) return err;
err= add_index_root_pages(inodes, dict_sys.sys_indexes);
if (err) return err;
err= add_index_root_pages(inodes, dict_sys.sys_columns);
if (err) return err;
err= add_index_root_pages(inodes, dict_sys.sys_fields);
}
return err;
}
/* Get the used inode from the system tablespace
@param inodes inode information used found in system tablespace
@param mtr mini-transaction
@return error code */
static dberr_t fsp_get_sys_used_segment(inode_info *inodes, mtr_t *mtr)
{
dberr_t err= DB_SUCCESS;
buf_block_t *block= nullptr;
/* Get TRX_SYS_FSEG_HEADER, TRX_SYS_DOUBLEWRITE_FSEG from
TRX_SYS_PAGE */
block= buf_pool.page_fix(page_id_t{0, TRX_SYS_PAGE_NO}, &err,
buf_pool_t::FIX_WAIT_READ);
if (!block)
return err;
if (!inodes->insert_seg(block->page.frame + TRX_SYS + TRX_SYS_FSEG_HEADER))
err= DB_CORRUPTION;
if (!inodes->insert_seg(block->page.frame + TRX_SYS_DOUBLEWRITE
+ TRX_SYS_DOUBLEWRITE_FSEG))
err= DB_CORRUPTION;
block->page.unfix();
if (err)
return err;
block= buf_pool.page_fix(page_id_t{0, DICT_HDR_PAGE_NO}, &err,
buf_pool_t::FIX_WAIT_READ);
if (!block)
return err;
if (!inodes->insert_seg(block->page.frame + DICT_HDR + DICT_HDR_FSEG_HEADER))
err= DB_CORRUPTION;
block->page.unfix();
if (err)
return err;
block= buf_pool.page_fix(page_id_t{0, FSP_IBUF_HEADER_PAGE_NO},
&err, buf_pool_t::FIX_WAIT_READ);
if (!block)
return err;
if (!inodes->insert_seg(block->page.frame + PAGE_DATA))
err= DB_CORRUPTION;
block->page.unfix();
/* Get rollback segment header page */
for (ulint rseg_id= 0; rseg_id < TRX_SYS_N_RSEGS && err == DB_SUCCESS;
rseg_id++)
{
trx_rseg_t *rseg= &trx_sys.rseg_array[rseg_id];
if (rseg->space->id == 0)
{
block= buf_pool.page_fix(rseg->page_id(), &err,
buf_pool_t::FIX_WAIT_READ);
if (!block)
continue;
if (!inodes->insert_seg(block->page.frame + TRX_RSEG +
TRX_RSEG_FSEG_HEADER))
err= DB_CORRUPTION;
block->page.unfix();
/* Even after slow shutdown, there is a possiblity that
cached undo log can exist. So store the segment as used one */
for (trx_undo_t *undo= UT_LIST_GET_FIRST(rseg->undo_cached);
undo && err == DB_SUCCESS;
undo= UT_LIST_GET_NEXT(undo_list, undo))
{
block= buf_pool.page_fix(page_id_t{0, undo->hdr_page_no}, &err,
buf_pool_t::FIX_WAIT_READ);
if (!block)
continue;
if (!inodes->insert_seg(block->page.frame + TRX_UNDO_SEG_HDR +
TRX_UNDO_FSEG_HEADER))
err= DB_CORRUPTION;
block->page.unfix();
}
}
}
if (err == DB_SUCCESS)
err= fsp_table_inodes(inodes, mtr);
return err;
}
/** Free the extents, fragment page from the given inode
@param page_no index node page number
@param offset index node offset within page
@return error code */
static void fseg_inode_free(uint32_t page_no, uint16_t offset)
{
fil_space_t *space= fil_system.sys_space;
mtr_t mtr;
mtr.start();
mtr.x_lock_space(space);
sql_print_information("InnoDB: Freeing the unused segment page "
UINT32PF " offset " UINT32PF, page_no,
offset);
buf_block_t *iblock= buf_page_get_gen(page_id_t{0, page_no}, 0,
RW_X_LATCH, nullptr, BUF_GET,
&mtr);
DBUG_EXECUTE_IF("unused_undo_free_fail_4", iblock= nullptr;);
if (!iblock)
{
mtr.commit();
return;
}
fseg_inode_t *inode= iblock->page.frame + offset;
bool all_pages_freed= false;
while (!fseg_free_step_low(iblock, inode, space, &mtr, nullptr,
&all_pages_freed))
{
DBUG_EXECUTE_IF("unused_undo_free_fail_5",
mtr.commit();
return;);
iblock->fix();
mtr.commit();
mtr.start();
mtr.x_lock_space(space);
iblock->page.lock.x_lock();
mtr.memo_push(iblock, MTR_MEMO_PAGE_X_FIX);
}
DBUG_EXECUTE_IF("unused_undo_free_fail_6", mtr.commit(); return;);
if (all_pages_freed)
/* Freeing completed: free the segment inode */
fsp_free_seg_inode(space, inode, iblock, &mtr);
mtr.commit();
return;
}
/** Free the unused segment
@param boffset offset of the list containing segment header
@param space system tablespace
@param usedInodes Used system tablespace index node */
static void fsp_free_unused_seg(inode_info *unused_inodes)
{
for (auto it= unused_inodes->start();
it != unused_inodes->last(); it++)
fseg_inode_free((uint32_t)((*it) >> 32), (uint16_t)(*it));
}
static dberr_t fsp_get_sys_unused_segment(inode_info *inodes,
uint16_t boffset,
inode_info *unused_inodes)
{
dberr_t err= DB_SUCCESS;
buf_block_t *block= buf_pool.page_fix(page_id_t{0, 0}, &err,
buf_pool_t::FIX_WAIT_READ);
if (!block)
return err;
buf_block_t *header= block;
const uint32_t len= flst_get_len(block->page.frame + boffset);
fil_addr_t addr= flst_get_first(block->page.frame + boffset);
for (uint32_t i= len; i-- && err == DB_SUCCESS; )
{
ut_ad(addr.boffset >= FIL_PAGE_DATA);
ut_ad(addr.boffset < block->physical_size() - FIL_PAGE_DATA_END);
block= buf_pool.page_fix(page_id_t{0, addr.page}, &err,
buf_pool_t::FIX_WAIT_READ);
if (!block)
{
addr.page= FIL_NULL;
continue;
}
fil_addr_t next_addr= flst_get_next_addr(block->page.frame + addr.boffset);
for (uint32_t i= 0;
i < FSP_SEG_INODES_PER_PAGE(srv_page_size) && err == DB_SUCCESS;
i++)
{
fseg_inode_t *inode=
fsp_seg_inode_page_get_nth_inode(block->page.frame, i);
uint16_t offset= uint16_t (inode - block->page.frame);
if (mach_read_from_8(FSEG_ID + inode) == 0)
continue;
if (!inodes->find(addr.page, offset) &&
!unused_inodes->insert_inode(addr.page, offset))
err= DB_CORRUPTION;
}
addr= next_addr;
block->page.unfix();
}
ut_ad(addr.page == FIL_NULL);
header->page.unfix();
return err;
}
/** Remove the unused segment in tablespace. This function
used only during shrinking of system tablespace
@param shutdown called during slow shutdown
@return error code */
dberr_t fil_space_t::garbage_collect(bool shutdown)
{
if (!shutdown && !trx_sys.is_undo_empty())
{
sql_print_warning("InnoDB: Can't free the unused segments"
" in system tablespace because a previous"
" shutdown was not with"
" innodb_fast_shutdown=0");
return DB_SUCCESS;
}
ut_a(id == 0);
/* Collect all the used segment inode entries */
mtr_t mtr;
mtr.start();
inode_info used_inodes, unused_inodes;
dberr_t err= fsp_get_sys_used_segment(&used_inodes, &mtr);
DBUG_EXECUTE_IF("unused_undo_free_fail_1", err= DB_CORRUPTION;);
if (err)
{
mtr.commit();
return err;
}
err= fsp_get_sys_unused_segment(&used_inodes,
FSP_HEADER_OFFSET + FSP_SEG_INODES_FULL,
&unused_inodes);
DBUG_EXECUTE_IF("unused_undo_free_fail_2", err= DB_CORRUPTION;);
if (err)
{
mtr.commit();
return err;
}
err= fsp_get_sys_unused_segment(&used_inodes,
FSP_HEADER_OFFSET + FSP_SEG_INODES_FREE,
&unused_inodes);
mtr.commit();
DBUG_EXECUTE_IF("unused_undo_free_fail_3", err= DB_CORRUPTION;);
if (err)
return err;
/* Ignore the error just like trx_purge_free_segment() */
fsp_free_unused_seg(&unused_inodes);
return DB_SUCCESS;
}
void fsp_system_tablespace_truncate(bool shutdown)
{
ut_ad(!purge_sys.enabled());
ut_ad(!srv_undo_sources);
uint32_t last_used_extent= 0;
fil_space_t *space= fil_system.sys_space;
dberr_t err= space->garbage_collect(shutdown);
/* Print the warning and move on to shrinking the system
tablespace because error was thrown only while finding
the unused segment, nothing has modified in system tablespace */
if (err)
sql_print_warning("InnoDB: Cannot free the unused segment "
"in system tablespace due to %s. Ignorning "
"the autoshrink option", ut_strerr(err));
mtr_t mtr;
mtr.start();
mtr.x_lock_space(space);
dberr_t err= fsp_traverse_extents(space, &last_used_extent, &mtr);
err= fsp_traverse_extents(space, &last_used_extent, &mtr);
if (err != DB_SUCCESS)
{
func_exit:
err_exit:
mtr.commit();
sql_print_warning("InnoDB: Cannot shrink the system tablespace "
"due to %s", ut_strerr(err));
mtr.commit();
return;
}
uint32_t fixed_size= srv_sys_space.get_min_size(),
......@@ -3643,7 +4016,7 @@ void fsp_system_tablespace_truncate()
header= fsp_get_latched_xdes_page(0, &mtr, &err);
if (!header)
goto func_exit;
goto err_exit;
mtr.write<4, mtr_t::FORCED>(
*header, FSP_HEADER_OFFSET + FSP_SIZE + header->page.frame,
......@@ -3656,16 +4029,16 @@ void fsp_system_tablespace_truncate()
err= fsp_shrink_list(
header, FSP_HEADER_OFFSET + FSP_FREE, last_used_extent, &mtr);
if (err != DB_SUCCESS)
goto func_exit;
goto err_exit;
err= fsp_shrink_list(
header, FSP_HEADER_OFFSET + FSP_FREE_FRAG, last_used_extent, &mtr);
if (err != DB_SUCCESS)
goto func_exit;
goto err_exit;
err= fsp_xdes_reset(space, last_used_extent, &mtr);
if (err != DB_SUCCESS)
goto func_exit;
goto err_exit;
mtr.trim_pages(page_id_t(0, last_used_extent));
size_t shrink_redo_size= mtr.get_log_size();
......
......@@ -213,4 +213,12 @@ dict_process_sys_foreign_col_rec(
in referenced table */
ulint* pos); /*!< out: column position */
/** This function gets the next system table record as it scans
the table.
@param pcur persistent cursor
@param mtr mini-transaction
@return the next record if found
@retval nullptr at the end of the table */
const rec_t*
dict_getnext_system_low(btr_pcur_t *pcur, mtr_t *mtr);
#endif
......@@ -1009,6 +1009,10 @@ struct fil_space_t final
/** Update the data structures on write completion */
void complete_write();
/** Free the unused segment for the tablespace
@param shutdown called during slow shutdown
@return error code */
dberr_t garbage_collect(bool shutdown);
private:
/** @return whether the file is usable for io() */
ATTRIBUTE_COLD bool prepare_acquired();
......
......@@ -555,8 +555,9 @@ inline void fsp_init_file_page(
mtr->init(block);
}
/** Truncate the system tablespace */
void fsp_system_tablespace_truncate();
/** Truncate the system tablespace
@param shutdown Called during shutdown */
void fsp_system_tablespace_truncate(bool shutdown);
#ifndef UNIV_DEBUG
# define fsp_init_file_page(space, block, mtr) fsp_init_file_page(block, mtr)
......
......@@ -1579,6 +1579,6 @@ void srv_purge_shutdown()
srv_shutdown_purge_tasks();
if (!srv_fast_shutdown && !high_level_read_only && srv_was_started &&
!opt_bootstrap && srv_operation == SRV_OPERATION_NORMAL)
fsp_system_tablespace_truncate();
fsp_system_tablespace_truncate(true);
}
}
......@@ -333,6 +333,15 @@ inline dberr_t trx_sys_t::reset_page(mtr_t *mtr)
if (!sys_header) return err;
if (mach_read_from_4(sys_header->page.frame + TRX_SYS +
TRX_SYS_FSEG_HEADER) != TRX_SYS_SPACE)
return DB_CORRUPTION;
/* Store the TRX_SYS_FSEG_HEADER page, offset */
char fseg_addr[6];
memcpy(fseg_addr,
sys_header->page.frame + TRX_SYS + TRX_SYS_FSEG_HEADER + 4, 6);
const bool dblwr_enabled=
mach_read_from_4(TRX_SYS_DOUBLEWRITE_MAGIC + TRX_SYS_DOUBLEWRITE +
sys_header->page.frame)
......@@ -347,6 +356,9 @@ inline dberr_t trx_sys_t::reset_page(mtr_t *mtr)
mtr->write<2>(*sys_header, FIL_PAGE_TYPE + sys_header->page.frame,
FIL_PAGE_TYPE_TRX_SYS);
mtr->memcpy(*sys_header,
sys_header->page.frame + TRX_SYS + TRX_SYS_FSEG_HEADER + 4,
fseg_addr, 6);
mtr->write<4>(*sys_header,
TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_PAGE_NO +
sys_header->page.frame, FSP_FIRST_RSEG_PAGE_NO);
......@@ -1783,7 +1795,7 @@ dberr_t srv_start(bool create_new_db)
if (!high_level_read_only
&& srv_sys_space.can_auto_shrink()) {
fsp_system_tablespace_truncate();
fsp_system_tablespace_truncate(false);
DBUG_EXECUTE_IF("crash_after_sys_truncate",
return srv_init_abort(DB_ERROR););
}
......
......@@ -482,6 +482,7 @@ inline dberr_t purge_sys_t::iterator::free_history_rseg(trx_rseg_t &rseg) const
free_segment:
ut_ad(rseg.curr_size >= seg_size);
rseg.curr_size-= seg_size;
DBUG_EXECUTE_IF("undo_segment_leak", goto skip_purge_free;);
trx_purge_free_segment(rseg_hdr, b, mtr);
break;
case TRX_UNDO_CACHED:
......@@ -507,7 +508,9 @@ inline dberr_t purge_sys_t::iterator::free_history_rseg(trx_rseg_t &rseg) const
goto free_segment;
}
}
#ifndef DBUG_OFF
skip_purge_free:
#endif /* !DBUG_OFF */
hdr_addr= prev_hdr_addr;
mtr.commit();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment