MDEV-33542 Inplace algorithm occupies more disk space compared to copy algorithm

Problem:
=======
- In case of large file size, InnoDB eagerly adds the new extent
even though there are many existing unused pages of the segment.
Reason is that in case of larger file size, threshold
(1/8 of reserved pages) for adding new extent has been
reached frequently.

Solution:
=========
- Try to utilise the unused pages in the segment before adding
the new extent in the file segment.

need_for_new_extent(): In case of larger file size, try to use
the 4 * FSP_EXTENT_SIZE as threshold to allocate the new extent.

fseg_alloc_free_page_low(): Rewrote the function to allocate
the page in the following order.
1) Try to get the page from existing segment extent.
2) Check whether the segment needs new extent
(need_for_new_extent()) and allocate the new extent,
find the page.
3) Take individual page from the unused page from
segment or tablespace.
4) Allocate a new extent and take first page from it.

Removed FSEG_FILLFACTOR, FSEG_FRAG_LIMIT variable.
parent 5b4e69c0
...@@ -37,7 +37,7 @@ test.t1 analyze status Engine-independent statistics collected ...@@ -37,7 +37,7 @@ test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK test.t1 analyze status OK
SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
CLUST_INDEX_SIZE CLUST_INDEX_SIZE
1856 1792
connection con2; connection con2;
DELETE FROM t1 WHERE a00 = 'cnm'; DELETE FROM t1 WHERE a00 = 'cnm';
COMMIT; COMMIT;
...@@ -80,7 +80,7 @@ test.t1 analyze status Engine-independent statistics collected ...@@ -80,7 +80,7 @@ test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK test.t1 analyze status OK
SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
CLUST_INDEX_SIZE CLUST_INDEX_SIZE
1856 1792
DELETE FROM t1 WHERE a00 = 'dpn'; DELETE FROM t1 WHERE a00 = 'dpn';
COMMIT; COMMIT;
INSERT INTO t1 SET a00 = 'dpn'; INSERT INTO t1 SET a00 = 'dpn';
...@@ -117,6 +117,6 @@ test.t1 analyze status Engine-independent statistics collected ...@@ -117,6 +117,6 @@ test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK test.t1 analyze status OK
SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1';
CLUST_INDEX_SIZE CLUST_INDEX_SIZE
1856 1792
SET DEBUG_SYNC = 'RESET'; SET DEBUG_SYNC = 'RESET';
DROP TABLE t1; DROP TABLE t1;
...@@ -271,7 +271,7 @@ inline void xdes_init(const buf_block_t &block, xdes_t *descr, mtr_t *mtr) ...@@ -271,7 +271,7 @@ inline void xdes_init(const buf_block_t &block, xdes_t *descr, mtr_t *mtr)
static MY_ATTRIBUTE((nonnull, warn_unused_result)) static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t dberr_t
fseg_mark_page_used(fseg_inode_t *seg_inode, buf_block_t *iblock, fseg_mark_page_used(fseg_inode_t *seg_inode, buf_block_t *iblock,
ulint page, xdes_t *descr, buf_block_t *xdes, mtr_t *mtr) uint32_t page, xdes_t *descr, buf_block_t *xdes, mtr_t *mtr)
{ {
ut_ad(fil_page_get_type(iblock->page.frame) == FIL_PAGE_INODE); ut_ad(fil_page_get_type(iblock->page.frame) == FIL_PAGE_INODE);
ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE)); ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
...@@ -995,7 +995,7 @@ MY_ATTRIBUTE((nonnull, warn_unused_result)) ...@@ -995,7 +995,7 @@ MY_ATTRIBUTE((nonnull, warn_unused_result))
@return error code */ @return error code */
static dberr_t static dberr_t
fsp_alloc_from_free_frag(buf_block_t *header, buf_block_t *xdes, xdes_t *descr, fsp_alloc_from_free_frag(buf_block_t *header, buf_block_t *xdes, xdes_t *descr,
ulint bit, mtr_t *mtr) uint32_t bit, mtr_t *mtr)
{ {
if (UNIV_UNLIKELY(xdes_get_state(descr) != XDES_FREE_FRAG || if (UNIV_UNLIKELY(xdes_get_state(descr) != XDES_FREE_FRAG ||
!xdes_is_free(descr, bit))) !xdes_is_free(descr, bit)))
...@@ -1987,29 +1987,42 @@ fseg_alloc_free_page_low( ...@@ -1987,29 +1987,42 @@ fseg_alloc_free_page_low(
} }
} }
/* In the big if-else below we look for ret_page and ret_descr */ const uint32_t extent_size = FSP_EXTENT_SIZE;
/*-------------------------------------------------------------*/
if ((xdes_get_state(descr) == XDES_FSEG)
&& mach_read_from_8(descr + XDES_ID) == seg_id
&& xdes_is_free(descr, hint % FSP_EXTENT_SIZE)) {
take_hinted_page:
/* 1. We can take the hinted page
=================================*/
ret_descr = descr; ret_descr = descr;
/* Try to get the page from extent which belongs to segment */
if (xdes_get_state(descr) == XDES_FSEG
&& mach_read_from_8(descr + XDES_ID) == seg_id) {
/* Get the page from the segment extent */
if (xdes_is_free(descr, hint % extent_size)) {
take_hinted_page:
ret_page = hint; ret_page = hint;
/* Skip the check for extending the tablespace. If the
page hint were not within the size of the tablespace,
we would have got (descr == NULL) above and reset the hint. */
goto got_hinted_page; goto got_hinted_page;
/*-----------------------------------------------------------*/ } else if (!xdes_is_full(descr)) {
} else if (xdes_get_state(descr) == XDES_FREE /* Take the page from the same extent as the
&& reserved - used < reserved / FSEG_FILLFACTOR hinted page (and the extent already belongs to
&& used >= FSEG_FRAG_LIMIT) { the segment) */
ret_page = xdes_find_free(descr, hint % extent_size);
/* 2. We allocate the free extent from space and can take if (ret_page == FIL_NULL) {
========================================================= ut_ad(!has_done_reservation);
the hinted page return nullptr;
===============*/ }
ret_page += xdes_get_offset(ret_descr);
goto alloc_done;
}
}
/** If the number of unused but reserved pages in a segment is
esser than minimum value of 1/8 of reserved pages or
4 * FSP_EXTENT_SIZE and there are at least half of extent size
used pages, then we allow a new empty extent to be added to
the segment in fseg_alloc_free_page_general(). Otherwise, we use
unused pages of the segment. */
if (used < extent_size / 2 ||
reserved - used >= reserved / 8 ||
reserved - used >= extent_size * 4) {
} else if (xdes_get_state(descr) == XDES_FREE) {
/* Allocate the free extent from space and can
take the hinted page */
ret_descr = fsp_alloc_free_extent(space, hint, &xdes, ret_descr = fsp_alloc_free_extent(space, hint, &xdes,
mtr, err); mtr, err);
...@@ -2036,54 +2049,34 @@ fseg_alloc_free_page_low( ...@@ -2036,54 +2049,34 @@ fseg_alloc_free_page_low(
/* Try to fill the segment free list */ /* Try to fill the segment free list */
*err = fseg_fill_free_list(seg_inode, iblock, space, *err = fseg_fill_free_list(seg_inode, iblock, space,
hint + FSP_EXTENT_SIZE, mtr); hint + extent_size, mtr);
if (UNIV_UNLIKELY(*err != DB_SUCCESS)) { if (UNIV_UNLIKELY(*err != DB_SUCCESS)) {
return nullptr; return nullptr;
} }
goto take_hinted_page; goto take_hinted_page;
/*-----------------------------------------------------------*/ } else if (direction != FSP_NO_DIR) {
} else if ((direction != FSP_NO_DIR)
&& ((reserved - used) < reserved / FSEG_FILLFACTOR) ret_descr = fseg_alloc_free_extent(seg_inode, iblock,
&& (used >= FSEG_FRAG_LIMIT) &xdes, space, mtr, err);
&& (ret_descr = fseg_alloc_free_extent(seg_inode, iblock,
&xdes, space, if (!ret_descr) {
mtr, err))) { ut_ad(*err != DB_SUCCESS);
/* 3. We take any free extent (which was already assigned above return nullptr;
=============================================================== }
in the if-condition to ret_descr) and take the lowest or /* Take any free extent (which was already assigned
======================================================== above in the if-condition to ret_descr) and take the
highest page in it, depending on the direction lowest or highest page in it, depending on the direction */
==============================================*/
ret_page = xdes_get_offset(ret_descr); ret_page = xdes_get_offset(ret_descr);
if (direction == FSP_DOWN) { if (direction == FSP_DOWN) {
ret_page += FSP_EXTENT_SIZE - 1; ret_page += extent_size - 1;
} }
ut_ad(!has_done_reservation || ret_page != FIL_NULL); goto alloc_done;
/*-----------------------------------------------------------*/
} else if (UNIV_UNLIKELY(*err != DB_SUCCESS)) {
return nullptr;
} else if ((xdes_get_state(descr) == XDES_FSEG)
&& mach_read_from_8(descr + XDES_ID) == seg_id
&& (!xdes_is_full(descr))) {
/* 4. We can take the page from the same extent as the
======================================================
hinted page (and the extent already belongs to the
==================================================
segment)
========*/
ret_descr = descr;
ret_page = xdes_find_free(ret_descr, hint % FSP_EXTENT_SIZE);
if (ret_page == FIL_NULL) {
ut_ad(!has_done_reservation);
} else {
ret_page += xdes_get_offset(ret_descr);
} }
/*-----------------------------------------------------------*/
} else if (reserved - used > 0) { /* Try to take individual page from the segment or tablespace */
/* 5. We take any unused page from the segment if (reserved - used > 0) {
==============================================*/ /* Take any unused page from the segment */
fil_addr_t first; fil_addr_t first;
if (flst_get_len(seg_inode + FSEG_NOT_FULL) > 0) { if (flst_get_len(seg_inode + FSEG_NOT_FULL) > 0) {
...@@ -2092,7 +2085,7 @@ fseg_alloc_free_page_low( ...@@ -2092,7 +2085,7 @@ fseg_alloc_free_page_low(
first = flst_get_first(seg_inode + FSEG_FREE); first = flst_get_first(seg_inode + FSEG_FREE);
} else { } else {
ut_ad(!has_done_reservation); ut_ad(!has_done_reservation);
return(NULL); return nullptr;
} }
ret_descr = xdes_lst_get_descriptor(*space, first, mtr, &xdes); ret_descr = xdes_lst_get_descriptor(*space, first, mtr, &xdes);
...@@ -2106,10 +2099,9 @@ fseg_alloc_free_page_low( ...@@ -2106,10 +2099,9 @@ fseg_alloc_free_page_low(
} else { } else {
ret_page += xdes_get_offset(ret_descr); ret_page += xdes_get_offset(ret_descr);
} }
/*-----------------------------------------------------------*/
} else if (used < FSEG_FRAG_LIMIT) { } else if (used < extent_size / 2) {
/* 6. We allocate an individual page from the space /* Allocate an individual page from the space */
===================================================*/
buf_block_t* block = fsp_alloc_free_page( buf_block_t* block = fsp_alloc_free_page(
space, hint, mtr, init_mtr, err); space, hint, mtr, init_mtr, err);
...@@ -2132,13 +2124,11 @@ fseg_alloc_free_page_low( ...@@ -2132,13 +2124,11 @@ fseg_alloc_free_page_low(
/* fsp_alloc_free_page() invoked fsp_init_file_page() /* fsp_alloc_free_page() invoked fsp_init_file_page()
already. */ already. */
return(block); return(block);
/*-----------------------------------------------------------*/
} else { } else {
/* 7. We allocate a new extent and take its first page /* In worst case, try to allocate a new extent
======================================================*/ and take its first page */
ret_descr = fseg_alloc_free_extent(seg_inode, iblock, &xdes, ret_descr = fseg_alloc_free_extent(seg_inode, iblock, &xdes,
space, mtr, err); space, mtr, err);
if (!ret_descr) { if (!ret_descr) {
ut_ad(!has_done_reservation || *err); ut_ad(!has_done_reservation || *err);
return nullptr; return nullptr;
...@@ -2151,14 +2141,13 @@ fseg_alloc_free_page_low( ...@@ -2151,14 +2141,13 @@ fseg_alloc_free_page_low(
/* Page could not be allocated */ /* Page could not be allocated */
ut_ad(!has_done_reservation); ut_ad(!has_done_reservation);
return(NULL); return nullptr;
} }
alloc_done:
if (space->size <= ret_page && !is_predefined_tablespace(space->id)) { if (space->size <= ret_page && !is_predefined_tablespace(space->id)) {
/* It must be that we are extending a single-table /* It must be that we are extending a single-table
tablespace whose size is still < 64 pages */ tablespace whose size is still < 64 pages */
if (ret_page >= extent_size) {
if (ret_page >= FSP_EXTENT_SIZE) {
sql_print_error("InnoDB: Trying to extend '%s'" sql_print_error("InnoDB: Trying to extend '%s'"
" by single page(s) though the" " by single page(s) though the"
" space size " UINT32PF "." " space size " UINT32PF "."
...@@ -2166,30 +2155,31 @@ fseg_alloc_free_page_low( ...@@ -2166,30 +2155,31 @@ fseg_alloc_free_page_low(
space->chain.start->name, space->size, space->chain.start->name, space->size,
ret_page); ret_page);
ut_ad(!has_done_reservation); ut_ad(!has_done_reservation);
return(NULL); return nullptr;
} }
if (!fsp_try_extend_data_file_with_pages( if (!fsp_try_extend_data_file_with_pages(
space, ret_page, header, mtr)) { space, ret_page, header, mtr)) {
/* No disk space left */ /* No disk space left */
ut_ad(!has_done_reservation); ut_ad(!has_done_reservation);
return(NULL); return nullptr;
} }
} }
got_hinted_page: /* Skip the check for extending the tablespace.
/* ret_descr == NULL if the block was allocated from free_frag If the page hint were not within the size of the tablespace,
(XDES_FREE_FRAG) */ descr set to nullptr above and reset the hint and the block
was allocated from free_frag (XDES_FREE_FRAG) */
if (ret_descr != NULL) { if (ret_descr != NULL) {
got_hinted_page:
/* At this point we know the extent and the page offset. /* At this point we know the extent and the page offset.
The extent is still in the appropriate list (FSEG_NOT_FULL The extent is still in the appropriate list (FSEG_NOT_FULL
or FSEG_FREE), and the page is not yet marked as used. */ or FSEG_FREE), and the page is not yet marked as used. */
ut_d(buf_block_t* xxdes); ut_d(buf_block_t* xxdes);
ut_ad(xdes_get_descriptor(space, ret_page, mtr, err, &xxdes) ut_ad(xdes_get_descriptor(space, ret_page, mtr, err, &xxdes)
== ret_descr); == ret_descr);
ut_ad(xdes == xxdes); ut_ad(xdes == xxdes);
ut_ad(xdes_is_free(ret_descr, ret_page % FSP_EXTENT_SIZE)); ut_ad(xdes_is_free(ret_descr, ret_page % extent_size));
*err = fseg_mark_page_used(seg_inode, iblock, ret_page, *err = fseg_mark_page_used(seg_inode, iblock, ret_page,
ret_descr, xdes, mtr); ret_descr, xdes, mtr);
......
...@@ -209,24 +209,6 @@ typedef byte fseg_inode_t; ...@@ -209,24 +209,6 @@ typedef byte fseg_inode_t;
static constexpr byte FSEG_MAGIC_N_BYTES[4]={0x05,0xd6,0x69,0xd2}; static constexpr byte FSEG_MAGIC_N_BYTES[4]={0x05,0xd6,0x69,0xd2};
#define FSEG_FILLFACTOR 8 /* If the number of unused but reserved
pages in a segment is less than
reserved pages / FSEG_FILLFACTOR,
and there are
at least FSEG_FRAG_LIMIT used pages,
then we allow a new empty extent to
be added to the segment in
fseg_alloc_free_page_general().
Otherwise, we
use unused pages of the segment. */
#define FSEG_FRAG_LIMIT FSEG_FRAG_ARR_N_SLOTS
/* If the segment has >= this many
used pages, it may be expanded by
allocating extents to the segment;
until that only individual fragment
pages are allocated from the space */
#define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment #define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment
is at least this many extents, we is at least this many extents, we
allow extents to be put to the free allow extents to be put to the free
...@@ -294,7 +276,7 @@ Determine if a page is marked free. ...@@ -294,7 +276,7 @@ Determine if a page is marked free.
@param[in] descr extent descriptor @param[in] descr extent descriptor
@param[in] offset page offset within extent @param[in] offset page offset within extent
@return whether the page is free */ @return whether the page is free */
inline bool xdes_is_free(const xdes_t *descr, ulint offset) inline bool xdes_is_free(const xdes_t *descr, uint32_t offset)
{ {
ut_ad(offset < FSP_EXTENT_SIZE); ut_ad(offset < FSP_EXTENT_SIZE);
ulint index= XDES_FREE_BIT + XDES_BITS_PER_PAGE * offset; ulint index= XDES_FREE_BIT + XDES_BITS_PER_PAGE * offset;
......
...@@ -610,7 +610,7 @@ class AbstractCallback ...@@ -610,7 +610,7 @@ class AbstractCallback
if (m_xdes != 0) { if (m_xdes != 0) {
const xdes_t* xdesc = xdes(page_no, m_xdes); const xdes_t* xdesc = xdes(page_no, m_xdes);
ulint pos = page_no % FSP_EXTENT_SIZE; uint32_t pos = page_no % FSP_EXTENT_SIZE;
return xdes_is_free(xdesc, pos); return xdes_is_free(xdesc, pos);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment