MDEV-15528 Punch holes when pages are freed

When a InnoDB data file page is freed, its contents becomes garbage,
and any storage allocated in the data file is wasted. During flushing,
InnoDB initializes the page with zeros if scrubbing is enabled. If the
tablespace is compressed then InnoDB should punch a hole else ignore the
flushing of the freed page.

buf_page_t:
- Replaced the variable file_page_was_freed, init_on_flush in buf_page_t
with status enum variable.
- Changed all debug assert of file_page_was_freed to DBUG_ASSERT
of buf_page_t::status

Removed buf_page_set_file_page_was_freed(),
buf_page_reset_file_page_was_freed().

buf_page_free(): Newly added function which takes X-lock on the page
before marking the status as FREED. So that InnoDB flush handler can
avoid concurrent flush of the freed page. Also while flushing the page,
InnoDB make sure that redo log which does freeing of the page also written
to the disk. Currently, this function only marks the page as FREED if
it is in buffer pool

buf_flush_freed_page(): Newly added function which initializes zeros
asynchorously if innodb_immediate_scrub_data_uncompressed is enabled.
Punch a hole to the file synchorously if page_compressed is enabled.
Reset the io_fix to NORMAL. Release the block from flush list and
associated mutex before writing zeros or punch a hole to the file.

buf_flush_page(): Removed the unnecessary usage of temporary
variable "flush"

fil_io(): Introduce new parameter called punch_hole. It allows fil_io()
to punch the hole to the file for the given offset.

buf_page_create(): Let the callers assign buf_page_t::status.
Every caller should eventually invoke mtr_t::init().

fsp_page_create(): Remove the unused mtr_t parameter.

In all other callers of buf_page_create() except fsp_page_create(),
before invoking mtr_t::init(), invoke
mtr_t::sx_latch_at_savepoint() or mtr_t::x_latch_at_savepoint().

mtr_t::init(): Initialize buf_page_t::status also for the temporary
tablespace (when redo logging is disabled), to avoid assertion failures.
parent 980108ce
...@@ -172,7 +172,7 @@ Filename::tab#.ibd ...@@ -172,7 +172,7 @@ Filename::tab#.ibd
#::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - #::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, -
#::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - #::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, -
#::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - #::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, -
#::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - #::# | Freshly allocated page | -
#::# | Freshly allocated page | - #::# | Freshly allocated page | -
# Variables used by page type dump for ibdata1 # Variables used by page type dump for ibdata1
...@@ -207,7 +207,7 @@ Filename::tab#.ibd ...@@ -207,7 +207,7 @@ Filename::tab#.ibd
#::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - #::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, -
#::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - #::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, -
#::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - #::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, -
#::# | Index page | index id=#, page level=#, No. of records=#, garbage=#, - #::# | Freshly allocated page | -
#::# | Freshly allocated page | - #::# | Freshly allocated page | -
[6]: check the valid lower bound values for option [6]: check the valid lower bound values for option
# allow-mismatches,page,start-page,end-page # allow-mismatches,page,start-page,end-page
......
...@@ -962,7 +962,8 @@ btr_search_guess_on_hash( ...@@ -962,7 +962,8 @@ btr_search_guess_on_hash(
} }
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(!block->page.file_page_was_freed); DBUG_ASSERT(block->page.status != buf_page_t::FREED);
buf_page_set_accessed(&block->page); buf_page_set_accessed(&block->page);
buf_block_buf_fix_inc(block, __FILE__, __LINE__); buf_block_buf_fix_inc(block, __FILE__, __LINE__);
mutex_exit(&block->mutex); mutex_exit(&block->mutex);
...@@ -1313,7 +1314,7 @@ void btr_search_drop_page_hash_when_freed(const page_id_t page_id) ...@@ -1313,7 +1314,7 @@ void btr_search_drop_page_hash_when_freed(const page_id_t page_id)
/* If AHI is still valid, page can't be in free state. /* If AHI is still valid, page can't be in free state.
AHI is dropped when page is freed. */ AHI is dropped when page is freed. */
ut_ad(!block->page.file_page_was_freed); DBUG_ASSERT(block->page.status != buf_page_t::FREED);
buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH); buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
......
...@@ -1355,13 +1355,11 @@ buf_block_init(buf_block_t* block, byte* frame) ...@@ -1355,13 +1355,11 @@ buf_block_init(buf_block_t* block, byte* frame)
block->page.state = BUF_BLOCK_NOT_USED; block->page.state = BUF_BLOCK_NOT_USED;
block->page.buf_fix_count = 0; block->page.buf_fix_count = 0;
block->page.io_fix = BUF_IO_NONE; block->page.io_fix = BUF_IO_NONE;
block->page.init_on_flush = false;
block->page.real_size = 0; block->page.real_size = 0;
block->page.write_size = 0; block->page.write_size = 0;
block->modify_clock = 0; block->modify_clock = 0;
block->page.slot = NULL; block->page.slot = NULL;
block->page.status = buf_page_t::NORMAL;
ut_d(block->page.file_page_was_freed = FALSE);
#ifdef BTR_CUR_HASH_ADAPT #ifdef BTR_CUR_HASH_ADAPT
block->index = NULL; block->index = NULL;
...@@ -3211,58 +3209,64 @@ void buf_page_make_young(buf_page_t* bpage) ...@@ -3211,58 +3209,64 @@ void buf_page_make_young(buf_page_t* bpage)
mutex_exit(&buf_pool->mutex); mutex_exit(&buf_pool->mutex);
} }
#ifdef UNIV_DEBUG /** Mark the page status as FREED for the given tablespace id and
/** Sets file_page_was_freed TRUE if the page is found in the buffer pool. page number. If the page is not in the buffer pool then ignore it.
This function should be called when we free a file page and want the X-lock should be taken on the page before marking the page status
debug version to check that it is not accessed any more unless as FREED. It avoids the concurrent flushing of freed page.
reallocated. Currently, this function only marks the page as FREED if it is
in buffer pool.
@param[in] page_id page id @param[in] page_id page id
@return control block if found in page hash table, otherwise NULL */ @param[in,out] mtr mini-transaction
buf_page_t* buf_page_set_file_page_was_freed(const page_id_t page_id) @param[in] file file name
@param[in] line line where called */
void buf_page_free(const page_id_t page_id,
mtr_t *mtr,
const char *file,
unsigned line)
{ {
buf_page_t* bpage; ut_ad(mtr);
rw_lock_t* hash_lock; ut_ad(mtr->is_active());
buf_pool->stat.n_page_gets++;
rw_lock_t *hash_lock= buf_page_hash_lock_get(page_id);
rw_lock_s_lock(hash_lock);
bpage = buf_page_hash_get_s_locked(page_id, &hash_lock); /* page_hash can be changed. */
hash_lock= buf_page_hash_lock_s_confirm(hash_lock, page_id);
buf_block_t *block= reinterpret_cast<buf_block_t*>
(buf_page_hash_get_low(page_id));
if (bpage) { if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)
BPageMutex* block_mutex = buf_page_get_mutex(bpage); {
ut_ad(!buf_pool_watch_is_sentinel(bpage)); /* FIXME: if block!=NULL, convert to BUF_BLOCK_FILE_PAGE,
mutex_enter(block_mutex); but avoid buf_zip_decompress() */
/* FIXME: If block==NULL, introduce a separate data structure
to cover freed page ranges to augment buf_flush_freed_page() */
rw_lock_s_unlock(hash_lock); rw_lock_s_unlock(hash_lock);
/* bpage->file_page_was_freed can already hold return;
when this code is invoked from dict_drop_index_tree() */
bpage->file_page_was_freed = TRUE;
mutex_exit(block_mutex);
} }
return(bpage); block->fix();
} mutex_enter(&block->mutex);
/* Now safe to release page_hash mutex */
/** Sets file_page_was_freed FALSE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
reallocated.
@param[in] page_id page id
@return control block if found in page hash table, otherwise NULL */
buf_page_t* buf_page_reset_file_page_was_freed(const page_id_t page_id)
{
buf_page_t* bpage;
rw_lock_t* hash_lock;
bpage = buf_page_hash_get_s_locked(page_id, &hash_lock);
if (bpage) {
BPageMutex* block_mutex = buf_page_get_mutex(bpage);
ut_ad(!buf_pool_watch_is_sentinel(bpage));
mutex_enter(block_mutex);
rw_lock_s_unlock(hash_lock); rw_lock_s_unlock(hash_lock);
bpage->file_page_was_freed = FALSE; ut_ad(block->page.buf_fix_count > 0);
mutex_exit(block_mutex);
#ifdef UNIV_DEBUG
if (!fsp_is_system_temporary(page_id.space()))
{
ibool ret= rw_lock_s_lock_nowait(block->debug_latch, file, line);
ut_a(ret);
} }
#endif /* UNIV_DEBUG */
return(bpage); mtr_memo_type_t fix_type= MTR_MEMO_PAGE_X_FIX;
rw_lock_x_lock_inline(&block->lock, 0, file, line);
mtr_memo_push(mtr, block, fix_type);
block->page.status= buf_page_t::FREED;
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
mutex_exit(&block->mutex);
} }
#endif /* UNIV_DEBUG */
/** Attempts to discard the uncompressed frame of a compressed page. /** Attempts to discard the uncompressed frame of a compressed page.
The caller should not be holding any mutexes when this function is called. The caller should not be holding any mutexes when this function is called.
...@@ -3382,7 +3386,7 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size) ...@@ -3382,7 +3386,7 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
rw_lock_s_unlock(hash_lock); rw_lock_s_unlock(hash_lock);
ut_ad(!bpage->file_page_was_freed); DBUG_ASSERT(bpage->status != buf_page_t::FREED);
buf_page_set_accessed(bpage); buf_page_set_accessed(bpage);
...@@ -4282,7 +4286,7 @@ buf_page_get_gen( ...@@ -4282,7 +4286,7 @@ buf_page_get_gen(
"btr_search_drop_page_hash_when_freed". */ "btr_search_drop_page_hash_when_freed". */
ut_ad(mode == BUF_GET_POSSIBLY_FREED ut_ad(mode == BUF_GET_POSSIBLY_FREED
|| mode == BUF_PEEK_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL
|| !fix_block->page.file_page_was_freed); || fix_block->page.status != buf_page_t::FREED);
/* Check if this is the first access to the page */ /* Check if this is the first access to the page */
access_time = buf_page_is_accessed(&fix_block->page); access_time = buf_page_is_accessed(&fix_block->page);
...@@ -4472,10 +4476,6 @@ buf_page_optimistic_get( ...@@ -4472,10 +4476,6 @@ buf_page_optimistic_get(
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
ut_d(buf_page_mutex_enter(block));
ut_ad(!block->page.file_page_was_freed);
ut_d(buf_page_mutex_exit(block));
if (!access_time) { if (!access_time) {
/* In the case of a first access, try to apply linear /* In the case of a first access, try to apply linear
read-ahead */ read-ahead */
...@@ -4558,10 +4558,6 @@ buf_page_try_get_func( ...@@ -4558,10 +4558,6 @@ buf_page_try_get_func(
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
ut_d(buf_page_mutex_enter(block));
ut_d(ut_a(!block->page.file_page_was_freed));
ut_d(buf_page_mutex_exit(block));
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
buf_pool->stat.n_page_gets++; buf_pool->stat.n_page_gets++;
...@@ -4588,10 +4584,8 @@ buf_page_init_low( ...@@ -4588,10 +4584,8 @@ buf_page_init_low(
bpage->real_size = 0; bpage->real_size = 0;
bpage->slot = NULL; bpage->slot = NULL;
bpage->ibuf_exist = false; bpage->ibuf_exist = false;
bpage->status = buf_page_t::NORMAL;
HASH_INVALIDATE(bpage, hash); HASH_INVALIDATE(bpage, hash);
ut_d(bpage->file_page_was_freed = FALSE);
} }
/** Inits a page to the buffer buf_pool. /** Inits a page to the buffer buf_pool.
...@@ -4844,7 +4838,7 @@ buf_page_init_for_read( ...@@ -4844,7 +4838,7 @@ buf_page_init_for_read(
bpage->state = BUF_BLOCK_ZIP_PAGE; bpage->state = BUF_BLOCK_ZIP_PAGE;
bpage->id = page_id; bpage->id = page_id;
bpage->init_on_flush = false; bpage->status = buf_page_t::NORMAL;
ut_d(bpage->in_page_hash = FALSE); ut_d(bpage->in_page_hash = FALSE);
ut_d(bpage->in_zip_hash = FALSE); ut_d(bpage->in_zip_hash = FALSE);
...@@ -4936,8 +4930,6 @@ buf_page_create( ...@@ -4936,8 +4930,6 @@ buf_page_create(
if (block if (block
&& buf_page_in_file(&block->page) && buf_page_in_file(&block->page)
&& !buf_pool_watch_is_sentinel(&block->page)) { && !buf_pool_watch_is_sentinel(&block->page)) {
ut_d(block->page.file_page_was_freed = FALSE);
/* Page can be found in buf_pool */ /* Page can be found in buf_pool */
mutex_exit(&buf_pool->mutex); mutex_exit(&buf_pool->mutex);
rw_lock_x_unlock(hash_lock); rw_lock_x_unlock(hash_lock);
...@@ -4945,8 +4937,13 @@ buf_page_create( ...@@ -4945,8 +4937,13 @@ buf_page_create(
buf_block_free(free_block); buf_block_free(free_block);
if (!recv_recovery_is_on()) { if (!recv_recovery_is_on()) {
return buf_page_get_with_no_latch(page_id, zip_size, /* FIXME: Remove the redundant lookup and avoid
mtr); the unnecessary invocation of buf_zip_decompress().
We may have to convert buf_page_t to buf_block_t,
but we are going to initialize the page. */
return buf_page_get_gen(page_id, zip_size, RW_NO_LATCH,
block, BUF_GET_POSSIBLY_FREED,
__FILE__, __LINE__, mtr);
} }
mutex_exit(&recv_sys.mutex); mutex_exit(&recv_sys.mutex);
......
...@@ -897,6 +897,10 @@ a page is written to disk. ...@@ -897,6 +897,10 @@ a page is written to disk.
(may be src_frame or an encrypted/compressed copy of it) */ (may be src_frame or an encrypted/compressed copy of it) */
static byte* buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s) static byte* buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s)
{ {
if (bpage->status == buf_page_t::FREED) {
return s;
}
ut_ad(space->id == bpage->id.space()); ut_ad(space->id == bpage->id.space());
bpage->real_size = srv_page_size; bpage->real_size = srv_page_size;
...@@ -1022,6 +1026,62 @@ static byte* buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s) ...@@ -1022,6 +1026,62 @@ static byte* buf_page_encrypt(fil_space_t* space, buf_page_t* bpage, byte* s)
return d; return d;
} }
/** The following function deals with freed page during flushing.
i) Writing zeros to the file asynchronously if scrubbing is enabled
ii) Punch the hole to the file synchoronously if page_compressed is
enabled for the tablespace
This function also resets the IO_FIX to IO_NONE and making the
page status as NORMAL. It initiates the write to the file only after
releasing the page from flush list and its associated mutex.
@param[in,out] bpage freed buffer page
@param[in] space tablespace object of the freed page */
static void buf_flush_freed_page(buf_page_t* bpage, fil_space_t* space)
{
const page_id_t page_id(bpage->id.space(), bpage->id.page_no());
BPageMutex* block_mutex = buf_page_get_mutex(bpage);
const bool uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
bool punch_hole = false;
mutex_enter(&buf_pool->mutex);
mutex_enter(block_mutex);
buf_page_set_io_fix(bpage, BUF_IO_NONE);
bpage->status = buf_page_t::NORMAL;
buf_flush_write_complete(bpage, false);
if (uncompressed) {
rw_lock_sx_unlock_gen(&((buf_block_t*) bpage)->lock,
BUF_IO_WRITE);
}
buf_pool->stat.n_pages_written++;
mutex_exit(block_mutex);
mutex_exit(&buf_pool->mutex);
if (space->is_compressed()) {
#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
punch_hole = (space != fil_system.temp_space
&& space->is_compressed());
#endif
}
if (srv_immediate_scrub_data_uncompressed || punch_hole) {
/* Zero write the page */
ulint type = IORequest::WRITE;
IORequest request(type, NULL);
page_t* frame = const_cast<byte*>(field_ref_zero);
fil_io(request, punch_hole ? true :false,
page_id, space->zip_size(), 0,
space->physical_size(), frame, NULL,
false, punch_hole);
}
space->release_for_io();
}
/********************************************************************//** /********************************************************************//**
Does an asynchronous write of a buffer page. NOTE: when the Does an asynchronous write of a buffer page. NOTE: when the
doublewrite buffer is used, we must call doublewrite buffer is used, we must call
...@@ -1084,6 +1144,12 @@ buf_flush_write_block_low( ...@@ -1084,6 +1144,12 @@ buf_flush_write_block_low(
frame = ((buf_block_t*) bpage)->frame; frame = ((buf_block_t*) bpage)->frame;
} }
/* Skip the encryption and compression for the
freed page */
if (bpage->status == buf_page_t::FREED) {
break;
}
byte* page = reinterpret_cast<const buf_block_t*>(bpage)->frame; byte* page = reinterpret_cast<const buf_block_t*>(bpage)->frame;
if (full_crc32) { if (full_crc32) {
...@@ -1111,7 +1177,12 @@ buf_flush_write_block_low( ...@@ -1111,7 +1177,12 @@ buf_flush_write_block_low(
ut_ad(space->atomic_write_supported); ut_ad(space->atomic_write_supported);
} }
const bool use_doublewrite = !bpage->init_on_flush if (bpage->status == buf_page_t::FREED) {
buf_flush_freed_page(bpage, space);
return;
}
const bool use_doublewrite = bpage->status != buf_page_t::INIT_ON_FLUSH
&& space->use_doublewrite(); && space->use_doublewrite();
if (!use_doublewrite) { if (!use_doublewrite) {
...@@ -1191,17 +1262,14 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync) ...@@ -1191,17 +1262,14 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync)
ut_ad(buf_flush_ready_for_flush(bpage, flush_type)); ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
bool is_uncompressed; bool is_uncompressed = (buf_page_get_state(bpage)
== BUF_BLOCK_FILE_PAGE);
is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex)); ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
ibool flush;
rw_lock_t* rw_lock; rw_lock_t* rw_lock;
bool no_fix_count = bpage->buf_fix_count == 0; bool no_fix_count = bpage->buf_fix_count == 0;
if (!is_uncompressed) { if (!is_uncompressed) {
flush = TRUE;
rw_lock = NULL; rw_lock = NULL;
} else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST) } else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)
|| (!no_fix_count || (!no_fix_count
...@@ -1211,19 +1279,15 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync) ...@@ -1211,19 +1279,15 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync)
/* For table residing in temporary tablespace sync is done /* For table residing in temporary tablespace sync is done
using IO_FIX and so before scheduling for flush ensure that using IO_FIX and so before scheduling for flush ensure that
page is not fixed. */ page is not fixed. */
flush = FALSE; return false;
} else { } else {
rw_lock = &reinterpret_cast<buf_block_t*>(bpage)->lock; rw_lock = &reinterpret_cast<buf_block_t*>(bpage)->lock;
if (flush_type != BUF_FLUSH_LIST) { if (flush_type != BUF_FLUSH_LIST
flush = rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE); && !rw_lock_sx_lock_nowait(rw_lock, BUF_IO_WRITE)) {
} else { return false;
/* Will SX lock later */
flush = TRUE;
} }
} }
if (flush) {
/* We are committed to flushing by the time we get here */ /* We are committed to flushing by the time we get here */
buf_page_set_io_fix(bpage, BUF_IO_WRITE); buf_page_set_io_fix(bpage, BUF_IO_WRITE);
...@@ -1263,9 +1327,7 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync) ...@@ -1263,9 +1327,7 @@ bool buf_flush_page(buf_page_t* bpage, buf_flush_t flush_type, bool sync)
buffer pool or removed from flush_list or LRU_list. */ buffer pool or removed from flush_list or LRU_list. */
buf_flush_write_block_low(bpage, flush_type, sync); buf_flush_write_block_low(bpage, flush_type, sync);
} return true;
return(flush);
} }
# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
......
...@@ -3892,6 +3892,8 @@ inline void IORequest::set_fil_node(fil_node_t* node) ...@@ -3892,6 +3892,8 @@ inline void IORequest::set_fil_node(fil_node_t* node)
@param[in] message message for aio handler if non-sync aio @param[in] message message for aio handler if non-sync aio
used, else ignored used, else ignored
@param[in] ignore whether to ignore out-of-bounds page_id @param[in] ignore whether to ignore out-of-bounds page_id
@param[in] punch_hole punch the hole to the file for page_compressed
tablespace
@return DB_SUCCESS, or DB_TABLESPACE_DELETED @return DB_SUCCESS, or DB_TABLESPACE_DELETED
if we are trying to do i/o on a tablespace which does not exist */ if we are trying to do i/o on a tablespace which does not exist */
dberr_t dberr_t
...@@ -3904,7 +3906,8 @@ fil_io( ...@@ -3904,7 +3906,8 @@ fil_io(
ulint len, ulint len,
void* buf, void* buf,
void* message, void* message,
bool ignore) bool ignore,
bool punch_hole)
{ {
os_offset_t offset; os_offset_t offset;
IORequest req_type(type); IORequest req_type(type);
...@@ -4083,13 +4086,20 @@ fil_io( ...@@ -4083,13 +4086,20 @@ fil_io(
|| !fil_is_user_tablespace_id(page_id.space()) || !fil_is_user_tablespace_id(page_id.space())
|| offset == page_id.page_no() * zip_size); || offset == page_id.page_no() * zip_size);
dberr_t err = DB_SUCCESS;
if (punch_hole) {
/* Punch the hole to the file */
err = os_file_punch_hole(node->handle, offset, len);
} else {
/* Queue the aio request */ /* Queue the aio request */
dberr_t err = os_aio( err = os_aio(
req_type, req_type,
mode, name, node->handle, buf, offset, len, mode, name, node->handle, buf, offset, len,
space->purpose != FIL_TYPE_TEMPORARY space->purpose != FIL_TYPE_TEMPORARY
&& srv_read_only_mode, && srv_read_only_mode,
node, message); node, message);
}
/* We an try to recover the page from the double write buffer if /* We an try to recover the page from the double write buffer if
the decompression fails or the page is corrupt. */ the decompression fails or the page is corrupt. */
...@@ -4154,8 +4164,8 @@ void fil_aio_callback(os_aio_userdata_t *data) ...@@ -4154,8 +4164,8 @@ void fil_aio_callback(os_aio_userdata_t *data)
} }
ulint offset = bpage->id.page_no(); ulint offset = bpage->id.page_no();
if (dblwr && bpage->init_on_flush) { if (dblwr && bpage->status == buf_page_t::INIT_ON_FLUSH) {
bpage->init_on_flush = false; bpage->status = buf_page_t::NORMAL;
dblwr = false; dblwr = false;
} }
dberr_t err = buf_page_io_complete(bpage, dblwr); dberr_t err = buf_page_io_complete(bpage, dblwr);
......
...@@ -554,8 +554,10 @@ void fsp_header_init(fil_space_t* space, ulint size, mtr_t* mtr) ...@@ -554,8 +554,10 @@ void fsp_header_init(fil_space_t* space, ulint size, mtr_t* mtr)
const ulint zip_size = space->zip_size(); const ulint zip_size = space->zip_size();
mtr_x_lock_space(space, mtr); mtr_x_lock_space(space, mtr);
const auto savepoint = mtr->get_savepoint();
buf_block_t* block = buf_page_create(page_id, zip_size, mtr); buf_block_t* block = buf_page_create(page_id, zip_size, mtr);
buf_page_get(page_id, zip_size, RW_SX_LATCH, mtr); mtr->sx_latch_at_savepoint(savepoint, block);
buf_block_dbg_add_level(block, SYNC_FSP_PAGE); buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
space->size_in_header = size; space->size_in_header = size;
...@@ -873,16 +875,12 @@ fsp_fill_free_list( ...@@ -873,16 +875,12 @@ fsp_fill_free_list(
pages should be ignored. */ pages should be ignored. */
if (i > 0) { if (i > 0) {
const page_id_t page_id(space->id, i); const auto savepoint = mtr->get_savepoint();
block= buf_page_create(page_id_t(space->id, i),
block = buf_page_create( zip_size, mtr);
page_id, zip_size, mtr); mtr->sx_latch_at_savepoint(savepoint, block);
buf_page_get(
page_id, zip_size, RW_SX_LATCH, mtr);
buf_block_dbg_add_level(block, SYNC_FSP_PAGE); buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
fsp_init_file_page(space, block, mtr); fsp_init_file_page(space, block, mtr);
mtr->write<2>(*block, mtr->write<2>(*block,
FIL_PAGE_TYPE + block->frame, FIL_PAGE_TYPE + block->frame,
...@@ -900,17 +898,11 @@ fsp_fill_free_list( ...@@ -900,17 +898,11 @@ fsp_fill_free_list(
ibuf_mtr.start(); ibuf_mtr.start();
ibuf_mtr.set_named_space(space); ibuf_mtr.set_named_space(space);
const page_id_t page_id(
space->id,
i + FSP_IBUF_BITMAP_OFFSET);
block = buf_page_create( block = buf_page_create(
page_id, zip_size, &ibuf_mtr); page_id_t(space->id,
i + FSP_IBUF_BITMAP_OFFSET),
buf_page_get( zip_size, &ibuf_mtr);
page_id, zip_size, RW_SX_LATCH, ibuf_mtr.sx_latch_at_savepoint(0, block);
&ibuf_mtr);
buf_block_dbg_add_level(block, SYNC_FSP_PAGE); buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
fsp_init_file_page(space, block, &ibuf_mtr); fsp_init_file_page(space, block, &ibuf_mtr);
...@@ -1060,8 +1052,7 @@ fsp_alloc_from_free_frag(buf_block_t *header, buf_block_t *xdes, xdes_t *descr, ...@@ -1060,8 +1052,7 @@ fsp_alloc_from_free_frag(buf_block_t *header, buf_block_t *xdes, xdes_t *descr,
@param[in,out] space tablespace @param[in,out] space tablespace
@param[in] offset page number of the allocated page @param[in] offset page number of the allocated page
@param[in] rw_latch RW_SX_LATCH, RW_X_LATCH @param[in] rw_latch RW_SX_LATCH, RW_X_LATCH
@param[in,out] mtr mini-transaction of the allocation @param[in,out] mtr mini-transaction
@param[in,out] init_mtr mini-transaction for initializing the page
@return block, initialized */ @return block, initialized */
static static
buf_block_t* buf_block_t*
...@@ -1069,13 +1060,13 @@ fsp_page_create( ...@@ -1069,13 +1060,13 @@ fsp_page_create(
fil_space_t* space, fil_space_t* space,
page_no_t offset, page_no_t offset,
rw_lock_type_t rw_latch, rw_lock_type_t rw_latch,
mtr_t* mtr, mtr_t* mtr)
mtr_t* init_mtr)
{ {
buf_block_t* block = buf_page_create(page_id_t(space->id, offset), buf_block_t* block = buf_page_create(page_id_t(space->id, offset),
space->zip_size(), init_mtr); space->zip_size(), mtr);
/* Mimic buf_page_get(), but avoid the buf_pool->page_hash lookup. */ /* The latch may already have been acquired, so we cannot invoke
mtr_t::x_latch_at_savepoint() or mtr_t::sx_latch_at_savepoint(). */
mtr_memo_type_t memo; mtr_memo_type_t memo;
if (rw_latch == RW_X_LATCH) { if (rw_latch == RW_X_LATCH) {
...@@ -1087,9 +1078,9 @@ fsp_page_create( ...@@ -1087,9 +1078,9 @@ fsp_page_create(
memo = MTR_MEMO_PAGE_SX_FIX; memo = MTR_MEMO_PAGE_SX_FIX;
} }
mtr_memo_push(init_mtr, block, memo); mtr_memo_push(mtr, block, memo);
buf_block_buf_fix_inc(block, __FILE__, __LINE__); buf_block_buf_fix_inc(block, __FILE__, __LINE__);
fsp_init_file_page(space, block, init_mtr); fsp_init_file_page(space, block, mtr);
return(block); return(block);
} }
...@@ -1202,7 +1193,7 @@ fsp_alloc_free_page( ...@@ -1202,7 +1193,7 @@ fsp_alloc_free_page(
} }
fsp_alloc_from_free_frag(block, xdes, descr, free, mtr); fsp_alloc_from_free_frag(block, xdes, descr, free, mtr);
return fsp_page_create(space, page_no, rw_latch, mtr, init_mtr); return fsp_page_create(space, page_no, rw_latch, init_mtr);
} }
/** Frees a single page of a space. /** Frees a single page of a space.
...@@ -2233,7 +2224,7 @@ fseg_alloc_free_page_low( ...@@ -2233,7 +2224,7 @@ fseg_alloc_free_page_low(
xdes, mtr); xdes, mtr);
} }
return fsp_page_create(space, ret_page, rw_latch, mtr, init_mtr); return fsp_page_create(space, ret_page, rw_latch, init_mtr);
} }
/**********************************************************************//** /**********************************************************************//**
...@@ -2642,7 +2633,7 @@ fseg_free_page_func( ...@@ -2642,7 +2633,7 @@ fseg_free_page_func(
fseg_free_page_low(seg_inode, iblock, space, offset, ahi, mtr); fseg_free_page_low(seg_inode, iblock, space, offset, ahi, mtr);
ut_d(buf_page_set_file_page_was_freed(page_id_t(space->id, offset))); buf_page_free(page_id_t(space->id, offset), mtr, __FILE__, __LINE__);
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
...@@ -2747,13 +2738,13 @@ fseg_free_extent( ...@@ -2747,13 +2738,13 @@ fseg_free_extent(
fsp_free_extent(space, page, mtr); fsp_free_extent(space, page, mtr);
#ifdef UNIV_DEBUG
for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) { for (ulint i = 0; i < FSP_EXTENT_SIZE; i++) {
if (!xdes_is_free(descr, i)) {
buf_page_set_file_page_was_freed( buf_page_free(
page_id_t(space->id, first_page_in_extent + i)); page_id_t(space->id, first_page_in_extent + i),
mtr, __FILE__, __LINE__);
}
} }
#endif /* UNIV_DEBUG */
} }
#ifndef BTR_CUR_HASH_ADAPT #ifndef BTR_CUR_HASH_ADAPT
......
...@@ -1948,8 +1948,6 @@ ibuf_remove_free_page(void) ...@@ -1948,8 +1948,6 @@ ibuf_remove_free_page(void)
const page_id_t page_id(IBUF_SPACE_ID, page_no); const page_id_t page_id(IBUF_SPACE_ID, page_no);
ut_d(buf_page_reset_file_page_was_freed(page_id));
ibuf_enter(&mtr); ibuf_enter(&mtr);
mutex_enter(&ibuf_mutex); mutex_enter(&ibuf_mutex);
...@@ -1982,7 +1980,7 @@ ibuf_remove_free_page(void) ...@@ -1982,7 +1980,7 @@ ibuf_remove_free_page(void)
ibuf_bitmap_page_set_bits<IBUF_BITMAP_IBUF>( ibuf_bitmap_page_set_bits<IBUF_BITMAP_IBUF>(
bitmap_page, page_id, srv_page_size, false, &mtr); bitmap_page, page_id, srv_page_size, false, &mtr);
ut_d(buf_page_set_file_page_was_freed(page_id)); buf_page_free(page_id, &mtr, __FILE__, __LINE__);
ibuf_mtr_commit(&mtr); ibuf_mtr_commit(&mtr);
} }
......
...@@ -372,10 +372,7 @@ FILE_PAGE (the other is buf_page_get_gen). ...@@ -372,10 +372,7 @@ FILE_PAGE (the other is buf_page_get_gen).
@param[in,out] mtr mini-transaction @param[in,out] mtr mini-transaction
@return pointer to the block, page bufferfixed */ @return pointer to the block, page bufferfixed */
buf_block_t* buf_block_t*
buf_page_create( buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr);
const page_id_t page_id,
ulint zip_size,
mtr_t* mtr);
/********************************************************************//** /********************************************************************//**
Releases a compressed-only page acquired with buf_page_get_zip(). */ Releases a compressed-only page acquired with buf_page_get_zip(). */
...@@ -402,24 +399,17 @@ buf_page_make_young( ...@@ -402,24 +399,17 @@ buf_page_make_young(
/*================*/ /*================*/
buf_page_t* bpage); /*!< in: buffer block of a file page */ buf_page_t* bpage); /*!< in: buffer block of a file page */
#ifdef UNIV_DEBUG /** Mark the page status as FREED for the given tablespace id and
/** Sets file_page_was_freed TRUE if the page is found in the buffer pool. page number. If the page is not in buffer pool then ignore it.
This function should be called when we free a file page and want the @param[in] page_id page_id
debug version to check that it is not accessed any more unless @param[in,out] mtr mini-transaction
reallocated. @param[in] file file name
@param[in] page_id page id @param[in] line line where called */
@return control block if found in page hash table, otherwise NULL */ void buf_page_free(const page_id_t page_id,
buf_page_t* buf_page_set_file_page_was_freed(const page_id_t page_id); mtr_t *mtr,
const char *file,
/** Sets file_page_was_freed FALSE if the page is found in the buffer pool. unsigned line);
This function should be called when we free a file page and want the
debug version to check that it is not accessed any more unless
reallocated.
@param[in] page_id page id
@return control block if found in page hash table, otherwise NULL */
buf_page_t* buf_page_reset_file_page_was_freed(const page_id_t page_id);
#endif /* UNIV_DEBUG */
/********************************************************************//** /********************************************************************//**
Reads the freed_page_clock of a buffer block. Reads the freed_page_clock of a buffer block.
@return freed_page_clock */ @return freed_page_clock */
...@@ -1244,13 +1234,6 @@ class buf_page_t { ...@@ -1244,13 +1234,6 @@ class buf_page_t {
if written again we check is TRIM if written again we check is TRIM
operation needed. */ operation needed. */
/** whether the page will be (re)initialized at the time it will
be written to the file, that is, whether the doublewrite buffer
can be safely skipped. Protected under similar conditions as
buf_block_t::frame. Can be set while holding buf_block_t::lock
X-latch and reset during page flush, while io_fix is in effect. */
bool init_on_flush;
ulint real_size; /*!< Real size of the page ulint real_size; /*!< Real size of the page
Normal pages == srv_page_size Normal pages == srv_page_size
page compressed pages, payload page compressed pages, payload
...@@ -1365,17 +1348,25 @@ class buf_page_t { ...@@ -1365,17 +1348,25 @@ class buf_page_t {
and bytes allocated for recv_sys.pages, and bytes allocated for recv_sys.pages,
the field is protected by the field is protected by
recv_sys_t::mutex. */ recv_sys_t::mutex. */
# ifdef UNIV_DEBUG
ibool file_page_was_freed;
/*!< this is set to TRUE when
fsp frees a page in buffer pool;
protected by buf_pool->zip_mutex
or buf_block_t::mutex. */
# endif /* UNIV_DEBUG */
/** Change buffer entries for the page exist. /** Change buffer entries for the page exist.
Protected by io_fix==BUF_IO_READ or by buf_block_t::lock. */ Protected by io_fix==BUF_IO_READ or by buf_block_t::lock. */
bool ibuf_exist; bool ibuf_exist;
/** Block initialization status. Can be modified while holding io_fix
or buf_block_t::lock X-latch */
enum {
/** the page was read normally and should be flushed normally */
NORMAL = 0,
/** the page was (re)initialized, and the doublewrite buffer can be
skipped on the next flush */
INIT_ON_FLUSH,
/** the page was freed and need to be flushed.
For page_compressed, page flush will punch a hole to free space.
Else if innodb_immediate_scrub_data_uncompressed, the page will
be overwritten with zeroes. */
FREED
} status;
void fix() { buf_fix_count++; } void fix() { buf_fix_count++; }
uint32_t unfix() uint32_t unfix()
{ {
......
...@@ -1378,6 +1378,8 @@ fil_space_extend( ...@@ -1378,6 +1378,8 @@ fil_space_extend(
@param[in] message message for aio handler if non-sync aio @param[in] message message for aio handler if non-sync aio
used, else ignored used, else ignored
@param[in] ignore whether to ignore out-of-bounds page_id @param[in] ignore whether to ignore out-of-bounds page_id
@param[in] punch_hole punch the hole to the file for page_compressed
tablespace
@return DB_SUCCESS, or DB_TABLESPACE_DELETED @return DB_SUCCESS, or DB_TABLESPACE_DELETED
if we are trying to do i/o on a tablespace which does not exist */ if we are trying to do i/o on a tablespace which does not exist */
dberr_t dberr_t
...@@ -1390,7 +1392,8 @@ fil_io( ...@@ -1390,7 +1392,8 @@ fil_io(
ulint len, ulint len,
void* buf, void* buf,
void* message, void* message,
bool ignore = false); bool ignore = false,
bool punch_hole = false);
/**********************************************************************//** /**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the Waits for an aio operation to complete. This function is used to write the
......
...@@ -502,6 +502,8 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str, ...@@ -502,6 +502,8 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str,
@param[in,out] b buffer page */ @param[in,out] b buffer page */
inline void mtr_t::init(buf_block_t *b) inline void mtr_t::init(buf_block_t *b)
{ {
b->page.status= buf_page_t::INIT_ON_FLUSH;
if (m_log_mode != MTR_LOG_ALL) if (m_log_mode != MTR_LOG_ALL)
{ {
ut_ad(m_log_mode == MTR_LOG_NONE || m_log_mode == MTR_LOG_NO_REDO); ut_ad(m_log_mode == MTR_LOG_NONE || m_log_mode == MTR_LOG_NO_REDO);
...@@ -510,7 +512,6 @@ inline void mtr_t::init(buf_block_t *b) ...@@ -510,7 +512,6 @@ inline void mtr_t::init(buf_block_t *b)
m_log.close(log_write<INIT_PAGE>(b->page.id, &b->page)); m_log.close(log_write<INIT_PAGE>(b->page.id, &b->page));
m_last_offset= FIL_PAGE_TYPE; m_last_offset= FIL_PAGE_TYPE;
b->page.init_on_flush= true;
} }
/** Free a page. /** Free a page.
......
...@@ -4984,7 +4984,7 @@ lock_rec_validate_page( ...@@ -4984,7 +4984,7 @@ lock_rec_validate_page(
goto function_exit; goto function_exit;
} }
ut_ad(!block->page.file_page_was_freed); DBUG_ASSERT(block->page.status != buf_page_t::FREED);
for (i = 0; i < nth_lock; i++) { for (i = 0; i < nth_lock; i++) {
...@@ -5090,7 +5090,7 @@ lock_rec_block_validate( ...@@ -5090,7 +5090,7 @@ lock_rec_block_validate(
/* The lock and the block that it is referring to may be freed at /* The lock and the block that it is referring to may be freed at
this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check. this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check.
If the lock exists in lock_rec_validate_page() we assert If the lock exists in lock_rec_validate_page() we assert
!block->page.file_page_was_freed. */ block->page.status != FREED. */
buf_block_t* block; buf_block_t* block;
mtr_t mtr; mtr_t mtr;
......
...@@ -37,6 +37,7 @@ Created June 2005 by Marko Makela ...@@ -37,6 +37,7 @@ Created June 2005 by Marko Makela
/** A BLOB field reference full of zero, for use in assertions and tests. /** A BLOB field reference full of zero, for use in assertions and tests.
Initially, BLOB field references are set to zero, in Initially, BLOB field references are set to zero, in
dtuple_convert_big_rec(). */ dtuple_convert_big_rec(). */
alignas(UNIV_PAGE_SIZE_MIN)
const byte field_ref_zero[UNIV_PAGE_SIZE_MAX] = { 0, }; const byte field_ref_zero[UNIV_PAGE_SIZE_MAX] = { 0, };
#include "mtr0log.h" #include "mtr0log.h"
...@@ -450,7 +451,7 @@ static void page_zip_compress_write_log(buf_block_t *block, ...@@ -450,7 +451,7 @@ static void page_zip_compress_write_log(buf_block_t *block,
if (trailer_size) if (trailer_size)
mtr->zmemcpy(block->page, page_zip_get_size(page_zip) - trailer_size, mtr->zmemcpy(block->page, page_zip_get_size(page_zip) - trailer_size,
trailer_size); trailer_size);
block->page.init_on_flush= true; /* because of mtr_t::init() */ block->page.status = buf_page_t::INIT_ON_FLUSH; /* because of mtr_t::init() */
} }
/******************************************************//** /******************************************************//**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment