Commit 8af53897 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-25801: buf_flush_dirty_pages() is very slow

In commit 7cffb5f6 (MDEV-23399)
the implementation of buf_flush_dirty_pages() was replaced with
a slow one, which would perform excessive scans of the
buf_pool.flush_list and make little progress.

buf_flush_list(), buf_flush_LRU(): Split from buf_flush_lists().
Vladislav Vaintroub noticed that we will not need to invoke
log_flush_task.wait() for the LRU eviction flushing.

buf_flush_list_space(): Replaces buf_flush_dirty_pages().
This is like buf_flush_list(), but operating on a single
tablespace at a time. Writes at most innodb_io_capacity
pages. Returns whether some of the tablespace might remain
in the buffer pool.
parent 762bcb81
...@@ -1729,10 +1729,10 @@ inline bool buf_pool_t::withdraw_blocks() ...@@ -1729,10 +1729,10 @@ inline bool buf_pool_t::withdraw_blocks()
/* reserve free_list length */ /* reserve free_list length */
if (UT_LIST_GET_LEN(withdraw) < withdraw_target) { if (UT_LIST_GET_LEN(withdraw) < withdraw_target) {
ulint n_flushed = buf_flush_lists( ulint n_flushed = buf_flush_LRU(
std::max<ulint>(withdraw_target std::max<ulint>(withdraw_target
- UT_LIST_GET_LEN(withdraw), - UT_LIST_GET_LEN(withdraw),
srv_LRU_scan_depth), 0); srv_LRU_scan_depth));
buf_flush_wait_batch_end_acquiring_mutex(true); buf_flush_wait_batch_end_acquiring_mutex(true);
if (n_flushed) { if (n_flushed) {
...@@ -3321,7 +3321,7 @@ buf_page_get_low( ...@@ -3321,7 +3321,7 @@ buf_page_get_low(
fix_block->fix(); fix_block->fix();
mysql_mutex_unlock(&buf_pool.mutex); mysql_mutex_unlock(&buf_pool.mutex);
buf_flush_lists(ULINT_UNDEFINED, LSN_MAX); buf_flush_list();
buf_flush_wait_batch_end_acquiring_mutex(false); buf_flush_wait_batch_end_acquiring_mutex(false);
if (fix_block->page.buf_fix_count() == 1 if (fix_block->page.buf_fix_count() == 1
......
This diff is collapsed.
...@@ -487,7 +487,7 @@ buf_block_t* buf_LRU_get_free_block(bool have_mutex) ...@@ -487,7 +487,7 @@ buf_block_t* buf_LRU_get_free_block(bool have_mutex)
involved (particularly in case of ROW_FORMAT=COMPRESSED pages). We involved (particularly in case of ROW_FORMAT=COMPRESSED pages). We
can do that in a separate patch sometime in future. */ can do that in a separate patch sometime in future. */
if (!buf_flush_lists(innodb_lru_flush_size, 0)) { if (!buf_flush_LRU(innodb_lru_flush_size)) {
MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT); MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT);
++flush_failures; ++flush_failures;
} }
......
...@@ -1098,7 +1098,7 @@ static bool fil_crypt_start_encrypting_space(fil_space_t* space) ...@@ -1098,7 +1098,7 @@ static bool fil_crypt_start_encrypting_space(fil_space_t* space)
mtr.commit(); mtr.commit();
/* 4 - sync tablespace before publishing crypt data */ /* 4 - sync tablespace before publishing crypt data */
while (buf_flush_dirty_pages(space->id)); while (buf_flush_list_space(space));
/* 5 - publish crypt data */ /* 5 - publish crypt data */
mutex_enter(&fil_crypt_threads_mutex); mutex_enter(&fil_crypt_threads_mutex);
...@@ -2036,14 +2036,7 @@ fil_crypt_flush_space( ...@@ -2036,14 +2036,7 @@ fil_crypt_flush_space(
if (end_lsn > 0 && !space->is_stopping()) { if (end_lsn > 0 && !space->is_stopping()) {
ulint sum_pages = 0; ulint sum_pages = 0;
const ulonglong start = my_interval_timer(); const ulonglong start = my_interval_timer();
do { while (buf_flush_list_space(space, &sum_pages));
ulint n_dirty= buf_flush_dirty_pages(state->space->id);
if (!n_dirty) {
break;
}
sum_pages += n_dirty;
} while (!space->is_stopping());
if (sum_pages) { if (sum_pages) {
const ulonglong end = my_interval_timer(); const ulonglong end = my_interval_timer();
......
...@@ -1742,7 +1742,7 @@ void fil_close_tablespace(ulint id) ...@@ -1742,7 +1742,7 @@ void fil_close_tablespace(ulint id)
can no longer read more pages of this tablespace to buf_pool. can no longer read more pages of this tablespace to buf_pool.
Thus we can clean the tablespace out of buf_pool Thus we can clean the tablespace out of buf_pool
completely and permanently. */ completely and permanently. */
while (buf_flush_dirty_pages(id)); while (buf_flush_list_space(space));
ut_ad(space->is_stopping()); ut_ad(space->is_stopping());
/* If the free is successful, the X lock will be released before /* If the free is successful, the X lock will be released before
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2020, MariaDB Corporation. Copyright (c) 2014, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -54,12 +54,6 @@ the list as they age towards the tail of the LRU. ...@@ -54,12 +54,6 @@ the list as they age towards the tail of the LRU.
@param id tablespace identifier */ @param id tablespace identifier */
void buf_flush_remove_pages(ulint id); void buf_flush_remove_pages(ulint id);
/** Try to flush all the dirty pages that belong to a given tablespace.
@param id tablespace identifier
@return number dirty pages that there were for this tablespace */
ulint buf_flush_dirty_pages(ulint id)
MY_ATTRIBUTE((warn_unused_result));
/*******************************************************************//** /*******************************************************************//**
Relocates a buffer control block on the flush_list. Relocates a buffer control block on the flush_list.
Note that it is assumed that the contents of bpage has already been Note that it is assumed that the contents of bpage has already been
...@@ -93,10 +87,23 @@ buf_flush_init_for_writing( ...@@ -93,10 +87,23 @@ buf_flush_init_for_writing(
/** Write out dirty blocks from buf_pool.flush_list. /** Write out dirty blocks from buf_pool.flush_list.
@param max_n wished maximum mumber of blocks flushed @param max_n wished maximum mumber of blocks flushed
@param lsn buf_pool.get_oldest_modification(LSN_MAX) target (0=LRU flush) @param lsn buf_pool.get_oldest_modification(LSN_MAX) target
@return the number of processed pages
@retval 0 if a buf_pool.flush_list batch is already running */
ulint buf_flush_list(ulint max_n= ULINT_UNDEFINED, lsn_t lsn= LSN_MAX);
/** Try to flush dirty pages that belong to a given tablespace.
@param space tablespace
@param n_flushed number of pages written
@return whether any pages might not have been flushed */
bool buf_flush_list_space(fil_space_t *space, ulint *n_flushed= nullptr)
MY_ATTRIBUTE((warn_unused_result));
/** Write out dirty blocks from buf_pool.LRU.
@param max_n wished maximum mumber of blocks flushed
@return the number of processed pages @return the number of processed pages
@retval 0 if a batch of the same type (lsn==0 or lsn!=0) is already running */ @retval 0 if a buf_pool.LRU batch is already running */
ulint buf_flush_lists(ulint max_n, lsn_t lsn); ulint buf_flush_LRU(ulint max_n);
/** Wait until a flush batch ends. /** Wait until a flush batch ends.
@param lru true=buf_pool.LRU; false=buf_pool.flush_list */ @param lru true=buf_pool.LRU; false=buf_pool.flush_list */
......
...@@ -4232,7 +4232,7 @@ row_import_for_mysql( ...@@ -4232,7 +4232,7 @@ row_import_for_mysql(
/* Ensure that all pages dirtied during the IMPORT make it to disk. /* Ensure that all pages dirtied during the IMPORT make it to disk.
The only dirty pages generated should be from the pessimistic purge The only dirty pages generated should be from the pessimistic purge
of delete marked records that couldn't be purged in Phase I. */ of delete marked records that couldn't be purged in Phase I. */
while (buf_flush_dirty_pages(prebuilt->table->space_id)); while (buf_flush_list_space(prebuilt->table->space));
for (ulint count = 0; prebuilt->table->space->referenced(); count++) { for (ulint count = 0; prebuilt->table->space->referenced(); count++) {
/* Issue a warning every 10.24 seconds, starting after /* Issue a warning every 10.24 seconds, starting after
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2020, MariaDB Corporation. Copyright (c) 2017, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -536,7 +536,7 @@ row_quiesce_table_start( ...@@ -536,7 +536,7 @@ row_quiesce_table_start(
} }
} }
while (buf_flush_dirty_pages(table->space_id)) { while (buf_flush_list_space(table->space)) {
if (trx_is_interrupted(trx)) { if (trx_is_interrupted(trx)) {
goto aborted; goto aborted;
} }
......
...@@ -596,7 +596,7 @@ static void trx_purge_truncate_history() ...@@ -596,7 +596,7 @@ static void trx_purge_truncate_history()
return; return;
} }
const fil_space_t& space = *purge_sys.truncate.current; fil_space_t& space = *purge_sys.truncate.current;
/* Undo tablespace always are a single file. */ /* Undo tablespace always are a single file. */
ut_a(UT_LIST_GET_LEN(space.chain) == 1); ut_a(UT_LIST_GET_LEN(space.chain) == 1);
fil_node_t* file = UT_LIST_GET_FIRST(space.chain); fil_node_t* file = UT_LIST_GET_FIRST(space.chain);
...@@ -672,7 +672,7 @@ static void trx_purge_truncate_history() ...@@ -672,7 +672,7 @@ static void trx_purge_truncate_history()
mini-transaction commit and the server was killed, then mini-transaction commit and the server was killed, then
discarding the to-be-trimmed pages without flushing would discarding the to-be-trimmed pages without flushing would
break crash recovery. So, we cannot avoid the write. */ break crash recovery. So, we cannot avoid the write. */
while (buf_flush_dirty_pages(space.id)); while (buf_flush_list_space(&space));
log_free_check(); log_free_check();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment