MDEV-34265 Possible hang during IO burst with innodb_flush_sync enabled

When checkpoint age goes beyond the sync flush threshold and
buf_flush_sync_lsn is set, page cleaner enters into "furious flush"
stage to aggressively flush dirty pages from flush list and pull
checkpoint LSN above safe margin. In this stage, page cleaner skips
doing LRU flush and eviction.

In 10.6, all other threads entirely rely on page cleaner to generate
free pages. If free pages get over while page cleaner is busy in
"furious flush" stage, a session thread could wait for free page in the
middle of a min-transaction(mtr) while holding latches on other pages.

It, in turn, can prevent page cleaner to flush such pages preventing
checkpoint LSN to move forward creating a deadlock situation. Even
otherwise, it could create a stall and hang like situation for large BP
with plenty of dirty pages to flush before the stage could finish.

Fix: During furious flush, check and evict LRU pages after each flush
iteration.
parent b2048179
......@@ -268,4 +268,17 @@ PARTITION BY KEY(a) PARTITIONS 16;
INSERT INTO t1 VALUES(1);
UPDATE t1 SET a = 2 WHERE a = 1;
DROP TABLE t1;
#
# MDEV-34265 Possible hang during IO burst with innodb_flush_sync enabled
#
CREATE TABLE t1(f1 MEDIUMTEXT)ENGINE=InnoDB;
SET @save_dbug=@@GLOBAL.debug_dbug;
SET @@GLOBAL.debug_dbug='+d,ib_page_cleaner_sleep';
SET STATEMENT debug_dbug='+d,ib_free_page_sleep' FOR
INSERT INTO t1 VALUES(REPEAT(1, 8459264));
SET @@GLOBAL.debug_dbug=@save_dbug;
SELECT length(f1) FROM t1;
length(f1)
8459264
DROP TABLE t1;
# End of 10.6 tests
......@@ -163,9 +163,9 @@ DROP TABLE t1,t2;
CREATE TABLE t1 (a INT KEY) ENGINE=InnoDB;
--error 0,1193
--error 0,ER_UNKNOWN_SYSTEM_VARIABLE
SET @save_limit = @@GLOBAL.innodb_limit_optimistic_insert_debug;
--error 0,1193
--error 0,ER_UNKNOWN_SYSTEM_VARIABLE
SET GLOBAL innodb_limit_optimistic_insert_debug = 2;
BEGIN;
......@@ -289,4 +289,30 @@ CREATE TABLE t1(a INT KEY)ENGINE=InnoDB
INSERT INTO t1 VALUES(1);
UPDATE t1 SET a = 2 WHERE a = 1;
DROP TABLE t1;
--echo #
--echo # MDEV-34265 Possible hang during IO burst with innodb_flush_sync enabled
--echo #
CREATE TABLE t1(f1 MEDIUMTEXT)ENGINE=InnoDB;
--error 0,ER_UNKNOWN_SYSTEM_VARIABLE
SET @save_dbug=@@GLOBAL.debug_dbug;
--error 0,ER_UNKNOWN_SYSTEM_VARIABLE
SET @@GLOBAL.debug_dbug='+d,ib_page_cleaner_sleep';
if ($have_debug) {
SET STATEMENT debug_dbug='+d,ib_free_page_sleep' FOR
INSERT INTO t1 VALUES(REPEAT(1, 8459264));
}
if (!$have_debug) {
--echo SET STATEMENT debug_dbug='+d,ib_free_page_sleep' FOR
INSERT INTO t1 VALUES(REPEAT(1, 8459264));
}
--error 0,ER_UNKNOWN_SYSTEM_VARIABLE
SET @@GLOBAL.debug_dbug=@save_dbug;
SELECT length(f1) FROM t1;
DROP TABLE t1;
--echo # End of 10.6 tests
......@@ -2025,70 +2025,94 @@ ATTRIBUTE_COLD void buf_flush_ahead(lsn_t lsn, bool furious)
/** Conduct checkpoint-related flushing for innodb_flush_sync=ON,
and try to initiate checkpoints until the target is met.
@param lsn minimum value of buf_pool.get_oldest_modification(LSN_MAX) */
ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn)
ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
static void buf_flush_sync_for_checkpoint(lsn_t lsn)
{
ut_ad(!srv_read_only_mode);
mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
for (;;)
/* During furious flush, we need to keep generating free pages. Otherwise
concurrent mtrs could be blocked holding latches for the pages to be flushed
causing deadlock in rare occasion.
Ideally we should be acquiring buffer pool mutex for the check but it is more
expensive and we are not using the mutex while calling need_LRU_eviction() as
of today. It is a quick and dirty read of the LRU and free list length.
Atomic read of try_LRU_scan should eventually let us do the eviction.
Correcting the inaccuracy would need more consideration to avoid any possible
performance regression. */
if (buf_pool.need_LRU_eviction())
{
if (ulint n_flushed= buf_flush_list(srv_max_io_capacity, lsn))
{
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE,
MONITOR_FLUSH_SYNC_COUNT,
MONITOR_FLUSH_SYNC_PAGES, n_flushed);
}
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_pool.page_cleaner_set_idle(false);
buf_pool.n_flush_inc();
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
switch (srv_file_flush_method) {
case SRV_NOSYNC:
case SRV_O_DIRECT_NO_FSYNC:
break;
default:
fil_flush_file_spaces();
}
mysql_mutex_lock(&buf_pool.mutex);
/* Confirm that eviction is needed after acquiring buffer pool mutex. */
if (buf_pool.need_LRU_eviction())
/* We intend to only evict pages keeping maximum flush bandwidth for
flush list pages advancing checkpoint. However, if the LRU tail is full
of dirty pages, we might need some flushing. */
std::ignore= buf_flush_LRU(srv_io_capacity);
mysql_mutex_unlock(&buf_pool.mutex);
mysql_mutex_lock(&log_sys.mutex);
const lsn_t newest_lsn= log_sys.get_lsn();
mysql_mutex_lock(&log_sys.flush_order_mutex);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
lsn_t measure= buf_pool.get_oldest_modification(0);
mysql_mutex_unlock(&log_sys.flush_order_mutex);
const lsn_t checkpoint_lsn= measure ? measure : newest_lsn;
if (!recv_recovery_is_on() &&
checkpoint_lsn > log_sys.last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT)
{
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
log_checkpoint_low(checkpoint_lsn, newest_lsn);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
measure= buf_pool.get_oldest_modification(LSN_MAX);
}
else
{
mysql_mutex_unlock(&log_sys.mutex);
if (!measure)
measure= LSN_MAX;
}
buf_pool.n_flush_dec();
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}
mysql_mutex_assert_not_owner(&log_sys.mutex);
if (ulint n_flushed= buf_flush_list(srv_max_io_capacity, lsn))
{
MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_SYNC_TOTAL_PAGE,
MONITOR_FLUSH_SYNC_COUNT,
MONITOR_FLUSH_SYNC_PAGES, n_flushed);
}
/* After attempting log checkpoint, check if we have reached our target. */
const lsn_t target= buf_flush_sync_lsn;
switch (srv_file_flush_method) {
case SRV_NOSYNC:
case SRV_O_DIRECT_NO_FSYNC:
break;
default:
fil_flush_file_spaces();
}
if (measure >= target)
buf_flush_sync_lsn= 0;
else if (measure >= buf_flush_async_lsn)
buf_flush_async_lsn= 0;
mysql_mutex_lock(&log_sys.mutex);
const lsn_t newest_lsn= log_sys.get_lsn();
mysql_mutex_lock(&log_sys.flush_order_mutex);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
lsn_t measure= buf_pool.get_oldest_modification(0);
mysql_mutex_unlock(&log_sys.flush_order_mutex);
const lsn_t checkpoint_lsn= measure ? measure : newest_lsn;
/* wake up buf_flush_wait() */
pthread_cond_broadcast(&buf_pool.done_flush_list);
if (!recv_recovery_is_on() &&
checkpoint_lsn > log_sys.last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT)
{
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
log_checkpoint_low(checkpoint_lsn, newest_lsn);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
measure= buf_pool.get_oldest_modification(LSN_MAX);
}
else
{
mysql_mutex_unlock(&log_sys.mutex);
if (!measure)
measure= LSN_MAX;
}
lsn= std::max(lsn, target);
mysql_mutex_assert_not_owner(&log_sys.mutex);
if (measure >= lsn)
return;
}
/* After attempting log checkpoint, check if we have reached our target. */
const lsn_t target= buf_flush_sync_lsn;
if (measure >= target)
buf_flush_sync_lsn= 0;
else if (measure >= buf_flush_async_lsn)
buf_flush_async_lsn= 0;
/* wake up buf_flush_wait() */
pthread_cond_broadcast(&buf_pool.done_flush_list);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}
/** Check if the adpative flushing threshold is recommended based on
......@@ -2298,6 +2322,10 @@ static void buf_flush_page_cleaner()
for (;;)
{
DBUG_EXECUTE_IF("ib_page_cleaner_sleep",
{
std::this_thread::sleep_for(std::chrono::seconds(1));
});
lsn_limit= buf_flush_sync_lsn;
if (UNIV_UNLIKELY(lsn_limit != 0) && UNIV_LIKELY(srv_flush_sync))
......
......@@ -406,17 +406,32 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
ut_ad(LRU_size <= BUF_LRU_MIN_LEN ||
available >= scan_depth || buf_pool.need_LRU_eviction());
ut_d(bool signalled = false);
if (UNIV_UNLIKELY(available < scan_depth) && LRU_size > BUF_LRU_MIN_LEN)
{
mysql_mutex_lock(&buf_pool.flush_list_mutex);
if (!buf_pool.page_cleaner_active())
{
buf_pool.page_cleaner_wakeup(true);
ut_d(signalled = true);
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}
if (!have_mutex)
mysql_mutex_unlock(&buf_pool.mutex);
DBUG_EXECUTE_IF("ib_free_page_sleep",
{
static bool do_sleep = true;
if (do_sleep && signalled)
{
std::this_thread::sleep_for(std::chrono::milliseconds(10));
do_sleep = false;
}
});
block->page.zip.clear();
return block;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment