Commit d34479dc authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-33053 InnoDB LRU flushing does not run before running out of buffer pool

buf_flush_LRU(): Display a warning if no pages could be evicted and
no writes initiated.

buf_pool_t::need_LRU_eviction(): Renamed from buf_pool_t::ran_out().
Check if the amount of free pages is smaller than innodb_lru_scan_depth
instead of checking if it is 0.

buf_flush_page_cleaner(): For the final LRU flush after a checkpoint
flush, use a "budget" of innodb_io_capacity_max, like we do in the
case when we are not in "furious" checkpoint flushing.

Co-developed by: Debarun Banerjee
Reviewed by: Debarun Banerjee
Tested by: Matthias Leich
parent 16f2f8e5
...@@ -1797,6 +1797,28 @@ ulint buf_flush_LRU(ulint max_n, bool evict) ...@@ -1797,6 +1797,28 @@ ulint buf_flush_LRU(ulint max_n, bool evict)
buf_pool.try_LRU_scan= true; buf_pool.try_LRU_scan= true;
pthread_cond_broadcast(&buf_pool.done_free); pthread_cond_broadcast(&buf_pool.done_free);
} }
else if (!pages && !buf_pool.try_LRU_scan &&
buf_pool.LRU_warned.test_and_set(std::memory_order_acquire))
{
/* For example, with the minimum innodb_buffer_pool_size=5M and
the default innodb_page_size=16k there are only a little over 316
pages in the buffer pool. The buffer pool can easily be exhausted
by a workload of some dozen concurrent connections. The system could
reach a deadlock like the following:
(1) Many threads are waiting in buf_LRU_get_free_block()
for buf_pool.done_free.
(2) Some threads are waiting for a page latch which is held by
another thread that is waiting in buf_LRU_get_free_block().
(3) This thread is the only one that could make progress, but
we fail to do so because all the pages that we scanned are
buffer-fixed or latched by some thread. */
sql_print_warning("InnoDB: Could not free any blocks in the buffer pool!"
" %zu blocks are in use and %zu free."
" Consider increasing innodb_buffer_pool_size.",
UT_LIST_GET_LEN(buf_pool.LRU),
UT_LIST_GET_LEN(buf_pool.free));
}
return pages; return pages;
} }
...@@ -2287,6 +2309,16 @@ static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in, ...@@ -2287,6 +2309,16 @@ static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in,
goto func_exit; goto func_exit;
} }
TPOOL_SUPPRESS_TSAN
bool buf_pool_t::need_LRU_eviction() const
{
/* try_LRU_scan==false means that buf_LRU_get_free_block() is waiting
for buf_flush_page_cleaner() to evict some blocks */
return UNIV_UNLIKELY(!try_LRU_scan ||
(UT_LIST_GET_LEN(LRU) > BUF_LRU_MIN_LEN &&
UT_LIST_GET_LEN(free) < srv_LRU_scan_depth / 2));
}
/** page_cleaner thread tasked with flushing dirty pages from the buffer /** page_cleaner thread tasked with flushing dirty pages from the buffer
pools. As of now we'll have only one coordinator. */ pools. As of now we'll have only one coordinator. */
static void buf_flush_page_cleaner() static void buf_flush_page_cleaner()
...@@ -2319,21 +2351,24 @@ static void buf_flush_page_cleaner() ...@@ -2319,21 +2351,24 @@ static void buf_flush_page_cleaner()
} }
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
if (buf_pool.ran_out()) if (!buf_pool.need_LRU_eviction())
goto no_wait; {
else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
break; break;
if (buf_pool.page_cleaner_idle() && if (buf_pool.page_cleaner_idle() &&
(!UT_LIST_GET_LEN(buf_pool.flush_list) || (!UT_LIST_GET_LEN(buf_pool.flush_list) ||
srv_max_dirty_pages_pct_lwm == 0.0)) srv_max_dirty_pages_pct_lwm == 0.0))
/* We are idle; wait for buf_pool.page_cleaner_wakeup() */ {
my_cond_wait(&buf_pool.do_flush_list, buf_pool.LRU_warned.clear(std::memory_order_release);
&buf_pool.flush_list_mutex.m_mutex); /* We are idle; wait for buf_pool.page_cleaner_wakeup() */
else my_cond_wait(&buf_pool.do_flush_list,
my_cond_timedwait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex.m_mutex);
&buf_pool.flush_list_mutex.m_mutex, &abstime); }
no_wait: else
my_cond_timedwait(&buf_pool.do_flush_list,
&buf_pool.flush_list_mutex.m_mutex, &abstime);
}
set_timespec(abstime, 1); set_timespec(abstime, 1);
lsn_limit= buf_flush_sync_lsn; lsn_limit= buf_flush_sync_lsn;
...@@ -2365,7 +2400,7 @@ static void buf_flush_page_cleaner() ...@@ -2365,7 +2400,7 @@ static void buf_flush_page_cleaner()
} }
while (false); while (false);
if (!buf_pool.ran_out()) if (!buf_pool.need_LRU_eviction())
continue; continue;
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
oldest_lsn= buf_pool.get_oldest_modification(0); oldest_lsn= buf_pool.get_oldest_modification(0);
...@@ -2394,7 +2429,7 @@ static void buf_flush_page_cleaner() ...@@ -2394,7 +2429,7 @@ static void buf_flush_page_cleaner()
if (oldest_lsn >= soft_lsn_limit) if (oldest_lsn >= soft_lsn_limit)
buf_flush_async_lsn= soft_lsn_limit= 0; buf_flush_async_lsn= soft_lsn_limit= 0;
} }
else if (buf_pool.ran_out()) else if (buf_pool.need_LRU_eviction())
{ {
buf_pool.page_cleaner_set_idle(false); buf_pool.page_cleaner_set_idle(false);
buf_pool.n_flush_inc(); buf_pool.n_flush_inc();
...@@ -2509,9 +2544,11 @@ static void buf_flush_page_cleaner() ...@@ -2509,9 +2544,11 @@ static void buf_flush_page_cleaner()
MONITOR_FLUSH_ADAPTIVE_PAGES, MONITOR_FLUSH_ADAPTIVE_PAGES,
n_flushed); n_flushed);
} }
else if (buf_flush_async_lsn <= oldest_lsn) else if (buf_flush_async_lsn <= oldest_lsn &&
!buf_pool.need_LRU_eviction())
goto check_oldest_and_set_idle; goto check_oldest_and_set_idle;
n= srv_max_io_capacity;
n= n >= n_flushed ? n - n_flushed : 0; n= n >= n_flushed ? n - n_flushed : 0;
goto LRU_flush; goto LRU_flush;
} }
......
...@@ -60,10 +60,6 @@ static constexpr ulint BUF_LRU_OLD_TOLERANCE = 20; ...@@ -60,10 +60,6 @@ static constexpr ulint BUF_LRU_OLD_TOLERANCE = 20;
frames in the buffer pool, we set this to TRUE */ frames in the buffer pool, we set this to TRUE */
static bool buf_lru_switched_on_innodb_mon = false; static bool buf_lru_switched_on_innodb_mon = false;
/** True if diagnostic message about difficult to find free blocks
in the buffer bool has already printed. */
static bool buf_lru_free_blocks_error_printed;
/******************************************************************//** /******************************************************************//**
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
and page_zip_decompress() operations. Based on the statistics, and page_zip_decompress() operations. Based on the statistics,
...@@ -408,6 +404,7 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex) ...@@ -408,6 +404,7 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
buf_LRU_check_size_of_non_data_objects(); buf_LRU_check_size_of_non_data_objects();
buf_block_t* block; buf_block_t* block;
IF_DBUG(static bool buf_lru_free_blocks_error_printed,);
DBUG_EXECUTE_IF("ib_lru_force_no_free_page", DBUG_EXECUTE_IF("ib_lru_force_no_free_page",
if (!buf_lru_free_blocks_error_printed) { if (!buf_lru_free_blocks_error_printed) {
n_iterations = 21; n_iterations = 21;
...@@ -417,9 +414,25 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex) ...@@ -417,9 +414,25 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
/* If there is a block in the free list, take it */ /* If there is a block in the free list, take it */
if ((block = buf_LRU_get_free_only()) != nullptr) { if ((block = buf_LRU_get_free_only()) != nullptr) {
got_block: got_block:
const ulint LRU_size = UT_LIST_GET_LEN(buf_pool.LRU);
const ulint available = UT_LIST_GET_LEN(buf_pool.free);
const ulint scan_depth = srv_LRU_scan_depth / 2;
ut_ad(LRU_size <= BUF_LRU_MIN_LEN || available >= scan_depth
|| buf_pool.need_LRU_eviction());
if (!have_mutex) { if (!have_mutex) {
mysql_mutex_unlock(&buf_pool.mutex); mysql_mutex_unlock(&buf_pool.mutex);
} }
if (UNIV_UNLIKELY(available < scan_depth)
&& LRU_size > BUF_LRU_MIN_LEN) {
mysql_mutex_lock(&buf_pool.flush_list_mutex);
if (!buf_pool.page_cleaner_active()) {
buf_pool.page_cleaner_wakeup(true);
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}
block->page.zip.clear(); block->page.zip.clear();
return block; return block;
} }
...@@ -445,10 +458,11 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex) ...@@ -445,10 +458,11 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
if ((block = buf_LRU_get_free_only()) != nullptr) { if ((block = buf_LRU_get_free_only()) != nullptr) {
goto got_block; goto got_block;
} }
const bool wake = buf_pool.need_LRU_eviction();
mysql_mutex_unlock(&buf_pool.mutex); mysql_mutex_unlock(&buf_pool.mutex);
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
const auto n_flush = buf_pool.n_flush(); const auto n_flush = buf_pool.n_flush();
if (!buf_pool.try_LRU_scan) { if (wake && !buf_pool.page_cleaner_active()) {
buf_pool.page_cleaner_wakeup(true); buf_pool.page_cleaner_wakeup(true);
} }
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
...@@ -467,9 +481,10 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex) ...@@ -467,9 +481,10 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS ); MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS );
} }
if (n_iterations == 21 && !buf_lru_free_blocks_error_printed if (n_iterations == 21
&& srv_buf_pool_old_size == srv_buf_pool_size) { && srv_buf_pool_old_size == srv_buf_pool_size
buf_lru_free_blocks_error_printed = true; && buf_pool.LRU_warned.test_and_set(std::memory_order_acquire)) {
IF_DBUG(buf_lru_free_blocks_error_printed = true,);
mysql_mutex_unlock(&buf_pool.mutex); mysql_mutex_unlock(&buf_pool.mutex);
ib::warn() << "Difficult to find free blocks in the buffer pool" ib::warn() << "Difficult to find free blocks in the buffer pool"
" (" << n_iterations << " search iterations)! " " (" << n_iterations << " search iterations)! "
......
...@@ -1488,10 +1488,8 @@ class buf_pool_t ...@@ -1488,10 +1488,8 @@ class buf_pool_t
n_chunks_new / 4 * chunks->size; n_chunks_new / 4 * chunks->size;
} }
/** @return whether the buffer pool has run out */ /** @return whether the buffer pool is running low */
TPOOL_SUPPRESS_TSAN bool need_LRU_eviction() const;
bool ran_out() const
{ return UNIV_UNLIKELY(!try_LRU_scan || !UT_LIST_GET_LEN(free)); }
/** @return whether the buffer pool is shrinking */ /** @return whether the buffer pool is shrinking */
inline bool is_shrinking() const inline bool is_shrinking() const
...@@ -1811,6 +1809,9 @@ class buf_pool_t ...@@ -1811,6 +1809,9 @@ class buf_pool_t
Set whenever the free list grows, along with a broadcast of done_free. Set whenever the free list grows, along with a broadcast of done_free.
Protected by buf_pool.mutex. */ Protected by buf_pool.mutex. */
Atomic_relaxed<bool> try_LRU_scan; Atomic_relaxed<bool> try_LRU_scan;
/** Whether we have warned to be running out of buffer pool */
std::atomic_flag LRU_warned;
/* @} */ /* @} */
/** @name LRU replacement algorithm fields */ /** @name LRU replacement algorithm fields */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment