Commit f11b6087 authored by Krunal Bauskar's avatar Krunal Bauskar Committed by Marko Mäkelä

MDEV-24949: Enabling idle flushing (possible regression from MDEV-23855)

- Currently page cleaner thread will stop flushing if
  dirty_pct < innodb_max_dirty_pages_pct_lwm.

- If the server is not performing any activity then said resources/time
  could be used to flush the pending dirty pages and keep buffer pool
  clean for the next burst of the cycle. This flushing is called idle flushing.

- flushing logic underwent a complete revamp in 10.5.7/8
  and as part of the revamp idle flushing logic got removed.

- New proposed logic of idle flushing is based on updated logic of the
  page cleaner that will enable idle flushing if
  - buf page cleaner is idle
  - there are dirty pages (< innodb_max_dirty_pages_pct_lwm)
  - server is not performing any activity

  Logic will kickstart the idle flushing bounded by innodb_io_capacity.

(Thanks to Marko Makela for reviewing the patch and idea
 right from the its inception).
parent 1799caa3
......@@ -1480,6 +1480,8 @@ bool buf_pool_t::create()
srv_buf_pool_old_size= srv_buf_pool_size;
srv_buf_pool_base_size= srv_buf_pool_size;
last_activity_count= srv_get_activity_count();
chunk_t::map_ref= chunk_t::map_reg;
buf_LRU_old_ratio_update(100 * 3 / 8, false);
btr_search_sys_create();
......
......@@ -134,7 +134,34 @@ inline void buf_pool_t::page_cleaner_wakeup()
double dirty_pct= double(UT_LIST_GET_LEN(buf_pool.flush_list)) * 100.0 /
double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free));
double pct_lwm= srv_max_dirty_pages_pct_lwm;
/* if pct_lwm != 0.0 means adpative flushing is enabled.
signal buf page cleaner thread
- if pct_lwm <= dirty_pct then it will invoke apdative flushing flow
- if pct_lwm > dirty_pct then it will invoke idle flushing flow.
idle_flushing:
dirty_pct < innodb_max_dirty_pages_pct_lwm so it could be an
idle flushing use-case.
Why is last_activity_count not updated always?
- let's first understand when is server activity count updated.
- it is updated on commit of a transaction trx_t::commit() and not
on adding a page to the flush list.
- page_cleaner_wakeup is called when a page is added to the flush list.
- now let's say the first user thread, updates the count from X -> Y but
is yet to commit the transaction (so activity count is still Y).
followup user threads will see the updated count as (Y) that is matching
the universal server activity count (Y), giving a false impression that
the server is idle.
How to avoid this?
- by allowing last_activity_count to updated when page-cleaner is made
active and has work to do. This ensures that the last_activity signal
is consumed by the page-cleaner before the next one is generated. */
if ((pct_lwm != 0.0 && pct_lwm <= dirty_pct) ||
(pct_lwm != 0.0 && last_activity_count == srv_get_activity_count()) ||
srv_max_buf_pool_modified_pct <= dirty_pct)
{
page_cleaner_is_idle= false;
......@@ -2076,6 +2103,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
mysql_mutex_lock(&buf_pool.flush_list_mutex);
lsn_t lsn_limit;
ulint last_activity_count= srv_get_activity_count();
for (;;)
{
......@@ -2095,9 +2123,14 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
break;
if (buf_pool.page_cleaner_idle())
my_cond_wait(&buf_pool.do_flush_list,
&buf_pool.flush_list_mutex.m_mutex);
/* If buf pager cleaner is idle and there is no work
(either dirty pages are all flushed or adaptive flushing
is not enabled) then opt for non-timed wait */
if (buf_pool.page_cleaner_idle() &&
(!UT_LIST_GET_LEN(buf_pool.flush_list) ||
srv_max_dirty_pages_pct_lwm == 0.0))
my_cond_wait(&buf_pool.do_flush_list, &buf_pool.flush_list_mutex.m_mutex);
else
my_cond_timedwait(&buf_pool.do_flush_list,
&buf_pool.flush_list_mutex.m_mutex, &abstime);
......@@ -2135,10 +2168,26 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
const double dirty_pct= double(dirty_blocks) * 100.0 /
double(UT_LIST_GET_LEN(buf_pool.LRU) + UT_LIST_GET_LEN(buf_pool.free));
bool idle_flush= false;
if (lsn_limit);
else if (srv_max_dirty_pages_pct_lwm != 0.0)
{
if (dirty_pct < srv_max_dirty_pages_pct_lwm)
const ulint activity_count= srv_get_activity_count();
if (activity_count != last_activity_count)
last_activity_count= activity_count;
else if (buf_pool.page_cleaner_idle() && buf_pool.n_pend_reads == 0)
{
/* reaching here means 3 things:
- last_activity_count == activity_count: suggesting server is idle
(no trx_t::commit activity)
- page cleaner is idle (dirty_pct < srv_max_dirty_pages_pct_lwm)
- there are no pending reads but there are dirty pages to flush */
idle_flush= true;
buf_pool.update_last_activity_count(activity_count);
}
if (!idle_flush && dirty_pct < srv_max_dirty_pages_pct_lwm)
goto unemployed;
}
else if (dirty_pct < srv_max_buf_pool_modified_pct)
......@@ -2163,7 +2212,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
pthread_cond_broadcast(&buf_pool.done_flush_list);
goto try_checkpoint;
}
else if (!srv_adaptive_flushing)
else if (idle_flush || !srv_adaptive_flushing)
{
n_flushed= buf_flush_lists(srv_io_capacity, LSN_MAX);
try_checkpoint:
......@@ -2220,6 +2269,12 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
next:
#endif /* !DBUG_OFF */
mysql_mutex_lock(&buf_pool.flush_list_mutex);
/* when idle flushing kicks in page_cleaner is marked active.
reset it back to idle since the it was made active as part of
idle flushing stage. */
if (idle_flush)
buf_pool.page_cleaner_set_idle(true);
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
......
......@@ -1946,6 +1946,8 @@ class buf_pool_t
private:
/** whether the page cleaner needs wakeup from indefinite sleep */
bool page_cleaner_is_idle;
/** track server activity count for signaling idle flushing */
ulint last_activity_count;
public:
/** signalled to wake up the page_cleaner; protected by flush_list_mutex */
pthread_cond_t do_flush_list;
......@@ -1966,6 +1968,13 @@ class buf_pool_t
page_cleaner_is_idle= deep_sleep;
}
/** Update server last activity count */
void update_last_activity_count(ulint activity_count)
{
mysql_mutex_assert_owner(&flush_list_mutex);
last_activity_count= activity_count;
}
// n_flush_LRU + n_flush_list is approximately COUNT(io_fix()==BUF_IO_WRITE)
// in flush_list
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment