Commit 9d146652 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-32029 Assertion failures in log_sort_flush_list upon crash recovery

In commit 0d175968 (MDEV-31354)
we only waited that no buf_pool.flush_list writes are in progress.
The buf_flush_page_cleaner() thread could still initiate page writes
from the buf_pool.LRU list while only holding buf_pool.mutex, not
buf_pool.flush_list_mutex. This is something that was changed in
commit a55b951e (MDEV-26827).

log_sort_flush_list(): Wait for the buf_flush_page_cleaner() thread to
be completely idle, including LRU flushing.

buf_flush_page_cleaner(): Always broadcast buf_pool.done_flush_list
when becoming idle, so that log_sort_flush_list() will be woken up.
Also, ensure that buf_pool.n_flush_inc() or
buf_pool.flush_list_set_active() has been invoked before any page
writes are initiated.

buf_flush_try_neighbors(): Release buf_pool.mutex here and not in the
callers, to avoid code duplication. Make innodb_flush_neighbors=ON
obey the innodb_io_capacity limit.
parent 31ea201e
...@@ -355,6 +355,13 @@ inline void buf_pool_t::n_flush_dec() ...@@ -355,6 +355,13 @@ inline void buf_pool_t::n_flush_dec()
mysql_mutex_unlock(&flush_list_mutex); mysql_mutex_unlock(&flush_list_mutex);
} }
inline void buf_pool_t::n_flush_dec_holding_mutex()
{
mysql_mutex_assert_owner(&flush_list_mutex);
ut_ad(page_cleaner_status >= LRU_FLUSH);
page_cleaner_status-= LRU_FLUSH;
}
/** Complete write of a file page from buf_pool. /** Complete write of a file page from buf_pool.
@param request write request @param request write request
@param error whether the write may have failed */ @param error whether the write may have failed */
...@@ -1076,6 +1083,8 @@ static ulint buf_flush_try_neighbors(fil_space_t *space, ...@@ -1076,6 +1083,8 @@ static ulint buf_flush_try_neighbors(fil_space_t *space,
bool contiguous, bool evict, bool contiguous, bool evict,
ulint n_flushed, ulint n_to_flush) ulint n_flushed, ulint n_to_flush)
{ {
mysql_mutex_unlock(&buf_pool.mutex);
ut_ad(space->id == page_id.space()); ut_ad(space->id == page_id.space());
ut_ad(bpage->id() == page_id); ut_ad(bpage->id() == page_id);
...@@ -1337,25 +1346,23 @@ static void buf_flush_LRU_list_batch(ulint max, bool evict, ...@@ -1337,25 +1346,23 @@ static void buf_flush_LRU_list_batch(ulint max, bool evict,
continue; continue;
} }
if (neighbors && space->is_rotational()) if (n->flushed >= max && !recv_recovery_is_on())
{
mysql_mutex_unlock(&buf_pool.mutex);
n->flushed+= buf_flush_try_neighbors(space, page_id, bpage,
neighbors == 1,
do_evict, n->flushed, max);
goto reacquire_mutex;
}
else if (n->flushed >= max && !recv_recovery_is_on())
{ {
bpage->lock.u_unlock(true); bpage->lock.u_unlock(true);
break; break;
} }
if (neighbors && space->is_rotational())
n->flushed+= buf_flush_try_neighbors(space, page_id, bpage,
neighbors == 1,
do_evict, n->flushed, max);
else if (bpage->flush(do_evict, space)) else if (bpage->flush(do_evict, space))
{
++n->flushed; ++n->flushed;
else
continue;
goto reacquire_mutex; goto reacquire_mutex;
} }
}
else else
/* Can't evict or dispatch this block. Go to previous. */ /* Can't evict or dispatch this block. Go to previous. */
ut_ad(buf_pool.lru_hp.is_hp(prev)); ut_ad(buf_pool.lru_hp.is_hp(prev));
...@@ -1492,19 +1499,18 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn) ...@@ -1492,19 +1499,18 @@ static ulint buf_do_flush_list_batch(ulint max_n, lsn_t lsn)
else else
{ {
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (neighbors && space->is_rotational()) do
{ {
mysql_mutex_unlock(&buf_pool.mutex); if (neighbors && space->is_rotational())
count+= buf_flush_try_neighbors(space, page_id, bpage, neighbors == 1, count+= buf_flush_try_neighbors(space, page_id, bpage,
false, count, max_n); neighbors == 1, false, count, max_n);
reacquire_mutex:
mysql_mutex_lock(&buf_pool.mutex);
}
else if (bpage->flush(false, space)) else if (bpage->flush(false, space))
{
++count; ++count;
goto reacquire_mutex; else
continue;
mysql_mutex_lock(&buf_pool.mutex);
} }
while (0);
} }
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
...@@ -1723,8 +1729,7 @@ The caller must invoke buf_dblwr.flush_buffered_writes() ...@@ -1723,8 +1729,7 @@ The caller must invoke buf_dblwr.flush_buffered_writes()
after releasing buf_pool.mutex. after releasing buf_pool.mutex.
@param max_n wished maximum mumber of blocks flushed @param max_n wished maximum mumber of blocks flushed
@param evict whether to evict pages after flushing @param evict whether to evict pages after flushing
@return evict ? number of processed pages : number of pages written @return evict ? number of processed pages : number of pages written */
@retval 0 if a buf_pool.LRU batch is already running */
ulint buf_flush_LRU(ulint max_n, bool evict) ulint buf_flush_LRU(ulint max_n, bool evict)
{ {
mysql_mutex_assert_owner(&buf_pool.mutex); mysql_mutex_assert_owner(&buf_pool.mutex);
...@@ -1867,7 +1872,9 @@ static void buf_flush_wait(lsn_t lsn) ...@@ -1867,7 +1872,9 @@ static void buf_flush_wait(lsn_t lsn)
{ {
ut_ad(lsn <= log_sys.get_lsn()); ut_ad(lsn <= log_sys.get_lsn());
while (buf_pool.get_oldest_modification(lsn) < lsn) lsn_t oldest_lsn;
while ((oldest_lsn= buf_pool.get_oldest_modification(lsn)) < lsn)
{ {
if (buf_flush_sync_lsn < lsn) if (buf_flush_sync_lsn < lsn)
{ {
...@@ -1876,13 +1883,20 @@ static void buf_flush_wait(lsn_t lsn) ...@@ -1876,13 +1883,20 @@ static void buf_flush_wait(lsn_t lsn)
pthread_cond_signal(&buf_pool.do_flush_list); pthread_cond_signal(&buf_pool.do_flush_list);
my_cond_wait(&buf_pool.done_flush_list, my_cond_wait(&buf_pool.done_flush_list,
&buf_pool.flush_list_mutex.m_mutex); &buf_pool.flush_list_mutex.m_mutex);
if (buf_pool.get_oldest_modification(lsn) >= lsn) oldest_lsn= buf_pool.get_oldest_modification(lsn);
if (oldest_lsn >= lsn)
break; break;
} }
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
os_aio_wait_until_no_pending_writes(false); os_aio_wait_until_no_pending_writes(false);
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
} }
if (oldest_lsn >= buf_flush_sync_lsn)
{
buf_flush_sync_lsn= 0;
pthread_cond_broadcast(&buf_pool.done_flush_list);
}
} }
/** Wait until all persistent pages are flushed up to a limit. /** Wait until all persistent pages are flushed up to a limit.
...@@ -2272,24 +2286,19 @@ static void buf_flush_page_cleaner() ...@@ -2272,24 +2286,19 @@ static void buf_flush_page_cleaner()
set_timespec(abstime, 1); set_timespec(abstime, 1);
lsn_limit= buf_flush_sync_lsn; lsn_limit= buf_flush_sync_lsn;
const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0); lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);
if (!oldest_lsn) if (!oldest_lsn)
{ {
if (UNIV_UNLIKELY(lsn_limit != 0)) fully_unemployed:
{
buf_flush_sync_lsn= 0; buf_flush_sync_lsn= 0;
/* wake up buf_flush_wait() */
pthread_cond_broadcast(&buf_pool.done_flush_list);
}
unemployed:
buf_flush_async_lsn= 0;
set_idle: set_idle:
buf_pool.page_cleaner_set_idle(true); buf_pool.page_cleaner_set_idle(true);
set_almost_idle:
pthread_cond_broadcast(&buf_pool.done_flush_list);
if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED))
break; break;
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
end_of_batch:
buf_dblwr.flush_buffered_writes(); buf_dblwr.flush_buffered_writes();
do do
...@@ -2307,6 +2316,7 @@ static void buf_flush_page_cleaner() ...@@ -2307,6 +2316,7 @@ static void buf_flush_page_cleaner()
if (!buf_pool.ran_out()) if (!buf_pool.ran_out())
continue; continue;
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
oldest_lsn= buf_pool.get_oldest_modification(0);
} }
lsn_t soft_lsn_limit= buf_flush_async_lsn; lsn_t soft_lsn_limit= buf_flush_async_lsn;
...@@ -2335,7 +2345,17 @@ static void buf_flush_page_cleaner() ...@@ -2335,7 +2345,17 @@ static void buf_flush_page_cleaner()
else if (buf_pool.ran_out()) else if (buf_pool.ran_out())
{ {
buf_pool.page_cleaner_set_idle(false); buf_pool.page_cleaner_set_idle(false);
buf_pool.get_oldest_modification(0); buf_pool.n_flush_inc();
/* Remove clean blocks from buf_pool.flush_list before the LRU scan. */
for (buf_page_t *p= UT_LIST_GET_FIRST(buf_pool.flush_list); p; )
{
const lsn_t lsn{p->oldest_modification()};
ut_ad(lsn > 2 || lsn == 1);
buf_page_t *n= UT_LIST_GET_NEXT(list, p);
if (lsn <= 1)
buf_pool.delete_from_flush_list(p);
p= n;
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
n= srv_max_io_capacity; n= srv_max_io_capacity;
mysql_mutex_lock(&buf_pool.mutex); mysql_mutex_lock(&buf_pool.mutex);
...@@ -2343,15 +2363,16 @@ static void buf_flush_page_cleaner() ...@@ -2343,15 +2363,16 @@ static void buf_flush_page_cleaner()
n= buf_flush_LRU(n, false); n= buf_flush_LRU(n, false);
mysql_mutex_unlock(&buf_pool.mutex); mysql_mutex_unlock(&buf_pool.mutex);
last_pages+= n; last_pages+= n;
check_oldest_and_set_idle:
if (pct_lwm == 0.0)
goto end_of_batch;
/* when idle flushing kicks in page_cleaner is marked active.
reset it back to idle since the it was made active as part of
idle flushing stage. */
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
goto set_idle; buf_pool.n_flush_dec_holding_mutex();
oldest_lsn= buf_pool.get_oldest_modification(0);
if (!oldest_lsn)
goto fully_unemployed;
if (oldest_lsn >= buf_flush_async_lsn)
buf_flush_async_lsn= 0;
buf_pool.page_cleaner_set_idle(false);
goto set_almost_idle;
} }
else if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) else if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED))
break; break;
...@@ -2379,6 +2400,7 @@ static void buf_flush_page_cleaner() ...@@ -2379,6 +2400,7 @@ static void buf_flush_page_cleaner()
- page cleaner is idle (dirty_pct < srv_max_dirty_pages_pct_lwm) - page cleaner is idle (dirty_pct < srv_max_dirty_pages_pct_lwm)
- there are no pending reads but there are dirty pages to flush */ - there are no pending reads but there are dirty pages to flush */
buf_pool.update_last_activity_count(activity_count); buf_pool.update_last_activity_count(activity_count);
buf_pool.n_flush_inc();
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
goto idle_flush; goto idle_flush;
} }
...@@ -2394,9 +2416,10 @@ static void buf_flush_page_cleaner() ...@@ -2394,9 +2416,10 @@ static void buf_flush_page_cleaner()
else if (dirty_pct < srv_max_buf_pool_modified_pct) else if (dirty_pct < srv_max_buf_pool_modified_pct)
possibly_unemployed: possibly_unemployed:
if (!soft_lsn_limit && !af_needed_for_redo(oldest_lsn)) if (!soft_lsn_limit && !af_needed_for_redo(oldest_lsn))
goto unemployed; goto set_idle;
buf_pool.page_cleaner_set_idle(false); buf_pool.page_cleaner_set_idle(false);
buf_pool.n_flush_inc();
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (UNIV_UNLIKELY(soft_lsn_limit != 0)) if (UNIV_UNLIKELY(soft_lsn_limit != 0))
...@@ -2434,10 +2457,7 @@ static void buf_flush_page_cleaner() ...@@ -2434,10 +2457,7 @@ static void buf_flush_page_cleaner()
n_flushed); n_flushed);
} }
else if (buf_flush_async_lsn <= oldest_lsn) else if (buf_flush_async_lsn <= oldest_lsn)
{ goto check_oldest_and_set_idle;
mysql_mutex_lock(&buf_pool.flush_list_mutex);
goto unemployed;
}
n= n >= n_flushed ? n - n_flushed : 0; n= n >= n_flushed ? n - n_flushed : 0;
goto LRU_flush; goto LRU_flush;
......
...@@ -1753,6 +1753,10 @@ class buf_pool_t ...@@ -1753,6 +1753,10 @@ class buf_pool_t
/** Decrement the number of pending LRU flush */ /** Decrement the number of pending LRU flush */
inline void n_flush_dec(); inline void n_flush_dec();
/** Decrement the number of pending LRU flush
while holding flush_list_mutex */
inline void n_flush_dec_holding_mutex();
/** @return whether flush_list flushing is active */ /** @return whether flush_list flushing is active */
bool flush_list_active() const bool flush_list_active() const
{ {
...@@ -1777,6 +1781,15 @@ class buf_pool_t ...@@ -1777,6 +1781,15 @@ class buf_pool_t
mysql_mutex_assert_owner(&flush_list_mutex); mysql_mutex_assert_owner(&flush_list_mutex);
return page_cleaner_status & PAGE_CLEANER_IDLE; return page_cleaner_status & PAGE_CLEANER_IDLE;
} }
/** @return whether the page cleaner may be initiating writes */
bool page_cleaner_active() const
{
mysql_mutex_assert_owner(&flush_list_mutex);
static_assert(PAGE_CLEANER_IDLE == 1, "efficiency");
return page_cleaner_status > PAGE_CLEANER_IDLE;
}
/** Wake up the page cleaner if needed. /** Wake up the page cleaner if needed.
@param for_LRU whether to wake up for LRU eviction */ @param for_LRU whether to wake up for LRU eviction */
void page_cleaner_wakeup(bool for_LRU= false); void page_cleaner_wakeup(bool for_LRU= false);
......
...@@ -3581,11 +3581,16 @@ inline fil_space_t *fil_system_t::find(const char *path) const ...@@ -3581,11 +3581,16 @@ inline fil_space_t *fil_system_t::find(const char *path) const
static void log_sort_flush_list() static void log_sort_flush_list()
{ {
/* Ensure that oldest_modification() cannot change during std::sort() */ /* Ensure that oldest_modification() cannot change during std::sort() */
{
const double pct_lwm= srv_max_dirty_pages_pct_lwm;
/* Disable "idle" flushing in order to minimize the wait time below. */
srv_max_dirty_pages_pct_lwm= 0.0;
for (;;) for (;;)
{ {
os_aio_wait_until_no_pending_writes(false); os_aio_wait_until_no_pending_writes(false);
mysql_mutex_lock(&buf_pool.flush_list_mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex);
if (buf_pool.flush_list_active()) if (buf_pool.page_cleaner_active())
my_cond_wait(&buf_pool.done_flush_list, my_cond_wait(&buf_pool.done_flush_list,
&buf_pool.flush_list_mutex.m_mutex); &buf_pool.flush_list_mutex.m_mutex);
else if (!os_aio_pending_writes()) else if (!os_aio_pending_writes())
...@@ -3593,6 +3598,9 @@ static void log_sort_flush_list() ...@@ -3593,6 +3598,9 @@ static void log_sort_flush_list()
mysql_mutex_unlock(&buf_pool.flush_list_mutex); mysql_mutex_unlock(&buf_pool.flush_list_mutex);
} }
srv_max_dirty_pages_pct_lwm= pct_lwm;
}
const size_t size= UT_LIST_GET_LEN(buf_pool.flush_list); const size_t size= UT_LIST_GET_LEN(buf_pool.flush_list);
std::unique_ptr<buf_page_t *[]> list(new buf_page_t *[size]); std::unique_ptr<buf_page_t *[]> list(new buf_page_t *[size]);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment