Commit a30d4250 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-26790 InnoDB read-ahead may cause page writes

buf_LRU_get_free_block(): Replace the Boolean parameter with a
ternary parameter, so that have_no_mutex_soft can be specified
reduce the chances of initiating page eviction flushing in read-ahead.

buf_read_acquire(): Invoke buf_LRU_get_free_block(have_no_mutex_soft)
and check in each caller for a nullptr return value.
parent d6aed216
/***************************************************************************** /*****************************************************************************
Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2018, 2021, MariaDB Corporation. Copyright (c) 2018, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -454,7 +454,7 @@ byte *buf_buddy_alloc_low(ulint i, bool *lru) ...@@ -454,7 +454,7 @@ byte *buf_buddy_alloc_low(ulint i, bool *lru)
} }
/* Try replacing an uncompressed page in the buffer pool. */ /* Try replacing an uncompressed page in the buffer pool. */
block = buf_LRU_get_free_block(true); block = buf_LRU_get_free_block(have_mutex);
if (lru) { if (lru) {
*lru = true; *lru = true;
} }
......
...@@ -2416,7 +2416,7 @@ buf_page_get_low( ...@@ -2416,7 +2416,7 @@ buf_page_get_low(
goto loop; goto loop;
} }
buf_block_t *new_block = buf_LRU_get_free_block(false); buf_block_t *new_block = buf_LRU_get_free_block(have_no_mutex);
buf_block_init_low(new_block); buf_block_init_low(new_block);
wait_for_unfix: wait_for_unfix:
......
...@@ -386,14 +386,15 @@ we put it to free list to be used. ...@@ -386,14 +386,15 @@ we put it to free list to be used.
* scan whole LRU list * scan whole LRU list
* scan LRU list even if buf_pool.try_LRU_scan is not set * scan LRU list even if buf_pool.try_LRU_scan is not set
@param have_mutex whether buf_pool.mutex is already being held @param get how to allocate the block
@return the free control block, in state BUF_BLOCK_MEMORY */ @return the free control block, in state BUF_BLOCK_MEMORY
buf_block_t *buf_LRU_get_free_block(bool have_mutex) @retval nullptr if get==have_no_mutex_soft and memory was not available */
buf_block_t* buf_LRU_get_free_block(buf_LRU_get get)
{ {
ulint n_iterations = 0; ulint n_iterations = 0;
ulint flush_failures = 0; ulint flush_failures = 0;
MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH); MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH);
if (have_mutex) { if (UNIV_UNLIKELY(get == have_mutex)) {
mysql_mutex_assert_owner(&buf_pool.mutex); mysql_mutex_assert_owner(&buf_pool.mutex);
goto got_mutex; goto got_mutex;
} }
...@@ -405,13 +406,14 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex) ...@@ -405,13 +406,14 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
DBUG_EXECUTE_IF("ib_lru_force_no_free_page", DBUG_EXECUTE_IF("ib_lru_force_no_free_page",
if (!buf_lru_free_blocks_error_printed) { if (!buf_lru_free_blocks_error_printed) {
n_iterations = 21; n_iterations = 21;
block = nullptr;
goto not_found;}); goto not_found;});
retry: retry:
/* If there is a block in the free list, take it */ /* If there is a block in the free list, take it */
if ((block = buf_LRU_get_free_only()) != nullptr) { if ((block = buf_LRU_get_free_only()) != nullptr) {
got_block: got_block:
if (!have_mutex) { if (UNIV_LIKELY(get != have_mutex)) {
mysql_mutex_unlock(&buf_pool.mutex); mysql_mutex_unlock(&buf_pool.mutex);
} }
block->page.zip.clear(); block->page.zip.clear();
...@@ -435,6 +437,11 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex) ...@@ -435,6 +437,11 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
buf_pool.try_LRU_scan = false; buf_pool.try_LRU_scan = false;
} }
if (get == have_no_mutex_soft) {
mysql_mutex_unlock(&buf_pool.mutex);
return nullptr;
}
for (;;) { for (;;) {
if ((block = buf_LRU_get_free_only()) != nullptr) { if ((block = buf_LRU_get_free_only()) != nullptr) {
goto got_block; goto got_block;
......
...@@ -272,7 +272,7 @@ buf_read_page_low( ...@@ -272,7 +272,7 @@ buf_read_page_low(
/** Acquire a buffer block. */ /** Acquire a buffer block. */
static buf_block_t *buf_read_acquire() static buf_block_t *buf_read_acquire()
{ {
return buf_LRU_get_free_block(false); return buf_LRU_get_free_block(have_no_mutex_soft);
} }
/** Free a buffer block if needed. */ /** Free a buffer block if needed. */
...@@ -341,7 +341,9 @@ ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size) ...@@ -341,7 +341,9 @@ ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size)
goto no_read_ahead; goto no_read_ahead;
/* Read all the suitable blocks within the area */ /* Read all the suitable blocks within the area */
buf_block_t *block= zip_size ? nullptr : buf_read_acquire(); buf_block_t *block= nullptr;
if (!zip_size && !(block= buf_read_acquire()))
goto no_read_ahead;
for (page_id_t i= low; i < high; ++i) for (page_id_t i= low; i < high; ++i)
{ {
...@@ -354,8 +356,8 @@ ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size) ...@@ -354,8 +356,8 @@ ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size)
{ {
count++; count++;
ut_ad(!block); ut_ad(!block);
if (!zip_size) if (!zip_size && !(block= buf_read_acquire()))
block= buf_read_acquire(); break;
} }
} }
...@@ -398,7 +400,9 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size, ...@@ -398,7 +400,9 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size,
return DB_TABLESPACE_DELETED; return DB_TABLESPACE_DELETED;
} }
buf_block_t *block= zip_size ? nullptr : buf_LRU_get_free_block(false); buf_block_t *block= zip_size
? nullptr
: buf_LRU_get_free_block(have_no_mutex);
/* Our caller should already have ensured that the page does not /* Our caller should already have ensured that the page does not
exist in buf_pool.page_hash. */ exist in buf_pool.page_hash. */
...@@ -428,11 +432,14 @@ void buf_read_page_background(fil_space_t *space, const page_id_t page_id, ...@@ -428,11 +432,14 @@ void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold()); buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
if (buf_pool.page_hash_contains(page_id, chain)) if (buf_pool.page_hash_contains(page_id, chain))
{ {
skip:
space->release(); space->release();
return; return;
} }
buf_block_t *block= zip_size ? nullptr : buf_read_acquire(); buf_block_t *block= nullptr;
if (!zip_size && !(block= buf_read_acquire()))
goto skip;
dberr_t err; dberr_t err;
if (buf_read_page_low(page_id, zip_size, chain, &err, space, block)) if (buf_read_page_low(page_id, zip_size, chain, &err, space, block))
...@@ -581,7 +588,9 @@ ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size) ...@@ -581,7 +588,9 @@ ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size)
} }
/* If we got this far, read-ahead can be sensible: do it */ /* If we got this far, read-ahead can be sensible: do it */
buf_block_t *block= zip_size ? nullptr : buf_read_acquire(); buf_block_t *block= nullptr;
if (!zip_size && !(block= buf_read_acquire()))
goto fail;
count= 0; count= 0;
for (; new_low != new_high_1; ++new_low) for (; new_low != new_high_1; ++new_low)
...@@ -595,8 +604,8 @@ ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size) ...@@ -595,8 +604,8 @@ ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size)
{ {
count++; count++;
ut_ad(!block); ut_ad(!block);
if (!zip_size) if (!zip_size && !(block= buf_read_acquire()))
block= buf_read_acquire(); break;
} }
} }
...@@ -635,7 +644,7 @@ void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos) ...@@ -635,7 +644,7 @@ void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos)
} }
const ulint zip_size = space->zip_size() | 1; const ulint zip_size = space->zip_size() | 1;
buf_block_t* block = buf_LRU_get_free_block(false); buf_block_t* block = buf_LRU_get_free_block(have_no_mutex);
for (ulint i = 0; i < page_nos.size(); i++) { for (ulint i = 0; i < page_nos.size(); i++) {
...@@ -672,7 +681,7 @@ void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos) ...@@ -672,7 +681,7 @@ void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos)
if (buf_read_page_low(cur_page_id, zip_size, chain, &err, space, if (buf_read_page_low(cur_page_id, zip_size, chain, &err, space,
block)) { block)) {
ut_ad(!block); ut_ad(!block);
block = buf_LRU_get_free_block(false); block = buf_LRU_get_free_block(have_no_mutex);
} }
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
......
...@@ -499,7 +499,7 @@ dberr_t fsp_header_init(fil_space_t *space, uint32_t size, mtr_t *mtr) ...@@ -499,7 +499,7 @@ dberr_t fsp_header_init(fil_space_t *space, uint32_t size, mtr_t *mtr)
const page_id_t page_id(space->id, 0); const page_id_t page_id(space->id, 0);
const ulint zip_size = space->zip_size(); const ulint zip_size = space->zip_size();
buf_block_t *free_block = buf_LRU_get_free_block(false); buf_block_t *free_block = buf_LRU_get_free_block(have_no_mutex);
mtr->x_lock_space(space); mtr->x_lock_space(space);
...@@ -833,7 +833,7 @@ fsp_fill_free_list( ...@@ -833,7 +833,7 @@ fsp_fill_free_list(
if (i) if (i)
{ {
buf_block_t *f= buf_LRU_get_free_block(false); buf_block_t *f= buf_LRU_get_free_block(have_no_mutex);
buf_block_t *block= buf_page_create(space, static_cast<uint32_t>(i), buf_block_t *block= buf_page_create(space, static_cast<uint32_t>(i),
zip_size, mtr, f); zip_size, mtr, f);
if (UNIV_UNLIKELY(block != f)) if (UNIV_UNLIKELY(block != f))
...@@ -845,7 +845,7 @@ fsp_fill_free_list( ...@@ -845,7 +845,7 @@ fsp_fill_free_list(
if (space->purpose != FIL_TYPE_TEMPORARY) if (space->purpose != FIL_TYPE_TEMPORARY)
{ {
buf_block_t *f= buf_LRU_get_free_block(false); buf_block_t *f= buf_LRU_get_free_block(have_no_mutex);
buf_block_t *block= buf_block_t *block=
buf_page_create(space, static_cast<uint32_t>(i + 1), buf_page_create(space, static_cast<uint32_t>(i + 1),
zip_size, mtr, f); zip_size, mtr, f);
...@@ -1053,7 +1053,7 @@ fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr) ...@@ -1053,7 +1053,7 @@ fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr)
} }
} }
free_block= buf_LRU_get_free_block(false); free_block= buf_LRU_get_free_block(have_no_mutex);
got_free_block: got_free_block:
block= buf_page_create(space, static_cast<uint32_t>(offset), block= buf_page_create(space, static_cast<uint32_t>(offset),
space->zip_size(), mtr, free_block); space->zip_size(), mtr, free_block);
......
...@@ -90,7 +90,7 @@ inline bool buf_page_peek_if_too_old(const buf_page_t *bpage) ...@@ -90,7 +90,7 @@ inline bool buf_page_peek_if_too_old(const buf_page_t *bpage)
@return own: the allocated block, in state BUF_BLOCK_MEMORY */ @return own: the allocated block, in state BUF_BLOCK_MEMORY */
inline buf_block_t *buf_block_alloc() inline buf_block_t *buf_block_alloc()
{ {
return buf_LRU_get_free_block(false); return buf_LRU_get_free_block(have_no_mutex);
} }
/********************************************************************//** /********************************************************************//**
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2021, MariaDB Corporation. Copyright (c) 2017, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -62,6 +62,17 @@ bool buf_LRU_scan_and_free_block(ulint limit= ULINT_UNDEFINED); ...@@ -62,6 +62,17 @@ bool buf_LRU_scan_and_free_block(ulint limit= ULINT_UNDEFINED);
@retval NULL if the free list is empty */ @retval NULL if the free list is empty */
buf_block_t* buf_LRU_get_free_only(); buf_block_t* buf_LRU_get_free_only();
/** How to acquire a block */
enum buf_LRU_get {
/** The caller is not holding buf_pool.mutex */
have_no_mutex= 0,
/** The caller is holding buf_pool.mutex */
have_mutex,
/** The caller is not holding buf_pool.mutex and is OK if a block
cannot be allocated. */
have_no_mutex_soft
};
/** Get a block from the buf_pool.free list. /** Get a block from the buf_pool.free list.
If the list is empty, blocks will be moved from the end of buf_pool.LRU If the list is empty, blocks will be moved from the end of buf_pool.LRU
to buf_pool.free. to buf_pool.free.
...@@ -83,9 +94,10 @@ we put it to free list to be used. ...@@ -83,9 +94,10 @@ we put it to free list to be used.
* scan whole LRU list * scan whole LRU list
* scan LRU list even if buf_pool.try_LRU_scan is not set * scan LRU list even if buf_pool.try_LRU_scan is not set
@param have_mutex whether buf_pool.mutex is already being held @param get how to allocate the block
@return the free control block, in state BUF_BLOCK_MEMORY */ @return the free control block, in state BUF_BLOCK_MEMORY
buf_block_t* buf_LRU_get_free_block(bool have_mutex) @retval nullptr if get==have_no_mutex_soft and memory was not available */
buf_block_t* buf_LRU_get_free_block(buf_LRU_get get)
MY_ATTRIBUTE((malloc,warn_unused_result)); MY_ATTRIBUTE((malloc,warn_unused_result));
/** @return whether the unzip_LRU list should be used for evicting a victim /** @return whether the unzip_LRU list should be used for evicting a victim
......
...@@ -726,7 +726,7 @@ static struct ...@@ -726,7 +726,7 @@ static struct
{ {
retry: retry:
bool fail= false; bool fail= false;
buf_block_t *free_block= buf_LRU_get_free_block(false); buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
mysql_mutex_lock(&recv_sys.mutex); mysql_mutex_lock(&recv_sys.mutex);
for (auto d= defers.begin(); d != defers.end(); ) for (auto d= defers.begin(); d != defers.end(); )
...@@ -3284,7 +3284,7 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id, ...@@ -3284,7 +3284,7 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id,
@retval nullptr if the page cannot be initialized based on log records */ @retval nullptr if the page cannot be initialized based on log records */
buf_block_t *recv_sys_t::recover_low(const page_id_t page_id) buf_block_t *recv_sys_t::recover_low(const page_id_t page_id)
{ {
buf_block_t *free_block= buf_LRU_get_free_block(false); buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
buf_block_t *block= nullptr; buf_block_t *block= nullptr;
mysql_mutex_lock(&mutex); mysql_mutex_lock(&mutex);
...@@ -3404,7 +3404,7 @@ void recv_sys_t::apply(bool last_batch) ...@@ -3404,7 +3404,7 @@ void recv_sys_t::apply(bool last_batch)
fil_system.extend_to_recv_size(); fil_system.extend_to_recv_size();
buf_block_t *free_block= buf_LRU_get_free_block(false); buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
for (map::iterator p= pages.begin(); p != pages.end(); ) for (map::iterator p= pages.begin(); p != pages.end(); )
{ {
...@@ -3451,7 +3451,7 @@ void recv_sys_t::apply(bool last_batch) ...@@ -3451,7 +3451,7 @@ void recv_sys_t::apply(bool last_batch)
{ {
next_free_block: next_free_block:
mysql_mutex_unlock(&mutex); mysql_mutex_unlock(&mutex);
free_block= buf_LRU_get_free_block(false); free_block= buf_LRU_get_free_block(have_no_mutex);
mysql_mutex_lock(&mutex); mysql_mutex_lock(&mutex);
break; break;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment