Commit a30d4250 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-26790 InnoDB read-ahead may cause page writes

buf_LRU_get_free_block(): Replace the Boolean parameter with a
ternary parameter, so that have_no_mutex_soft can be specified
reduce the chances of initiating page eviction flushing in read-ahead.

buf_read_acquire(): Invoke buf_LRU_get_free_block(have_no_mutex_soft)
and check in each caller for a nullptr return value.
parent d6aed216
/*****************************************************************************
Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2018, 2021, MariaDB Corporation.
Copyright (c) 2018, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -454,7 +454,7 @@ byte *buf_buddy_alloc_low(ulint i, bool *lru)
}
/* Try replacing an uncompressed page in the buffer pool. */
block = buf_LRU_get_free_block(true);
block = buf_LRU_get_free_block(have_mutex);
if (lru) {
*lru = true;
}
......
......@@ -2416,7 +2416,7 @@ buf_page_get_low(
goto loop;
}
buf_block_t *new_block = buf_LRU_get_free_block(false);
buf_block_t *new_block = buf_LRU_get_free_block(have_no_mutex);
buf_block_init_low(new_block);
wait_for_unfix:
......
......@@ -386,14 +386,15 @@ we put it to free list to be used.
* scan whole LRU list
* scan LRU list even if buf_pool.try_LRU_scan is not set
@param have_mutex whether buf_pool.mutex is already being held
@return the free control block, in state BUF_BLOCK_MEMORY */
buf_block_t *buf_LRU_get_free_block(bool have_mutex)
@param get how to allocate the block
@return the free control block, in state BUF_BLOCK_MEMORY
@retval nullptr if get==have_no_mutex_soft and memory was not available */
buf_block_t* buf_LRU_get_free_block(buf_LRU_get get)
{
ulint n_iterations = 0;
ulint flush_failures = 0;
MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH);
if (have_mutex) {
if (UNIV_UNLIKELY(get == have_mutex)) {
mysql_mutex_assert_owner(&buf_pool.mutex);
goto got_mutex;
}
......@@ -405,13 +406,14 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
DBUG_EXECUTE_IF("ib_lru_force_no_free_page",
if (!buf_lru_free_blocks_error_printed) {
n_iterations = 21;
block = nullptr;
goto not_found;});
retry:
/* If there is a block in the free list, take it */
if ((block = buf_LRU_get_free_only()) != nullptr) {
got_block:
if (!have_mutex) {
if (UNIV_LIKELY(get != have_mutex)) {
mysql_mutex_unlock(&buf_pool.mutex);
}
block->page.zip.clear();
......@@ -435,6 +437,11 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex)
buf_pool.try_LRU_scan = false;
}
if (get == have_no_mutex_soft) {
mysql_mutex_unlock(&buf_pool.mutex);
return nullptr;
}
for (;;) {
if ((block = buf_LRU_get_free_only()) != nullptr) {
goto got_block;
......
......@@ -272,7 +272,7 @@ buf_read_page_low(
/** Acquire a buffer block. */
static buf_block_t *buf_read_acquire()
{
return buf_LRU_get_free_block(false);
return buf_LRU_get_free_block(have_no_mutex_soft);
}
/** Free a buffer block if needed. */
......@@ -341,7 +341,9 @@ ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size)
goto no_read_ahead;
/* Read all the suitable blocks within the area */
buf_block_t *block= zip_size ? nullptr : buf_read_acquire();
buf_block_t *block= nullptr;
if (!zip_size && !(block= buf_read_acquire()))
goto no_read_ahead;
for (page_id_t i= low; i < high; ++i)
{
......@@ -354,8 +356,8 @@ ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size)
{
count++;
ut_ad(!block);
if (!zip_size)
block= buf_read_acquire();
if (!zip_size && !(block= buf_read_acquire()))
break;
}
}
......@@ -398,7 +400,9 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size,
return DB_TABLESPACE_DELETED;
}
buf_block_t *block= zip_size ? nullptr : buf_LRU_get_free_block(false);
buf_block_t *block= zip_size
? nullptr
: buf_LRU_get_free_block(have_no_mutex);
/* Our caller should already have ensured that the page does not
exist in buf_pool.page_hash. */
......@@ -428,11 +432,14 @@ void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
if (buf_pool.page_hash_contains(page_id, chain))
{
skip:
space->release();
return;
}
buf_block_t *block= zip_size ? nullptr : buf_read_acquire();
buf_block_t *block= nullptr;
if (!zip_size && !(block= buf_read_acquire()))
goto skip;
dberr_t err;
if (buf_read_page_low(page_id, zip_size, chain, &err, space, block))
......@@ -581,7 +588,9 @@ ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size)
}
/* If we got this far, read-ahead can be sensible: do it */
buf_block_t *block= zip_size ? nullptr : buf_read_acquire();
buf_block_t *block= nullptr;
if (!zip_size && !(block= buf_read_acquire()))
goto fail;
count= 0;
for (; new_low != new_high_1; ++new_low)
......@@ -595,8 +604,8 @@ ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size)
{
count++;
ut_ad(!block);
if (!zip_size)
block= buf_read_acquire();
if (!zip_size && !(block= buf_read_acquire()))
break;
}
}
......@@ -635,7 +644,7 @@ void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos)
}
const ulint zip_size = space->zip_size() | 1;
buf_block_t* block = buf_LRU_get_free_block(false);
buf_block_t* block = buf_LRU_get_free_block(have_no_mutex);
for (ulint i = 0; i < page_nos.size(); i++) {
......@@ -672,7 +681,7 @@ void buf_read_recv_pages(uint32_t space_id, st_::span<uint32_t> page_nos)
if (buf_read_page_low(cur_page_id, zip_size, chain, &err, space,
block)) {
ut_ad(!block);
block = buf_LRU_get_free_block(false);
block = buf_LRU_get_free_block(have_no_mutex);
}
if (err != DB_SUCCESS) {
......
......@@ -499,7 +499,7 @@ dberr_t fsp_header_init(fil_space_t *space, uint32_t size, mtr_t *mtr)
const page_id_t page_id(space->id, 0);
const ulint zip_size = space->zip_size();
buf_block_t *free_block = buf_LRU_get_free_block(false);
buf_block_t *free_block = buf_LRU_get_free_block(have_no_mutex);
mtr->x_lock_space(space);
......@@ -833,9 +833,9 @@ fsp_fill_free_list(
if (i)
{
buf_block_t *f= buf_LRU_get_free_block(false);
buf_block_t *f= buf_LRU_get_free_block(have_no_mutex);
buf_block_t *block= buf_page_create(space, static_cast<uint32_t>(i),
zip_size, mtr, f);
zip_size, mtr, f);
if (UNIV_UNLIKELY(block != f))
buf_pool.free_block(f);
fsp_init_file_page(space, block, mtr);
......@@ -845,7 +845,7 @@ fsp_fill_free_list(
if (space->purpose != FIL_TYPE_TEMPORARY)
{
buf_block_t *f= buf_LRU_get_free_block(false);
buf_block_t *f= buf_LRU_get_free_block(have_no_mutex);
buf_block_t *block=
buf_page_create(space, static_cast<uint32_t>(i + 1),
zip_size, mtr, f);
......@@ -1053,7 +1053,7 @@ fsp_page_create(fil_space_t *space, page_no_t offset, mtr_t *mtr)
}
}
free_block= buf_LRU_get_free_block(false);
free_block= buf_LRU_get_free_block(have_no_mutex);
got_free_block:
block= buf_page_create(space, static_cast<uint32_t>(offset),
space->zip_size(), mtr, free_block);
......
......@@ -90,7 +90,7 @@ inline bool buf_page_peek_if_too_old(const buf_page_t *bpage)
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
inline buf_block_t *buf_block_alloc()
{
return buf_LRU_get_free_block(false);
return buf_LRU_get_free_block(have_no_mutex);
}
/********************************************************************//**
......
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2021, MariaDB Corporation.
Copyright (c) 2017, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -62,6 +62,17 @@ bool buf_LRU_scan_and_free_block(ulint limit= ULINT_UNDEFINED);
@retval NULL if the free list is empty */
buf_block_t* buf_LRU_get_free_only();
/** How to acquire a block */
enum buf_LRU_get {
/** The caller is not holding buf_pool.mutex */
have_no_mutex= 0,
/** The caller is holding buf_pool.mutex */
have_mutex,
/** The caller is not holding buf_pool.mutex and is OK if a block
cannot be allocated. */
have_no_mutex_soft
};
/** Get a block from the buf_pool.free list.
If the list is empty, blocks will be moved from the end of buf_pool.LRU
to buf_pool.free.
......@@ -83,9 +94,10 @@ we put it to free list to be used.
* scan whole LRU list
* scan LRU list even if buf_pool.try_LRU_scan is not set
@param have_mutex whether buf_pool.mutex is already being held
@return the free control block, in state BUF_BLOCK_MEMORY */
buf_block_t* buf_LRU_get_free_block(bool have_mutex)
@param get how to allocate the block
@return the free control block, in state BUF_BLOCK_MEMORY
@retval nullptr if get==have_no_mutex_soft and memory was not available */
buf_block_t* buf_LRU_get_free_block(buf_LRU_get get)
MY_ATTRIBUTE((malloc,warn_unused_result));
/** @return whether the unzip_LRU list should be used for evicting a victim
......
......@@ -726,7 +726,7 @@ static struct
{
retry:
bool fail= false;
buf_block_t *free_block= buf_LRU_get_free_block(false);
buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
mysql_mutex_lock(&recv_sys.mutex);
for (auto d= defers.begin(); d != defers.end(); )
......@@ -3284,7 +3284,7 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id,
@retval nullptr if the page cannot be initialized based on log records */
buf_block_t *recv_sys_t::recover_low(const page_id_t page_id)
{
buf_block_t *free_block= buf_LRU_get_free_block(false);
buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
buf_block_t *block= nullptr;
mysql_mutex_lock(&mutex);
......@@ -3404,7 +3404,7 @@ void recv_sys_t::apply(bool last_batch)
fil_system.extend_to_recv_size();
buf_block_t *free_block= buf_LRU_get_free_block(false);
buf_block_t *free_block= buf_LRU_get_free_block(have_no_mutex);
for (map::iterator p= pages.begin(); p != pages.end(); )
{
......@@ -3451,7 +3451,7 @@ void recv_sys_t::apply(bool last_batch)
{
next_free_block:
mysql_mutex_unlock(&mutex);
free_block= buf_LRU_get_free_block(false);
free_block= buf_LRU_get_free_block(have_no_mutex);
mysql_mutex_lock(&mutex);
break;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment