Commit b6a24724 authored by Daniel Black's avatar Daniel Black

MDEV-27891: SIGSEGV in InnoDB buffer pool resize

During an increase in resize, the new curr_size got a value
less than old_size.

As n_chunks_new and n_chunks have a strong correlation to the
resizing operation in progress, we can use them and remove the
need for old_size.

For convienece the n_chunks_new < n_chunks is now the is_shrinking
function.

The volatile compiler optimization on n_chunks{,_new} is removed
as real mutex uses are needed.

Other n_chunks_new/n_chunks methods:

n_chunks_new and n_chunks almost always read and altered under
the pool mutex.

Exceptions are:
* i_s_innodb_buffer_page_fill,
* buf_pool_t::is_uncompressed (via is_blocked_field)
These need reexamining for the need of a mutex, however comments
indicates this already.

get_n_pages has uses in buffer pool load, recover log memory
exhaustion estimates and innodb status so take the minimum number
of chunks for safety.

The buf_pool_t::running_out function also uses curr_size/old_size.
We replace this hot function calculation with just n_chunks_new.
This is the new size of the chunks before the resizing occurs.

If we are resizing down, we've already got the case we had previously
(as the minimum). If we are resizing upwards, we are taking an
optimistic view that there will be buffer chunks available for locks.
As this memory allocation is occurring immediately next the resizing
function it seems likely.

Compiler hint UNIV_UNLIKELY removed to leave it to the branch
predictor to make an informed decision.

Added test case of a smaller size than the Marko/Roel original
in JIRA reducing the size to 256M. SEGV hits roughly 1/10 times
but its better than a 21G memory size.

Reviewer: Marko
parent 1fa872f6
MDEV-27891: Delayed SIGSEGV in InnoDB buffer pool resize
after or during DROP TABLE
select @@innodb_buffer_pool_chunk_size;
@@innodb_buffer_pool_chunk_size
1048576
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
SET GLOBAL innodb_buffer_pool_size=256*1024*1024;
DROP TABLE t1;
SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + @@innodb_buffer_pool_chunk_size;
# end of 10.6 test
set global innodb_buffer_pool_size = 8388608;;
--source include/have_innodb.inc
--source include/big_test.inc
--let $save_size= `SELECT @@GLOBAL.innodb_buffer_pool_size`
let $wait_timeout = 60;
let $wait_condition =
SELECT SUBSTR(variable_value, 1, 30) = 'Completed resizing buffer pool'
FROM information_schema.global_status
WHERE variable_name = 'INNODB_BUFFER_POOL_RESIZE_STATUS';
--echo
--echo MDEV-27891: Delayed SIGSEGV in InnoDB buffer pool resize
--echo after or during DROP TABLE
--echo
select @@innodb_buffer_pool_chunk_size;
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
SET GLOBAL innodb_buffer_pool_size=256*1024*1024;
DROP TABLE t1;
--source include/wait_condition.inc
SET GLOBAL innodb_buffer_pool_size=@@innodb_buffer_pool_size + @@innodb_buffer_pool_chunk_size;
--source include/wait_condition.inc
--echo # end of 10.6 test
--eval set global innodb_buffer_pool_size = $save_size;
--source include/wait_condition.inc
...@@ -298,7 +298,7 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i) ...@@ -298,7 +298,7 @@ static buf_buddy_free_t* buf_buddy_alloc_zip(ulint i)
buf = UT_LIST_GET_FIRST(buf_pool.zip_free[i]); buf = UT_LIST_GET_FIRST(buf_pool.zip_free[i]);
if (buf_pool.curr_size < buf_pool.old_size if (buf_pool.is_shrinking()
&& UT_LIST_GET_LEN(buf_pool.withdraw) && UT_LIST_GET_LEN(buf_pool.withdraw)
< buf_pool.withdraw_target) { < buf_pool.withdraw_target) {
...@@ -609,7 +609,7 @@ void buf_buddy_free_low(void* buf, ulint i) ...@@ -609,7 +609,7 @@ void buf_buddy_free_low(void* buf, ulint i)
We may waste up to 15360*max_len bytes to free blocks We may waste up to 15360*max_len bytes to free blocks
(1024 + 2048 + 4096 + 8192 = 15360) */ (1024 + 2048 + 4096 + 8192 = 15360) */
if (UT_LIST_GET_LEN(buf_pool.zip_free[i]) < 16 if (UT_LIST_GET_LEN(buf_pool.zip_free[i]) < 16
&& buf_pool.curr_size >= buf_pool.old_size) { && !buf_pool.is_shrinking()) {
goto func_exit; goto func_exit;
} }
...@@ -715,7 +715,7 @@ buf_buddy_realloc(void* buf, ulint size) ...@@ -715,7 +715,7 @@ buf_buddy_realloc(void* buf, ulint size)
void buf_buddy_condense_free() void buf_buddy_condense_free()
{ {
mysql_mutex_assert_owner(&buf_pool.mutex); mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(buf_pool.curr_size < buf_pool.old_size); ut_ad(buf_pool.is_shrinking());
for (ulint i = 0; i < UT_ARR_SIZE(buf_pool.zip_free); ++i) { for (ulint i = 0; i < UT_ARR_SIZE(buf_pool.zip_free); ++i) {
buf_buddy_free_t* buf = buf_buddy_free_t* buf =
......
...@@ -1200,7 +1200,6 @@ bool buf_pool_t::create() ...@@ -1200,7 +1200,6 @@ bool buf_pool_t::create()
for (size_t i= 0; i < UT_ARR_SIZE(zip_free); ++i) for (size_t i= 0; i < UT_ARR_SIZE(zip_free); ++i)
UT_LIST_INIT(zip_free[i], &buf_buddy_free_t::list); UT_LIST_INIT(zip_free[i], &buf_buddy_free_t::list);
ulint s= curr_size; ulint s= curr_size;
old_size= s;
s/= BUF_READ_AHEAD_PORTION; s/= BUF_READ_AHEAD_PORTION;
read_ahead_area= s >= READ_AHEAD_PAGES read_ahead_area= s >= READ_AHEAD_PAGES
? READ_AHEAD_PAGES ? READ_AHEAD_PAGES
...@@ -1669,7 +1668,6 @@ inline void buf_pool_t::resize() ...@@ -1669,7 +1668,6 @@ inline void buf_pool_t::resize()
#endif /* BTR_CUR_HASH_ADAPT */ #endif /* BTR_CUR_HASH_ADAPT */
mysql_mutex_lock(&mutex); mysql_mutex_lock(&mutex);
ut_ad(curr_size == old_size);
ut_ad(n_chunks_new == n_chunks); ut_ad(n_chunks_new == n_chunks);
ut_ad(UT_LIST_GET_LEN(withdraw) == 0); ut_ad(UT_LIST_GET_LEN(withdraw) == 0);
...@@ -1678,7 +1676,7 @@ inline void buf_pool_t::resize() ...@@ -1678,7 +1676,7 @@ inline void buf_pool_t::resize()
curr_size = n_chunks_new * chunks->size; curr_size = n_chunks_new * chunks->size;
mysql_mutex_unlock(&mutex); mysql_mutex_unlock(&mutex);
if (curr_size < old_size) { if (is_shrinking()) {
/* set withdraw target */ /* set withdraw target */
size_t w = 0; size_t w = 0;
...@@ -1699,7 +1697,7 @@ inline void buf_pool_t::resize() ...@@ -1699,7 +1697,7 @@ inline void buf_pool_t::resize()
withdraw_retry: withdraw_retry:
/* wait for the number of blocks fit to the new size (if needed)*/ /* wait for the number of blocks fit to the new size (if needed)*/
bool should_retry_withdraw = curr_size < old_size bool should_retry_withdraw = is_shrinking()
&& withdraw_blocks(); && withdraw_blocks();
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
...@@ -1782,7 +1780,7 @@ inline void buf_pool_t::resize() ...@@ -1782,7 +1780,7 @@ inline void buf_pool_t::resize()
ULINTPF " to " ULINTPF ".", ULINTPF " to " ULINTPF ".",
n_chunks, n_chunks_new); n_chunks, n_chunks_new);
if (n_chunks_new < n_chunks) { if (is_shrinking()) {
/* delete chunks */ /* delete chunks */
chunk_t* chunk = chunks + n_chunks_new; chunk_t* chunk = chunks + n_chunks_new;
const chunk_t* const echunk = chunks + n_chunks; const chunk_t* const echunk = chunks + n_chunks;
...@@ -1846,8 +1844,7 @@ inline void buf_pool_t::resize() ...@@ -1846,8 +1844,7 @@ inline void buf_pool_t::resize()
goto calc_buf_pool_size; goto calc_buf_pool_size;
} }
ulint n_chunks_copy = ut_min(n_chunks_new, ulint n_chunks_copy = ut_min(n_chunks_new, n_chunks);
n_chunks);
memcpy(new_chunks, chunks, memcpy(new_chunks, chunks,
n_chunks_copy * sizeof *new_chunks); n_chunks_copy * sizeof *new_chunks);
...@@ -1914,7 +1911,6 @@ inline void buf_pool_t::resize() ...@@ -1914,7 +1911,6 @@ inline void buf_pool_t::resize()
/* set size */ /* set size */
ut_ad(UT_LIST_GET_LEN(withdraw) == 0); ut_ad(UT_LIST_GET_LEN(withdraw) == 0);
ulint s= curr_size; ulint s= curr_size;
old_size= s;
s/= BUF_READ_AHEAD_PORTION; s/= BUF_READ_AHEAD_PORTION;
read_ahead_area= s >= READ_AHEAD_PAGES read_ahead_area= s >= READ_AHEAD_PAGES
? READ_AHEAD_PAGES ? READ_AHEAD_PAGES
...@@ -3876,7 +3872,7 @@ void buf_pool_t::validate() ...@@ -3876,7 +3872,7 @@ void buf_pool_t::validate()
mysql_mutex_unlock(&flush_list_mutex); mysql_mutex_unlock(&flush_list_mutex);
if (curr_size == old_size if (n_chunks_new == n_chunks
&& n_lru + n_free > curr_size + n_zip) { && n_lru + n_free > curr_size + n_zip) {
ib::fatal() << "n_LRU " << n_lru << ", n_free " << n_free ib::fatal() << "n_LRU " << n_lru << ", n_free " << n_free
...@@ -3886,7 +3882,7 @@ void buf_pool_t::validate() ...@@ -3886,7 +3882,7 @@ void buf_pool_t::validate()
ut_ad(UT_LIST_GET_LEN(LRU) >= n_lru); ut_ad(UT_LIST_GET_LEN(LRU) >= n_lru);
if (curr_size == old_size if (n_chunks_new == n_chunks
&& UT_LIST_GET_LEN(free) != n_free) { && UT_LIST_GET_LEN(free) != n_free) {
ib::fatal() << "Free list len " ib::fatal() << "Free list len "
......
...@@ -1225,7 +1225,7 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) ...@@ -1225,7 +1225,7 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n)
ulint free_limit= srv_LRU_scan_depth; ulint free_limit= srv_LRU_scan_depth;
mysql_mutex_assert_owner(&buf_pool.mutex); mysql_mutex_assert_owner(&buf_pool.mutex);
if (buf_pool.withdraw_target && buf_pool.curr_size < buf_pool.old_size) if (buf_pool.withdraw_target && buf_pool.is_shrinking())
free_limit+= buf_pool.withdraw_target - UT_LIST_GET_LEN(buf_pool.withdraw); free_limit+= buf_pool.withdraw_target - UT_LIST_GET_LEN(buf_pool.withdraw);
const auto neighbors= UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN const auto neighbors= UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN
......
...@@ -288,7 +288,7 @@ buf_block_t* buf_LRU_get_free_only() ...@@ -288,7 +288,7 @@ buf_block_t* buf_LRU_get_free_only()
ut_a(!block->page.in_file()); ut_a(!block->page.in_file());
UT_LIST_REMOVE(buf_pool.free, &block->page); UT_LIST_REMOVE(buf_pool.free, &block->page);
if (buf_pool.curr_size >= buf_pool.old_size if (!buf_pool.is_shrinking()
|| UT_LIST_GET_LEN(buf_pool.withdraw) || UT_LIST_GET_LEN(buf_pool.withdraw)
>= buf_pool.withdraw_target >= buf_pool.withdraw_target
|| !buf_pool.will_be_withdrawn(block->page)) { || !buf_pool.will_be_withdrawn(block->page)) {
...@@ -321,7 +321,7 @@ static void buf_LRU_check_size_of_non_data_objects() ...@@ -321,7 +321,7 @@ static void buf_LRU_check_size_of_non_data_objects()
{ {
mysql_mutex_assert_owner(&buf_pool.mutex); mysql_mutex_assert_owner(&buf_pool.mutex);
if (recv_recovery_is_on() || buf_pool.curr_size != buf_pool.old_size) if (recv_recovery_is_on() || buf_pool.n_chunks_new != buf_pool.n_chunks)
return; return;
const auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU); const auto s= UT_LIST_GET_LEN(buf_pool.free) + UT_LIST_GET_LEN(buf_pool.LRU);
...@@ -1012,7 +1012,7 @@ buf_LRU_block_free_non_file_page( ...@@ -1012,7 +1012,7 @@ buf_LRU_block_free_non_file_page(
page_zip_set_size(&block->page.zip, 0); page_zip_set_size(&block->page.zip, 0);
} }
if (buf_pool.curr_size < buf_pool.old_size if (buf_pool.is_shrinking()
&& UT_LIST_GET_LEN(buf_pool.withdraw) < buf_pool.withdraw_target && UT_LIST_GET_LEN(buf_pool.withdraw) < buf_pool.withdraw_target
&& buf_pool.will_be_withdrawn(block->page)) { && buf_pool.will_be_withdrawn(block->page)) {
/* This should be withdrawn */ /* This should be withdrawn */
......
...@@ -1353,7 +1353,7 @@ class buf_pool_t ...@@ -1353,7 +1353,7 @@ class buf_pool_t
{ {
ut_ad(is_initialised()); ut_ad(is_initialised());
size_t size= 0; size_t size= 0;
for (auto j= n_chunks; j--; ) for (auto j= ut_min(n_chunks_new, n_chunks); j--; )
size+= chunks[j].size; size+= chunks[j].size;
return size; return size;
} }
...@@ -1363,7 +1363,7 @@ class buf_pool_t ...@@ -1363,7 +1363,7 @@ class buf_pool_t
@return whether the frame will be withdrawn */ @return whether the frame will be withdrawn */
bool will_be_withdrawn(const byte *ptr) const bool will_be_withdrawn(const byte *ptr) const
{ {
ut_ad(curr_size < old_size); ut_ad(n_chunks_new < n_chunks);
#ifdef SAFE_MUTEX #ifdef SAFE_MUTEX
if (resize_in_progress()) if (resize_in_progress())
mysql_mutex_assert_owner(&mutex); mysql_mutex_assert_owner(&mutex);
...@@ -1383,7 +1383,7 @@ class buf_pool_t ...@@ -1383,7 +1383,7 @@ class buf_pool_t
@return whether the frame will be withdrawn */ @return whether the frame will be withdrawn */
bool will_be_withdrawn(const buf_page_t &bpage) const bool will_be_withdrawn(const buf_page_t &bpage) const
{ {
ut_ad(curr_size < old_size); ut_ad(n_chunks_new < n_chunks);
#ifdef SAFE_MUTEX #ifdef SAFE_MUTEX
if (resize_in_progress()) if (resize_in_progress())
mysql_mutex_assert_owner(&mutex); mysql_mutex_assert_owner(&mutex);
...@@ -1540,11 +1540,18 @@ class buf_pool_t ...@@ -1540,11 +1540,18 @@ class buf_pool_t
inline void watch_remove(buf_page_t *watch, hash_chain &chain); inline void watch_remove(buf_page_t *watch, hash_chain &chain);
/** @return whether less than 1/4 of the buffer pool is available */ /** @return whether less than 1/4 of the buffer pool is available */
TPOOL_SUPPRESS_TSAN
bool running_out() const bool running_out() const
{ {
return !recv_recovery_is_on() && return !recv_recovery_is_on() &&
UNIV_UNLIKELY(UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) < UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) <
std::min(curr_size, old_size) / 4); n_chunks_new / 4 * chunks->size;
}
/** @return whether the buffer pool is shrinking */
inline bool is_shrinking() const
{
return n_chunks_new < n_chunks;
} }
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
...@@ -1603,15 +1610,15 @@ class buf_pool_t ...@@ -1603,15 +1610,15 @@ class buf_pool_t
ut_allocator<unsigned char> allocator; /*!< Allocator used for ut_allocator<unsigned char> allocator; /*!< Allocator used for
allocating memory for the the "chunks" allocating memory for the the "chunks"
member. */ member. */
volatile ulint n_chunks; /*!< number of buffer pool chunks */ ulint n_chunks; /*!< number of buffer pool chunks */
volatile ulint n_chunks_new; /*!< new number of buffer pool chunks */ ulint n_chunks_new; /*!< new number of buffer pool chunks.
both n_chunks{,new} are protected under
mutex */
chunk_t* chunks; /*!< buffer pool chunks */ chunk_t* chunks; /*!< buffer pool chunks */
chunk_t* chunks_old; /*!< old buffer pool chunks to be freed chunk_t* chunks_old; /*!< old buffer pool chunks to be freed
after resizing buffer pool */ after resizing buffer pool */
/** current pool size in pages */ /** current pool size in pages */
Atomic_counter<ulint> curr_size; Atomic_counter<ulint> curr_size;
/** previous pool size in pages */
Atomic_counter<ulint> old_size;
/** read-ahead request size in pages */ /** read-ahead request size in pages */
Atomic_counter<uint32_t> read_ahead_area; Atomic_counter<uint32_t> read_ahead_area;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment