Commit 50324ce6 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-21351 Replace recv_sys.heap with list of buf_block_t

InnoDB crash recovery used a special type of mem_heap_t that
allocates backing store from the buffer pool. That incurred
a significant overhead, leading to underutilization of memory,
and limiting the maximum contiguous allocated size of a log record.

recv_sys_t::blocks: A linked list of buf_block_t that are allocated
by buf_block_alloc() for redo log records. Replaces recv_sys_t::heap.
We repurpose buf_block_t::unzip_LRU for linking the elements.

recv_sys_t::max_log_blocks: Renamed from recv_n_pool_free_frames.

recv_sys_t::max_blocks(): Accessor for max_log_blocks.

recv_sys_t::alloc(): Allocate memory from the current recv_sys_t::blocks
element, or allocate another block.  In debug builds, various free()
member functions must be invoked, because we repurpose
buf_page_t::buf_fix_count for tracking allocations.

recv_sys_t::free_corrupted_page(): Renamed from recv_recover_corrupt_page()

recv_sys_t::is_memory_exhausted(): Renamed from recv_sys_heap_check()

recv_sys_t::pages and its elements are allocated directly by the
system memory allocator.

recv_parse_log_recs(): Remove the parameter available_memory.

We rename some variables 'store_to_hash' to 'store', because
recv_sys.pages is not actually a hash table.

This is joint work with Thirunarayanan Balathandayuthapani.
parent a983b244
...@@ -4,7 +4,7 @@ MariaBackup: hot backup tool for InnoDB ...@@ -4,7 +4,7 @@ MariaBackup: hot backup tool for InnoDB
Originally Created 3/3/2009 Yasufumi Kinoshita Originally Created 3/3/2009 Yasufumi Kinoshita
Written by Alexey Kopytov, Aleksandr Kuzminsky, Stewart Smith, Vadim Tkachenko, Written by Alexey Kopytov, Aleksandr Kuzminsky, Stewart Smith, Vadim Tkachenko,
Yasufumi Kinoshita, Ignacio Nin and Baron Schwartz. Yasufumi Kinoshita, Ignacio Nin and Baron Schwartz.
(c) 2017, 2019, MariaDB Corporation. (c) 2017, 2020, MariaDB Corporation.
Portions written by Marko Mäkelä. Portions written by Marko Mäkelä.
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
...@@ -2680,7 +2680,7 @@ static lsn_t xtrabackup_copy_log(lsn_t start_lsn, lsn_t end_lsn, bool last) ...@@ -2680,7 +2680,7 @@ static lsn_t xtrabackup_copy_log(lsn_t start_lsn, lsn_t end_lsn, bool last)
store_t store = STORE_NO; store_t store = STORE_NO;
if (more_data && recv_parse_log_recs(0, &store, 0, false)) { if (more_data && recv_parse_log_recs(0, &store, false)) {
msg("Error: copying the log failed"); msg("Error: copying the log failed");
......
...@@ -5984,7 +5984,7 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict) ...@@ -5984,7 +5984,7 @@ buf_page_io_complete(buf_page_t* bpage, bool dblwr, bool evict)
buf_corrupt_page_release(bpage, space); buf_corrupt_page_release(bpage, space);
if (recv_recovery_is_on()) { if (recv_recovery_is_on()) {
recv_recover_corrupt_page(corrupt_page_id); recv_sys.free_corrupted_page(corrupt_page_id);
} }
space->release_for_io(); space->release_for_io();
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2019, MariaDB Corporation. Copyright (c) 2015, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -766,7 +766,7 @@ buf_read_recv_pages( ...@@ -766,7 +766,7 @@ buf_read_recv_pages(
ulint count = 0; ulint count = 0;
buf_pool = buf_pool_get(cur_page_id); buf_pool = buf_pool_get(cur_page_id);
while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) { while (buf_pool->n_pend_reads >= recv_sys.max_blocks() / 2) {
os_thread_sleep(10000); os_thread_sleep(10000);
......
/***************************************************************************** /*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2019, MariaDB Corporation. Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -47,11 +47,6 @@ dberr_t ...@@ -47,11 +47,6 @@ dberr_t
recv_find_max_checkpoint(ulint* max_field) recv_find_max_checkpoint(ulint* max_field)
MY_ATTRIBUTE((nonnull, warn_unused_result)); MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Remove records for a corrupted page.
This function should called when srv_force_recovery > 0.
@param[in] page_id page id of the corrupted page */
ATTRIBUTE_COLD void recv_recover_corrupt_page(page_id_t page_id);
/** Apply any buffered redo log to a page that was just read from a data file. /** Apply any buffered redo log to a page that was just read from a data file.
@param[in,out] bpage buffer pool page */ @param[in,out] bpage buffer pool page */
ATTRIBUTE_COLD void recv_recover_page(buf_page_t* bpage); ATTRIBUTE_COLD void recv_recover_page(buf_page_t* bpage);
...@@ -106,14 +101,12 @@ bool recv_sys_add_to_parsing_buf(const byte* log_block, lsn_t scanned_lsn); ...@@ -106,14 +101,12 @@ bool recv_sys_add_to_parsing_buf(const byte* log_block, lsn_t scanned_lsn);
to wait merging to file pages. to wait merging to file pages.
@param[in] checkpoint_lsn the LSN of the latest checkpoint @param[in] checkpoint_lsn the LSN of the latest checkpoint
@param[in] store whether to store page operations @param[in] store whether to store page operations
@param[in] available_memory memory to read the redo logs
@param[in] apply whether to apply the records @param[in] apply whether to apply the records
@return whether MLOG_CHECKPOINT record was seen the first time, @return whether MLOG_CHECKPOINT record was seen the first time,
or corruption was noticed */ or corruption was noticed */
bool recv_parse_log_recs( bool recv_parse_log_recs(
lsn_t checkpoint_lsn, lsn_t checkpoint_lsn,
store_t* store, store_t* store,
ulint available_memory,
bool apply); bool apply);
/** Moves the parsing buffer data left to the buffer start */ /** Moves the parsing buffer data left to the buffer start */
...@@ -223,6 +216,10 @@ struct page_recv_t ...@@ -223,6 +216,10 @@ struct page_recv_t
iterator end() { return NULL; } iterator end() { return NULL; }
bool empty() const { ut_ad(!head == !tail); return !head; } bool empty() const { ut_ad(!head == !tail); return !head; }
inline void clear(); inline void clear();
#ifdef UNIV_DEBUG
/** Declare the records as freed; @see recv_sys_t::alloc() */
inline void free() const;
#endif
} log; } log;
/** Ignore any earlier redo log records for this page. */ /** Ignore any earlier redo log records for this page. */
...@@ -284,8 +281,6 @@ struct recv_sys_t{ ...@@ -284,8 +281,6 @@ struct recv_sys_t{
record, or 0 if none was parsed */ record, or 0 if none was parsed */
/** the time when progress was last reported */ /** the time when progress was last reported */
time_t progress_time; time_t progress_time;
mem_heap_t* heap; /*!< memory heap of log records and file
addresses*/
using map = std::map<const page_id_t, page_recv_t, using map = std::map<const page_id_t, page_recv_t,
std::less<const page_id_t>, std::less<const page_id_t>,
...@@ -314,6 +309,26 @@ struct recv_sys_t{ ...@@ -314,6 +309,26 @@ struct recv_sys_t{
/** Last added LSN to pages. */ /** Last added LSN to pages. */
lsn_t last_stored_lsn; lsn_t last_stored_lsn;
private:
/** Maximum number of buffer pool blocks to allocate for redo log records */
ulint max_log_blocks;
/** Base node of the redo block list (up to max_log_blocks)
List elements are linked via buf_block_t::unzip_LRU. */
UT_LIST_BASE_NODE_T(buf_block_t) blocks;
public:
/** @return the maximum number of buffer pool blocks for log records */
ulint max_blocks() const { return max_log_blocks; }
/** Check whether the number of read redo log blocks exceeds the maximum.
Store last_stored_lsn if the recovery is not in the last phase.
@param[in,out] store whether to store page operations
@return whether the memory is exhausted */
inline bool is_memory_exhausted(store_t *store);
#ifdef UNIV_DEBUG
/** whether all redo log in the current batch has been applied */
bool after_apply= false;
#endif
/** Initialize the redo log recovery subsystem. */ /** Initialize the redo log recovery subsystem. */
void create(); void create();
...@@ -352,6 +367,32 @@ struct recv_sys_t{ ...@@ -352,6 +367,32 @@ struct recv_sys_t{
progress_time = time; progress_time = time;
return true; return true;
} }
/** Get the memory block for storing recv_t and redo log data
@param[in] len length of the data to be stored
@param[in] store_recv whether to store recv_t object
@return pointer to len bytes of memory (never NULL) */
inline byte *alloc(size_t len, bool store_recv= false);
#ifdef UNIV_DEBUG
private:
/** Find the buffer pool block that is storing a redo log record.
@param[in] data pointer to buffer returned by alloc()
@return redo list element */
inline buf_block_t *find_block(const void *data) const;
public:
/** Declare a redo log record freed from a buffer pool block.
@param[in] data pointer to buffer returned by alloc() */
inline void free(const void *data) const;
#endif
/** @return the free length of the latest alloc() block, in bytes */
inline size_t get_free_len() const;
/** Remove records for a corrupted page.
This function should only be called when innodb_force_recovery is set.
@param page_id corrupted page identifier */
ATTRIBUTE_COLD void free_corrupted_page(page_id_t page_id);
}; };
/** The recovery system */ /** The recovery system */
...@@ -392,10 +433,4 @@ times! */ ...@@ -392,10 +433,4 @@ times! */
roll-forward */ roll-forward */
#define RECV_SCAN_SIZE (4U << srv_page_size_shift) #define RECV_SCAN_SIZE (4U << srv_page_size_shift)
/** This many frames must be left free in the buffer pool when we scan
the log and store the scanned log records in the buffer pool: we will
use these free frames to read in pages when we start applying the
log records to the database. */
extern ulint recv_n_pool_free_frames;
#endif #endif
/***************************************************************************** /*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2019, MariaDB Corporation. Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software the terms of the GNU General Public License as published by the Free Software
...@@ -59,7 +59,6 @@ buffer pool; the latter method is used for very big heaps */ ...@@ -59,7 +59,6 @@ buffer pool; the latter method is used for very big heaps */
/** Different type of heaps in terms of which datastructure is using them */ /** Different type of heaps in terms of which datastructure is using them */
#define MEM_HEAP_FOR_BTR_SEARCH (MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER) #define MEM_HEAP_FOR_BTR_SEARCH (MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER)
#define MEM_HEAP_FOR_PAGE_HASH (MEM_HEAP_DYNAMIC) #define MEM_HEAP_FOR_PAGE_HASH (MEM_HEAP_DYNAMIC)
#define MEM_HEAP_FOR_RECV_SYS (MEM_HEAP_BUFFER)
#define MEM_HEAP_FOR_LOCK_HEAP (MEM_HEAP_BUFFER) #define MEM_HEAP_FOR_LOCK_HEAP (MEM_HEAP_BUFFER)
/** The following start size is used for the first block in the memory heap if /** The following start size is used for the first block in the memory heap if
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment