Commit aa719b50 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-32050: Do not copy undo records in purge

Also, default to innodb_purge_batch_size=1000,
replacing the old default value of processing 300 undo log pages
in a batch. Axel Schwenke found this value to help reduce purge lag
without having a significant impact on workload throughput.

In purge, we can simply acquire a shared latch on the undo log page
(to avoid a race condition like the one that was fixed in
commit b102872a) and retain a buffer-fix
after releasing the latch. The buffer-fix will prevent the undo log
page from being evicted from the buffer pool. Concurrent modification
is prevented by design. Only the purge_coordinator_task
(or its accomplice purge_truncation_task) may free the undo log pages,
after any purge_worker_task have completed execution. Hence, we do not
have to worry about any overwriting or reuse of the undo log records.

trx_undo_rec_copy(): Remove. The only remaining caller would have been
trx_undo_get_undo_rec_low(), which is where the logic was merged.

purge_sys_t::m_initialized: Replaces heap.

purge_sys_t::pages: A cache of buffer-fixed pages that have been
looked up from buf_pool.page_hash.

purge_sys_t::get_page(): Return a buffer-fixed undo page, using the
pages cache.

trx_purge_t::batch_cleanup(): Renamed from clone_end_view().
Clear the pages cache and clone the end_view at the end of a batch.

purge_sys_t::n_pages_handled(): Return pages.size(). This determines
if innodb_purge_batch_size was exceeded.

purge_sys_t::rseg_get_next_history_log(): Replaces
trx_purge_rseg_get_next_history_log().

purge_sys_t::choose_next_log(): Replaces trx_purge_choose_next_log()
and trx_purge_read_undo_rec().

purge_sys_t::get_next_rec(): Replaces trx_purge_get_next_rec()
and trx_undo_get_next_rec().

purge_sys_t::fetch_next_rec(): Replaces trx_purge_fetch_next_rec()
and some use of trx_undo_get_first_rec().

trx_purge_attach_undo_recs(): Do not allow purge_sys.n_pages_handled()
exceed the innodb_purge_batch_size or ¾ of the buffer pool, whichever
is smaller.

Reviewed by: Vladislav Lesin
Tested by: Matthias Leich and Axel Schwenke
parent 88733282
SET @global_start_value = @@global.innodb_purge_batch_size; SET @global_start_value = @@global.innodb_purge_batch_size;
SELECT @global_start_value; SELECT @global_start_value;
@global_start_value @global_start_value
300 1000
'#--------------------FN_DYNVARS_046_01------------------------#' '#--------------------FN_DYNVARS_046_01------------------------#'
SET @@global.innodb_purge_batch_size = 1; SET @@global.innodb_purge_batch_size = 1;
SET @@global.innodb_purge_batch_size = DEFAULT; SET @@global.innodb_purge_batch_size = DEFAULT;
SELECT @@global.innodb_purge_batch_size; SELECT @@global.innodb_purge_batch_size;
@@global.innodb_purge_batch_size @@global.innodb_purge_batch_size
300 1000
'#---------------------FN_DYNVARS_046_02-------------------------#' '#---------------------FN_DYNVARS_046_02-------------------------#'
SET innodb_purge_batch_size = 1; SET innodb_purge_batch_size = 1;
ERROR HY000: Variable 'innodb_purge_batch_size' is a GLOBAL variable and should be set with SET GLOBAL ERROR HY000: Variable 'innodb_purge_batch_size' is a GLOBAL variable and should be set with SET GLOBAL
SELECT @@innodb_purge_batch_size; SELECT @@innodb_purge_batch_size;
@@innodb_purge_batch_size @@innodb_purge_batch_size
300 1000
SELECT local.innodb_purge_batch_size; SELECT local.innodb_purge_batch_size;
ERROR 42S02: Unknown table 'local' in field list ERROR 42S02: Unknown table 'local' in field list
SET global innodb_purge_batch_size = 1; SET global innodb_purge_batch_size = 1;
...@@ -112,4 +112,4 @@ SELECT @@global.innodb_purge_batch_size; ...@@ -112,4 +112,4 @@ SELECT @@global.innodb_purge_batch_size;
SET @@global.innodb_purge_batch_size = @global_start_value; SET @@global.innodb_purge_batch_size = @global_start_value;
SELECT @@global.innodb_purge_batch_size; SELECT @@global.innodb_purge_batch_size;
@@global.innodb_purge_batch_size @@global.innodb_purge_batch_size
300 1000
...@@ -307,7 +307,7 @@ ...@@ -307,7 +307,7 @@
NUMERIC_MAX_VALUE 65536 NUMERIC_MAX_VALUE 65536
@@ -1345,7 +1345,7 @@ @@ -1345,7 +1345,7 @@
SESSION_VALUE NULL SESSION_VALUE NULL
DEFAULT_VALUE 300 DEFAULT_VALUE 1000
VARIABLE_SCOPE GLOBAL VARIABLE_SCOPE GLOBAL
-VARIABLE_TYPE BIGINT UNSIGNED -VARIABLE_TYPE BIGINT UNSIGNED
+VARIABLE_TYPE INT UNSIGNED +VARIABLE_TYPE INT UNSIGNED
......
...@@ -1293,7 +1293,7 @@ READ_ONLY NO ...@@ -1293,7 +1293,7 @@ READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME INNODB_PURGE_BATCH_SIZE VARIABLE_NAME INNODB_PURGE_BATCH_SIZE
SESSION_VALUE NULL SESSION_VALUE NULL
DEFAULT_VALUE 300 DEFAULT_VALUE 1000
VARIABLE_SCOPE GLOBAL VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BIGINT UNSIGNED VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT Number of UNDO log pages to purge in one batch from the history list. VARIABLE_COMMENT Number of UNDO log pages to purge in one batch from the history list.
......
...@@ -18852,7 +18852,7 @@ static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size, ...@@ -18852,7 +18852,7 @@ static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
PLUGIN_VAR_OPCMDARG, PLUGIN_VAR_OPCMDARG,
"Number of UNDO log pages to purge in one batch from the history list.", "Number of UNDO log pages to purge in one batch from the history list.",
NULL, NULL, NULL, NULL,
300, /* Default setting */ 1000, /* Default setting */
1, /* Minimum value */ 1, /* Minimum value */
5000, 0); /* Maximum value */ 5000, 0); /* Maximum value */
......
...@@ -80,15 +80,6 @@ row_purge_step( ...@@ -80,15 +80,6 @@ row_purge_step(
que_thr_t* thr) /*!< in: query thread */ que_thr_t* thr) /*!< in: query thread */
MY_ATTRIBUTE((nonnull, warn_unused_result)); MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Info required to purge a record */
struct trx_purge_rec_t
{
/** Record to purge */
const trx_undo_rec_t *undo_rec;
/** File pointer to undo record */
roll_ptr_t roll_ptr;
};
/** Purge worker context */ /** Purge worker context */
struct purge_node_t struct purge_node_t
{ {
......
...@@ -31,6 +31,7 @@ Created 3/26/1996 Heikki Tuuri ...@@ -31,6 +31,7 @@ Created 3/26/1996 Heikki Tuuri
#include "srw_lock.h" #include "srw_lock.h"
#include <queue> #include <queue>
#include <unordered_map>
/** Prepend the history list with an undo log. /** Prepend the history list with an undo log.
Remove the undo log segment from the rseg slot if it is too big for reuse. Remove the undo log segment from the rseg slot if it is too big for reuse.
...@@ -127,6 +128,7 @@ struct TrxUndoRsegsIterator { ...@@ -127,6 +128,7 @@ struct TrxUndoRsegsIterator {
/** The control structure used in the purge operation */ /** The control structure used in the purge operation */
class purge_sys_t class purge_sys_t
{ {
friend TrxUndoRsegsIterator;
public: public:
/** latch protecting view, m_enabled */ /** latch protecting view, m_enabled */
alignas(CPU_LEVEL1_DCACHE_LINESIZE) mutable srw_spin_lock latch; alignas(CPU_LEVEL1_DCACHE_LINESIZE) mutable srw_spin_lock latch;
...@@ -134,6 +136,8 @@ class purge_sys_t ...@@ -134,6 +136,8 @@ class purge_sys_t
/** Read view at the start of a purge batch. Any encountered index records /** Read view at the start of a purge batch. Any encountered index records
that are older than view will be removed. */ that are older than view will be removed. */
ReadViewBase view; ReadViewBase view;
/** whether the subsystem has been initialized */
bool m_initialized{false};
/** whether purge is enabled; protected by latch and std::atomic */ /** whether purge is enabled; protected by latch and std::atomic */
std::atomic<bool> m_enabled{false}; std::atomic<bool> m_enabled{false};
public: public:
...@@ -152,7 +156,34 @@ class purge_sys_t ...@@ -152,7 +156,34 @@ class purge_sys_t
/** Read view at the end of a purge batch (copied from view). Any undo pages /** Read view at the end of a purge batch (copied from view). Any undo pages
containing records older than end_view may be freed. */ containing records older than end_view may be freed. */
ReadViewBase end_view; ReadViewBase end_view;
struct hasher
{
size_t operator()(const page_id_t &id) const { return size_t(id.raw()); }
};
using unordered_map =
std::unordered_map<const page_id_t, buf_block_t*, hasher,
#if defined __GNUC__ && __GNUC__ == 4 && __GNUC_MINOR__ >= 8
std::equal_to<page_id_t>
/* GCC 4.8.5 would fail to find a matching allocator */
#else
std::equal_to<page_id_t>,
ut_allocator<std::pair<const page_id_t, buf_block_t*>>
#endif
>;
/** map of buffer-fixed undo log pages processed during a purge batch */
unordered_map pages;
public: public:
/** @return the number of processed undo pages */
size_t n_pages_handled() const { return pages.size(); }
/** Look up an undo log page.
@param id undo page identifier
@return undo page
@retval nullptr in case the page is corrupted */
buf_block_t *get_page(page_id_t id);
que_t* query; /*!< The query graph which will do the que_t* query; /*!< The query graph which will do the
parallelized purge operation */ parallelized purge operation */
...@@ -188,6 +219,7 @@ class purge_sys_t ...@@ -188,6 +219,7 @@ class purge_sys_t
to purge */ to purge */
trx_rseg_t* rseg; /*!< Rollback segment for the next undo trx_rseg_t* rseg; /*!< Rollback segment for the next undo
record to purge */ record to purge */
private:
uint32_t page_no; /*!< Page number for the next undo uint32_t page_no; /*!< Page number for the next undo
record to purge, page number of the record to purge, page number of the
log header, if dummy record */ log header, if dummy record */
...@@ -202,7 +234,7 @@ class purge_sys_t ...@@ -202,7 +234,7 @@ class purge_sys_t
TrxUndoRsegsIterator TrxUndoRsegsIterator
rseg_iter; /*!< Iterator to get the next rseg rseg_iter; /*!< Iterator to get the next rseg
to process */ to process */
public:
purge_pq_t purge_queue; /*!< Binary min-heap, ordered on purge_pq_t purge_queue; /*!< Binary min-heap, ordered on
TrxUndoRsegs::trx_no. It is protected TrxUndoRsegs::trx_no. It is protected
by the pq_mutex */ by the pq_mutex */
...@@ -217,17 +249,6 @@ class purge_sys_t ...@@ -217,17 +249,6 @@ class purge_sys_t
fil_space_t* last; fil_space_t* last;
} truncate; } truncate;
/** Heap for reading the undo log records */
mem_heap_t* heap;
/**
Constructor.
Some members may require late initialisation, thus we just mark object as
uninitialised. Real initialisation happens in create().
*/
purge_sys_t(): m_enabled(false), heap(nullptr) {}
/** Create the instance */ /** Create the instance */
void create(); void create();
...@@ -281,6 +302,32 @@ class purge_sys_t ...@@ -281,6 +302,32 @@ class purge_sys_t
/** @return whether stop_SYS() is in effect */ /** @return whether stop_SYS() is in effect */
bool must_wait_FTS() const { return m_FTS_paused; } bool must_wait_FTS() const { return m_FTS_paused; }
private:
/**
Get the next record to purge and update the info in the purge system.
@param roll_ptr undo log pointer to the record
@return buffer-fixed reference to undo log record
@retval {nullptr,1} if the whole undo log can skipped in purge
@retval {nullptr,0} if nothing is left, or on corruption */
inline trx_purge_rec_t get_next_rec(roll_ptr_t roll_ptr);
/** Choose the next undo log to purge.
@return whether anything is to be purged */
bool choose_next_log();
/** Update the last not yet purged history log info in rseg when
we have purged a whole undo log. Advances also purge_trx_no
past the purged log. */
void rseg_get_next_history_log();
public:
/**
Fetch the next undo log record from the history list to purge.
@return buffer-fixed reference to undo log record
@retval {nullptr,1} if the whole undo log can skipped in purge
@retval {nullptr,0} if nothing is left, or on corruption */
inline trx_purge_rec_t fetch_next_rec();
/** Determine if the history of a transaction is purgeable. /** Determine if the history of a transaction is purgeable.
@param trx_id transaction identifier @param trx_id transaction identifier
@return whether the history is purgeable */ @return whether the history is purgeable */
...@@ -327,9 +374,10 @@ class purge_sys_t ...@@ -327,9 +374,10 @@ class purge_sys_t
/** Wake up the purge threads if there is work to do. */ /** Wake up the purge threads if there is work to do. */
void wake_if_not_active(); void wake_if_not_active();
/** Update end_view at the end of a purge batch. /** Release undo pages and update end_view at the end of a purge batch.
@param head the new head of the purge queue */ @retval false when nothing is to be purged
inline void clone_end_view(const iterator &head); @retval true when purge_sys.rseg->latch was locked */
inline void batch_cleanup(const iterator &head);
struct view_guard struct view_guard
{ {
......
...@@ -28,32 +28,9 @@ Created 3/26/1996 Heikki Tuuri ...@@ -28,32 +28,9 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0types.h" #include "trx0types.h"
#include "row0types.h" #include "row0types.h"
#include "mtr0mtr.h"
#include "rem0types.h"
#include "page0types.h" #include "page0types.h"
#include "row0log.h"
#include "que0types.h" #include "que0types.h"
/***********************************************************************//**
Copies the undo record to the heap.
@param undo_rec record in an undo log page
@param heap memory heap
@return copy of undo_rec
@retval nullptr if the undo log record is corrupted */
inline trx_undo_rec_t* trx_undo_rec_copy(const trx_undo_rec_t *undo_rec,
mem_heap_t *heap)
{
const size_t offset= ut_align_offset(undo_rec, srv_page_size);
const size_t end= mach_read_from_2(undo_rec);
if (end <= offset || end >= srv_page_size - FIL_PAGE_DATA_END)
return nullptr;
const size_t len= end - offset;
trx_undo_rec_t *rec= static_cast<trx_undo_rec_t*>
(mem_heap_dup(heap, undo_rec, len));
mach_write_to_2(rec, len);
return rec;
}
/**********************************************************************//** /**********************************************************************//**
Reads the undo log record number. Reads the undo log record number.
@return undo no */ @return undo no */
......
...@@ -107,6 +107,15 @@ typedef byte trx_undo_rec_t; ...@@ -107,6 +107,15 @@ typedef byte trx_undo_rec_t;
/* @} */ /* @} */
/** Info required to purge a record */
struct trx_purge_rec_t
{
/** Undo log record, or nullptr (roll_ptr!=0 if the log can be skipped) */
const trx_undo_rec_t *undo_rec;
/** File pointer to undo_rec */
roll_ptr_t roll_ptr;
};
typedef std::vector<trx_id_t, ut_allocator<trx_id_t> > trx_ids_t; typedef std::vector<trx_id_t, ut_allocator<trx_id_t> > trx_ids_t;
/** Number of std::unordered_map hash buckets expected to be needed /** Number of std::unordered_map hash buckets expected to be needed
......
...@@ -116,31 +116,16 @@ trx_undo_page_get_next_rec(const buf_block_t *undo_page, uint16_t rec, ...@@ -116,31 +116,16 @@ trx_undo_page_get_next_rec(const buf_block_t *undo_page, uint16_t rec,
trx_undo_rec_t* trx_undo_rec_t*
trx_undo_get_prev_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no, trx_undo_get_prev_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no,
uint16_t offset, bool shared, mtr_t *mtr); uint16_t offset, bool shared, mtr_t *mtr);
/** Get the next record in an undo log.
@param[in,out] block undo log page
@param[in] rec undo record offset in the page
@param[in] page_no undo log header page number
@param[in] offset undo log header offset on page
@param[in,out] mtr mini-transaction
@return undo log record, the page latched, NULL if none */
trx_undo_rec_t*
trx_undo_get_next_rec(const buf_block_t *&block, uint16_t rec,
uint32_t page_no, uint16_t offset, mtr_t *mtr);
/** Get the first record in an undo log. /** Get the first undo log record on a page.
@param[in] space undo log header space @param[in] block undo log page
@param[in] page_no undo log header page number @param[in] page_no undo log header page number
@param[in] offset undo log header offset on page @param[in] offset undo log header page offset
@param[in] mode latching mode: RW_S_LATCH or RW_X_LATCH @return pointer to first record
@param[out] block undo log page @retval nullptr if none exists */
@param[in,out] mtr mini-transaction
@param[out] err error code
@return undo log record, the page latched
@retval nullptr if none */
trx_undo_rec_t* trx_undo_rec_t*
trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no, trx_undo_page_get_first_rec(const buf_block_t *block, uint32_t page_no,
uint16_t offset, ulint mode, const buf_block_t*& block, uint16_t offset);
mtr_t *mtr, dberr_t *err);
/** Initialize an undo log page. /** Initialize an undo log page.
NOTE: This corresponds to a redo log record and must not be changed! NOTE: This corresponds to a redo log record and must not be changed!
......
This diff is collapsed.
...@@ -2061,12 +2061,23 @@ trx_undo_get_undo_rec_low( ...@@ -2061,12 +2061,23 @@ trx_undo_get_undo_rec_low(
mtr.start(); mtr.start();
const buf_block_t* undo_page= trx_undo_rec_t *undo_rec= nullptr;
buf_page_get(page_id_t(rseg->space->id, page_no), 0, RW_S_LATCH, &mtr); if (const buf_block_t* undo_page=
buf_page_get(page_id_t(rseg->space->id, page_no), 0, RW_S_LATCH, &mtr))
trx_undo_rec_t *undo_rec= undo_page {
? trx_undo_rec_copy(undo_page->page.frame + offset, heap) undo_rec= undo_page->page.frame + offset;
: nullptr; const size_t end= mach_read_from_2(undo_rec);
if (UNIV_UNLIKELY(end <= offset ||
end >= srv_page_size - FIL_PAGE_DATA_END))
undo_rec= nullptr;
else
{
size_t len{end - offset};
undo_rec=
static_cast<trx_undo_rec_t*>(mem_heap_dup(heap, undo_rec, len));
mach_write_to_2(undo_rec, len);
}
}
mtr.commit(); mtr.commit();
return undo_rec; return undo_rec;
......
...@@ -128,8 +128,8 @@ uint16_t trx_undo_page_get_start(const buf_block_t *block, uint32_t page_no, ...@@ -128,8 +128,8 @@ uint16_t trx_undo_page_get_start(const buf_block_t *block, uint32_t page_no,
@param[in] page_no undo log header page number @param[in] page_no undo log header page number
@param[in] offset undo log header page offset @param[in] offset undo log header page offset
@return pointer to first record @return pointer to first record
@retval NULL if none exists */ @retval nullptr if none exists */
static trx_undo_rec_t* trx_undo_rec_t*
trx_undo_page_get_first_rec(const buf_block_t *block, uint32_t page_no, trx_undo_page_get_first_rec(const buf_block_t *block, uint32_t page_no,
uint16_t offset) uint16_t offset)
{ {
...@@ -253,25 +253,6 @@ trx_undo_get_next_rec_from_next_page(const buf_block_t *&block, ...@@ -253,25 +253,6 @@ trx_undo_get_next_rec_from_next_page(const buf_block_t *&block,
return block ? trx_undo_page_get_first_rec(block, page_no, offset) : nullptr; return block ? trx_undo_page_get_first_rec(block, page_no, offset) : nullptr;
} }
/** Get the next record in an undo log.
@param[in,out] block undo log page
@param[in] rec undo record offset in the page
@param[in] page_no undo log header page number
@param[in] offset undo log header offset on page
@param[in,out] mtr mini-transaction
@return undo log record, the page latched, NULL if none */
trx_undo_rec_t*
trx_undo_get_next_rec(const buf_block_t *&block, uint16_t rec,
uint32_t page_no, uint16_t offset, mtr_t *mtr)
{
if (trx_undo_rec_t *next= trx_undo_page_get_next_rec(block, rec, page_no,
offset))
return next;
return trx_undo_get_next_rec_from_next_page(block, page_no, offset,
RW_S_LATCH, mtr);
}
/** Get the first record in an undo log. /** Get the first record in an undo log.
@param[in] space undo log header space @param[in] space undo log header space
@param[in] page_no undo log header page number @param[in] page_no undo log header page number
...@@ -282,7 +263,7 @@ trx_undo_get_next_rec(const buf_block_t *&block, uint16_t rec, ...@@ -282,7 +263,7 @@ trx_undo_get_next_rec(const buf_block_t *&block, uint16_t rec,
@param[out] err error code @param[out] err error code
@return undo log record, the page latched @return undo log record, the page latched
@retval nullptr if none */ @retval nullptr if none */
trx_undo_rec_t* static trx_undo_rec_t*
trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no, trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no,
uint16_t offset, ulint mode, const buf_block_t*& block, uint16_t offset, ulint mode, const buf_block_t*& block,
mtr_t *mtr, dberr_t *err) mtr_t *mtr, dberr_t *err)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment