Commit 271e6ae3 authored by Marko Mäkelä's avatar Marko Mäkelä

Bug#54914: InnoDB: performance drop with innodb_change_buffering=all

Reduce ibuf_mutex and ibuf_pessimistic_insert_mutex contention further.

Protect ibuf->empty by the insert buffer root page latch, not ibuf_mutex.

ibuf_tree_root_get(): Assert that ibuf_mutex is owned by the
caller. Assert that the stamped page number is correct. Assert that
ibuf->empty agrees with the root page.

ibuf_size_update(): Do not update ibuf->empty.

ibuf_init_at_db_start(): Update ibuf->empty while holding the root page latch.

ibuf_add_free_page(): Return TRUE/FALSE instead of DB_SUCCESS/DB_STRONG_FAIL.

ibuf_remove_free_page(): Release ibuf_pessimistic_insert_mutex as
early as possible.

ibuf_contract_ext(): Rely on a dirty read of ibuf->empty, unless the
server is being shut down. Never acquire ibuf_mutex. Eliminate n_stored.

ibuf_contract_after_insert(): Never acquire ibuf_mutex. Perform dirty
reads of ibuf->size and ibuf->max_size.

ibuf_insert_low(): Only acquire ibuf_mutex for mode==BTR_MODIFY_TREE.
Perform dirty reads of ibuf->size and ibuf->max_size. Update
ibuf->empty while holding the root page latch.

ibuf_delete_rec(): Update ibuf->empty while holding the root page latch.

ibuf_is_empty(): Release ibuf_mutex earlier.
parent a4c5cf7c
......@@ -55,6 +55,7 @@ Created 7/19/1997 Heikki Tuuri
#include "lock0lock.h"
#include "log0recv.h"
#include "que0que.h"
#include "srv0start.h" /* srv_shutdown_state */
/* STRUCTURE OF AN INSERT BUFFER RECORD
......@@ -395,8 +396,10 @@ ibuf_tree_root_get(
mtr_t* mtr) /*!< in: mtr */
{
buf_block_t* block;
page_t* root;
ut_ad(ibuf_inside());
ut_ad(mutex_own(&ibuf_mutex));
mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
......@@ -405,7 +408,13 @@ ibuf_tree_root_get(
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
return(buf_block_get_frame(block));
root = buf_block_get_frame(block);
ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO);
ut_ad(ibuf->empty == (page_get_n_recs(root) == 0));
return(root);
}
#ifdef UNIV_IBUF_COUNT_DEBUG
......@@ -482,8 +491,6 @@ ibuf_size_update(
/* the '1 +' is the ibuf header page */
ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
ibuf->empty = page_get_n_recs(root) == 0;
}
/******************************************************************//**
......@@ -554,6 +561,7 @@ ibuf_init_at_db_start(void)
ibuf_size_update(root, &mtr);
mutex_exit(&ibuf_mutex);
ibuf->empty = (page_get_n_recs(root) == 0);
mtr_commit(&mtr);
ibuf_exit();
......@@ -2025,9 +2033,9 @@ ibuf_data_too_much_free(void)
/*********************************************************************//**
Allocates a new page from the ibuf file segment and adds it to the free
list.
@return DB_SUCCESS, or DB_STRONG_FAIL if no space left */
@return TRUE on success, FALSE if no space left */
static
ulint
ibool
ibuf_add_free_page(void)
/*====================*/
{
......@@ -2063,10 +2071,10 @@ ibuf_add_free_page(void)
header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
&mtr);
if (page_no == FIL_NULL) {
if (UNIV_UNLIKELY(page_no == FIL_NULL)) {
mtr_commit(&mtr);
return(DB_STRONG_FAIL);
return(FALSE);
}
{
......@@ -2113,7 +2121,7 @@ ibuf_add_free_page(void)
ibuf_exit();
return(DB_SUCCESS);
return(TRUE);
}
/*********************************************************************//**
......@@ -2143,20 +2151,17 @@ ibuf_remove_free_page(void)
header_page = ibuf_header_page_get(&mtr);
/* Prevent pessimistic inserts to insert buffer trees for a while */
mutex_enter(&ibuf_pessimistic_insert_mutex);
ibuf_enter();
mutex_enter(&ibuf_pessimistic_insert_mutex);
mutex_enter(&ibuf_mutex);
if (!ibuf_data_too_much_free()) {
mutex_exit(&ibuf_mutex);
mutex_exit(&ibuf_pessimistic_insert_mutex);
ibuf_exit();
mutex_exit(&ibuf_pessimistic_insert_mutex);
mtr_commit(&mtr);
return;
......@@ -2218,11 +2223,11 @@ ibuf_remove_free_page(void)
flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
mutex_exit(&ibuf_pessimistic_insert_mutex);
ibuf->seg_size--;
ibuf->free_list_len--;
mutex_exit(&ibuf_pessimistic_insert_mutex);
/* Set the bit indicating that this page is no more an ibuf tree page
(level 2 page) */
......@@ -2484,17 +2489,19 @@ ibuf_contract_ext(
ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
ulint n_stored;
ulint sum_sizes;
mtr_t mtr;
*n_pages = 0;
ut_ad(!ibuf_inside());
mutex_enter(&ibuf_mutex);
/* We perform a dirty read of ibuf->empty, without latching
the insert buffer root page. We trust this dirty read except
when a slow shutdown is being executed. During a slow
shutdown, the insert buffer merge must be completed. */
if (ibuf->empty) {
mutex_exit(&ibuf_mutex);
if (UNIV_UNLIKELY(ibuf->empty)
&& UNIV_LIKELY(!srv_shutdown_state)) {
ibuf_is_empty:
#if 0 /* TODO */
......@@ -2523,18 +2530,18 @@ ibuf_is_empty:
position within the leaf */
btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
mutex_exit(&ibuf_mutex);
ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
/* When the ibuf tree is emptied completely, the last record
is removed using an optimistic delete and ibuf_size_update
is not called, causing ibuf->empty to remain FALSE. If we do
not reset it to TRUE here then database shutdown will hang
in the loop in ibuf_contract_for_n_pages. */
ibuf->empty = TRUE;
/* If a B-tree page is empty, it must be the root page
and the whole B-tree must be empty. InnoDB does not
allow empty B-tree pages other than the root. */
ut_ad(ibuf->empty);
ut_ad(page_get_space_id(btr_pcur_get_page(&pcur))
== IBUF_SPACE_ID);
ut_ad(page_get_page_no(btr_pcur_get_page(&pcur))
== FSP_IBUF_TREE_ROOT_PAGE_NO);
ibuf_exit();
......@@ -2546,10 +2553,10 @@ ibuf_is_empty:
sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
space_ids, space_versions,
page_nos, &n_stored);
page_nos, n_pages);
#if 0 /* defined UNIV_IBUF_DEBUG */
fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
sync, n_stored, sum_sizes);
sync, *n_pages, sum_sizes);
#endif
ibuf_exit();
......@@ -2557,8 +2564,7 @@ ibuf_is_empty:
btr_pcur_close(&pcur);
buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
n_stored);
*n_pages = n_stored;
*n_pages);
return(sum_sizes + 1);
}
......@@ -2628,33 +2634,33 @@ ibuf_contract_after_insert(
ibool sync;
ulint sum_sizes;
ulint size;
mutex_enter(&ibuf_mutex);
if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
mutex_exit(&ibuf_mutex);
ulint max_size;
/* Perform dirty reads of ibuf->size and ibuf->max_size, to
reduce ibuf_mutex contention. ibuf->max_size remains constant
after ibuf_init_at_db_start(), but ibuf->size should be
protected by ibuf_mutex. Given that ibuf->size fits in a
machine word, this should be OK; at worst we are doing some
excessive ibuf_contract() or occasionally skipping a
ibuf_contract(). */
size = ibuf->size;
max_size = ibuf->max_size;
if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
return;
}
sync = FALSE;
if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) {
sync = TRUE;
}
mutex_exit(&ibuf_mutex);
sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
/* Contract at least entry_size many bytes */
sum_sizes = 0;
size = 1;
while ((size > 0) && (sum_sizes < entry_size)) {
do {
size = ibuf_contract(sync);
sum_sizes += size;
}
} while (size > 0 && sum_sizes < entry_size);
}
/*********************************************************************//**
......@@ -3272,7 +3278,7 @@ ibuf_set_entry_counter(
/*********************************************************************//**
Buffer an operation in the insert/delete buffer, instead of doing it
directly to the disk page, if this is possible.
@return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
@return DB_SUCCESS, DB_STRONG_FAIL or other error */
static
ulint
ibuf_insert_low(
......@@ -3302,6 +3308,7 @@ ibuf_insert_low(
rec_t* ins_rec;
ibool old_bit_value;
page_t* bitmap_page;
buf_block_t* block;
page_t* root;
ulint err;
ibool do_merge;
......@@ -3311,7 +3318,6 @@ ibuf_insert_low(
ulint n_stored;
mtr_t mtr;
mtr_t bitmap_mtr;
ibool too_big;
ut_a(!dict_index_is_clust(index));
ut_ad(dtuple_check_typed(entry));
......@@ -3323,11 +3329,14 @@ ibuf_insert_low(
do_merge = FALSE;
mutex_enter(&ibuf_mutex);
too_big = ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT;
mutex_exit(&ibuf_mutex);
if (too_big) {
/* Perform dirty reads of ibuf->size and ibuf->max_size, to
reduce ibuf_mutex contention. ibuf->max_size remains constant
after ibuf_init_at_db_start(), but ibuf->size should be
protected by ibuf_mutex. Given that ibuf->size fits in a
machine word, this should be OK; at worst we are doing some
excessive ibuf_contract() or occasionally skipping a
ibuf_contract(). */
if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
/* Insert buffer is now too big, contract it but do not try
to insert */
......@@ -3361,10 +3370,8 @@ ibuf_insert_low(
if (mode == BTR_MODIFY_TREE) {
for (;;) {
mutex_enter(&ibuf_pessimistic_insert_mutex);
ibuf_enter();
mutex_enter(&ibuf_pessimistic_insert_mutex);
mutex_enter(&ibuf_mutex);
if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
......@@ -3373,17 +3380,13 @@ ibuf_insert_low(
}
mutex_exit(&ibuf_mutex);
ibuf_exit();
mutex_exit(&ibuf_pessimistic_insert_mutex);
ibuf_exit();
err = ibuf_add_free_page();
if (UNIV_UNLIKELY(err == DB_STRONG_FAIL)) {
if (UNIV_UNLIKELY(!ibuf_add_free_page())) {
mem_heap_free(heap);
return(err);
return(DB_STRONG_FAIL);
}
}
} else {
......@@ -3423,9 +3426,14 @@ ibuf_insert_low(
before mtr_commit(&mtr). We must not mtr_commit(&mtr)
until after the IBUF_OP_DELETE has been buffered. */
err = DB_STRONG_FAIL;
fail_exit:
if (mode == BTR_MODIFY_TREE) {
mutex_exit(&ibuf_mutex);
mutex_exit(&ibuf_pessimistic_insert_mutex);
}
goto function_exit;
err = DB_STRONG_FAIL;
goto func_exit;
}
/* After this point, the page could still be loaded to the
......@@ -3471,9 +3479,7 @@ ibuf_insert_low(
space_ids, space_versions,
page_nos, &n_stored);
err = DB_STRONG_FAIL;
goto function_exit;
goto fail_exit;
}
}
......@@ -3484,11 +3490,9 @@ ibuf_insert_low(
&& !ibuf_set_entry_counter(ibuf_entry, space, page_no, &pcur,
mode == BTR_MODIFY_PREV, &mtr)) {
bitmap_fail:
err = DB_STRONG_FAIL;
mtr_commit(&bitmap_mtr);
goto function_exit;
goto fail_exit;
}
/* Set the bitmap bit denoting that the insert buffer contains
......@@ -3512,10 +3516,19 @@ bitmap_fail:
err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
ibuf_entry, &ins_rec,
&dummy_big_rec, 0, thr, &mtr);
if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
/* Update the page max trx id field */
page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
thr_get_trx(thr)->id, &mtr);
block = btr_cur_get_block(cursor);
ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
/* If this is the root page, update ibuf->empty. */
if (UNIV_UNLIKELY(buf_block_get_page_no(block)
== FSP_IBUF_TREE_ROOT_PAGE_NO)) {
const page_t* root = buf_block_get_frame(block);
ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
ut_ad(page_get_page_no(root)
== FSP_IBUF_TREE_ROOT_PAGE_NO);
ibuf->empty = (page_get_n_recs(root) == 0);
}
} else {
ut_ad(mode == BTR_MODIFY_TREE);
......@@ -3532,16 +3545,22 @@ bitmap_fail:
cursor,
ibuf_entry, &ins_rec,
&dummy_big_rec, 0, thr, &mtr);
if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
/* Update the page max trx id field */
page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
thr_get_trx(thr)->id, &mtr);
}
mutex_exit(&ibuf_pessimistic_insert_mutex);
ibuf_size_update(root, &mtr);
mutex_exit(&ibuf_mutex);
ibuf->empty = (page_get_n_recs(root) == 0);
block = btr_cur_get_block(cursor);
ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
}
if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
/* Update the page max trx id field */
page_update_max_trx_id(block, NULL,
thr_get_trx(thr)->id, &mtr);
}
function_exit:
func_exit:
#ifdef UNIV_IBUF_COUNT_DEBUG
if (err == DB_SUCCESS) {
fprintf(stderr,
......@@ -3553,11 +3572,6 @@ function_exit:
ibuf_count_get(space, page_no) + 1);
}
#endif
if (mode == BTR_MODIFY_TREE) {
mutex_exit(&ibuf_mutex);
mutex_exit(&ibuf_pessimistic_insert_mutex);
}
mtr_commit(&mtr);
btr_pcur_close(&pcur);
......@@ -3565,16 +3579,8 @@ function_exit:
mem_heap_free(heap);
if (err == DB_SUCCESS) {
mutex_enter(&ibuf_mutex);
ibuf->empty = FALSE;
mutex_exit(&ibuf_mutex);
if (mode == BTR_MODIFY_TREE) {
ibuf_contract_after_insert(entry_size);
}
if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) {
ibuf_contract_after_insert(entry_size);
}
if (do_merge) {
......@@ -4081,6 +4087,22 @@ ibuf_delete_rec(
success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
if (success) {
if (UNIV_UNLIKELY(!page_get_n_recs(btr_pcur_get_page(pcur)))) {
/* If a B-tree page is empty, it must be the root page
and the whole B-tree must be empty. InnoDB does not
allow empty B-tree pages other than the root. */
root = btr_pcur_get_page(pcur);
ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
ut_ad(page_get_page_no(root)
== FSP_IBUF_TREE_ROOT_PAGE_NO);
/* ibuf->empty is protected by the root page latch.
Before the deletion, it had to be FALSE. */
ut_ad(!ibuf->empty);
ibuf->empty = TRUE;
}
#ifdef UNIV_IBUF_COUNT_DEBUG
fprintf(stderr,
"Decrementing ibuf count of space %lu page %lu\n"
......@@ -4108,6 +4130,7 @@ ibuf_delete_rec(
if (!ibuf_restore_pos(space, page_no, search_tuple,
BTR_MODIFY_TREE, pcur, mtr)) {
mutex_exit(&ibuf_mutex);
goto func_exit;
}
......@@ -4121,10 +4144,12 @@ ibuf_delete_rec(
ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
#endif
ibuf_size_update(root, mtr);
mutex_exit(&ibuf_mutex);
ibuf->empty = (page_get_n_recs(root) == 0);
btr_pcur_commit_specify_mtr(pcur, mtr);
func_exit:
mutex_exit(&ibuf_mutex);
btr_pcur_close(pcur);
return(TRUE);
......@@ -4642,37 +4667,18 @@ ibuf_is_empty(void)
mtr_t mtr;
ibuf_enter();
mutex_enter(&ibuf_mutex);
mtr_start(&mtr);
mutex_enter(&ibuf_mutex);
root = ibuf_tree_root_get(&mtr);
if (page_get_n_recs(root) == 0) {
is_empty = TRUE;
if (ibuf->empty == FALSE) {
fprintf(stderr,
"InnoDB: Warning: insert buffer tree is empty"
" but the data struct does not\n"
"InnoDB: know it. This condition is legal"
" if the master thread has not yet\n"
"InnoDB: run to completion.\n");
}
} else {
ut_a(ibuf->empty == FALSE);
is_empty = FALSE;
}
mutex_exit(&ibuf_mutex);
is_empty = (page_get_n_recs(root) == 0);
mtr_commit(&mtr);
ibuf_exit();
ut_a(is_empty == ibuf->empty);
return(is_empty);
}
......
......@@ -46,11 +46,12 @@ struct ibuf_struct{
ulint seg_size; /*!< allocated pages of the file
segment containing ibuf header and
tree */
ibool empty; /*!< after an insert to the ibuf tree
is performed, this is set to FALSE,
and if a contract operation finds
the tree empty, this is set to
TRUE */
ibool empty; /*!< Protected by the page
latch of the root page of the
insert buffer tree
(FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE
if and only if the insert
buffer tree is empty. */
ulint free_list_len; /*!< length of the free list */
ulint height; /*!< tree height */
dict_index_t* index; /*!< insert buffer index */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment