Commit d2769414 authored by Yasufumi Kinoshita's avatar Yasufumi Kinoshita

Bug #14676111 WRONG PAGE_LEVEL WRITTEN FOR UPPER THAN FATHER PAGE AT BTR_LIFT_PAGE_UP()

btr_lift_page_up() writes wrong page number (different by -1) for upper than father page.
But in almost all of the cases, the father page should be root page, no upper
pages. It is very rare path.

In addition the leaf page should not be lifted unless the father page is root.
Because the branch pages should not become the leaf pages.

rb://1336 approved by Marko Makela.
parent dd634761
drop table if exists t1;
CREATE TABLE t1 (a int not null primary key) engine=InnoDB;
set global innodb_limit_optimistic_insert_debug = 2;
insert into t1 values (1);
insert into t1 values (5);
insert into t1 values (4);
insert into t1 values (3);
insert into t1 values (2);
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
DATA_LENGTH / 16384
10.0000
delete from t1 where a=4;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
DATA_LENGTH / 16384
8.0000
delete from t1 where a=5;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
DATA_LENGTH / 16384
5.0000
set global innodb_limit_optimistic_insert_debug = 10000;
delete from t1 where a=2;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
DATA_LENGTH / 16384
3.0000
insert into t1 values (2);
delete from t1 where a=2;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
DATA_LENGTH / 16384
2.0000
insert into t1 values (2);
delete from t1 where a=2;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
DATA_LENGTH / 16384
1.0000
drop table t1;
# Test for bug #14676111: WRONG PAGE_LEVEL WRITTEN FOR UPPER THAN FATHER PAGE AT BTR_LIFT_PAGE_UP()
-- source include/have_innodb.inc
-- source include/have_debug.inc
if (`select count(*)=0 from information_schema.global_variables where variable_name = 'INNODB_LIMIT_OPTIMISTIC_INSERT_DEBUG'`)
{
--skip Test requires InnoDB built with UNIV_DEBUG definition.
}
--disable_query_log
set @old_innodb_limit_optimistic_insert_debug = @@innodb_limit_optimistic_insert_debug;
--enable_query_log
--disable_warnings
drop table if exists t1;
--enable_warnings
CREATE TABLE t1 (a int not null primary key) engine=InnoDB;
let $wait_condition=
SELECT VARIABLE_VALUE < 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'INNODB_PURGE_TRX_ID_AGE';
#
# make 4 leveled straight tree
#
set global innodb_limit_optimistic_insert_debug = 2;
insert into t1 values (1);
insert into t1 values (5);
#current tree form
# (1, 5)
insert into t1 values (4);
#records in a page is limited to 2 artificially. root rise occurs
#current tree form
# (1, 5)
#(1, 4) (5)
insert into t1 values (3);
#current tree form
# (1, 5)
# (1, 4) (5)
#(1, 3) (4) (5)
insert into t1 values (2);
#current tree form
# (1, 5)
# (1, 4) (5)
# (1, 3) (4) (5)
#(1, 2) (3) (4) (5)
analyze table t1;
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
delete from t1 where a=4;
--source include/wait_condition.inc
#deleting 1 record of 2 records don't cause merge artificially.
#current tree form
# (1, 5)
# (1) (5)
# (1, 3) (5)
#(1, 2) (3) (5)
analyze table t1;
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
delete from t1 where a=5;
--source include/wait_condition.inc
#deleting 1 record of 2 records don't cause merge artificially.
#current tree form
# (1)
# (1)
# (1, 3) <- lift up this level next, when deleting node ptr
#(1, 2) (3) <- merged next
analyze table t1;
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
#
# cause merge at level 0
#
#disable the artificial limitation of records in a page
set global innodb_limit_optimistic_insert_debug = 10000;
delete from t1 where a=2;
--source include/wait_condition.inc
#merge page occurs. and lift up occurs.
#current tree form
# (1)
# (1)
# (1, 3)
analyze table t1;
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
insert into t1 values (2);
#current tree form
# (1)
# (1) <- lift up this level next, because it is not root
# (1, 2, 3)
delete from t1 where a=2;
--source include/wait_condition.inc
#current tree form
# (1)
# (1, 3)
analyze table t1;
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
insert into t1 values (2);
#current tree form
# (1)
# (1, 2, 3) <- lift up this level next, because the father is root
delete from t1 where a=2;
--source include/wait_condition.inc
#current tree form
# (1, 3)
analyze table t1;
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
drop table t1;
--disable_query_log
set global innodb_limit_optimistic_insert_debug = @old_innodb_limit_optimistic_insert_debug;
--enable_query_log
drop table if exists t1;
CREATE TABLE t1 (a int not null primary key) engine=InnoDB;
set global innodb_limit_optimistic_insert_debug = 2;
insert into t1 values (1);
insert into t1 values (5);
insert into t1 values (4);
insert into t1 values (3);
insert into t1 values (2);
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
DATA_LENGTH / 16384
10.0000
delete from t1 where a=4;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
DATA_LENGTH / 16384
8.0000
delete from t1 where a=5;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
DATA_LENGTH / 16384
5.0000
set global innodb_limit_optimistic_insert_debug = 10000;
delete from t1 where a=2;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
DATA_LENGTH / 16384
3.0000
insert into t1 values (2);
delete from t1 where a=2;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
DATA_LENGTH / 16384
2.0000
insert into t1 values (2);
delete from t1 where a=2;
analyze table t1;
Table Op Msg_type Msg_text
test.t1 analyze status OK
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
DATA_LENGTH / 16384
1.0000
drop table t1;
# Test for bug #14676111: WRONG PAGE_LEVEL WRITTEN FOR UPPER THAN FATHER PAGE AT BTR_LIFT_PAGE_UP()
-- source include/have_innodb_plugin.inc
-- source include/have_debug.inc
if (`select count(*)=0 from information_schema.global_variables where variable_name = 'INNODB_LIMIT_OPTIMISTIC_INSERT_DEBUG'`)
{
--skip Test requires InnoDB built with UNIV_DEBUG definition.
}
--disable_query_log
set @old_innodb_limit_optimistic_insert_debug = @@innodb_limit_optimistic_insert_debug;
--enable_query_log
--disable_warnings
drop table if exists t1;
--enable_warnings
CREATE TABLE t1 (a int not null primary key) engine=InnoDB;
let $wait_condition=
SELECT VARIABLE_VALUE < 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS
WHERE VARIABLE_NAME = 'INNODB_PURGE_TRX_ID_AGE';
#
# make 4 leveled straight tree
#
set global innodb_limit_optimistic_insert_debug = 2;
insert into t1 values (1);
insert into t1 values (5);
#current tree form
# (1, 5)
insert into t1 values (4);
#records in a page is limited to 2 artificially. root rise occurs
#current tree form
# (1, 5)
#(1, 4) (5)
insert into t1 values (3);
#current tree form
# (1, 5)
# (1, 4) (5)
#(1, 3) (4) (5)
insert into t1 values (2);
#current tree form
# (1, 5)
# (1, 4) (5)
# (1, 3) (4) (5)
#(1, 2) (3) (4) (5)
analyze table t1;
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
delete from t1 where a=4;
--source include/wait_condition.inc
#deleting 1 record of 2 records don't cause merge artificially.
#current tree form
# (1, 5)
# (1) (5)
# (1, 3) (5)
#(1, 2) (3) (5)
analyze table t1;
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
delete from t1 where a=5;
--source include/wait_condition.inc
#deleting 1 record of 2 records don't cause merge artificially.
#current tree form
# (1)
# (1)
# (1, 3) <- lift up this level next, when deleting node ptr
#(1, 2) (3) <- merged next
analyze table t1;
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
#
# cause merge at level 0
#
#disable the artificial limitation of records in a page
set global innodb_limit_optimistic_insert_debug = 10000;
delete from t1 where a=2;
--source include/wait_condition.inc
#merge page occurs. and lift up occurs.
#current tree form
# (1)
# (1)
# (1, 3)
analyze table t1;
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
insert into t1 values (2);
#current tree form
# (1)
# (1) <- lift up this level next, because it is not root
# (1, 2, 3)
delete from t1 where a=2;
--source include/wait_condition.inc
#current tree form
# (1)
# (1, 3)
analyze table t1;
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
insert into t1 values (2);
#current tree form
# (1)
# (1, 2, 3) <- lift up this level next, because the father is root
delete from t1 where a=2;
--source include/wait_condition.inc
#current tree form
# (1, 3)
analyze table t1;
select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1';
drop table t1;
--disable_query_log
set global innodb_limit_optimistic_insert_debug = @old_innodb_limit_optimistic_insert_debug;
--enable_query_log
...@@ -1973,6 +1973,7 @@ btr_lift_page_up( ...@@ -1973,6 +1973,7 @@ btr_lift_page_up(
ulint root_page_no; ulint root_page_no;
ulint ancestors; ulint ancestors;
ulint i; ulint i;
ibool lift_father_up = FALSE;
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
...@@ -2007,6 +2008,27 @@ btr_lift_page_up( ...@@ -2007,6 +2008,27 @@ btr_lift_page_up(
pages[ancestors++] = iter_page; pages[ancestors++] = iter_page;
} }
if (ancestors > 1 && page_level == 0) {
/* The father page also should be the only on its level (not
root). We should lift up the father page at first.
Because the leaf page should be lifted up only for root page.
The freeing page is based on page_level (==0 or !=0)
to choose segment. If the page_level is changed ==0 from !=0,
later freeing of the page doesn't find the page allocation
to be freed.*/
lift_father_up = TRUE;
page = father_page;
page_level = btr_page_get_level(page, mtr);
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
father_page = pages[1];
}
btr_search_drop_page_hash_index(page); btr_search_drop_page_hash_index(page);
/* Make the father empty */ /* Make the father empty */
...@@ -2018,7 +2040,7 @@ btr_lift_page_up( ...@@ -2018,7 +2040,7 @@ btr_lift_page_up(
lock_update_copy_and_discard(father_page, page); lock_update_copy_and_discard(father_page, page);
/* Go upward to root page, decreasing levels by one. */ /* Go upward to root page, decreasing levels by one. */
for (i = 0; i < ancestors; i++) { for (i = lift_father_up ? 1 : 0; i < ancestors; i++) {
iter_page = pages[i]; iter_page = pages[i];
ut_ad(btr_page_get_level(iter_page, mtr) == (page_level + 1)); ut_ad(btr_page_get_level(iter_page, mtr) == (page_level + 1));
......
...@@ -49,6 +49,10 @@ ulint btr_cur_n_sea = 0; ...@@ -49,6 +49,10 @@ ulint btr_cur_n_sea = 0;
ulint btr_cur_n_non_sea_old = 0; ulint btr_cur_n_non_sea_old = 0;
ulint btr_cur_n_sea_old = 0; ulint btr_cur_n_sea_old = 0;
#ifdef UNIV_DEBUG
uint btr_cur_limit_optimistic_insert_debug = 0;
#endif /* UNIV_DEBUG */
/* In the optimistic insert, if the insert does not fit, but this much space /* In the optimistic insert, if the insert does not fit, but this much space
can be released by page reorganize, then it is reorganized */ can be released by page reorganize, then it is reorganized */
...@@ -1022,6 +1026,9 @@ btr_cur_optimistic_insert( ...@@ -1022,6 +1026,9 @@ btr_cur_optimistic_insert(
goto calculate_sizes_again; goto calculate_sizes_again;
} }
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
goto fail);
/* If there have been many consecutive inserts, and we are on the leaf /* If there have been many consecutive inserts, and we are on the leaf
level, check if we have to split the page to reserve enough free space level, check if we have to split the page to reserve enough free space
for future updates of records. */ for future updates of records. */
...@@ -1034,7 +1041,9 @@ btr_cur_optimistic_insert( ...@@ -1034,7 +1041,9 @@ btr_cur_optimistic_insert(
&& (0 == level) && (0 == level)
&& (btr_page_get_split_rec_to_right(cursor, &dummy_rec) && (btr_page_get_split_rec_to_right(cursor, &dummy_rec)
|| btr_page_get_split_rec_to_left(cursor, &dummy_rec))) { || btr_page_get_split_rec_to_left(cursor, &dummy_rec))) {
#ifdef UNIV_DEBUG
fail:
#endif /* UNIV_DEBUG */
if (big_rec_vec) { if (big_rec_vec) {
dtuple_convert_back_big_rec(index, entry, big_rec_vec); dtuple_convert_back_big_rec(index, entry, big_rec_vec);
} }
......
...@@ -495,6 +495,10 @@ static SHOW_VAR innodb_status_variables[]= { ...@@ -495,6 +495,10 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_rows_read, SHOW_LONG}, (char*) &export_vars.innodb_rows_read, SHOW_LONG},
{"rows_updated", {"rows_updated",
(char*) &export_vars.innodb_rows_updated, SHOW_LONG}, (char*) &export_vars.innodb_rows_updated, SHOW_LONG},
#ifdef UNIV_DEBUG
{"purge_trx_id_age",
(char*) &export_vars.innodb_purge_trx_id_age, SHOW_LONG},
#endif /* UNIV_DEBUG */
{NullS, NullS, SHOW_LONG} {NullS, NullS, SHOW_LONG}
}; };
...@@ -9274,6 +9278,11 @@ static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug, ...@@ -9274,6 +9278,11 @@ static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
PLUGIN_VAR_RQCMDARG, PLUGIN_VAR_RQCMDARG,
"Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()", "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()",
NULL, NULL, 0, 0, 1024, 0); NULL, NULL, 0, 0, 1024, 0);
static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG,
"Artificially limit the number of records per B-tree page (0=unlimited).",
NULL, NULL, 0, 0, UINT_MAX32, 0);
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
static struct st_mysql_sys_var* innobase_system_variables[]= { static struct st_mysql_sys_var* innobase_system_variables[]= {
...@@ -9323,6 +9332,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { ...@@ -9323,6 +9332,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
MYSQL_SYSVAR(trx_rseg_n_slots_debug), MYSQL_SYSVAR(trx_rseg_n_slots_debug),
MYSQL_SYSVAR(limit_optimistic_insert_debug),
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
NULL NULL
}; };
......
...@@ -703,6 +703,11 @@ extern ulint btr_cur_n_sea; ...@@ -703,6 +703,11 @@ extern ulint btr_cur_n_sea;
extern ulint btr_cur_n_non_sea_old; extern ulint btr_cur_n_non_sea_old;
extern ulint btr_cur_n_sea_old; extern ulint btr_cur_n_sea_old;
#ifdef UNIV_DEBUG
/* Flag to limit optimistic insert records */
extern uint btr_cur_limit_optimistic_insert_debug;
#endif /* UNIV_DEBUG */
#ifndef UNIV_NONINL #ifndef UNIV_NONINL
#include "btr0cur.ic" #include "btr0cur.ic"
#endif #endif
......
...@@ -8,6 +8,16 @@ Created 10/16/1994 Heikki Tuuri ...@@ -8,6 +8,16 @@ Created 10/16/1994 Heikki Tuuri
#include "btr0btr.h" #include "btr0btr.h"
#ifdef UNIV_DEBUG
# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\
if (btr_cur_limit_optimistic_insert_debug\
&& (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\
CODE;\
}
#else
# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)
#endif /* UNIV_DEBUG */
/************************************************************* /*************************************************************
Returns the page cursor component of a tree cursor. */ Returns the page cursor component of a tree cursor. */
UNIV_INLINE UNIV_INLINE
...@@ -100,6 +110,9 @@ btr_cur_compress_recommendation( ...@@ -100,6 +110,9 @@ btr_cur_compress_recommendation(
page = btr_cur_get_page(cursor); page = btr_cur_get_page(cursor);
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
return(FALSE));
if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT) if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
|| ((btr_page_get_next(page, mtr) == FIL_NULL) || ((btr_page_get_next(page, mtr) == FIL_NULL)
&& (btr_page_get_prev(page, mtr) == FIL_NULL))) { && (btr_page_get_prev(page, mtr) == FIL_NULL))) {
......
...@@ -569,6 +569,9 @@ struct export_var_struct{ ...@@ -569,6 +569,9 @@ struct export_var_struct{
ulint innodb_rows_inserted; ulint innodb_rows_inserted;
ulint innodb_rows_updated; ulint innodb_rows_updated;
ulint innodb_rows_deleted; ulint innodb_rows_deleted;
#ifdef UNIV_DEBUG
ulint innodb_purge_trx_id_age;
#endif /* UNIV_DEBUG */
}; };
/* The server system struct */ /* The server system struct */
......
...@@ -133,6 +133,10 @@ struct trx_purge_struct{ ...@@ -133,6 +133,10 @@ struct trx_purge_struct{
than this */ than this */
dulint purge_undo_no; /* Purge has advanced past all records dulint purge_undo_no; /* Purge has advanced past all records
whose undo number is less than this */ whose undo number is less than this */
#ifdef UNIV_DEBUG
dulint done_trx_no; /* Indicate 'purge pointer' which have
purged already accurately. */
#endif /* UNIV_DEBUG */
/*-----------------------------*/ /*-----------------------------*/
ibool next_stored; /* TRUE if the info of the next record ibool next_stored; /* TRUE if the info of the next record
to purge is stored below: if yes, then to purge is stored below: if yes, then
......
...@@ -1913,6 +1913,15 @@ srv_export_innodb_status(void) ...@@ -1913,6 +1913,15 @@ srv_export_innodb_status(void)
export_vars.innodb_rows_updated = srv_n_rows_updated; export_vars.innodb_rows_updated = srv_n_rows_updated;
export_vars.innodb_rows_deleted = srv_n_rows_deleted; export_vars.innodb_rows_deleted = srv_n_rows_deleted;
#ifdef UNIV_DEBUG
if (ut_dulint_cmp(trx_sys->max_trx_id, purge_sys->done_trx_no) < 0) {
export_vars.innodb_purge_trx_id_age = 0;
} else {
export_vars.innodb_purge_trx_id_age =
ut_dulint_minus(trx_sys->max_trx_id, purge_sys->done_trx_no);
}
#endif /* UNIV_DEBUG */
mutex_exit(&srv_innodb_monitor_mutex); mutex_exit(&srv_innodb_monitor_mutex);
} }
...@@ -2387,6 +2396,29 @@ srv_master_thread( ...@@ -2387,6 +2396,29 @@ srv_master_thread(
+ buf_pool->n_pages_written; + buf_pool->n_pages_written;
srv_main_thread_op_info = "sleeping"; srv_main_thread_op_info = "sleeping";
#ifdef UNIV_DEBUG
if (btr_cur_limit_optimistic_insert_debug) {
/* If btr_cur_limit_optimistic_insert_debug is enabled
and no purge_threads, purge opportunity is increased
by x100 (1purge/100msec), to speed up debug scripts
which should wait for purged. */
if (!skip_sleep) {
os_thread_sleep(100000);
}
do {
if (srv_fast_shutdown
&& srv_shutdown_state > 0) {
goto background_loop;
}
srv_main_thread_op_info = "purging";
n_pages_purged = trx_purge();
} while (n_pages_purged);
} else
#endif /* UNIV_DEBUG */
if (!skip_sleep) { if (!skip_sleep) {
os_thread_sleep(1000000); os_thread_sleep(1000000);
......
...@@ -209,6 +209,7 @@ trx_purge_sys_create(void) ...@@ -209,6 +209,7 @@ trx_purge_sys_create(void)
purge_sys->purge_trx_no = ut_dulint_zero; purge_sys->purge_trx_no = ut_dulint_zero;
purge_sys->purge_undo_no = ut_dulint_zero; purge_sys->purge_undo_no = ut_dulint_zero;
purge_sys->next_stored = FALSE; purge_sys->next_stored = FALSE;
ut_d(purge_sys->done_trx_no = ut_dulint_zero);
rw_lock_create(&purge_sys->latch, SYNC_PURGE_LATCH); rw_lock_create(&purge_sys->latch, SYNC_PURGE_LATCH);
...@@ -576,6 +577,7 @@ trx_purge_truncate_if_arr_empty(void) ...@@ -576,6 +577,7 @@ trx_purge_truncate_if_arr_empty(void)
ut_ad(mutex_own(&(purge_sys->mutex))); ut_ad(mutex_own(&(purge_sys->mutex)));
if (purge_sys->arr->n_used == 0) { if (purge_sys->arr->n_used == 0) {
ut_d(purge_sys->done_trx_no = purge_sys->purge_trx_no);
trx_purge_truncate_history(); trx_purge_truncate_history();
......
...@@ -3072,6 +3072,8 @@ btr_lift_page_up( ...@@ -3072,6 +3072,8 @@ btr_lift_page_up(
buf_block_t* blocks[BTR_MAX_LEVELS]; buf_block_t* blocks[BTR_MAX_LEVELS];
ulint n_blocks; /*!< last used index in blocks[] */ ulint n_blocks; /*!< last used index in blocks[] */
ulint i; ulint i;
ibool lift_father_up = FALSE;
buf_block_t* block_orig = block;
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
...@@ -3082,11 +3084,13 @@ btr_lift_page_up( ...@@ -3082,11 +3084,13 @@ btr_lift_page_up(
{ {
btr_cur_t cursor; btr_cur_t cursor;
mem_heap_t* heap = mem_heap_create(100); ulint* offsets = NULL;
ulint* offsets; mem_heap_t* heap = mem_heap_create(
sizeof(*offsets)
* (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields));
buf_block_t* b; buf_block_t* b;
offsets = btr_page_get_father_block(NULL, heap, index, offsets = btr_page_get_father_block(offsets, heap, index,
block, mtr, &cursor); block, mtr, &cursor);
father_block = btr_cur_get_block(&cursor); father_block = btr_cur_get_block(&cursor);
father_page_zip = buf_block_get_page_zip(father_block); father_page_zip = buf_block_get_page_zip(father_block);
...@@ -3110,6 +3114,29 @@ btr_lift_page_up( ...@@ -3110,6 +3114,29 @@ btr_lift_page_up(
blocks[n_blocks++] = b = btr_cur_get_block(&cursor); blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
} }
if (n_blocks && page_level == 0) {
/* The father page also should be the only on its level (not
root). We should lift up the father page at first.
Because the leaf page should be lifted up only for root page.
The freeing page is based on page_level (==0 or !=0)
to choose segment. If the page_level is changed ==0 from !=0,
later freeing of the page doesn't find the page allocation
to be freed.*/
lift_father_up = TRUE;
block = father_block;
page = buf_block_get_frame(block);
page_level = btr_page_get_level(page, mtr);
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
father_block = blocks[0];
father_page_zip = buf_block_get_page_zip(father_block);
father_page = buf_block_get_frame(father_block);
}
mem_heap_free(heap); mem_heap_free(heap);
} }
...@@ -3117,6 +3144,7 @@ btr_lift_page_up( ...@@ -3117,6 +3144,7 @@ btr_lift_page_up(
/* Make the father empty */ /* Make the father empty */
btr_page_empty(father_block, father_page_zip, index, page_level, mtr); btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
page_level++;
/* Copy the records to the father page one by one. */ /* Copy the records to the father page one by one. */
if (0 if (0
...@@ -3149,7 +3177,7 @@ btr_lift_page_up( ...@@ -3149,7 +3177,7 @@ btr_lift_page_up(
lock_update_copy_and_discard(father_block, block); lock_update_copy_and_discard(father_block, block);
/* Go upward to root page, decrementing levels by one. */ /* Go upward to root page, decrementing levels by one. */
for (i = 0; i < n_blocks; i++, page_level++) { for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) {
page_t* page = buf_block_get_frame(blocks[i]); page_t* page = buf_block_get_frame(blocks[i]);
page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]); page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
...@@ -3171,7 +3199,7 @@ btr_lift_page_up( ...@@ -3171,7 +3199,7 @@ btr_lift_page_up(
ut_ad(page_validate(father_page, index)); ut_ad(page_validate(father_page, index));
ut_ad(btr_check_node_ptr(index, father_block, mtr)); ut_ad(btr_check_node_ptr(index, father_block, mtr));
return(father_block); return(lift_father_up ? block_orig : father_block);
} }
/*************************************************************//** /*************************************************************//**
......
...@@ -86,6 +86,11 @@ srv_refresh_innodb_monitor_stats(). Referenced by ...@@ -86,6 +86,11 @@ srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */ srv_printf_innodb_monitor(). */
UNIV_INTERN ulint btr_cur_n_sea_old = 0; UNIV_INTERN ulint btr_cur_n_sea_old = 0;
#ifdef UNIV_DEBUG
/* Flag to limit optimistic insert records */
UNIV_INTERN uint btr_cur_limit_optimistic_insert_debug = 0;
#endif /* UNIV_DEBUG */
/** In the optimistic insert, if the insert does not fit, but this much space /** In the optimistic insert, if the insert does not fit, but this much space
can be released by page reorganize, then it is reorganized */ can be released by page reorganize, then it is reorganized */
#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32) #define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
...@@ -1171,6 +1176,9 @@ btr_cur_optimistic_insert( ...@@ -1171,6 +1176,9 @@ btr_cur_optimistic_insert(
} }
} }
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
goto fail);
/* If there have been many consecutive inserts, and we are on the leaf /* If there have been many consecutive inserts, and we are on the leaf
level, check if we have to split the page to reserve enough free space level, check if we have to split the page to reserve enough free space
for future updates of records. */ for future updates of records. */
......
...@@ -577,6 +577,10 @@ static SHOW_VAR innodb_status_variables[]= { ...@@ -577,6 +577,10 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_rows_read, SHOW_LONG}, (char*) &export_vars.innodb_rows_read, SHOW_LONG},
{"rows_updated", {"rows_updated",
(char*) &export_vars.innodb_rows_updated, SHOW_LONG}, (char*) &export_vars.innodb_rows_updated, SHOW_LONG},
#ifdef UNIV_DEBUG
{"purge_trx_id_age",
(char*) &export_vars.innodb_purge_trx_id_age, SHOW_LONG},
#endif /* UNIV_DEBUG */
{NullS, NullS, SHOW_LONG} {NullS, NullS, SHOW_LONG}
}; };
...@@ -11262,6 +11266,11 @@ static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug, ...@@ -11262,6 +11266,11 @@ static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
PLUGIN_VAR_RQCMDARG, PLUGIN_VAR_RQCMDARG,
"Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()", "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()",
NULL, NULL, 0, 0, 1024, 0); NULL, NULL, 0, 0, 1024, 0);
static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG,
"Artificially limit the number of records per B-tree page (0=unlimited).",
NULL, NULL, 0, 0, UINT_MAX32, 0);
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
static struct st_mysql_sys_var* innobase_system_variables[]= { static struct st_mysql_sys_var* innobase_system_variables[]= {
...@@ -11327,6 +11336,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { ...@@ -11327,6 +11336,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(io_capacity), MYSQL_SYSVAR(io_capacity),
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
MYSQL_SYSVAR(trx_rseg_n_slots_debug), MYSQL_SYSVAR(trx_rseg_n_slots_debug),
MYSQL_SYSVAR(limit_optimistic_insert_debug),
#endif /* UNIV_DEBUG */ #endif /* UNIV_DEBUG */
NULL NULL
}; };
......
...@@ -795,6 +795,11 @@ srv_printf_innodb_monitor(). */ ...@@ -795,6 +795,11 @@ srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_sea_old; extern ulint btr_cur_n_sea_old;
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/* Flag to limit optimistic insert records */
extern uint btr_cur_limit_optimistic_insert_debug;
#endif /* UNIV_DEBUG */
#ifndef UNIV_NONINL #ifndef UNIV_NONINL
#include "btr0cur.ic" #include "btr0cur.ic"
#endif #endif
......
...@@ -26,6 +26,16 @@ Created 10/16/1994 Heikki Tuuri ...@@ -26,6 +26,16 @@ Created 10/16/1994 Heikki Tuuri
#ifndef UNIV_HOTBACKUP #ifndef UNIV_HOTBACKUP
#include "btr0btr.h" #include "btr0btr.h"
#ifdef UNIV_DEBUG
# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\
if (btr_cur_limit_optimistic_insert_debug\
&& (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\
CODE;\
}
#else
# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)
#endif /* UNIV_DEBUG */
#ifdef UNIV_DEBUG #ifdef UNIV_DEBUG
/*********************************************************//** /*********************************************************//**
Returns the page cursor component of a tree cursor. Returns the page cursor component of a tree cursor.
...@@ -146,6 +156,9 @@ btr_cur_compress_recommendation( ...@@ -146,6 +156,9 @@ btr_cur_compress_recommendation(
page = btr_cur_get_page(cursor); page = btr_cur_get_page(cursor);
LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
return(FALSE));
if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT) if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
|| ((btr_page_get_next(page, mtr) == FIL_NULL) || ((btr_page_get_next(page, mtr) == FIL_NULL)
&& (btr_page_get_prev(page, mtr) == FIL_NULL))) { && (btr_page_get_prev(page, mtr) == FIL_NULL))) {
......
...@@ -650,6 +650,9 @@ struct export_var_struct{ ...@@ -650,6 +650,9 @@ struct export_var_struct{
ulint innodb_rows_inserted; /*!< srv_n_rows_inserted */ ulint innodb_rows_inserted; /*!< srv_n_rows_inserted */
ulint innodb_rows_updated; /*!< srv_n_rows_updated */ ulint innodb_rows_updated; /*!< srv_n_rows_updated */
ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */ ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */
#ifdef UNIV_DEBUG
ulint innodb_purge_trx_id_age; /*!< max_trx_id - purged trx_id */
#endif /* UNIV_DEBUG */
}; };
/** The server system struct */ /** The server system struct */
......
...@@ -153,6 +153,10 @@ struct trx_purge_struct{ ...@@ -153,6 +153,10 @@ struct trx_purge_struct{
than this */ than this */
undo_no_t purge_undo_no; /*!< Purge has advanced past all records undo_no_t purge_undo_no; /*!< Purge has advanced past all records
whose undo number is less than this */ whose undo number is less than this */
#ifdef UNIV_DEBUG
trx_id_t done_trx_no; /* Indicate 'purge pointer' which have
purged already accurately. */
#endif /* UNIV_DEBUG */
/*-----------------------------*/ /*-----------------------------*/
ibool next_stored; /*!< TRUE if the info of the next record ibool next_stored; /*!< TRUE if the info of the next record
to purge is stored below: if yes, then to purge is stored below: if yes, then
......
...@@ -1964,6 +1964,15 @@ srv_export_innodb_status(void) ...@@ -1964,6 +1964,15 @@ srv_export_innodb_status(void)
export_vars.innodb_rows_updated = srv_n_rows_updated; export_vars.innodb_rows_updated = srv_n_rows_updated;
export_vars.innodb_rows_deleted = srv_n_rows_deleted; export_vars.innodb_rows_deleted = srv_n_rows_deleted;
#ifdef UNIV_DEBUG
if (ut_dulint_cmp(trx_sys->max_trx_id, purge_sys->done_trx_no) < 0) {
export_vars.innodb_purge_trx_id_age = 0;
} else {
export_vars.innodb_purge_trx_id_age =
ut_dulint_minus(trx_sys->max_trx_id, purge_sys->done_trx_no);
}
#endif /* UNIV_DEBUG */
mutex_exit(&srv_innodb_monitor_mutex); mutex_exit(&srv_innodb_monitor_mutex);
} }
...@@ -2468,6 +2477,30 @@ srv_master_thread( ...@@ -2468,6 +2477,30 @@ srv_master_thread(
srv_main_thread_op_info = "sleeping"; srv_main_thread_op_info = "sleeping";
srv_main_1_second_loops++; srv_main_1_second_loops++;
#ifdef UNIV_DEBUG
if (btr_cur_limit_optimistic_insert_debug) {
/* If btr_cur_limit_optimistic_insert_debug is enabled
and no purge_threads, purge opportunity is increased
by x100 (1purge/100msec), to speed up debug scripts
which should wait for purged. */
if (!skip_sleep) {
os_thread_sleep(100000);
srv_main_sleeps++;
}
do {
if (srv_fast_shutdown
&& srv_shutdown_state > 0) {
goto background_loop;
}
srv_main_thread_op_info = "purging";
n_pages_purged = trx_purge();
} while (n_pages_purged);
} else
#endif /* UNIV_DEBUG */
if (!skip_sleep) { if (!skip_sleep) {
os_thread_sleep(1000000); os_thread_sleep(1000000);
......
...@@ -226,6 +226,7 @@ trx_purge_sys_create(void) ...@@ -226,6 +226,7 @@ trx_purge_sys_create(void)
purge_sys->purge_trx_no = ut_dulint_zero; purge_sys->purge_trx_no = ut_dulint_zero;
purge_sys->purge_undo_no = ut_dulint_zero; purge_sys->purge_undo_no = ut_dulint_zero;
purge_sys->next_stored = FALSE; purge_sys->next_stored = FALSE;
ut_d(purge_sys->done_trx_no = ut_dulint_zero);
rw_lock_create(&purge_sys->latch, SYNC_PURGE_LATCH); rw_lock_create(&purge_sys->latch, SYNC_PURGE_LATCH);
...@@ -637,6 +638,7 @@ trx_purge_truncate_if_arr_empty(void) ...@@ -637,6 +638,7 @@ trx_purge_truncate_if_arr_empty(void)
ut_ad(mutex_own(&(purge_sys->mutex))); ut_ad(mutex_own(&(purge_sys->mutex)));
if (purge_sys->arr->n_used == 0) { if (purge_sys->arr->n_used == 0) {
ut_d(purge_sys->done_trx_no = purge_sys->purge_trx_no);
trx_purge_truncate_history(); trx_purge_truncate_history();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment