From 4c423016fa285ce322f754b7a23799d3f5c439e9 Mon Sep 17 00:00:00 2001 From: Yasufumi Kinoshita <yasufumi.kinoshita@oracle.com> Date: Mon, 12 Nov 2012 22:31:30 +0900 Subject: [PATCH] Bug #14676111 WRONG PAGE_LEVEL WRITTEN FOR UPPER THAN FATHER PAGE AT BTR_LIFT_PAGE_UP() btr_lift_page_up() writes wrong page number (different by -1) for upper than father page. But in almost all of the cases, the father page should be root page, no upper pages. It is very rare path. In addition the leaf page should not be lifted unless the father page is root. Because the branch pages should not become the leaf pages. rb://1336 approved by Marko Makela. --- .../suite/innodb/r/innodb_bug14676111.result | 53 ++++++++ .../suite/innodb/t/innodb_bug14676111.test | 128 ++++++++++++++++++ .../innodb_plugin/r/innodb_bug14676111.result | 53 ++++++++ .../innodb_plugin/t/innodb_bug14676111.test | 128 ++++++++++++++++++ storage/innobase/btr/btr0btr.c | 24 +++- storage/innobase/btr/btr0cur.c | 11 +- storage/innobase/handler/ha_innodb.cc | 10 ++ storage/innobase/include/btr0cur.h | 5 + storage/innobase/include/btr0cur.ic | 13 ++ storage/innobase/include/srv0srv.h | 3 + storage/innobase/include/trx0purge.h | 4 + storage/innobase/srv/srv0srv.c | 32 +++++ storage/innobase/trx/trx0purge.c | 2 + storage/innodb_plugin/btr/btr0btr.c | 38 +++++- storage/innodb_plugin/btr/btr0cur.c | 8 ++ storage/innodb_plugin/handler/ha_innodb.cc | 10 ++ storage/innodb_plugin/include/btr0cur.h | 5 + storage/innodb_plugin/include/btr0cur.ic | 13 ++ storage/innodb_plugin/include/srv0srv.h | 3 + storage/innodb_plugin/include/trx0purge.h | 4 + storage/innodb_plugin/srv/srv0srv.c | 33 +++++ storage/innodb_plugin/trx/trx0purge.c | 2 + 22 files changed, 575 insertions(+), 7 deletions(-) create mode 100644 mysql-test/suite/innodb/r/innodb_bug14676111.result create mode 100644 mysql-test/suite/innodb/t/innodb_bug14676111.test create mode 100644 mysql-test/suite/innodb_plugin/r/innodb_bug14676111.result create mode 100644 mysql-test/suite/innodb_plugin/t/innodb_bug14676111.test diff --git a/mysql-test/suite/innodb/r/innodb_bug14676111.result b/mysql-test/suite/innodb/r/innodb_bug14676111.result new file mode 100644 index 0000000000..ebecd1d00c --- /dev/null +++ b/mysql-test/suite/innodb/r/innodb_bug14676111.result @@ -0,0 +1,53 @@ +drop table if exists t1; +CREATE TABLE t1 (a int not null primary key) engine=InnoDB; +set global innodb_limit_optimistic_insert_debug = 2; +insert into t1 values (1); +insert into t1 values (5); +insert into t1 values (4); +insert into t1 values (3); +insert into t1 values (2); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; +DATA_LENGTH / 16384 +10.0000 +delete from t1 where a=4; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; +DATA_LENGTH / 16384 +8.0000 +delete from t1 where a=5; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; +DATA_LENGTH / 16384 +5.0000 +set global innodb_limit_optimistic_insert_debug = 10000; +delete from t1 where a=2; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; +DATA_LENGTH / 16384 +3.0000 +insert into t1 values (2); +delete from t1 where a=2; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; +DATA_LENGTH / 16384 +2.0000 +insert into t1 values (2); +delete from t1 where a=2; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; +DATA_LENGTH / 16384 +1.0000 +drop table t1; diff --git a/mysql-test/suite/innodb/t/innodb_bug14676111.test b/mysql-test/suite/innodb/t/innodb_bug14676111.test new file mode 100644 index 0000000000..fadd111fdc --- /dev/null +++ b/mysql-test/suite/innodb/t/innodb_bug14676111.test @@ -0,0 +1,128 @@ +# Test for bug #14676111: WRONG PAGE_LEVEL WRITTEN FOR UPPER THAN FATHER PAGE AT BTR_LIFT_PAGE_UP() + +-- source include/have_innodb.inc +-- source include/have_debug.inc + +if (`select count(*)=0 from information_schema.global_variables where variable_name = 'INNODB_LIMIT_OPTIMISTIC_INSERT_DEBUG'`) +{ + --skip Test requires InnoDB built with UNIV_DEBUG definition. +} + +--disable_query_log +set @old_innodb_limit_optimistic_insert_debug = @@innodb_limit_optimistic_insert_debug; +--enable_query_log +--disable_warnings +drop table if exists t1; +--enable_warnings + +CREATE TABLE t1 (a int not null primary key) engine=InnoDB; + +let $wait_condition= + SELECT VARIABLE_VALUE < 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS + WHERE VARIABLE_NAME = 'INNODB_PURGE_TRX_ID_AGE'; + +# +# make 4 leveled straight tree +# +set global innodb_limit_optimistic_insert_debug = 2; +insert into t1 values (1); +insert into t1 values (5); +#current tree form +# (1, 5) + +insert into t1 values (4); +#records in a page is limited to 2 artificially. root rise occurs +#current tree form +# (1, 5) +#(1, 4) (5) + +insert into t1 values (3); +#current tree form +# (1, 5) +# (1, 4) (5) +#(1, 3) (4) (5) + +insert into t1 values (2); +#current tree form +# (1, 5) +# (1, 4) (5) +# (1, 3) (4) (5) +#(1, 2) (3) (4) (5) + +analyze table t1; +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; + +delete from t1 where a=4; +--source include/wait_condition.inc +#deleting 1 record of 2 records don't cause merge artificially. +#current tree form +# (1, 5) +# (1) (5) +# (1, 3) (5) +#(1, 2) (3) (5) + +analyze table t1; +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; + +delete from t1 where a=5; +--source include/wait_condition.inc +#deleting 1 record of 2 records don't cause merge artificially. +#current tree form +# (1) +# (1) +# (1, 3) <- lift up this level next, when deleting node ptr +#(1, 2) (3) <- merged next + +analyze table t1; +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; + +# +# cause merge at level 0 +# + +#disable the artificial limitation of records in a page +set global innodb_limit_optimistic_insert_debug = 10000; +delete from t1 where a=2; +--source include/wait_condition.inc +#merge page occurs. and lift up occurs. +#current tree form +# (1) +# (1) +# (1, 3) + +analyze table t1; +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; + +insert into t1 values (2); +#current tree form +# (1) +# (1) <- lift up this level next, because it is not root +# (1, 2, 3) + +delete from t1 where a=2; +--source include/wait_condition.inc +#current tree form +# (1) +# (1, 3) + +analyze table t1; +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; + +insert into t1 values (2); +#current tree form +# (1) +# (1, 2, 3) <- lift up this level next, because the father is root + +delete from t1 where a=2; +--source include/wait_condition.inc +#current tree form +# (1, 3) + +analyze table t1; +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; + +drop table t1; + +--disable_query_log +set global innodb_limit_optimistic_insert_debug = @old_innodb_limit_optimistic_insert_debug; +--enable_query_log diff --git a/mysql-test/suite/innodb_plugin/r/innodb_bug14676111.result b/mysql-test/suite/innodb_plugin/r/innodb_bug14676111.result new file mode 100644 index 0000000000..ebecd1d00c --- /dev/null +++ b/mysql-test/suite/innodb_plugin/r/innodb_bug14676111.result @@ -0,0 +1,53 @@ +drop table if exists t1; +CREATE TABLE t1 (a int not null primary key) engine=InnoDB; +set global innodb_limit_optimistic_insert_debug = 2; +insert into t1 values (1); +insert into t1 values (5); +insert into t1 values (4); +insert into t1 values (3); +insert into t1 values (2); +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; +DATA_LENGTH / 16384 +10.0000 +delete from t1 where a=4; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; +DATA_LENGTH / 16384 +8.0000 +delete from t1 where a=5; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; +DATA_LENGTH / 16384 +5.0000 +set global innodb_limit_optimistic_insert_debug = 10000; +delete from t1 where a=2; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; +DATA_LENGTH / 16384 +3.0000 +insert into t1 values (2); +delete from t1 where a=2; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; +DATA_LENGTH / 16384 +2.0000 +insert into t1 values (2); +delete from t1 where a=2; +analyze table t1; +Table Op Msg_type Msg_text +test.t1 analyze status OK +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; +DATA_LENGTH / 16384 +1.0000 +drop table t1; diff --git a/mysql-test/suite/innodb_plugin/t/innodb_bug14676111.test b/mysql-test/suite/innodb_plugin/t/innodb_bug14676111.test new file mode 100644 index 0000000000..ae871e3b63 --- /dev/null +++ b/mysql-test/suite/innodb_plugin/t/innodb_bug14676111.test @@ -0,0 +1,128 @@ +# Test for bug #14676111: WRONG PAGE_LEVEL WRITTEN FOR UPPER THAN FATHER PAGE AT BTR_LIFT_PAGE_UP() + +-- source include/have_innodb_plugin.inc +-- source include/have_debug.inc + +if (`select count(*)=0 from information_schema.global_variables where variable_name = 'INNODB_LIMIT_OPTIMISTIC_INSERT_DEBUG'`) +{ + --skip Test requires InnoDB built with UNIV_DEBUG definition. +} + +--disable_query_log +set @old_innodb_limit_optimistic_insert_debug = @@innodb_limit_optimistic_insert_debug; +--enable_query_log +--disable_warnings +drop table if exists t1; +--enable_warnings + +CREATE TABLE t1 (a int not null primary key) engine=InnoDB; + +let $wait_condition= + SELECT VARIABLE_VALUE < 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS + WHERE VARIABLE_NAME = 'INNODB_PURGE_TRX_ID_AGE'; + +# +# make 4 leveled straight tree +# +set global innodb_limit_optimistic_insert_debug = 2; +insert into t1 values (1); +insert into t1 values (5); +#current tree form +# (1, 5) + +insert into t1 values (4); +#records in a page is limited to 2 artificially. root rise occurs +#current tree form +# (1, 5) +#(1, 4) (5) + +insert into t1 values (3); +#current tree form +# (1, 5) +# (1, 4) (5) +#(1, 3) (4) (5) + +insert into t1 values (2); +#current tree form +# (1, 5) +# (1, 4) (5) +# (1, 3) (4) (5) +#(1, 2) (3) (4) (5) + +analyze table t1; +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; + +delete from t1 where a=4; +--source include/wait_condition.inc +#deleting 1 record of 2 records don't cause merge artificially. +#current tree form +# (1, 5) +# (1) (5) +# (1, 3) (5) +#(1, 2) (3) (5) + +analyze table t1; +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; + +delete from t1 where a=5; +--source include/wait_condition.inc +#deleting 1 record of 2 records don't cause merge artificially. +#current tree form +# (1) +# (1) +# (1, 3) <- lift up this level next, when deleting node ptr +#(1, 2) (3) <- merged next + +analyze table t1; +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; + +# +# cause merge at level 0 +# + +#disable the artificial limitation of records in a page +set global innodb_limit_optimistic_insert_debug = 10000; +delete from t1 where a=2; +--source include/wait_condition.inc +#merge page occurs. and lift up occurs. +#current tree form +# (1) +# (1) +# (1, 3) + +analyze table t1; +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; + +insert into t1 values (2); +#current tree form +# (1) +# (1) <- lift up this level next, because it is not root +# (1, 2, 3) + +delete from t1 where a=2; +--source include/wait_condition.inc +#current tree form +# (1) +# (1, 3) + +analyze table t1; +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; + +insert into t1 values (2); +#current tree form +# (1) +# (1, 2, 3) <- lift up this level next, because the father is root + +delete from t1 where a=2; +--source include/wait_condition.inc +#current tree form +# (1, 3) + +analyze table t1; +select DATA_LENGTH / 16384 from information_schema.TABLES where TABLE_SCHEMA = 'test' and TABLE_NAME = 't1'; + +drop table t1; + +--disable_query_log +set global innodb_limit_optimistic_insert_debug = @old_innodb_limit_optimistic_insert_debug; +--enable_query_log diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c index 5079757272..fc4e07d716 100644 --- a/storage/innobase/btr/btr0btr.c +++ b/storage/innobase/btr/btr0btr.c @@ -1973,6 +1973,7 @@ btr_lift_page_up( ulint root_page_no; ulint ancestors; ulint i; + ibool lift_father_up = FALSE; ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); @@ -2007,6 +2008,27 @@ btr_lift_page_up( pages[ancestors++] = iter_page; } + if (ancestors > 1 && page_level == 0) { + /* The father page also should be the only on its level (not + root). We should lift up the father page at first. + Because the leaf page should be lifted up only for root page. + The freeing page is based on page_level (==0 or !=0) + to choose segment. If the page_level is changed ==0 from !=0, + later freeing of the page doesn't find the page allocation + to be freed.*/ + + lift_father_up = TRUE; + page = father_page; + page_level = btr_page_get_level(page, mtr); + + ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); + ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); + ut_ad(mtr_memo_contains(mtr, buf_block_align(page), + MTR_MEMO_PAGE_X_FIX)); + + father_page = pages[1]; + } + btr_search_drop_page_hash_index(page); /* Make the father empty */ @@ -2018,7 +2040,7 @@ btr_lift_page_up( lock_update_copy_and_discard(father_page, page); /* Go upward to root page, decreasing levels by one. */ - for (i = 0; i < ancestors; i++) { + for (i = lift_father_up ? 1 : 0; i < ancestors; i++) { iter_page = pages[i]; ut_ad(btr_page_get_level(iter_page, mtr) == (page_level + 1)); diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 389e95bcb0..8dc0a92408 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -49,6 +49,10 @@ ulint btr_cur_n_sea = 0; ulint btr_cur_n_non_sea_old = 0; ulint btr_cur_n_sea_old = 0; +#ifdef UNIV_DEBUG +uint btr_cur_limit_optimistic_insert_debug = 0; +#endif /* UNIV_DEBUG */ + /* In the optimistic insert, if the insert does not fit, but this much space can be released by page reorganize, then it is reorganized */ @@ -1022,6 +1026,9 @@ calculate_sizes_again: goto calculate_sizes_again; } + LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page), + goto fail); + /* If there have been many consecutive inserts, and we are on the leaf level, check if we have to split the page to reserve enough free space for future updates of records. */ @@ -1034,7 +1041,9 @@ calculate_sizes_again: && (0 == level) && (btr_page_get_split_rec_to_right(cursor, &dummy_rec) || btr_page_get_split_rec_to_left(cursor, &dummy_rec))) { - +#ifdef UNIV_DEBUG +fail: +#endif /* UNIV_DEBUG */ if (big_rec_vec) { dtuple_convert_back_big_rec(index, entry, big_rec_vec); } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index bcb903d22b..28262f0255 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -495,6 +495,10 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_rows_read, SHOW_LONG}, {"rows_updated", (char*) &export_vars.innodb_rows_updated, SHOW_LONG}, +#ifdef UNIV_DEBUG + {"purge_trx_id_age", + (char*) &export_vars.innodb_purge_trx_id_age, SHOW_LONG}, +#endif /* UNIV_DEBUG */ {NullS, NullS, SHOW_LONG} }; @@ -9274,6 +9278,11 @@ static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug, PLUGIN_VAR_RQCMDARG, "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()", NULL, NULL, 0, 0, 1024, 0); + +static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug, + btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG, + "Artificially limit the number of records per B-tree page (0=unlimited).", + NULL, NULL, 0, 0, UINT_MAX32, 0); #endif /* UNIV_DEBUG */ static struct st_mysql_sys_var* innobase_system_variables[]= { @@ -9323,6 +9332,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ #ifdef UNIV_DEBUG MYSQL_SYSVAR(trx_rseg_n_slots_debug), + MYSQL_SYSVAR(limit_optimistic_insert_debug), #endif /* UNIV_DEBUG */ NULL }; diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 341d628c6d..c2b81d0ae9 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -703,6 +703,11 @@ extern ulint btr_cur_n_sea; extern ulint btr_cur_n_non_sea_old; extern ulint btr_cur_n_sea_old; +#ifdef UNIV_DEBUG +/* Flag to limit optimistic insert records */ +extern uint btr_cur_limit_optimistic_insert_debug; +#endif /* UNIV_DEBUG */ + #ifndef UNIV_NONINL #include "btr0cur.ic" #endif diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic index bd2c46eb73..d894f0546d 100644 --- a/storage/innobase/include/btr0cur.ic +++ b/storage/innobase/include/btr0cur.ic @@ -8,6 +8,16 @@ Created 10/16/1994 Heikki Tuuri #include "btr0btr.h" +#ifdef UNIV_DEBUG +# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\ +if (btr_cur_limit_optimistic_insert_debug\ + && (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\ + CODE;\ +} +#else +# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE) +#endif /* UNIV_DEBUG */ + /************************************************************* Returns the page cursor component of a tree cursor. */ UNIV_INLINE @@ -100,6 +110,9 @@ btr_cur_compress_recommendation( page = btr_cur_get_page(cursor); + LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2, + return(FALSE)); + if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT) || ((btr_page_get_next(page, mtr) == FIL_NULL) && (btr_page_get_prev(page, mtr) == FIL_NULL))) { diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index aa6c88e053..e79d352d1e 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -569,6 +569,9 @@ struct export_var_struct{ ulint innodb_rows_inserted; ulint innodb_rows_updated; ulint innodb_rows_deleted; +#ifdef UNIV_DEBUG + ulint innodb_purge_trx_id_age; +#endif /* UNIV_DEBUG */ }; /* The server system struct */ diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h index fbae7eb9a6..fc9ff02192 100644 --- a/storage/innobase/include/trx0purge.h +++ b/storage/innobase/include/trx0purge.h @@ -133,6 +133,10 @@ struct trx_purge_struct{ than this */ dulint purge_undo_no; /* Purge has advanced past all records whose undo number is less than this */ +#ifdef UNIV_DEBUG + dulint done_trx_no; /* Indicate 'purge pointer' which have + purged already accurately. */ +#endif /* UNIV_DEBUG */ /*-----------------------------*/ ibool next_stored; /* TRUE if the info of the next record to purge is stored below: if yes, then diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 3f6f198299..46d5b6ad27 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -1913,6 +1913,15 @@ srv_export_innodb_status(void) export_vars.innodb_rows_updated = srv_n_rows_updated; export_vars.innodb_rows_deleted = srv_n_rows_deleted; +#ifdef UNIV_DEBUG + if (ut_dulint_cmp(trx_sys->max_trx_id, purge_sys->done_trx_no) < 0) { + export_vars.innodb_purge_trx_id_age = 0; + } else { + export_vars.innodb_purge_trx_id_age = + ut_dulint_minus(trx_sys->max_trx_id, purge_sys->done_trx_no); + } +#endif /* UNIV_DEBUG */ + mutex_exit(&srv_innodb_monitor_mutex); } @@ -2387,6 +2396,29 @@ loop: + buf_pool->n_pages_written; srv_main_thread_op_info = "sleeping"; +#ifdef UNIV_DEBUG + if (btr_cur_limit_optimistic_insert_debug) { + /* If btr_cur_limit_optimistic_insert_debug is enabled + and no purge_threads, purge opportunity is increased + by x100 (1purge/100msec), to speed up debug scripts + which should wait for purged. */ + + if (!skip_sleep) { + os_thread_sleep(100000); + } + + do { + if (srv_fast_shutdown + && srv_shutdown_state > 0) { + goto background_loop; + } + + srv_main_thread_op_info = "purging"; + n_pages_purged = trx_purge(); + + } while (n_pages_purged); + } else +#endif /* UNIV_DEBUG */ if (!skip_sleep) { os_thread_sleep(1000000); diff --git a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c index 6fe5855ebf..e783f14c1e 100644 --- a/storage/innobase/trx/trx0purge.c +++ b/storage/innobase/trx/trx0purge.c @@ -209,6 +209,7 @@ trx_purge_sys_create(void) purge_sys->purge_trx_no = ut_dulint_zero; purge_sys->purge_undo_no = ut_dulint_zero; purge_sys->next_stored = FALSE; + ut_d(purge_sys->done_trx_no = ut_dulint_zero); rw_lock_create(&purge_sys->latch, SYNC_PURGE_LATCH); @@ -576,6 +577,7 @@ trx_purge_truncate_if_arr_empty(void) ut_ad(mutex_own(&(purge_sys->mutex))); if (purge_sys->arr->n_used == 0) { + ut_d(purge_sys->done_trx_no = purge_sys->purge_trx_no); trx_purge_truncate_history(); diff --git a/storage/innodb_plugin/btr/btr0btr.c b/storage/innodb_plugin/btr/btr0btr.c index 9fef7843f9..04f3a79866 100644 --- a/storage/innodb_plugin/btr/btr0btr.c +++ b/storage/innodb_plugin/btr/btr0btr.c @@ -3072,6 +3072,8 @@ btr_lift_page_up( buf_block_t* blocks[BTR_MAX_LEVELS]; ulint n_blocks; /*!< last used index in blocks[] */ ulint i; + ibool lift_father_up = FALSE; + buf_block_t* block_orig = block; ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); @@ -3082,11 +3084,13 @@ btr_lift_page_up( { btr_cur_t cursor; - mem_heap_t* heap = mem_heap_create(100); - ulint* offsets; + ulint* offsets = NULL; + mem_heap_t* heap = mem_heap_create( + sizeof(*offsets) + * (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields)); buf_block_t* b; - offsets = btr_page_get_father_block(NULL, heap, index, + offsets = btr_page_get_father_block(offsets, heap, index, block, mtr, &cursor); father_block = btr_cur_get_block(&cursor); father_page_zip = buf_block_get_page_zip(father_block); @@ -3110,6 +3114,29 @@ btr_lift_page_up( blocks[n_blocks++] = b = btr_cur_get_block(&cursor); } + if (n_blocks && page_level == 0) { + /* The father page also should be the only on its level (not + root). We should lift up the father page at first. + Because the leaf page should be lifted up only for root page. + The freeing page is based on page_level (==0 or !=0) + to choose segment. If the page_level is changed ==0 from !=0, + later freeing of the page doesn't find the page allocation + to be freed.*/ + + lift_father_up = TRUE; + block = father_block; + page = buf_block_get_frame(block); + page_level = btr_page_get_level(page, mtr); + + ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); + ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); + ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); + + father_block = blocks[0]; + father_page_zip = buf_block_get_page_zip(father_block); + father_page = buf_block_get_frame(father_block); + } + mem_heap_free(heap); } @@ -3117,6 +3144,7 @@ btr_lift_page_up( /* Make the father empty */ btr_page_empty(father_block, father_page_zip, index, page_level, mtr); + page_level++; /* Copy the records to the father page one by one. */ if (0 @@ -3149,7 +3177,7 @@ btr_lift_page_up( lock_update_copy_and_discard(father_block, block); /* Go upward to root page, decrementing levels by one. */ - for (i = 0; i < n_blocks; i++, page_level++) { + for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) { page_t* page = buf_block_get_frame(blocks[i]); page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]); @@ -3171,7 +3199,7 @@ btr_lift_page_up( ut_ad(page_validate(father_page, index)); ut_ad(btr_check_node_ptr(index, father_block, mtr)); - return(father_block); + return(lift_father_up ? block_orig : father_block); } /*************************************************************//** diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 6c67d27ffe..e38b8a9bf5 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -86,6 +86,11 @@ srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor(). */ UNIV_INTERN ulint btr_cur_n_sea_old = 0; +#ifdef UNIV_DEBUG +/* Flag to limit optimistic insert records */ +UNIV_INTERN uint btr_cur_limit_optimistic_insert_debug = 0; +#endif /* UNIV_DEBUG */ + /** In the optimistic insert, if the insert does not fit, but this much space can be released by page reorganize, then it is reorganized */ #define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32) @@ -1171,6 +1176,9 @@ btr_cur_optimistic_insert( } } + LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page), + goto fail); + /* If there have been many consecutive inserts, and we are on the leaf level, check if we have to split the page to reserve enough free space for future updates of records. */ diff --git a/storage/innodb_plugin/handler/ha_innodb.cc b/storage/innodb_plugin/handler/ha_innodb.cc index bbdf5680e3..9ed452d980 100644 --- a/storage/innodb_plugin/handler/ha_innodb.cc +++ b/storage/innodb_plugin/handler/ha_innodb.cc @@ -577,6 +577,10 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_rows_read, SHOW_LONG}, {"rows_updated", (char*) &export_vars.innodb_rows_updated, SHOW_LONG}, +#ifdef UNIV_DEBUG + {"purge_trx_id_age", + (char*) &export_vars.innodb_purge_trx_id_age, SHOW_LONG}, +#endif /* UNIV_DEBUG */ {NullS, NullS, SHOW_LONG} }; @@ -11262,6 +11266,11 @@ static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug, PLUGIN_VAR_RQCMDARG, "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()", NULL, NULL, 0, 0, 1024, 0); + +static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug, + btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG, + "Artificially limit the number of records per B-tree page (0=unlimited).", + NULL, NULL, 0, 0, UINT_MAX32, 0); #endif /* UNIV_DEBUG */ static struct st_mysql_sys_var* innobase_system_variables[]= { @@ -11327,6 +11336,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(io_capacity), #ifdef UNIV_DEBUG MYSQL_SYSVAR(trx_rseg_n_slots_debug), + MYSQL_SYSVAR(limit_optimistic_insert_debug), #endif /* UNIV_DEBUG */ NULL }; diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h index 1c42116782..7744d2d1ee 100644 --- a/storage/innodb_plugin/include/btr0cur.h +++ b/storage/innodb_plugin/include/btr0cur.h @@ -795,6 +795,11 @@ srv_printf_innodb_monitor(). */ extern ulint btr_cur_n_sea_old; #endif /* !UNIV_HOTBACKUP */ +#ifdef UNIV_DEBUG +/* Flag to limit optimistic insert records */ +extern uint btr_cur_limit_optimistic_insert_debug; +#endif /* UNIV_DEBUG */ + #ifndef UNIV_NONINL #include "btr0cur.ic" #endif diff --git a/storage/innodb_plugin/include/btr0cur.ic b/storage/innodb_plugin/include/btr0cur.ic index e31f77c77e..5fc4651ca1 100644 --- a/storage/innodb_plugin/include/btr0cur.ic +++ b/storage/innodb_plugin/include/btr0cur.ic @@ -26,6 +26,16 @@ Created 10/16/1994 Heikki Tuuri #ifndef UNIV_HOTBACKUP #include "btr0btr.h" +#ifdef UNIV_DEBUG +# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\ +if (btr_cur_limit_optimistic_insert_debug\ + && (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\ + CODE;\ +} +#else +# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE) +#endif /* UNIV_DEBUG */ + #ifdef UNIV_DEBUG /*********************************************************//** Returns the page cursor component of a tree cursor. @@ -146,6 +156,9 @@ btr_cur_compress_recommendation( page = btr_cur_get_page(cursor); + LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2, + return(FALSE)); + if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT) || ((btr_page_get_next(page, mtr) == FIL_NULL) && (btr_page_get_prev(page, mtr) == FIL_NULL))) { diff --git a/storage/innodb_plugin/include/srv0srv.h b/storage/innodb_plugin/include/srv0srv.h index 1a9f54882c..089a4c1c77 100644 --- a/storage/innodb_plugin/include/srv0srv.h +++ b/storage/innodb_plugin/include/srv0srv.h @@ -650,6 +650,9 @@ struct export_var_struct{ ulint innodb_rows_inserted; /*!< srv_n_rows_inserted */ ulint innodb_rows_updated; /*!< srv_n_rows_updated */ ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */ +#ifdef UNIV_DEBUG + ulint innodb_purge_trx_id_age; /*!< max_trx_id - purged trx_id */ +#endif /* UNIV_DEBUG */ }; /** The server system struct */ diff --git a/storage/innodb_plugin/include/trx0purge.h b/storage/innodb_plugin/include/trx0purge.h index 37d3795efb..f221a65d07 100644 --- a/storage/innodb_plugin/include/trx0purge.h +++ b/storage/innodb_plugin/include/trx0purge.h @@ -153,6 +153,10 @@ struct trx_purge_struct{ than this */ undo_no_t purge_undo_no; /*!< Purge has advanced past all records whose undo number is less than this */ +#ifdef UNIV_DEBUG + trx_id_t done_trx_no; /* Indicate 'purge pointer' which have + purged already accurately. */ +#endif /* UNIV_DEBUG */ /*-----------------------------*/ ibool next_stored; /*!< TRUE if the info of the next record to purge is stored below: if yes, then diff --git a/storage/innodb_plugin/srv/srv0srv.c b/storage/innodb_plugin/srv/srv0srv.c index 8ad4c02e32..2d48fcc6b1 100644 --- a/storage/innodb_plugin/srv/srv0srv.c +++ b/storage/innodb_plugin/srv/srv0srv.c @@ -1964,6 +1964,15 @@ srv_export_innodb_status(void) export_vars.innodb_rows_updated = srv_n_rows_updated; export_vars.innodb_rows_deleted = srv_n_rows_deleted; +#ifdef UNIV_DEBUG + if (ut_dulint_cmp(trx_sys->max_trx_id, purge_sys->done_trx_no) < 0) { + export_vars.innodb_purge_trx_id_age = 0; + } else { + export_vars.innodb_purge_trx_id_age = + ut_dulint_minus(trx_sys->max_trx_id, purge_sys->done_trx_no); + } +#endif /* UNIV_DEBUG */ + mutex_exit(&srv_innodb_monitor_mutex); } @@ -2468,6 +2477,30 @@ loop: srv_main_thread_op_info = "sleeping"; srv_main_1_second_loops++; +#ifdef UNIV_DEBUG + if (btr_cur_limit_optimistic_insert_debug) { + /* If btr_cur_limit_optimistic_insert_debug is enabled + and no purge_threads, purge opportunity is increased + by x100 (1purge/100msec), to speed up debug scripts + which should wait for purged. */ + + if (!skip_sleep) { + os_thread_sleep(100000); + srv_main_sleeps++; + } + + do { + if (srv_fast_shutdown + && srv_shutdown_state > 0) { + goto background_loop; + } + + srv_main_thread_op_info = "purging"; + n_pages_purged = trx_purge(); + + } while (n_pages_purged); + } else +#endif /* UNIV_DEBUG */ if (!skip_sleep) { os_thread_sleep(1000000); diff --git a/storage/innodb_plugin/trx/trx0purge.c b/storage/innodb_plugin/trx/trx0purge.c index 56607c9ff9..ea508c1003 100644 --- a/storage/innodb_plugin/trx/trx0purge.c +++ b/storage/innodb_plugin/trx/trx0purge.c @@ -226,6 +226,7 @@ trx_purge_sys_create(void) purge_sys->purge_trx_no = ut_dulint_zero; purge_sys->purge_undo_no = ut_dulint_zero; purge_sys->next_stored = FALSE; + ut_d(purge_sys->done_trx_no = ut_dulint_zero); rw_lock_create(&purge_sys->latch, SYNC_PURGE_LATCH); @@ -637,6 +638,7 @@ trx_purge_truncate_if_arr_empty(void) ut_ad(mutex_own(&(purge_sys->mutex))); if (purge_sys->arr->n_used == 0) { + ut_d(purge_sys->done_trx_no = purge_sys->purge_trx_no); trx_purge_truncate_history(); -- 2.30.9