Commit 5da6bd7b authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-11027 InnoDB log recovery is too noisy

Provide more useful progress reporting of crash recovery.

recv_sys_t::progress_time: The time of the last report.

recv_sys_t::report(ib_time_t): Determine whether progress should
be reported.

recv_scan_print_counter: Remove.

log_group_read_log_seg(): After after each I/O request, invoke
recv_sys_t::report() and report progress if needed.

recv_apply_hashed_log_recs(): Change the return type back to void
(DB_SUCCESS was always returned), and rename the parameter to last_batch.
At the start of each batch, if there are pages to be recovered,
issue a message.
parent 0b1abc2f
......@@ -66,7 +66,8 @@ SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
FOUND /InnoDB: Log scan progressed past the checkpoint lsn 1213964/ in mysqld.1.err
FOUND /InnoDB: Starting crash recovery from checkpoint LSN=1213964/ in mysqld.1.err
FOUND /InnoDB: MLOG_FILE_NAME incorrect:bogus/ in mysqld.1.err
FOUND /InnoDB: ############### CORRUPT LOG RECORD FOUND ##################/ in mysqld.1.err
FOUND /InnoDB: Log record type 55, page 151:488\. Log parsing proceeded successfully up to 1213973\. Previous log record type 56, is multi 0 Recv offset 9, prev 0/ in mysqld.1.err
FOUND /len 22. hex 38000000000012860cb7809781e80006626f67757300. asc 8 bogus / in mysqld.1.err
......@@ -83,6 +84,7 @@ SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
FOUND /InnoDB: MLOG_FILE_NAME incorrect:bigot/ in mysqld.1.err
FOUND /len 22; hex 38000000000012860cb7809781e800066269676f7400; asc 8 bigot ;/ in mysqld.1.err
# missing MLOG_FILE_NAME or MLOG_FILE_DELETE before MLOG_CHECKPOINT
SELECT * FROM INFORMATION_SCHEMA.ENGINES
......
......@@ -66,7 +66,8 @@ SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
FOUND /InnoDB: Log scan progressed past the checkpoint lsn 1213964/ in mysqld.1.err
FOUND /InnoDB: Starting crash recovery from checkpoint LSN=1213964/ in mysqld.1.err
FOUND /InnoDB: MLOG_FILE_NAME incorrect:bogus/ in mysqld.1.err
FOUND /InnoDB: ############### CORRUPT LOG RECORD FOUND ##################/ in mysqld.1.err
FOUND /InnoDB: Log record type 55, page 151:488\. Log parsing proceeded successfully up to 1213973\. Previous log record type 56, is multi 0 Recv offset 9, prev 0/ in mysqld.1.err
FOUND /len 22. hex 38000000000012860cb7809781e80006626f67757300. asc 8 bogus / in mysqld.1.err
......@@ -83,6 +84,7 @@ SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
FOUND /InnoDB: MLOG_FILE_NAME incorrect:bigot/ in mysqld.1.err
FOUND /len 22; hex 38000000000012860cb7809781e800066269676f7400; asc 8 bigot ;/ in mysqld.1.err
# missing MLOG_FILE_NAME or MLOG_FILE_DELETE before MLOG_CHECKPOINT
SELECT * FROM INFORMATION_SCHEMA.ENGINES
......
......@@ -262,7 +262,9 @@ EOF
--source include/start_mysqld.inc
eval $check_no_innodb;
--source include/shutdown_mysqld.inc
let SEARCH_PATTERN=InnoDB: Log scan progressed past the checkpoint lsn 1213964;
let SEARCH_PATTERN=InnoDB: Starting crash recovery from checkpoint LSN=1213964;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN=InnoDB: MLOG_FILE_NAME incorrect:bogus;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN=InnoDB: ############### CORRUPT LOG RECORD FOUND ##################;
--source include/search_pattern_in_file.inc
......@@ -314,6 +316,8 @@ EOF
--source include/start_mysqld.inc
eval $check_no_innodb;
--source include/shutdown_mysqld.inc
let SEARCH_PATTERN=InnoDB: MLOG_FILE_NAME incorrect:bigot;
--source include/search_pattern_in_file.inc
--let SEARCH_PATTERN= len 22; hex 38000000000012860cb7809781e800066269676f7400; asc 8 bigot ;
--source include/search_pattern_in_file.inc
......
......@@ -73,15 +73,13 @@ let SEARCH_PATTERN= syntax error in innodb_log_group_home_dir;
--source include/restart_mysqld.inc
--error ER_UNKNOWN_STORAGE_ENGINE
SELECT * FROM t1;
let SEARCH_PATTERN= InnoDB: Starting an apply batch of log records;
let SEARCH_PATTERN= InnoDB: Starting crash recovery from checkpoint LSN=;
--source include/search_pattern_in_file.inc
--let $restart_parameters= --debug=d,innodb_log_abort_3
--source include/restart_mysqld.inc
--error ER_UNKNOWN_STORAGE_ENGINE
SELECT * FROM t1;
let SEARCH_PATTERN= InnoDB: Starting an apply batch of log records;
--source include/search_pattern_in_file.inc
--let $restart_parameters= --innodb-read-only
--source include/restart_mysqld.inc
......@@ -95,8 +93,6 @@ let SEARCH_PATTERN= InnoDB: innodb_read_only prevents crash recovery;
--source include/restart_mysqld.inc
--error ER_UNKNOWN_STORAGE_ENGINE
SELECT * FROM t1;
let SEARCH_PATTERN= InnoDB: Starting an apply batch of log records;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN= redo log from 3\*[0-9]+ to 2\*[0-9]+ pages;
--source include/search_pattern_in_file.inc
......@@ -104,8 +100,6 @@ let SEARCH_PATTERN= redo log from 3\*[0-9]+ to 2\*[0-9]+ pages;
--source include/restart_mysqld.inc
--error ER_UNKNOWN_STORAGE_ENGINE
SELECT * FROM t1;
let SEARCH_PATTERN= InnoDB: Starting an apply batch of log records;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN= redo log from 3\*[0-9]+ to 2\*[0-9]+ pages;
--source include/search_pattern_in_file.inc
......@@ -121,8 +115,6 @@ let SEARCH_PATTERN= InnoDB: innodb_read_only prevents crash recovery;
--error ER_UNKNOWN_STORAGE_ENGINE
SELECT * FROM t1;
let SEARCH_PATTERN= InnoDB: Starting an apply batch of log records;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN= redo log from 3\*[0-9]+ to 2\*[0-9]+ pages;
--source include/search_pattern_in_file.inc
......
......@@ -105,20 +105,12 @@ Reset the state of the recovery system variables. */
void
recv_sys_var_init(void);
/*===================*/
/*******************************************************************//**
Empties the hash table of stored log records, applying them to appropriate
pages. */
dberr_t
recv_apply_hashed_log_recs(
/*=======================*/
ibool allow_ibuf) /*!< in: if TRUE, also ibuf operations are
allowed during the application; if FALSE,
no ibuf operations are allowed, and after
the application all file pages are flushed to
disk and invalidated in buffer pool: this
alternative means that no new log records
can be generated during the application */
__attribute__((warn_unused_result));
/** Apply the hash table of stored log records to persistent data pages.
@param[in] last_batch whether the change buffer merge will be
performed as part of the operation */
void
recv_apply_hashed_log_recs(bool last_batch);
/** Block of log record data */
struct recv_data_t{
......@@ -244,6 +236,8 @@ struct recv_sys_t{
lsn_t mlog_checkpoint_lsn;
/*!< the LSN of a MLOG_CHECKPOINT
record, or 0 if none was parsed */
/** the time when progress was last reported */
ib_time_t progress_time;
mem_heap_t* heap; /*!< memory heap of log records and file
addresses*/
hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
......@@ -251,6 +245,20 @@ struct recv_sys_t{
addresses in the hash table */
recv_dblwr_t dblwr;
/** Determine whether redo log recovery progress should be reported.
@param[in] time the current time
@return whether progress should be reported
(the last report was at least 15 seconds ago) */
bool report(ib_time_t time)
{
if (time - progress_time < 15) {
return false;
}
progress_time = time;
return true;
}
};
/** The recovery system */
......
......@@ -1515,7 +1515,6 @@ log_preflush_pool_modified_pages(
bool success;
if (recv_recovery_on) {
dberr_t err = DB_SUCCESS;
/* If the recovery is running, we must first apply all
log records to their respective file pages to get the
right modify lsn values to these pages: otherwise, there
......@@ -1524,13 +1523,7 @@ log_preflush_pool_modified_pages(
not know how up-to-date the disk version of the database is,
and we could not make a new checkpoint on the basis of the
info on the buffer pool only. */
err = recv_apply_hashed_log_recs(TRUE);
if (err != DB_SUCCESS) {
ib::warn() << "recv_apply_hashed_log_recs failed err: "
<< err << " file: " << __FILE__ << " line: " << __LINE__;
}
recv_apply_hashed_log_recs(true);
}
if (new_oldest == LSN_MAX
......@@ -1775,14 +1768,7 @@ log_checkpoint(
os_thread_sleep(360000000););
if (recv_recovery_is_on()) {
dberr_t err = DB_SUCCESS;
err = recv_apply_hashed_log_recs(TRUE);
if (err != DB_SUCCESS) {
ib::warn() << "recv_apply_hashed_log_recs failed err: "
<< err << " file: " << __FILE__ << " line: " << __LINE__;
}
recv_apply_hashed_log_recs(true);
}
#ifndef _WIN32
......
......@@ -69,7 +69,7 @@ this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
#define RECV_READ_AHEAD_AREA 32
/** The recovery system */
recv_sys_t* recv_sys = NULL;
recv_sys_t* recv_sys;
/** TRUE when applying redo log records during crash recovery; FALSE
otherwise. Note that this is FALSE while a background thread is
rolling back incomplete transactions. */
......@@ -98,9 +98,6 @@ buffer pool before the pages have been recovered to the up-to-date state.
TRUE means that recovery is running and no operations on the log files
are allowed yet: the variable name is misleading. */
bool recv_no_ibuf_operations;
/** The following counter is used to decide when to print info on
log scan */
static ulint recv_scan_print_counter;
/** The type of the previous parsed redo log record */
static mlog_id_t recv_previous_parsed_rec_type;
......@@ -516,7 +513,6 @@ recv_sys_var_init(void)
recv_needed_recovery = false;
recv_lsn_checks_on = false;
recv_no_ibuf_operations = false;
recv_scan_print_counter = 0;
recv_previous_parsed_rec_type = MLOG_SINGLE_REC_FLAG;
recv_previous_parsed_rec_offset = 0;
recv_previous_parsed_rec_is_multi = 0;
......@@ -622,6 +618,7 @@ recv_sys_init(
recv_sys->found_corrupt_log = false;
recv_sys->found_corrupt_fs = false;
recv_sys->mlog_checkpoint_lsn = 0;
recv_sys->progress_time = ut_time();
recv_max_page_lsn = 0;
......@@ -631,19 +628,13 @@ recv_sys_init(
mutex_exit(&(recv_sys->mutex));
}
/********************************************************//**
Empties the hash table when it has been fully processed.*/
/** Empty a fully processed hash table. */
static
void
recv_sys_empty_hash(void)
/*=====================*/
recv_sys_empty_hash()
{
ut_ad(mutex_own(&(recv_sys->mutex)));
if (recv_sys->n_addrs != 0) {
ib::fatal() << recv_sys->n_addrs << " pages with log records"
" were left unprocessed!";
}
ut_a(recv_sys->n_addrs == 0);
hash_table_free(recv_sys->addr_hash);
mem_heap_empty(recv_sys->heap);
......@@ -740,7 +731,8 @@ log_group_read_log_seg(
happen when InnoDB was killed while it was
writing redo log. We simply treat this as an
abrupt end of the redo log. */
return(start_lsn);
end_lsn = start_lsn;
break;
}
if (innodb_log_checksums || group->is_encrypted()) {
......@@ -754,7 +746,8 @@ log_group_read_log_seg(
<< log_block_get_checkpoint_no(buf)
<< " expected: " << crc
<< " found: " << cksum;
return(start_lsn);
end_lsn = start_lsn;
break;
}
if (group->is_encrypted()) {
......@@ -763,8 +756,13 @@ log_group_read_log_seg(
}
}
if (start_lsn != end_lsn) {
if (recv_sys->report(ut_time())) {
ib::info() << "Read redo log up to LSN=" << start_lsn;
sd_notifyf(0, "STATUS=Read redo log up to LSN=" LSN_PF,
start_lsn);
}
if (start_lsn != end_lsn) {
goto loop;
}
......@@ -1911,6 +1909,8 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
mtr_commit(&mtr);
ib_time_t time = ut_time();
mutex_enter(&(recv_sys->mutex));
if (recv_max_page_lsn < page_lsn) {
......@@ -1919,11 +1919,16 @@ recv_recover_page(bool just_read_in, buf_block_t* block)
recv_addr->state = RECV_PROCESSED;
ut_a(recv_sys->n_addrs);
recv_sys->n_addrs--;
mutex_exit(&(recv_sys->mutex));
ut_a(recv_sys->n_addrs > 0);
if (ulint n = --recv_sys->n_addrs) {
if (recv_sys->report(time)) {
ib::info() << "To recover: " << n << " pages from log";
sd_notifyf(0, "STATUS=To recover: " ULINTPF
" pages from log", n);
}
}
mutex_exit(&recv_sys->mutex);
}
/** Reads in pages which have hashed log records, from an area around a given
......@@ -1970,61 +1975,48 @@ recv_read_in_area(
}
buf_read_recv_pages(FALSE, page_id.space(), page_nos, n);
/*
fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n);
*/
return(n);
}
/*******************************************************************//**
Empties the hash table of stored log records, applying them to appropriate
pages.
@return DB_SUCCESS when successfull or DB_ERROR when fails. */
dberr_t
recv_apply_hashed_log_recs(
/*=======================*/
ibool allow_ibuf) /*!< in: if TRUE, also ibuf operations are
allowed during the application; if FALSE,
no ibuf operations are allowed, and after
the application all file pages are flushed to
disk and invalidated in buffer pool: this
alternative means that no new log records
can be generated during the application;
the caller must in this case own the log
mutex */
/** Apply the hash table of stored log records to persistent data pages.
@param[in] last_batch whether the change buffer merge will be
performed as part of the operation */
void
recv_apply_hashed_log_recs(bool last_batch)
{
recv_addr_t* recv_addr;
ulint i;
ibool has_printed = FALSE;
mtr_t mtr;
ulint progress = 0;
dberr_t err = DB_SUCCESS;
loop:
mutex_enter(&(recv_sys->mutex));
for (;;) {
mutex_enter(&recv_sys->mutex);
if (recv_sys->apply_batch_on) {
mutex_exit(&(recv_sys->mutex));
if (!recv_sys->apply_batch_on) {
break;
}
mutex_exit(&recv_sys->mutex);
os_thread_sleep(500000);
goto loop;
}
ut_ad(!allow_ibuf == log_mutex_own());
ut_ad(!last_batch == log_mutex_own());
if (!allow_ibuf) {
if (!last_batch) {
recv_no_ibuf_operations = true;
}
if (ulint n = recv_sys->n_addrs) {
const char* msg = last_batch
? "Starting final batch to recover "
: "Starting a batch to recover ";
ib::info() << msg << n << " pages from redo log.";
sd_notifyf(0, "STATUS=%s" ULINTPF " pages from redo log",
msg, n);
}
recv_sys->apply_log_recs = TRUE;
recv_sys->apply_batch_on = TRUE;
for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
for (ulint i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
for (recv_addr = static_cast<recv_addr_t*>(
for (recv_addr_t* recv_addr = static_cast<recv_addr_t*>(
HASH_GET_FIRST(recv_sys->addr_hash, i));
recv_addr != 0;
recv_addr;
recv_addr = static_cast<recv_addr_t*>(
HASH_GET_NEXT(addr_hash, recv_addr))) {
......@@ -2053,23 +2045,13 @@ recv_apply_hashed_log_recs(
ut_ad(found);
if (recv_addr->state == RECV_NOT_PROCESSED) {
if (!has_printed) {
ib::info() << "Starting an apply batch"
" of log records"
" to the database...";
fputs("InnoDB: Progress in percent: ",
stderr);
has_printed = TRUE;
}
mutex_exit(&(recv_sys->mutex));
mutex_exit(&recv_sys->mutex);
if (buf_page_peek(page_id)) {
buf_block_t* block;
mtr_start(&mtr);
mtr_t mtr;
mtr.start();
block = buf_page_get(
buf_block_t* block = buf_page_get(
page_id, page_size,
RW_X_LATCH, &mtr);
......@@ -2077,7 +2059,7 @@ recv_apply_hashed_log_recs(
block, SYNC_NO_ORDER_CHECK);
recv_recover_page(FALSE, block);
mtr_commit(&mtr);
mtr.commit();
} else {
recv_read_in_area(page_id);
}
......@@ -2085,19 +2067,6 @@ recv_apply_hashed_log_recs(
mutex_enter(&(recv_sys->mutex));
}
}
progress = (ulint) ((i * 100) / hash_get_n_cells(recv_sys->addr_hash));
if (has_printed
&& progress
&& (i * 100) / hash_get_n_cells(recv_sys->addr_hash)
!= ((i + 1) * 100)
/ hash_get_n_cells(recv_sys->addr_hash)) {
fprintf(stderr, "%lu ", progress);
sd_notifyf(0, "STATUS=Applying batch of log records for"
" InnoDB: Progress %lu", progress);
}
}
/* Wait until all the pages have been processed */
......@@ -2111,13 +2080,7 @@ recv_apply_hashed_log_recs(
mutex_enter(&(recv_sys->mutex));
}
if (has_printed) {
fprintf(stderr, "\n");
}
if (!allow_ibuf) {
if (!last_batch) {
/* Flush all the file pages to disk and invalidate them in
the buffer pool */
......@@ -2154,14 +2117,7 @@ recv_apply_hashed_log_recs(
recv_sys_empty_hash();
if (has_printed) {
ib::info() << "Apply batch completed";
sd_notify(0, "STATUS=InnoDB: Apply batch completed");
}
mutex_exit(&(recv_sys->mutex));
return err;
mutex_exit(&recv_sys->mutex);
}
/** Tries to parse a single log record.
......@@ -2838,11 +2794,6 @@ recv_scan_log_recs(
scanned_lsn += data_len;
if (scanned_lsn > recv_sys->scanned_lsn) {
/* We have found more entries. If this scan is
of startup type, we must initiate crash recovery
environment before parsing these log records. */
if (!recv_needed_recovery) {
recv_needed_recovery = true;
......@@ -2852,8 +2803,8 @@ recv_scan_log_recs(
return(true);
}
ib::info() << "Log scan progressed "
"past the checkpoint lsn "
ib::info() << "Starting crash recovery from"
" checkpoint LSN="
<< recv_sys->scanned_lsn;
}
......@@ -2895,16 +2846,6 @@ recv_scan_log_recs(
*group_scanned_lsn = scanned_lsn;
if (recv_needed_recovery) {
recv_scan_print_counter++;
if (finished || (recv_scan_print_counter % 80 == 0)) {
ib::info() << "Doing recovery: scanned up to"
" log sequence number " << scanned_lsn;
}
}
if (more_data && !recv_sys->found_corrupt_log) {
/* Try to parse more log records */
......@@ -2987,9 +2928,7 @@ recv_group_scan_log_recs(
merge here, because it would generate
redo log records before we have
finished the redo log scan. */
if (recv_apply_hashed_log_recs(FALSE) != DB_SUCCESS) {
DBUG_RETURN(false);
}
recv_apply_hashed_log_recs(false);
}
start_lsn = end_lsn;
......@@ -3056,9 +2995,6 @@ recv_init_crash_recovery_spaces(void)
ut_ad(!srv_read_only_mode);
ut_ad(recv_needed_recovery);
ib::info() << "Database was not shutdown normally!";
ib::info() << "Starting crash recovery.";
for (recv_spaces_t::iterator i = recv_spaces.begin();
i != recv_spaces.end(); i++) {
ut_ad(!is_predefined_tablespace(i->first));
......@@ -3262,9 +3198,8 @@ recv_recovery_from_checkpoint_start(
}
if (recv_sys->mlog_checkpoint_lsn == 0) {
if (!srv_read_only_mode
&& group->scanned_lsn != checkpoint_lsn) {
lsn_t scan_lsn = group->scanned_lsn;
lsn_t scan_lsn = group->scanned_lsn;
if (!srv_read_only_mode && scan_lsn != checkpoint_lsn) {
log_mutex_exit();
ib::error err;
err << "Missing MLOG_CHECKPOINT";
......
......@@ -2225,7 +2225,7 @@ innobase_start_or_create_for_mysql(void)
return(srv_init_abort(err));
}
/* This must precede recv_apply_hashed_log_recs(TRUE). */
/* This must precede recv_apply_hashed_log_recs(true). */
purge_queue = trx_sys_init_at_db_start();
if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
......@@ -2233,14 +2233,9 @@ innobase_start_or_create_for_mysql(void)
respective file pages, for the last batch of
recv_group_scan_log_recs(). */
err = recv_apply_hashed_log_recs(TRUE);
recv_apply_hashed_log_recs(true);
DBUG_PRINT("ib_log", ("apply completed"));
if (err != DB_SUCCESS) {
UT_DELETE(purge_queue);
return(srv_init_abort(err));
}
if (recv_needed_recovery) {
trx_sys_print_mysql_binlog_offset();
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment