Commit 1ff6b6f0 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-34802 Recovery fails to note some log corruption

recv_recovery_from_checkpoint_start(): Abort startup due to log
corruption if we were unable to parse the entire log between
the latest log checkpoint and the corresponding FILE_CHECKPOINT record.

Also, reduce some code bloat related to log output and log_sys.mutex.

Reviewed by: Debarun Banerjee
parent e7bb9b7c
...@@ -3474,10 +3474,9 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3474,10 +3474,9 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
ut_d(mysql_mutex_unlock(&buf_pool.flush_list_mutex)); ut_d(mysql_mutex_unlock(&buf_pool.flush_list_mutex));
if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) { if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
sql_print_information("InnoDB: innodb_force_recovery=6"
ib::info() << "innodb_force_recovery=6 skips redo log apply"; " skips redo log apply");
return err;
return(DB_SUCCESS);
} }
mysql_mutex_lock(&log_sys.mutex); mysql_mutex_lock(&log_sys.mutex);
...@@ -3493,13 +3492,9 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3493,13 +3492,9 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
ut_ad(RECV_SCAN_SIZE <= srv_log_buffer_size); ut_ad(RECV_SCAN_SIZE <= srv_log_buffer_size);
ut_ad(recv_sys.pages.empty()); ut_ad(recv_sys.pages.empty());
contiguous_lsn = checkpoint_lsn; contiguous_lsn = checkpoint_lsn;
switch (log_sys.log.format) { switch (log_sys.log.format) {
case 0:
mysql_mutex_unlock(&log_sys.mutex);
return DB_SUCCESS;
default: default:
if (end_lsn == 0) { if (end_lsn == 0) {
break; break;
...@@ -3509,8 +3504,13 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3509,8 +3504,13 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
break; break;
} }
recv_sys.found_corrupt_log = true; recv_sys.found_corrupt_log = true;
err_exit:
err = DB_ERROR;
/* fall through */
func_exit:
case 0:
mysql_mutex_unlock(&log_sys.mutex); mysql_mutex_unlock(&log_sys.mutex);
return(DB_ERROR); return err;
} }
size_t sizeof_checkpoint; size_t sizeof_checkpoint;
...@@ -3527,14 +3527,15 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3527,14 +3527,15 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
ut_ad(!recv_sys.found_corrupt_fs || !srv_force_recovery); ut_ad(!recv_sys.found_corrupt_fs || !srv_force_recovery);
if (srv_read_only_mode && recv_needed_recovery) { if (srv_read_only_mode && recv_needed_recovery) {
mysql_mutex_unlock(&log_sys.mutex); read_only:
return(DB_READ_ONLY); err = DB_READ_ONLY;
goto func_exit;
} }
if (recv_sys.found_corrupt_log && !srv_force_recovery) { if (recv_sys.found_corrupt_log && !srv_force_recovery) {
mysql_mutex_unlock(&log_sys.mutex); sql_print_warning("InnoDB: Log scan aborted at LSN " LSN_PF,
ib::warn() << "Log scan aborted at LSN " << contiguous_lsn; contiguous_lsn);
return(DB_ERROR); goto err_exit;
} }
/* If we fail to open a tablespace while looking for FILE_CHECKPOINT, we /* If we fail to open a tablespace while looking for FILE_CHECKPOINT, we
...@@ -3542,14 +3543,12 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3542,14 +3543,12 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
would not be able to open an encrypted tablespace and the flag could be would not be able to open an encrypted tablespace and the flag could be
set. */ set. */
if (recv_sys.found_corrupt_fs) { if (recv_sys.found_corrupt_fs) {
mysql_mutex_unlock(&log_sys.mutex); goto err_exit;
return DB_ERROR;
} }
if (recv_sys.mlog_checkpoint_lsn == 0) { if (recv_sys.mlog_checkpoint_lsn == 0) {
lsn_t scan_lsn = log_sys.log.scanned_lsn; lsn_t scan_lsn = log_sys.log.scanned_lsn;
if (!srv_read_only_mode && scan_lsn != checkpoint_lsn) { if (!srv_read_only_mode && scan_lsn != checkpoint_lsn) {
mysql_mutex_unlock(&log_sys.mutex);
ib::error err; ib::error err;
err << "Missing FILE_CHECKPOINT"; err << "Missing FILE_CHECKPOINT";
if (end_lsn) { if (end_lsn) {
...@@ -3557,7 +3556,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3557,7 +3556,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
} }
err << " between the checkpoint " << checkpoint_lsn err << " between the checkpoint " << checkpoint_lsn
<< " and the end " << scan_lsn << "."; << " and the end " << scan_lsn << ".";
return(DB_ERROR); goto err_exit;
} }
log_sys.log.scanned_lsn = checkpoint_lsn; log_sys.log.scanned_lsn = checkpoint_lsn;
...@@ -3568,8 +3567,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3568,8 +3567,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
if ((recv_sys.found_corrupt_log && !srv_force_recovery) if ((recv_sys.found_corrupt_log && !srv_force_recovery)
|| recv_sys.found_corrupt_fs) { || recv_sys.found_corrupt_fs) {
mysql_mutex_unlock(&log_sys.mutex); goto err_exit;
return(DB_ERROR);
} }
} }
...@@ -3599,19 +3597,17 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3599,19 +3597,17 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
} }
if (!recv_needed_recovery) { if (!recv_needed_recovery) {
sql_print_information(
ib::info() "InnoDB: The log sequence number " LSN_PF
<< "The log sequence number " << flush_lsn " in the system tablespace does not match"
<< " in the system tablespace does not match" " the log sequence number " LSN_PF
" the log sequence number " " in the ib_logfile0!",
<< checkpoint_lsn << " in the " flush_lsn, checkpoint_lsn);
<< LOG_FILE_NAME << "!";
if (srv_read_only_mode) { if (srv_read_only_mode) {
ib::error() << "innodb_read_only" sql_print_error("InnoDB: innodb_read_only"
" prevents crash recovery"; " prevents crash recovery");
mysql_mutex_unlock(&log_sys.mutex); goto read_only;
return(DB_READ_ONLY);
} }
recv_needed_recovery = true; recv_needed_recovery = true;
...@@ -3632,8 +3628,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3632,8 +3628,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
rescan, missing_tablespace); rescan, missing_tablespace);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
mysql_mutex_unlock(&log_sys.mutex); goto func_exit;
return(err);
} }
/* If there is any missing tablespace and rescan is needed /* If there is any missing tablespace and rescan is needed
...@@ -3662,8 +3657,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3662,8 +3657,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
rescan, missing_tablespace); rescan, missing_tablespace);
if (err != DB_SUCCESS) { if (err != DB_SUCCESS) {
mysql_mutex_unlock(&log_sys.mutex); goto func_exit;
return err;
} }
rescan = true; rescan = true;
...@@ -3686,8 +3680,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3686,8 +3680,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
if ((recv_sys.found_corrupt_log if ((recv_sys.found_corrupt_log
&& !srv_force_recovery) && !srv_force_recovery)
|| recv_sys.found_corrupt_fs) { || recv_sys.found_corrupt_fs) {
mysql_mutex_unlock(&log_sys.mutex); goto err_exit;
return(DB_ERROR);
} }
/* In case of multi-batch recovery, /* In case of multi-batch recovery,
...@@ -3699,26 +3692,26 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3699,26 +3692,26 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
ut_ad(!rescan || recv_sys.pages.empty()); ut_ad(!rescan || recv_sys.pages.empty());
} }
if (log_sys.is_physical() if (!log_sys.is_physical()) {
&& (log_sys.log.scanned_lsn < checkpoint_lsn } else if (recv_sys.recovered_lsn < checkpoint_lsn
|| log_sys.log.scanned_lsn < recv_max_page_lsn)) { || recv_sys.recovered_lsn < end_lsn) {
sql_print_error("InnoDB: The log was only scanned up to "
ib::error() << "We scanned the log up to " LSN_PF ", while the current LSN at the "
<< log_sys.log.scanned_lsn "time of the latest checkpoint " LSN_PF
<< ". A checkpoint was at " << checkpoint_lsn << " and" " was " LSN_PF "!",
" the maximum LSN on a database page was " recv_sys.recovered_lsn,
<< recv_max_page_lsn << ". It is possible that the" checkpoint_lsn, end_lsn);
" database is now corrupt!"; goto err_exit;
} } else if (log_sys.log.scanned_lsn < checkpoint_lsn
|| log_sys.log.scanned_lsn < end_lsn
if (recv_sys.recovered_lsn < checkpoint_lsn) { || log_sys.log.scanned_lsn < recv_max_page_lsn) {
mysql_mutex_unlock(&log_sys.mutex); sql_print_error("InnoDB: We scanned the log up to " LSN_PF
". A checkpoint was at " LSN_PF
ib::error() << "Recovered only to lsn:" " and the maximum LSN on a database page was "
<< recv_sys.recovered_lsn LSN_PF ". It is possible that the"
<< " checkpoint_lsn: " << checkpoint_lsn; " database is now corrupt!",
log_sys.log.scanned_lsn, checkpoint_lsn,
return(DB_ERROR); recv_max_page_lsn);
} }
log_sys.next_checkpoint_lsn = checkpoint_lsn; log_sys.next_checkpoint_lsn = checkpoint_lsn;
...@@ -3750,9 +3743,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3750,9 +3743,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
log_sys.next_checkpoint_no = ++checkpoint_no; log_sys.next_checkpoint_no = ++checkpoint_no;
DBUG_EXECUTE_IF("before_final_redo_apply", DBUG_EXECUTE_IF("before_final_redo_apply", goto err_exit;);
mysql_mutex_unlock(&log_sys.mutex);
return DB_ERROR;);
mutex_enter(&recv_sys.mutex); mutex_enter(&recv_sys.mutex);
recv_sys.apply_log_recs = true; recv_sys.apply_log_recs = true;
...@@ -3760,17 +3751,14 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn) ...@@ -3760,17 +3751,14 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
ut_d(recv_no_log_write = srv_operation == SRV_OPERATION_RESTORE ut_d(recv_no_log_write = srv_operation == SRV_OPERATION_RESTORE
|| srv_operation == SRV_OPERATION_RESTORE_EXPORT); || srv_operation == SRV_OPERATION_RESTORE_EXPORT);
mutex_exit(&recv_sys.mutex);
mysql_mutex_unlock(&log_sys.mutex);
recv_lsn_checks_on = true; recv_lsn_checks_on = true;
mutex_exit(&recv_sys.mutex);
/* The database is now ready to start almost normal processing of user /* The database is now ready to start almost normal processing of user
transactions: transaction rollbacks and the application of the log transactions: transaction rollbacks and the application of the log
records in the hash table can be run in background. */ records in the hash table can be run in background. */
ut_ad(err == DB_SUCCESS);
return(DB_SUCCESS); goto func_exit;
} }
bool recv_dblwr_t::validate_page(const page_id_t page_id, bool recv_dblwr_t::validate_page(const page_id_t page_id,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment