Commit f59a1826 authored by Vladislav Vaintroub's avatar Vladislav Vaintroub

MDEV-14536 : during backup, retry read of log blocks, if there is

(possibly intermittent) checksum mismatch.
parent 3fe261bd
...@@ -2401,8 +2401,15 @@ xtrabackup_copy_logfile(copy_logfile copy) ...@@ -2401,8 +2401,15 @@ xtrabackup_copy_logfile(copy_logfile copy)
log_mutex_enter(); log_mutex_enter();
lsn_t lsn = log_group_read_log_seg(log_sys->buf, &log_sys->log, lsn_t lsn= start_lsn;
start_lsn, end_lsn); for(int retries= 0; retries < 100; retries++) {
if (log_group_read_log_seg(log_sys->buf, &log_sys->log,
&lsn, end_lsn)){
break;
}
msg("Retrying read of a redo log block");
my_sleep(1000);
}
start_lsn = xtrabackup_copy_log(copy, start_lsn, lsn); start_lsn = xtrabackup_copy_log(copy, start_lsn, lsn);
......
CREATE TABLE t(i INT) ENGINE INNODB;
INSERT INTO t VALUES(1);
# xtrabackup backup
FOUND 1 /Invalid log block checksum/ in backup.log
INSERT INTO t VALUES(2);
# xtrabackup prepare
# shutdown server
# remove datadir
# xtrabackup move back
# restart server
SELECT * FROM t;
i
1
DROP TABLE t;
--source include/have_debug.inc
CREATE TABLE t(i INT) ENGINE INNODB;
INSERT INTO t VALUES(1);
echo # xtrabackup backup;
let $targetdir=$MYSQLTEST_VARDIR/tmp/backup;
let $backuplog=$MYSQLTEST_VARDIR/tmp/backup.log;
--disable_result_log
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir --dbug=+d,log_intermittent_checksum_mismatch > $backuplog;
--enable_result_log
--let SEARCH_RANGE = 10000000
--let SEARCH_PATTERN=Invalid log block checksum
--let SEARCH_FILE=$backuplog
--source include/search_pattern_in_file.inc
remove_file $backuplog;
INSERT INTO t VALUES(2);
echo # xtrabackup prepare;
--disable_result_log
exec $XTRABACKUP --prepare --target-dir=$targetdir;
-- source include/restart_and_restore.inc
--enable_result_log
SELECT * FROM t;
DROP TABLE t;
rmdir $targetdir;
...@@ -99,14 +99,15 @@ recv_sys_debug_free(void); ...@@ -99,14 +99,15 @@ recv_sys_debug_free(void);
/** Read a log segment to a buffer. /** Read a log segment to a buffer.
@param[out] buf buffer @param[out] buf buffer
@param[in] group redo log files @param[in] group redo log files
@param[in] start_lsn read area start @param[in, out] start_lsn in : read area start, out: the last read valid lsn
@param[in] end_lsn read area end @param[in] end_lsn read area end
@return valid end_lsn */ @param[out] invalid_block - invalid, (maybe incompletely written) block encountered
lsn_t @return false, if invalid block encountered (e.g checksum mismatch), true otherwise */
bool
log_group_read_log_seg( log_group_read_log_seg(
byte* buf, byte* buf,
const log_group_t* group, const log_group_t* group,
lsn_t start_lsn, lsn_t* start_lsn,
lsn_t end_lsn); lsn_t end_lsn);
/********************************************************//** /********************************************************//**
......
...@@ -608,28 +608,29 @@ recv_sys_debug_free(void) ...@@ -608,28 +608,29 @@ recv_sys_debug_free(void)
/** Read a log segment to a buffer. /** Read a log segment to a buffer.
@param[out] buf buffer @param[out] buf buffer
@param[in] group redo log files @param[in] group redo log files
@param[in] start_lsn read area start @param[in, out] start_lsn in : read area start, out: the last read valid lsn
@param[in] end_lsn read area end @param[in] end_lsn read area end
@return valid end_lsn */ @param[out] invalid_block - invalid, (maybe incompletely written) block encountered
lsn_t @return false, if invalid block encountered (e.g checksum mismatch), true otherwise */
bool
log_group_read_log_seg( log_group_read_log_seg(
byte* buf, byte* buf,
const log_group_t* group, const log_group_t* group,
lsn_t start_lsn, lsn_t *start_lsn,
lsn_t end_lsn) lsn_t end_lsn)
{ {
ulint len; ulint len;
lsn_t source_offset; lsn_t source_offset;
bool success = true;
ut_ad(log_mutex_own()); ut_ad(log_mutex_own());
ut_ad(!(start_lsn % OS_FILE_LOG_BLOCK_SIZE)); ut_ad(!(*start_lsn % OS_FILE_LOG_BLOCK_SIZE));
ut_ad(!(end_lsn % OS_FILE_LOG_BLOCK_SIZE)); ut_ad(!(end_lsn % OS_FILE_LOG_BLOCK_SIZE));
loop: loop:
source_offset = log_group_calc_lsn_offset(start_lsn, group); source_offset = log_group_calc_lsn_offset(*start_lsn, group);
ut_a(end_lsn - start_lsn <= ULINT_MAX); ut_a(end_lsn - *start_lsn <= ULINT_MAX);
len = (ulint) (end_lsn - start_lsn); len = (ulint) (end_lsn - *start_lsn);
ut_ad(len != 0); ut_ad(len != 0);
...@@ -659,16 +660,16 @@ log_group_read_log_seg( ...@@ -659,16 +660,16 @@ log_group_read_log_seg(
for (ulint l = 0; l < len; l += OS_FILE_LOG_BLOCK_SIZE, for (ulint l = 0; l < len; l += OS_FILE_LOG_BLOCK_SIZE,
buf += OS_FILE_LOG_BLOCK_SIZE, buf += OS_FILE_LOG_BLOCK_SIZE,
start_lsn += OS_FILE_LOG_BLOCK_SIZE) { (*start_lsn) += OS_FILE_LOG_BLOCK_SIZE) {
const ulint block_number = log_block_get_hdr_no(buf); const ulint block_number = log_block_get_hdr_no(buf);
if (block_number != log_block_convert_lsn_to_no(start_lsn)) { if (block_number != log_block_convert_lsn_to_no(*start_lsn)) {
/* Garbage or an incompletely written log block. /* Garbage or an incompletely written log block.
We will not report any error, because this can We will not report any error, because this can
happen when InnoDB was killed while it was happen when InnoDB was killed while it was
writing redo log. We simply treat this as an writing redo log. We simply treat this as an
abrupt end of the redo log. */ abrupt end of the redo log. */
end_lsn = start_lsn; end_lsn = *start_lsn;
break; break;
} }
...@@ -676,6 +677,13 @@ log_group_read_log_seg( ...@@ -676,6 +677,13 @@ log_group_read_log_seg(
ulint crc = log_block_calc_checksum_crc32(buf); ulint crc = log_block_calc_checksum_crc32(buf);
ulint cksum = log_block_get_checksum(buf); ulint cksum = log_block_get_checksum(buf);
DBUG_EXECUTE_IF("log_intermittent_checksum_mismatch", {
static int block_counter;
if (block_counter++ == 0) {
cksum = crc + 1;
}
});
if (crc != cksum) { if (crc != cksum) {
ib::error() << "Invalid log block checksum." ib::error() << "Invalid log block checksum."
<< " block: " << block_number << " block: " << block_number
...@@ -683,30 +691,33 @@ log_group_read_log_seg( ...@@ -683,30 +691,33 @@ log_group_read_log_seg(
<< log_block_get_checkpoint_no(buf) << log_block_get_checkpoint_no(buf)
<< " expected: " << crc << " expected: " << crc
<< " found: " << cksum; << " found: " << cksum;
end_lsn = start_lsn; end_lsn = *start_lsn;
success = false;
break; break;
} }
if (group->is_encrypted()) { if (group->is_encrypted()) {
log_crypt(buf, start_lsn, log_crypt(buf, *start_lsn,
OS_FILE_LOG_BLOCK_SIZE, true); OS_FILE_LOG_BLOCK_SIZE, true);
} }
} }
} }
if (recv_sys->report(ut_time())) { if (recv_sys->report(ut_time())) {
ib::info() << "Read redo log up to LSN=" << start_lsn; ib::info() << "Read redo log up to LSN=" << *start_lsn;
sd_notifyf(0, "STATUS=Read redo log up to LSN=" LSN_PF, sd_notifyf(0, "STATUS=Read redo log up to LSN=" LSN_PF,
start_lsn); *start_lsn);
} }
if (start_lsn != end_lsn) { if (*start_lsn != end_lsn) {
goto loop; goto loop;
} }
return(start_lsn); return(success);
} }
/********************************************************//** /********************************************************//**
Copies a log segment from the most up-to-date log group to the other log Copies a log segment from the most up-to-date log group to the other log
groups, so that they all contain the latest log data. Also writes the info groups, so that they all contain the latest log data. Also writes the info
...@@ -721,10 +732,10 @@ recv_synchronize_groups() ...@@ -721,10 +732,10 @@ recv_synchronize_groups()
/* Read the last recovered log block to the recovery system buffer: /* Read the last recovered log block to the recovery system buffer:
the block is always incomplete */ the block is always incomplete */
const lsn_t start_lsn = ut_uint64_align_down(recovered_lsn, lsn_t start_lsn = ut_uint64_align_down(recovered_lsn,
OS_FILE_LOG_BLOCK_SIZE); OS_FILE_LOG_BLOCK_SIZE);
log_group_read_log_seg(log_sys->buf, &log_sys->log, log_group_read_log_seg(log_sys->buf, &log_sys->log,
start_lsn, start_lsn + OS_FILE_LOG_BLOCK_SIZE); &start_lsn, start_lsn + OS_FILE_LOG_BLOCK_SIZE);
/* Update the fields in the group struct to correspond to /* Update the fields in the group struct to correspond to
recovered_lsn */ recovered_lsn */
...@@ -2903,8 +2914,9 @@ recv_group_scan_log_recs( ...@@ -2903,8 +2914,9 @@ recv_group_scan_log_recs(
start_lsn = ut_uint64_align_down(end_lsn, start_lsn = ut_uint64_align_down(end_lsn,
OS_FILE_LOG_BLOCK_SIZE); OS_FILE_LOG_BLOCK_SIZE);
end_lsn = log_group_read_log_seg( end_lsn = start_lsn;
log_sys->buf, group, start_lsn, log_group_read_log_seg(
log_sys->buf, group, &end_lsn,
start_lsn + RECV_SCAN_SIZE); start_lsn + RECV_SCAN_SIZE);
} while (end_lsn != start_lsn } while (end_lsn != start_lsn
&& !recv_scan_log_recs( && !recv_scan_log_recs(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment