Commit 398dc1fd authored by Kristian Nielsen's avatar Kristian Nielsen

MDEV-22351: InnoDB may report incorrect binlog position information after RESET MASTER

From MariaDB 10.3.5, the saved binlog position is stored in multiple
entries in each rseg header, and during recovery the most recent of
them is selected as the postion to recover. The comparison between
different entries is by comparing the binlog file name and offset.
This fails if the binlog file name changes to something that compares
smaller, which can happen from RESET MASTER (which goes back to the
suffix .000001) or from renaming the basename of the binlog. As a
result, the wrong position can be recovered after RESET MASTER or
binlog rename.

This commit writes an 8-byte incrementing version number with each
position entry. The version is allocated during
innobase_commit_ordered(), which ensures that a higher version number
will correspond to a more recent commit. The recovered position is
then the entry with the highest version number.

If starting on a database written by earlier server version, the code
falls back to the old methods of comparing position entries.
Signed-off-by: default avatarKristian Nielsen <knielsen@knielsen-hq.org>
parent 38cf7fb6
......@@ -5,4 +5,36 @@ Variable_name Value
log_bin ON
FOUND 1 /Last binlog file .*, position .*/ in current_test
# expect FOUND
DROP TABLE t;
*** Check that mariabackup restores the correct position.
INSERT INTO t VALUES (2);
FLUSH BINARY LOGS;
INSERT INTO t VALUES (3);
CREATE TABLE t2 (file VARCHAR(255), pos INT) ENGINE=InnoDB;
LOAD DATA LOCAL INFILE "BASEDIR/xtrabackup_binlog_pos_innodb"
INTO TABLE t2 FIELDS ESCAPED BY '' (file, pos);
UPDATE t2 SET file= REPLACE(REPLACE(file, './', ''), '.\\', '');
file_ok pos_ok
OK OK
*** Check correct position after RESET MASTER.
INSERT INTO t VALUES (4);
FLUSH BINARY LOGS;
INSERT INTO t VALUES (5);
RESET MASTER;
INSERT INTO t VALUES (6);
INSERT INTO t VALUES (7);
TRUNCATE TABLE t2;
LOAD DATA LOCAL INFILE "BASEDIR/xtrabackup_binlog_pos_innodb"
INTO TABLE t2 FIELDS ESCAPED BY '' (file, pos);
UPDATE t2 SET file= REPLACE(REPLACE(file, './', ''), '.\\', '');
file_ok pos_ok
OK OK
*** Check correct position after server restart.
# restart
INSERT INTO t VALUES (8);
TRUNCATE TABLE t2;
LOAD DATA LOCAL INFILE "BASEDIR/xtrabackup_binlog_pos_innodb"
INTO TABLE t2 FIELDS ESCAPED BY '' (file, pos);
UPDATE t2 SET file= REPLACE(REPLACE(file, './', ''), '.\\', '');
file_ok pos_ok
OK OK
DROP TABLE t,t2;
......@@ -19,7 +19,91 @@ let SEARCH_FILE=$MYSQLTEST_VARDIR/log/current_test;
--source include/search_pattern_in_file.inc
--echo # expect FOUND
DROP TABLE t;
--echo *** Check that mariabackup restores the correct position.
rmdir $basedir;
INSERT INTO t VALUES (2);
FLUSH BINARY LOGS;
INSERT INTO t VALUES (3);
--let $file= query_get_value(SHOW MASTER STATUS, File, 1)
--let $pos= query_get_value(SHOW MASTER STATUS, Position, 1)
--disable_result_log
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$basedir;
--enable_result_log
exec $XTRABACKUP --prepare --binlog-info=1 --target-dir=$basedir;
CREATE TABLE t2 (file VARCHAR(255), pos INT) ENGINE=InnoDB;
--replace_result $basedir BASEDIR
--disable_warnings
eval LOAD DATA LOCAL INFILE "$basedir/xtrabackup_binlog_pos_innodb"
INTO TABLE t2 FIELDS ESCAPED BY '' (file, pos);
--enable_warnings
UPDATE t2 SET file= REPLACE(REPLACE(file, './', ''), '.\\', '');
--disable_query_log
eval SELECT
IF('$file'=file, "OK", CONCAT('Wrong file: ', file, ' expected: ', '$file')) AS file_ok,
IF('$pos'=pos, "OK", CONCAT('Wrong position: ', pos, ' expected: ', '$pos')) AS pos_ok
FROM t2;
--enable_query_log
--echo *** Check correct position after RESET MASTER.
rmdir $basedir;
INSERT INTO t VALUES (4);
FLUSH BINARY LOGS;
INSERT INTO t VALUES (5);
RESET MASTER;
INSERT INTO t VALUES (6);
INSERT INTO t VALUES (7);
--let $file= query_get_value(SHOW MASTER STATUS, File, 1)
--let $pos= query_get_value(SHOW MASTER STATUS, Position, 1)
--disable_result_log
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$basedir;
--enable_result_log
exec $XTRABACKUP --prepare --binlog-info=1 --target-dir=$basedir;
TRUNCATE TABLE t2;
--replace_result $basedir BASEDIR
--disable_warnings
eval LOAD DATA LOCAL INFILE "$basedir/xtrabackup_binlog_pos_innodb"
INTO TABLE t2 FIELDS ESCAPED BY '' (file, pos);
--enable_warnings
UPDATE t2 SET file= REPLACE(REPLACE(file, './', ''), '.\\', '');
--disable_query_log
eval SELECT
IF('$file'=file, "OK", CONCAT('Wrong file: ', file, ' expected: ', '$file')) AS file_ok,
IF('$pos'=pos, "OK", CONCAT('Wrong position: ', pos, ' expected: ', '$pos')) AS pos_ok
FROM t2;
--enable_query_log
--echo *** Check correct position after server restart.
rmdir $basedir;
--source include/restart_mysqld.inc
INSERT INTO t VALUES (8);
--let $file= query_get_value(SHOW MASTER STATUS, File, 1)
--let $pos= query_get_value(SHOW MASTER STATUS, Position, 1)
--disable_result_log
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$basedir;
--enable_result_log
exec $XTRABACKUP --prepare --binlog-info=1 --target-dir=$basedir;
TRUNCATE TABLE t2;
--replace_result $basedir BASEDIR
--disable_warnings
eval LOAD DATA LOCAL INFILE "$basedir/xtrabackup_binlog_pos_innodb"
INTO TABLE t2 FIELDS ESCAPED BY '' (file, pos);
--enable_warnings
UPDATE t2 SET file= REPLACE(REPLACE(file, './', ''), '.\\', '');
--disable_query_log
eval SELECT
IF('$file'=file, "OK", CONCAT('Wrong file: ', file, ' expected: ', '$file')) AS file_ok,
IF('$pos'=pos, "OK", CONCAT('Wrong position: ', pos, ' expected: ', '$pos')) AS pos_ok
FROM t2;
--enable_query_log
DROP TABLE t,t2;
# Cleanup
rmdir $basedir;
......@@ -237,6 +237,7 @@ If no binlog information is present, the first byte is NUL. */
#define TRX_RSEG_BINLOG_NAME TRX_RSEG_MAX_TRX_ID + 16
/** Maximum length of binlog file name, including terminating NUL, in bytes */
#define TRX_RSEG_BINLOG_NAME_LEN 512
/** See also TRX_RSEG_BINLOG_VERSION, below. */
#ifdef WITH_WSREP
/** The offset to WSREP XID headers */
......@@ -252,6 +253,11 @@ If no binlog information is present, the first byte is NUL. */
#define TRX_RSEG_WSREP_XID_DATA TRX_RSEG_WSREP_XID_INFO + 12
#endif /* WITH_WSREP*/
/** 8 byte binlog info version. The highest found version identifies which
entry is the most recent one that should be recovered. If 0, then the
version is not written (earlier MariaDB version). */
#define TRX_RSEG_BINLOG_VERSION TRX_RSEG_MAX_TRX_ID + 16 + 512 + 140
/*-------------------------------------------------------------*/
/** Read the page number of an undo log slot.
......
......@@ -848,6 +848,8 @@ class trx_sys_t
uint64_t recovered_binlog_offset;
/** Latest recovered binlog file name */
char recovered_binlog_filename[TRX_SYS_MYSQL_LOG_NAME_LEN];
/** Version number, to identify the most recent binlog position record. */
std::atomic<uint64_t> recovered_binlog_version;
/** Set when latest position is from pre-version 10.3.5 TRX_SYS. */
bool recovered_binlog_is_legacy_pos;
......
......@@ -435,6 +435,50 @@ trx_undo_lists_init(trx_rseg_t* rseg, trx_id_t& max_trx_id,
return(size);
}
/** Compare different versions of the recovered binlog position to find the
one that is current.
- Before MariaDB 10.3.5, a single position entry was stored in the TRX_SYS
page. Since 10.3.5, multiple entries are stored, one in each rolback segment.
- A running version number since MariaDB 10.4.32 is stored with each entry,
and identifies the most recent one.
- If version is not available, the binlog filename/offset is compared to
determine the most recent entry (this can fail if the binlog file is
renamed or RESET MASTER decreases the binlog index number, MDEV-22351.
@param[in] binlog_name Binlog name saved in rseg
@param[in] binlog_offset Binlog offset saved in rseg
@param[in] binlog_version Binlog version number saved in rseg
@return Whether the entry should be used over the previously restored entry.
@retval 0 The new entry is less recent, do not use
@retval 1 The new offset is more recent, but the binlog name is unchanged
@retval 2 The new filename and offset is more recent */
static
int
trx_rseg_binlog_pos_cmp(const char *binlog_name, uint64_t binlog_offset,
uint64_t binlog_version)
{
if (!*binlog_name)
return 0;
if (!*trx_sys.recovered_binlog_filename)
return 2;
int cmp = strncmp(binlog_name, trx_sys.recovered_binlog_filename,
TRX_RSEG_BINLOG_NAME_LEN);
int different_filename = (cmp != 0 ? 2 : 1);
if(trx_sys.recovered_binlog_is_legacy_pos)
return different_filename;
if (binlog_version > 0) {
return binlog_version > trx_sys.recovered_binlog_version.
load(std::memory_order_relaxed) ?
different_filename : 0;
}
/* Old MariaDB - compare binlog filename/offset
to find the most recent entry. */
if (cmp < 0)
return 0;
if (cmp)
return 2;
return binlog_offset > trx_sys.recovered_binlog_offset;
}
/** Restore the state of a persistent rollback segment.
@param[in,out] rseg persistent rollback segment
@param[in,out] max_trx_id maximum observed transaction identifier
......@@ -459,32 +503,23 @@ trx_rseg_mem_restore(trx_rseg_t* rseg, trx_id_t& max_trx_id, mtr_t* mtr)
(rseg_header) + TRX_RSEG_BINLOG_NAME;
compile_time_assert(TRX_RSEG_BINLOG_NAME_LEN == sizeof
trx_sys.recovered_binlog_filename);
/* Always prefer a position from rollback segment over
a legacy position from before version 10.3.5. */
int cmp = *trx_sys.recovered_binlog_filename &&
!trx_sys.recovered_binlog_is_legacy_pos
? strncmp(binlog_name,
trx_sys.recovered_binlog_filename,
TRX_RSEG_BINLOG_NAME_LEN)
: 1;
if (cmp >= 0) {
uint64_t binlog_offset = mach_read_from_8(
rseg_header + TRX_RSEG_BINLOG_OFFSET);
if (cmp) {
memcpy(trx_sys.
recovered_binlog_filename,
uint64_t binlog_version =
mach_read_from_8(rseg_header +
TRX_RSEG_BINLOG_VERSION);
int choice = trx_rseg_binlog_pos_cmp(binlog_name,
binlog_offset,
binlog_version);
if (choice) {
trx_sys.recovered_binlog_version.store
(binlog_version, std::memory_order_relaxed);
trx_sys.recovered_binlog_offset = binlog_offset;
trx_sys.recovered_binlog_is_legacy_pos= false;
if (choice > 1)
memcpy(trx_sys.recovered_binlog_filename,
binlog_name,
TRX_RSEG_BINLOG_NAME_LEN);
trx_sys.recovered_binlog_offset
= binlog_offset;
} else if (binlog_offset >
trx_sys.recovered_binlog_offset) {
trx_sys.recovered_binlog_offset
= binlog_offset;
}
trx_sys.recovered_binlog_is_legacy_pos= false;
}
#ifdef WITH_WSREP
......@@ -572,6 +607,7 @@ trx_rseg_array_init()
*trx_sys.recovered_binlog_filename = '\0';
trx_sys.recovered_binlog_offset = 0;
trx_sys.recovered_binlog_version.store(0, std::memory_order_relaxed);
trx_sys.recovered_binlog_is_legacy_pos= false;
#ifdef WITH_WSREP
trx_sys.recovered_wsrep_xid.null();
......@@ -793,4 +829,18 @@ trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr)
if (memcmp(binlog_name, p, len)) {
mlog_write_string(p, binlog_name, len, mtr);
}
/* Write a running version number with the binlog position entry. The
highest version marks the most current binlog position entry to restore.
All commits that modify both InnoDB and binlog go through
innobase_commit_ordered() which runs under a global binlog mutex, so
a simple atomic counter will provide a correct version number. */
#ifdef WITH_WSREP
compile_time_assert(TRX_RSEG_BINLOG_VERSION ==
TRX_RSEG_WSREP_XID_DATA + XIDDATASIZE);
#endif
mlog_write_ull(rseg_header + TRX_RSEG_BINLOG_VERSION,
1 + trx_sys.recovered_binlog_version.fetch_add(1ULL,
std::memory_order_relaxed),
mtr);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment