Commit 86dc7b4d authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-24626 Remove synchronous write of page0 file during file creation

During data file creation, InnoDB holds dict_sys mutex, tries to
write page 0 of the file and flushes the file. This not only causing
unnecessary contention but also a deviation from the write-ahead
logging protocol.

The clean sequence of operations is that we first start a dictionary
transaction and write SYS_TABLES and SYS_INDEXES records that identify
the tablespace. Then, we durably write a FILE_CREATE record to the
write-ahead log and create the file.

Recovery should not unnecessarily insist that the first page of each
data file that is referred to by the redo log is valid. It must be
enough that page 0 of the tablespace can be initialized based on the
redo log contents.

We introduce a new data structure deferred_spaces that keeps track
of corrupted-looking files during recovery. The data structure holds
the last LSN of a FILE_ record referring to the data file, the
tablespace identifier, and the last known file name.

There are two scenarios can happen during recovery:
i) Sufficient memory: InnoDB can reconstruct the
tablespace after parsing all redo log records.

ii) Insufficient memory(multiple apply phase): InnoDB should
store the deferred tablespace redo logs even though
tablespace is not present. InnoDB should start constructing
the tablespace when it first encounters deferred tablespace
id.

Mariabackup copies the zero filled ibd file in backup_fix_ddl() as
the extension of .new file. Mariabackup test case does page flushing
when it deals with DDL operation during backup operation.

fil_ibd_create(): Remove the write of page0 and flushing of file

fil_ibd_load(): Return FIL_LOAD_DEFER if the tablespace has
zero filled page0

Datafile: Clean up the error handling, and do not report errors
if we are in the middle of recovery. The caller will check
Datafile::m_defer.

fil_node_t::deferred: Indicates whether the tablespace loading was
deferred during recovery

FIL_LOAD_DEFER: Returned by fil_ibd_load() to indicate that tablespace
file was cannot be loaded.

recv_sys_t::recover_deferred(): Invoke deferred_spaces.create() to
initialize fil_space_t based on buffered metadata and records to
initialize page 0. Ignore the flags in fil_name_t, because they are
intentionally invalid.

fil_name_process(): Update deferred_spaces.

recv_sys_t::parse(): Store the redo log if the tablespace id
is present in deferred spaces

recv_sys_t::recover_low(): Should recover the first page of
the tablespace even though the tablespace instance is not
present

recv_sys_t::apply(): Initialize the deferred tablespace
before applying the deferred tablespace records

recv_validate_tablespace(): Skip the validation for deferred_spaces.

recv_rename_files(): Moved and revised from recv_sys_t::apply().
For deferred-recovery tablespaces, do not attempt to rename the
file if a deferred-recovery tablespace is associated with the name.

recv_recovery_from_checkpoint_start(): Invoke recv_rename_files()
and initialize all deferred tablespaces before applying redo log.

fil_node_t::read_page0(): Skip page0 validation if the tablespace
is deferred

buf_page_create_deferred(): A variant of buf_page_create() when
the fil_space_t is not available yet

This is joint work with Thirunarayanan Balathandayuthapani,
who implemented an initial prototype.
parent c290c0d7
......@@ -364,6 +364,7 @@ xb_fil_cur_result_t xb_fil_cur_read(xb_fil_cur_t* cursor,
ib_int64_t offset;
ib_int64_t to_read;
const ulint page_size = cursor->page_size;
bool defer = false;
xb_ad(!cursor->is_system() || page_size == srv_page_size);
cursor->read_filter->get_next_batch(&cursor->read_filter_ctxt,
......@@ -418,13 +419,15 @@ xb_fil_cur_result_t xb_fil_cur_read(xb_fil_cur_t* cursor,
ret = XB_FIL_CUR_ERROR;
goto func_exit;
}
defer = space->is_deferred();
/* check pages for corruption and re-read if necessary. i.e. in case of
partially written pages */
for (page = cursor->buf, i = 0; i < npages;
page += page_size, i++) {
unsigned page_no = cursor->buf_page_no + i;
if (page_is_corrupted(page, page_no, cursor, space)){
if (!defer && page_is_corrupted(page, page_no, cursor, space)) {
retry_count--;
if (retry_count == 0) {
......
......@@ -510,7 +510,8 @@ bool CorruptedPages::empty() const
}
static void xb_load_single_table_tablespace(const std::string &space_name,
bool set_size);
bool set_size,
ulint defer_space_id=0);
static void xb_data_files_close();
static fil_space_t* fil_space_get_by_name(const char* name);
......@@ -587,7 +588,8 @@ xtrabackup_add_datasink(ds_ctxt_t *ds)
typedef void (*process_single_tablespace_func_t)(const char *dirname,
const char *filname,
bool is_remote,
bool skip_node_page0);
bool skip_node_page0,
ulint defer_space_id);
static dberr_t enumerate_ibd_files(process_single_tablespace_func_t callback);
/* ======== Datafiles iterator ======== */
......@@ -1680,7 +1682,8 @@ debug_sync_point(const char *name)
static std::set<std::string> tables_for_export;
static void append_export_table(const char *dbname, const char *tablename,
bool is_remote, bool skip_node_page0)
bool is_remote, bool skip_node_page0,
ulint defer_space_id)
{
if(dbname && tablename && !is_remote)
{
......@@ -3271,11 +3274,14 @@ xb_fil_io_init()
node page0 will be read, and it's size and free pages limit
will be set from page 0, what is neccessary for checking and fixing corrupted
pages.
@param[in] defer_space_id use the space id to create space object
when there is deferred tablespace
*/
static void xb_load_single_table_tablespace(const char *dirname,
const char *filname,
bool is_remote,
bool skip_node_page0)
bool skip_node_page0,
ulint defer_space_id)
{
ut_ad(srv_operation == SRV_OPERATION_BACKUP
|| srv_operation == SRV_OPERATION_RESTORE_DELTA
......@@ -3298,6 +3304,7 @@ static void xb_load_single_table_tablespace(const char *dirname,
lsn_t flush_lsn;
dberr_t err;
fil_space_t *space;
bool defer = false;
name = static_cast<char*>(ut_malloc_nokey(pathlen));
......@@ -3329,14 +3336,30 @@ static void xb_load_single_table_tablespace(const char *dirname,
}
for (int i = 0; i < 10; i++) {
file->m_defer = false;
err = file->validate_first_page(&flush_lsn);
if (err != DB_CORRUPTION) {
if (file->m_defer) {
if (defer_space_id) {
defer = true;
file->set_space_id(defer_space_id);
file->set_flags(FSP_FLAGS_PAGE_SSIZE());
err = DB_SUCCESS;
break;
}
} else if (err != DB_CORRUPTION) {
break;
}
my_sleep(1000);
}
if (!defer && file->m_defer) {
delete file;
ut_free(name);
return;
}
bool is_empty_file = file->exists() && file->is_empty_file();
if (err == DB_SUCCESS && file->space_id() != SRV_TMP_SPACE_ID) {
......@@ -3345,9 +3368,11 @@ static void xb_load_single_table_tablespace(const char *dirname,
FIL_TYPE_TABLESPACE, NULL/* TODO: crypt_data */);
ut_a(space != NULL);
space->add(file->filepath(),
fil_node_t* node= space->add(
file->filepath(),
skip_node_page0 ? file->detach() : pfs_os_file_t(),
0, false, false);
node->deferred= defer;
mysql_mutex_lock(&fil_system.mutex);
space->read_page0();
mysql_mutex_unlock(&fil_system.mutex);
......@@ -3368,7 +3393,8 @@ static void xb_load_single_table_tablespace(const char *dirname,
}
static void xb_load_single_table_tablespace(const std::string &space_name,
bool skip_node_page0)
bool skip_node_page0,
ulint defer_space_id)
{
std::string name(space_name);
bool is_remote= access((name + ".ibd").c_str(), R_OK) != 0;
......@@ -3379,14 +3405,13 @@ static void xb_load_single_table_tablespace(const std::string &space_name,
buf[sizeof buf - 1]= '\0';
const char *dbname= buf;
char *p= strchr(buf, '/');
if (p == 0)
if (!p)
die("Unexpected tablespace %s filename %s", space_name.c_str(),
name.c_str());
ut_a(p);
*p= 0;
const char *tablename= p + 1;
xb_load_single_table_tablespace(dbname, tablename, is_remote,
skip_node_page0);
skip_node_page0, defer_space_id);
}
/** Scan the database directories under the MySQL datadir, looking for
......@@ -3425,12 +3450,11 @@ static dberr_t enumerate_ibd_files(process_single_tablespace_func_t callback)
/* General tablespaces are always at the first level of the
data home dir */
if (dbinfo.type == OS_FILE_TYPE_FILE) {
bool is_isl = ends_with(dbinfo.name, ".isl");
bool is_ibd = !is_isl && ends_with(dbinfo.name,".ibd");
if (is_isl || is_ibd) {
(*callback)(NULL, dbinfo.name, is_isl, false);
if (dbinfo.type != OS_FILE_TYPE_FILE) {
const bool is_isl = ends_with(dbinfo.name, ".isl");
if (is_isl || ends_with(dbinfo.name,".ibd")) {
(*callback)(nullptr, dbinfo.name, is_isl,
false, 0);
}
}
......@@ -3486,7 +3510,7 @@ static dberr_t enumerate_ibd_files(process_single_tablespace_func_t callback)
if (strlen(fileinfo.name) > 4) {
bool is_isl= false;
if (ends_with(fileinfo.name, ".ibd") || ((is_isl = ends_with(fileinfo.name, ".isl"))))
(*callback)(dbinfo.name, fileinfo.name, is_isl, false);
(*callback)(dbinfo.name, fileinfo.name, is_isl, false, 0);
}
}
......@@ -4567,9 +4591,9 @@ FTWRL. This ensures consistent backup in presence of DDL.
*/
void backup_fix_ddl(CorruptedPages &corrupted_pages)
{
std::set<std::string> new_tables;
std::set<std::string> dropped_tables;
std::map<std::string, std::string> renamed_tables;
space_id_to_name_t new_tables;
/* Disable further DDL on backed up tables (only needed for --no-lock).*/
pthread_mutex_lock(&backup_mutex);
......@@ -4619,7 +4643,7 @@ void backup_fix_ddl(CorruptedPages &corrupted_pages)
if (ddl_tracker.drops.find(id) == ddl_tracker.drops.end()) {
dropped_tables.erase(name);
new_tables.insert(name);
new_tables[id] = name;
if (opt_log_innodb_page_corruption)
corrupted_pages.drop_space(id);
}
......@@ -4661,12 +4685,12 @@ void backup_fix_ddl(CorruptedPages &corrupted_pages)
}
DBUG_EXECUTE_IF("check_mdl_lock_works", DBUG_ASSERT(new_tables.size() == 0););
for (std::set<std::string>::iterator iter = new_tables.begin();
iter != new_tables.end(); iter++) {
const char *space_name = iter->c_str();
if (check_if_skip_table(space_name))
continue;
xb_load_single_table_tablespace(*iter, false);
for (const auto &t : new_tables) {
if (!check_if_skip_table(t.second.c_str())) {
xb_load_single_table_tablespace(t.second, false,
t.first);
}
}
datafiles_iter_t it2;
......@@ -4677,6 +4701,7 @@ void backup_fix_ddl(CorruptedPages &corrupted_pages)
std::string dest_name= filename_to_spacename(
node->name, strlen(node->name));
dest_name.append(".new");
xtrabackup_copy_datafile(node, 0, dest_name.c_str(), wf_write_through,
corrupted_pages);
}
......
SET GLOBAL innodb_file_per_table=ON;
FLUSH TABLES;
CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB;
# restart
CREATE TABLE t3(a INT PRIMARY KEY) ENGINE=InnoDB;
BEGIN;
INSERT INTO t3 VALUES (33101),(347);
......@@ -31,7 +32,7 @@ WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
FOUND 1 /InnoDB: Ignoring data file '.*t[23].ibd' with space ID/ in mysqld.1.err
FOUND 1 /InnoDB: Tablespace \d+ was not found at .*t1.ibd/ in mysqld.1.err
NOT FOUND /InnoDB: Tablespace \d+ was not found at .*t1.ibd/ in mysqld.1.err
FOUND 1 /InnoDB: Tablespace \d+ was not found at .*t3.ibd/ in mysqld.1.err
FOUND 2 /InnoDB: Set innodb_force_recovery=1 to ignore this and to permanently lose all changes to the tablespace/ in mysqld.1.err
# Fault 4: Missing data file
......@@ -54,7 +55,7 @@ WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
NOT FOUND /\[Note\] InnoDB: Cannot read first page of .*t2.ibd/ in mysqld.1.err
FOUND 1 /\[ERROR\] InnoDB: Datafile .*t2.*\. Cannot determine the space ID from the first 64 pages/ in mysqld.1.err
FOUND 1 /.*\[ERROR\] InnoDB: Cannot apply log to \[page id: space=[1-9][0-9]*, page number=3\] of corrupted file './test/t2\.ibd'/ in mysqld.1.err
# restart
SELECT * FROM t2;
a
......@@ -85,27 +86,6 @@ INSERT INTO u6 VALUES(2);
# Kill the server
# Fault 6: All-zero data file and innodb_force_recovery
# restart: --innodb-force-recovery=1
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
FOUND 1 /\[Note\] InnoDB: Header page consists of zero bytes in datafile: .*u1.ibd/ in mysqld.1.err
FOUND 1 /\[ERROR\] InnoDB: Datafile .*u1.*\. Cannot determine the space ID from the first 64 pages/ in mysqld.1.err
NOT FOUND /\[Note\] InnoDB: Cannot read first page of .*u2.ibd/ in mysqld.1.err
# Fault 7: Missing or wrong data file and innodb_force_recovery
# restart: --innodb-force-recovery=1
SELECT * FROM INFORMATION_SCHEMA.ENGINES
WHERE engine = 'innodb'
AND support IN ('YES', 'DEFAULT', 'ENABLED');
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
FOUND 1 /\[Note\] InnoDB: Header page consists of zero bytes in datafile: .*u1.ibd/ in mysqld.1.err
FOUND 1 /InnoDB: At LSN: \d+: unable to open file .*u[1-5].ibd for tablespace/ in mysqld.1.err
FOUND 1 /\[ERROR\] InnoDB: Cannot replay rename of tablespace \d+ from '.*u4.ibd' to '.*u6.ibd' because the target file exists/ in mysqld.1.err
# restart: --innodb-force-recovery=1
FOUND 1 /\[Note\] InnoDB: Header page consists of zero bytes in datafile: .*u1.ibd/ in mysqld.1.err
FOUND 1 /InnoDB: At LSN: \d+: unable to open file .*u[1-5].ibd for tablespace/ in mysqld.1.err
FOUND 1 /\[Warning\] InnoDB: Tablespace \d+ was not found at .*u[1-5].ibd, and innodb_force_recovery was set. All redo log for this tablespace will be ignored!/ in mysqld.1.err
# restart
DROP TABLE u1,u2,u3,u6;
# List of files:
db.opt
......
......@@ -12,6 +12,7 @@ FLUSH TABLES;
CREATE TABLE t1(a INT PRIMARY KEY) ENGINE=InnoDB;
--source include/restart_mysqld.inc
--source include/no_checkpoint_start.inc
CREATE TABLE t3(a INT PRIMARY KEY) ENGINE=InnoDB;
......@@ -120,7 +121,7 @@ eval $check_no_innodb;
let SEARCH_PATTERN= \[Note\] InnoDB: Cannot read first page of .*t2.ibd;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN= \[ERROR\] InnoDB: Datafile .*t2.*\. Cannot determine the space ID from the first 64 pages;
let SEARCH_PATTERN= .*\[ERROR\] InnoDB: Cannot apply log to \\[page id: space=[1-9][0-9]*, page number=3\\] of corrupted file './test/t2\\.ibd';
--source include/search_pattern_in_file.inc
# Restore t2.ibd
......@@ -150,13 +151,15 @@ call mtr.add_suppression("InnoDB: Cannot open datafile for read-write: '.*t2\.ib
# The following are for aborted startup without --innodb-force-recovery:
call mtr.add_suppression("InnoDB: Tablespace .* was not found at .*test");
call mtr.add_suppression("InnoDB: Set innodb_force_recovery=1 to ignore this and to permanently lose all changes to the tablespace");
call mtr.add_suppression("InnoDB: Cannot read first page of '.*test.[tu]2.ibd' I/O error");
call mtr.add_suppression("InnoDB: Cannot read first page of '.*test.[tu]2.ibd': I/O error");
call mtr.add_suppression("InnoDB: Cannot apply log to \\[page id: space=[1-9][0-9]*, page number=3\\] of corrupted file './test/t2\\.ibd'");
call mtr.add_suppression("InnoDB: Datafile '.*test.*ibd' is corrupted");
call mtr.add_suppression("InnoDB: Cannot replay file rename. Remove either file and try again");
call mtr.add_suppression("InnoDB: Cannot rename.*because the target file exists");
call mtr.add_suppression("InnoDB: Log scan aborted at LSN");
# The following are for the --innodb-force-recovery=1 with broken u* tables:
call mtr.add_suppression("InnoDB: The size of the file .*u1\\.ibd is only 16384 bytes, should be at least 65536");
call mtr.add_suppression("InnoDB: The size of the file .*u[12]\\.ibd is only [1-9][0-9]* bytes, should be at least 65536");
call mtr.add_suppression("InnoDB: The size of tablespace file '.*test/u[12].ibd' is only");
call mtr.add_suppression("InnoDB: The error means the system cannot find the path specified");
call mtr.add_suppression("InnoDB: .*you must create directories");
call mtr.add_suppression("InnoDB: Cannot open datafile for read-only: '.*u[1-5]\.ibd'");
......@@ -199,69 +202,14 @@ EOF
--exec echo "" > $MYSQLD_DATADIR/test/u2.ibd
# TODO: Test with this, once
# Bug#18131883 IMPROVE INNODB ERROR MESSAGES REGARDING FILES
# has been fixed:
#--mkdir $MYSQLD_DATADIR/test/u3.ibd
--copy_file $MYSQLD_DATADIR/test/u6.ibd $MYSQLD_DATADIR/test/u4.ibd
--let $restart_parameters= --innodb-force-recovery=1
--source include/start_mysqld.inc
eval $check_no_innodb;
let SEARCH_PATTERN= \[Note\] InnoDB: Header page consists of zero bytes in datafile: .*u1.ibd;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN= \[ERROR\] InnoDB: Datafile .*u1.*\. Cannot determine the space ID from the first 64 pages;
--source include/search_pattern_in_file.inc
# TODO: These errors should state the file name (u2.ibd) and be ignored
# in innodb-force-recovery mode once
# Bug#18131883 IMPROVE INNODB ERROR MESSAGES REGARDING FILES
# has been fixed:
let SEARCH_PATTERN= \[Note\] InnoDB: Cannot read first page of .*u2.ibd;
--source include/search_pattern_in_file.inc
--source include/shutdown_mysqld.inc
# Allow --innodb-force-recovery to start despite the broken file.
# TODO: Remove this workaround, and make --innodb-force-recovery=1
# ignore the broken file.
--remove_file $MYSQLD_DATADIR/test/u2.ibd
--echo # Fault 7: Missing or wrong data file and innodb_force_recovery
--source include/start_mysqld.inc
eval $check_no_innodb;
let SEARCH_PATTERN= \[Note\] InnoDB: Header page consists of zero bytes in datafile: .*u1.ibd;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN= InnoDB: At LSN: \d+: unable to open file .*u[1-5].ibd for tablespace;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN= \[ERROR\] InnoDB: Cannot replay rename of tablespace \d+ from '.*u4.ibd' to '.*u6.ibd' because the target file exists;
--source include/search_pattern_in_file.inc
--remove_file $MYSQLD_DATADIR/test/u6.ibd
--source include/restart_mysqld.inc
let SEARCH_PATTERN= \[Note\] InnoDB: Header page consists of zero bytes in datafile: .*u1.ibd;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN= InnoDB: At LSN: \d+: unable to open file .*u[1-5].ibd for tablespace;
--source include/search_pattern_in_file.inc
let SEARCH_PATTERN= \[Warning\] InnoDB: Tablespace \d+ was not found at .*u[1-5].ibd, and innodb_force_recovery was set. All redo log for this tablespace will be ignored!;
--source include/search_pattern_in_file.inc
--let $restart_parameters=
--source include/restart_mysqld.inc
DROP TABLE u1,u2,u3,u6;
--remove_file $MYSQLD_DATADIR/test/u4.ibd
--echo # List of files:
--list_files $MYSQLD_DATADIR/test
......
......@@ -29,7 +29,7 @@ connect ddl3, localhost, root,,;
CREATE TABLE t3(a TEXT,b TEXT,FULLTEXT INDEX(a)) ENGINE=InnoDB;
ALTER TABLE t3 DROP INDEX a, ADD FULLTEXT INDEX(b), ALGORITHM=COPY;
connection default;
# restart: with restart_parameters
# restart
disconnect ddl1;
disconnect ddl2;
disconnect ddl3;
......@@ -69,7 +69,7 @@ DELETE FROM articles LIMIT 1;
ROLLBACK;
disconnect flush_redo_log;
connection default;
# restart: with restart_parameters
# restart
disconnect dml;
INSERT INTO articles (title,body) VALUES
('MySQL Tutorial','DBMS stands for DataBase ...');
......@@ -129,7 +129,7 @@ id title body
1 MySQL Tutorial DBMS stands for Database...
2 MariaDB Tutorial DB means Database ...
connection default;
# restart: with restart_parameters
# restart
disconnect dml;
disconnect dml2;
INSERT INTO articles VALUES (8, 12, 'MySQL Tutorial','DBMS stands for DataBase ...');
......
......@@ -93,16 +93,6 @@ SET DEBUG_SYNC='now WAIT_FOR 3';
--enable_query_log
}
if (!$have_debug)
{
# Work around the lack of MDEV-24626
let $restart_parameters=--innodb-force-recovery=1;
}
if ($have_debug)
{
let $restart_parameters=--innodb-force-recovery=0;
}
let $restart_noprint=1;
let $shutdown_timeout=0;
--source include/restart_mysqld.inc
......@@ -301,16 +291,15 @@ call mtr.add_suppression("InnoDB indexes are inconsistent with what defined in \
call mtr.add_suppression("InnoDB could not find key no [01] with name [ab] from dict cache for table test/t[123]");
call mtr.add_suppression("InnoDB: Table test/t[123] contains .* indexes inside InnoDB");
call mtr.add_suppression("InnoDB: Table `test`\\.`t3` does not exist");
# MDEV-24626 FIXME: a 0-sized file will not be deleted!
--list_files $datadir/test #sql-alter-*.ibd
# Work around the lack of MDEV-24626 as well.
--remove_files_wildcard $datadir/test #sql-alter-*.ibd
--remove_files_wildcard $datadir/test #sql-backup-*.ibd
# Some errors are reported despite the MDEV-24626 fix.
call mtr.add_suppression("InnoDB: Cannot (read first page of|open datafile for read-only:) '\\./test/(FTS_|#sql-(alter|backup)-).*\\.ibd'");
call mtr.add_suppression("InnoDB: Datafile '\\./test/(FTS_|#sql-(alter|backup)-).*\\.ibd' is corrupted");
call mtr.add_suppression("InnoDB: (The error means|Operating system error)");
call mtr.add_suppression("InnoDB: Ignoring tablespace for `test`\\.`(FTS_|#sql-(backup|alter)-).*` because it could not be opened\\.");
call mtr.add_suppression("InnoDB: Tablespace [1-9][0-9]* was not found at ./test/(FTS_|#sql-(alter|backup)-).*\\.ibd, and innodb_force_recovery was set");
call mtr.add_suppression("InnoDB: Corrupted page \\[page id: space=[1-9][0-9]*, page number=0\\] of datafile './test/(FTS_|#sql-(alter|backup)-).*\\.ibd' could not be found in the doublewrite buffer\\.");
call mtr.add_suppression("InnoDB: Expected tablespace id [1-9][0-9]* but found 0 in the file .*/test/(FTS_|#sql-(alter|backup)-).*\\.ibd");
--enable_query_log
}
--remove_files_wildcard $datadir/test #sql-*.frm
......
......@@ -12,6 +12,7 @@ INSERT INTO t VALUES
(0), (1), (2), (3), (4), (5), (6), (7), (8), (9),
(0), (1), (2), (3), (4), (5), (6), (7), (8), (9),
(0), (1), (2), (3), (4), (5), (6), (7), (8), (9);
set global innodb_log_checkpoint_now = 1;
# xtrabackup backup, execute the following query after test.t is copied:
# BEGIN NOT ATOMIC INSERT INTO test.t SELECT * FROM test.t; UPDATE test.t SET i = 10 WHERE i = 0; DELETE FROM test.t WHERE i = 1; END
SELECT count(*) FROM t WHERE i = 0;
......
......@@ -32,6 +32,7 @@ INSERT INTO t VALUES
(0), (1), (2), (3), (4), (5), (6), (7), (8), (9),
(0), (1), (2), (3), (4), (5), (6), (7), (8), (9);
set global innodb_log_checkpoint_now = 1;
--let after_copy_test_t=BEGIN NOT ATOMIC INSERT INTO test.t SELECT * FROM test.t; UPDATE test.t SET i = 10 WHERE i = 0; DELETE FROM test.t WHERE i = 1; END
--echo # xtrabackup backup, execute the following query after test.t is copied:
......
......@@ -3,6 +3,7 @@ CREATE TABLE t2 (i int) ENGINE=INNODB;
CREATE TABLE t3 (i int) ENGINE=INNODB;
CREATE TABLE t4 (i int) ENGINE=INNODB;
CREATE TABLE t5 (i int) ENGINE=INNODB;
set global innodb_log_checkpoint_now=1;
# xtrabackup prepare
# shutdown server
# remove datadir
......
......@@ -6,6 +6,8 @@ CREATE TABLE t3 (i int) ENGINE=INNODB;
CREATE TABLE t4 (i int) ENGINE=INNODB;
CREATE TABLE t5 (i int) ENGINE=INNODB;
set global innodb_log_checkpoint_now=1;
--let before_copy_test_t1=DROP TABLE test.t1
--let after_copy_test_t2=DROP TABLE test.t2;
# MDEV-18185, drop + rename combination
......
......@@ -3,6 +3,7 @@ CREATE TABLE t1(i INT PRIMARY KEY) ENGINE INNODB;
CREATE TABLE t2(i INT PRIMARY KEY) ENGINE INNODB;
CREATE TABLE t3(i INT) ENGINE INNODB;
CREATE TABLE t10(i INT PRIMARY KEY) ENGINE INNODB;
set global innodb_log_checkpoint_now = 1;
# Create full backup , modify table, then create incremental/differential backup
INSERT into t1 values(1);
# Prepare full backup, apply incremental one
......
......@@ -10,6 +10,8 @@ CREATE TABLE t2(i INT PRIMARY KEY) ENGINE INNODB;
CREATE TABLE t3(i INT) ENGINE INNODB;
CREATE TABLE t10(i INT PRIMARY KEY) ENGINE INNODB;
set global innodb_log_checkpoint_now = 1;
echo # Create full backup , modify table, then create incremental/differential backup;
--disable_result_log
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$basedir;
......
......@@ -7,5 +7,6 @@ PARTITION p1 VALUES LESS THAN (1995),
PARTITION p2 VALUES LESS THAN (2000),
PARTITION p3 VALUES LESS THAN (2005)
) ;
set global innodb_log_checkpoint_now = 1;
DROP TABLE t;
DROP TABLE `bobby``tables`;
......@@ -13,6 +13,8 @@ CREATE TABLE `bobby``tables` (id INT, name VARCHAR(50), purchased DATE) ENGINE I
PARTITION p3 VALUES LESS THAN (2005)
) ;
set global innodb_log_checkpoint_now = 1;
--disable_result_log
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir --lock-ddl-per-table=1 --dbug=+d,check_mdl_lock_works;
--enable_result_log
......
......@@ -7,7 +7,6 @@ COMMIT;
SELECT count(*) FROM t;
count(*)
100000
FOUND 1 /Checksum mismatch in datafile/ in backup.log
# Prepare full backup, apply incremental one
# Restore and check results
# shutdown server
......
......@@ -18,15 +18,7 @@ INSERT INTO t select uuid(), uuid(), uuid(), uuid() from seq_1_to_100000;
COMMIT;
SELECT count(*) FROM t;
let $backuplog=$MYSQLTEST_VARDIR/tmp/backup.log;
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$incremental_dir --incremental-basedir=$basedir --dbug=+d,page_intermittent_checksum_mismatch 2> $backuplog;
--let SEARCH_RANGE = 10000000
--let SEARCH_PATTERN=Checksum mismatch in datafile
--let SEARCH_FILE=$backuplog
--source include/search_pattern_in_file.inc
remove_file $backuplog;
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$incremental_dir --incremental-basedir=$basedir --dbug=+d,page_intermittent_checksum_mismatch;
--disable_result_log
echo # Prepare full backup, apply incremental one;
......
......@@ -4,8 +4,8 @@ CREATE TABLE t21(i INT) ENGINE INNODB;
INSERT INTO t21 VALUES(1);
CREATE TABLE t2(i int) ENGINE INNODB;
# xtrabackup backup
t1.ibd
t21.ibd
t1.new
t21.new
# xtrabackup prepare
t1.cfg
t21.cfg
......
......@@ -17,6 +17,7 @@ let targetdir=$MYSQLTEST_VARDIR/tmp/backup;
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup "--tables=test.*1" --target-dir=$targetdir;
--enable_result_log
list_files $targetdir/test *.ibd;
list_files $targetdir/test *.new;
# Inject a junk .ibd file into backup dir to
# see if prepare does not choke on it.
......
......@@ -9,7 +9,7 @@ USE db2;
CREATE TABLE t1(i INT) ENGINE INNODB;
USE test;
# xtrabackup backup
t1.ibd
t1.new
DROP TABLE t1;
DROP TABLE t2;
DROP DATABASE db2;
......@@ -27,6 +27,7 @@ exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup "--tables-ex
--enable_result_log
# check that only t1 table is in backup (t2 is excluded)
list_files $targetdir/test *.new;
list_files $targetdir/test *.ibd;
# check that db2 database is not in the backup (excluded)
--error 1
......
......@@ -2,6 +2,7 @@ CREATE TABLE t1(i int) ENGINE=INNODB;
CREATE TABLE t2(i int) ENGINE=INNODB;
CREATE TABLE t3(a CHAR(36)) ENGINE INNODB;
INSERT INTO t3 SELECT UUID() FROM seq_1_to_1000;
set global innodb_log_checkpoint_now=1;
# xtrabackup backup
# xtrabackup prepare
# shutdown server
......
......@@ -7,6 +7,7 @@ CREATE TABLE t2(i int) ENGINE=INNODB;
CREATE TABLE t3(a CHAR(36)) ENGINE INNODB;
INSERT INTO t3 SELECT UUID() FROM seq_1_to_1000;
set global innodb_log_checkpoint_now=1;
# this will table and populate it, after backup has list of tables to be copied
--let before_copy_test_t1=BEGIN NOT ATOMIC DROP TABLE test.t1;CREATE TABLE test.t1 ENGINE=INNODB SELECT UUID() from test.seq_1_to_100; END
--let after_copy_test_t2=BEGIN NOT ATOMIC DROP TABLE test.t2;CREATE TABLE test.t2 ENGINE=INNODB SELECT UUID() from test.seq_1_to_1000; END
......
......@@ -14,6 +14,7 @@ CREATE TABLE a1(a1 int) ENGINE INNODB;
INSERT INTO a1 VALUES(1);
CREATE TABLE b1(b1 CHAR(2)) ENGINE INNODB;
INSERT INTO b1 VALUES('b1');
set global innodb_log_checkpoint_now = 1;
# xtrabackup prepare
# shutdown server
# remove datadir
......
......@@ -24,6 +24,8 @@ INSERT INTO a1 VALUES(1);
CREATE TABLE b1(b1 CHAR(2)) ENGINE INNODB;
INSERT INTO b1 VALUES('b1');
set global innodb_log_checkpoint_now = 1;
# Test renames before of after copying tablespaces
--let before_copy_test_t1=RENAME TABLE test.t1 TO test.t1_renamed
--let after_copy_test_t2=RENAME TABLE test.t2 TO test.t2_renamed
......
CREATE TABLE t1(i int) ENGINE INNODB;
set global innodb_log_checkpoint_now = 1;
# xtrabackup prepare
# shutdown server
# remove datadir
......
......@@ -2,6 +2,7 @@
let $targetdir=$MYSQLTEST_VARDIR/tmp/backup;
mkdir $targetdir;
CREATE TABLE t1(i int) ENGINE INNODB;
set global innodb_log_checkpoint_now = 1;
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir --lock-ddl-per-table --dbug=+d,rename_during_mdl_lock_table;
echo # xtrabackup prepare;
......
......@@ -22,8 +22,8 @@ CREATE TABLE t2(i int) ENGINE INNODB;
ALTER TABLE t21 FORCE, ALGORITHM=INPLACE;
# Create partial backup (excluding table t21), Ignore the
# unsupported redo log for the table t21.
t1.ibd
t2.ibd
t1.new
t2.new
# Prepare the full backup
t1.ibd
t2.ibd
......
......@@ -60,6 +60,7 @@ ALTER TABLE t21 FORCE, ALGORITHM=INPLACE;
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup "--tables-exclude=test.t21" --target-dir=$targetdir;
--enable_result_log
--list_files $targetdir/test *.ibd
--list_files $targetdir/test *.new
--echo # Prepare the full backup
--disable_result_log
......
......@@ -3258,25 +3258,12 @@ void buf_block_t::initialise(const page_id_t page_id, ulint zip_size,
page_zip_set_size(&page.zip, zip_size);
}
/** Initialize a page in the buffer pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED =>
FILE_PAGE (the other is buf_page_get_gen).
@param[in,out] space space object
@param[in] offset offset of the tablespace
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in,out] mtr mini-transaction
@param[in,out] free_block pre-allocated buffer block
@return pointer to the block, page bufferfixed */
buf_block_t*
buf_page_create(fil_space_t *space, uint32_t offset,
ulint zip_size, mtr_t *mtr, buf_block_t *free_block)
static buf_block_t* buf_page_create_low(page_id_t page_id, ulint zip_size,
mtr_t *mtr, buf_block_t *free_block)
{
page_id_t page_id(space->id, offset);
ut_ad(mtr->is_active());
ut_ad(page_id.space() != 0 || !zip_size);
space->free_page(offset, false);
free_block->initialise(page_id, zip_size, 1);
const ulint fold= page_id.fold();
......@@ -3440,6 +3427,39 @@ buf_page_create(fil_space_t *space, uint32_t offset,
return block;
}
/** Initialize a page in the buffer pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
of the functions which perform to a block a state transition NOT_USED =>
FILE_PAGE (the other is buf_page_get_gen).
@param[in,out] space space object
@param[in] offset offset of the tablespace
or deferred space id if space
object is null
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in,out] mtr mini-transaction
@param[in,out] free_block pre-allocated buffer block
@return pointer to the block, page bufferfixed */
buf_block_t*
buf_page_create(fil_space_t *space, uint32_t offset,
ulint zip_size, mtr_t *mtr, buf_block_t *free_block)
{
space->free_page(offset, false);
return buf_page_create_low({space->id, offset}, zip_size, mtr, free_block);
}
/** Initialize a page in buffer pool while initializing the
deferred tablespace
@param space_id space identfier
@param zip_size ROW_FORMAT=COMPRESSED page size or 0
@param mtr mini-transaction
@param free_block pre-allocated buffer block
@return pointer to the block, page bufferfixed */
buf_block_t* buf_page_create_deferred(uint32_t space_id, ulint zip_size,
mtr_t *mtr, buf_block_t *free_block)
{
return buf_page_create_low({space_id, 0}, zip_size, mtr, free_block);
}
/** Monitor the buffer page read/write activity, and increment corresponding
counter value in MONITOR_MODULE_BUF_PAGE.
@param bpage buffer page whose read or write was completed
......
......@@ -2134,46 +2134,9 @@ fil_ibd_create(
crypt_data->fill_page0(flags, page);
}
if (ulint zip_size = fil_space_t::zip_size(flags)) {
page_zip_des_t page_zip;
page_zip_set_size(&page_zip, zip_size);
page_zip.data = page + srv_page_size;
#ifdef UNIV_DEBUG
page_zip.m_start = 0;
#endif /* UNIV_DEBUG */
page_zip.m_end = 0;
page_zip.m_nonempty = 0;
page_zip.n_blobs = 0;
buf_flush_init_for_writing(NULL, page, &page_zip, false);
*err = os_file_write(IORequestWrite, path, file,
page_zip.data, 0, zip_size);
} else {
buf_flush_init_for_writing(NULL, page, NULL,
fil_space_t::full_crc32(flags));
*err = os_file_write(IORequestWrite, path, file,
page, 0, srv_page_size);
}
aligned_free(page);
fil_space_t::name_type space_name;
if (*err != DB_SUCCESS) {
ib::error()
<< "Could not write the first page to"
<< " tablespace '" << path << "'";
goto err_exit;
}
if (!os_file_flush(file)) {
ib::error() << "File flush of tablespace '"
<< path << "' failed";
*err = DB_ERROR;
goto err_exit;
}
if (has_data_dir) {
/* Make the ISL file if the IBD file is not
in the default location. */
......@@ -2657,15 +2620,23 @@ fil_ibd_load(
}
os_offset_t size;
bool deferred_space = false;
/* Read and validate the first page of the tablespace.
Assign a tablespace name based on the tablespace type. */
switch (file.validate_for_recovery()) {
os_offset_t minimum_size;
case DB_SUCCESS:
deferred_space = file.m_defer;
if (deferred_space) {
goto tablespace_check;
}
if (file.space_id() != space_id) {
return(FIL_LOAD_ID_CHANGED);
}
tablespace_check:
/* Get and test the file size. */
size = os_file_get_size(file.handle());
......@@ -2681,6 +2652,8 @@ fil_ibd_load(
ib::error() << "Could not measure the size of"
" single-table tablespace file '"
<< file.filepath() << "'";
} else if (deferred_space) {
return FIL_LOAD_DEFER;
} else if (size < minimum_size) {
ib::error() << "The size of tablespace file '"
<< file.filepath() << "' is only " << size
......
......@@ -280,11 +280,8 @@ Datafile::read_first_page(bool read_only_mode)
} else if (srv_operation == SRV_OPERATION_BACKUP) {
break;
} else {
ib::error()
<< "Cannot read first page of '"
<< m_filepath << "' "
<< err;
ib::error() << "Cannot read first page of '"
<< m_filepath << "': " << err;
break;
}
}
......@@ -424,6 +421,9 @@ Datafile::validate_for_recovery()
" the first 64 pages.";
return(err);
}
if (m_space_id == ULINT_UNDEFINED) {
return DB_SUCCESS; /* empty file */
}
if (restore_from_doublewrite()) {
return(DB_CORRUPTION);
......@@ -467,11 +467,18 @@ dberr_t Datafile::validate_first_page(lsn_t *flush_lsn)
if (error_txt != NULL) {
err_exit:
free_first_page();
if (recv_recovery_is_on()
|| srv_operation == SRV_OPERATION_BACKUP) {
m_defer= true;
return DB_SUCCESS;
}
ib::info() << error_txt << " in datafile: " << m_filepath
<< ", Space ID:" << m_space_id << ", Flags: "
<< m_flags;
m_is_valid = false;
free_first_page();
return(DB_CORRUPTION);
}
......@@ -500,13 +507,18 @@ dberr_t Datafile::validate_first_page(lsn_t *flush_lsn)
ulint logical_size = fil_space_t::logical_size(m_flags);
if (srv_page_size != logical_size) {
free_first_page();
if (recv_recovery_is_on()
|| srv_operation == SRV_OPERATION_BACKUP) {
m_defer= true;
return DB_SUCCESS;
}
/* Logical size must be innodb_page_size. */
ib::error()
<< "Data file '" << m_filepath << "' uses page size "
<< logical_size << ", but the innodb_page_size"
" start-up parameter is "
<< srv_page_size;
free_first_page();
return(DB_ERROR);
}
......@@ -535,10 +547,18 @@ dberr_t Datafile::validate_first_page(lsn_t *flush_lsn)
fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
if (node && !strcmp(m_filepath, node->name)) {
ok_exit:
mysql_mutex_unlock(&fil_system.mutex);
return DB_SUCCESS;
}
if (!m_space_id
&& (recv_recovery_is_on()
|| srv_operation == SRV_OPERATION_BACKUP)) {
m_defer= true;
goto ok_exit;
}
/* Make sure the space_id has not already been opened. */
ib::error() << "Attempted to open a previously opened"
" tablespace. Previous tablespace: "
......@@ -575,6 +595,10 @@ Datafile::find_space_id()
file_size = os_file_get_size(m_handle);
if (!file_size) {
return DB_SUCCESS;
}
if (file_size == (os_offset_t) -1) {
ib::error() << "Could not get file size of datafile '"
<< m_filepath << "'";
......
......@@ -314,6 +314,17 @@ buf_block_t*
buf_page_create(fil_space_t *space, uint32_t offset,
ulint zip_size, mtr_t *mtr, buf_block_t *free_block);
/** Initialize a page in buffer pool while initializing the
deferred tablespace
@param space_id space identfier
@param zip_size ROW_FORMAT=COMPRESSED page size or 0
@param mtr mini-transaction
@param free_block pre-allocated buffer block
@return pointer to the block, page bufferfixed */
buf_block_t*
buf_page_create_deferred(uint32_t space_id, ulint zip_size, mtr_t *mtr,
buf_block_t *free_block);
/********************************************************************//**
Releases a compressed-only page acquired with buf_page_get_zip(). */
UNIV_INLINE
......
......@@ -99,6 +99,7 @@ this must be equal to srv_page_size */
class page_id_t
{
public:
/** Constructor from (space, page_no).
@param[in] space tablespace id
@param[in] page_no page number */
......@@ -152,6 +153,7 @@ class page_id_t
}
ulonglong raw() { return m_id; }
private:
/** The page identifier */
uint64_t m_id;
......
......@@ -507,6 +507,8 @@ struct fil_space_t final
/** @return whether the storage device is rotational (HDD, not SSD) */
inline bool is_rotational() const;
inline bool is_deferred() const;
/** Open each file. Never invoked on .ibd files.
@param create_new_db whether to skip the call to fil_node_t::read_page0()
@return whether all files were opened */
......@@ -1088,6 +1090,10 @@ struct fil_node_t final
/** Filesystem block size */
ulint block_size;
/** Deferring the tablespace during recovery and it
can be used to skip the validation of page0 */
bool deferred=false;
/** FIL_NODE_MAGIC_N */
ulint magic_n;
......@@ -1145,6 +1151,11 @@ inline bool fil_space_t::is_rotational() const
return false;
}
inline bool fil_space_t::is_deferred() const
{
return UT_LIST_GET_FIRST(chain)->deferred;
}
/** Common InnoDB file extensions */
enum ib_extention {
NO_EXT = 0,
......@@ -1475,6 +1486,11 @@ struct fil_system_t {
/** Extend all open data files to the recovered size */
ATTRIBUTE_COLD void extend_to_recv_size();
/** Determine if a tablespace associated with a file name exists.
@param path tablespace file name to look for
@return a matching tablespace */
inline fil_space_t *find(const char *path) const;
};
/** The tablespace memory cache. */
......@@ -1684,7 +1700,9 @@ enum fil_load_status {
/** The file(s) were not found */
FIL_LOAD_NOT_FOUND,
/** The file(s) were not valid */
FIL_LOAD_INVALID
FIL_LOAD_INVALID,
/** The tablespace file was deferred to open */
FIL_LOAD_DEFER
};
/** Open a single-file tablespace and add it to the InnoDB data structures.
......
......@@ -324,6 +324,9 @@ class Datafile {
@return the first data page */
const byte* get_first_page() const { return(m_first_page); }
void set_space_id(ulint space_id) { m_space_id= space_id; }
void set_flags(ulint flags) { m_flags = flags; }
private:
/** Free the filepath buffer. */
void free_filepath();
......@@ -443,6 +446,8 @@ class Datafile {
ulint m_last_os_error;
public:
/** true if table is deferred during recovery */
bool m_defer=false;
/** Use the following to determine the uniqueness of this datafile. */
#ifdef _WIN32
/* Use fields dwVolumeSerialNumber, nFileIndexLow, nFileIndexHigh. */
......
......@@ -405,6 +405,15 @@ struct recv_sys_t
{
return UNIV_UNLIKELY(recovery_on) ? recover_low(page_id) : nullptr;
}
/** Try to recover a tablespace that was not readable earlier
@param p iterator, initially pointing to page_id_t{space_id,0};
the records will be freed and the iterator advanced
@param name tablespace file name
@param free_block spare buffer block
@return whether recovery failed */
bool recover_deferred(map::iterator &p, const std::string &name,
buf_block_t *&free_block);
};
/** The recovery system */
......
......@@ -587,6 +587,220 @@ static recv_spaces_t recv_spaces;
/** The last parsed FILE_RENAME records */
static std::map<uint32_t,std::string> renamed_spaces;
/** Files for which fil_ibd_load() returned FIL_LOAD_DEFER */
static struct
{
/** Maintains the last opened defer file name along with lsn */
struct item
{
/** Log sequence number of latest add() called by fil_name_process() */
lsn_t lsn;
/** File name from the FILE_ record */
std::string file_name;
};
using map= std::map<const uint32_t, item, std::less<const uint32_t>,
ut_allocator<std::pair<const uint32_t, item> > >;
/** Map of defer tablespaces */
map defers;
/** Add the deferred space only if it is latest one
@param space space identifier
@param f_name file name
@param lsn log sequence number of the FILE_ record */
void add(uint32_t space, const std::string &f_name, lsn_t lsn)
{
mysql_mutex_assert_owner(&recv_sys.mutex);
const char *filename= f_name.c_str();
if (srv_operation == SRV_OPERATION_RESTORE)
{
/* Replace absolute DATA DIRECTORY file paths with
short names relative to the backup directory. */
const char *name= strrchr(filename, '/');
#ifdef _WIN32
if (const char *last= strrchr(filename, '\\'))
if (last > name)
name= last;
#endif
if (name)
{
while (--name > filename &&
#ifdef _WIN32
*name != '\\' &&
#endif
*name != '/');
if (name > filename)
filename= name + 1;
}
}
char *fil_path= fil_make_filepath(nullptr, {filename, strlen(filename)},
IBD, false);
const item defer= {lsn, fil_path};
auto p= defers.emplace(space, defer);
if (!p.second && p.first->second.lsn <= defer.lsn)
p.first->second= defer;
ut_free(fil_path);
}
void remove(uint32_t space)
{
mysql_mutex_assert_owner(&recv_sys.mutex);
defers.erase(space);
}
/** Look up a tablespace that was found corrupted during recovery.
@param id tablespace id
@return tablespace whose creation was deferred
@retval nullptr if no such tablespace was found */
const item *find(uint32_t id)
{
mysql_mutex_assert_owner(&recv_sys.mutex);
auto it= defers.find(id);
if (it != defers.end())
return &it->second;
return nullptr;
}
void clear()
{
mysql_mutex_assert_owner(&recv_sys.mutex);
defers.clear();
}
/** Initialize all deferred tablespaces.
@return whether any deferred initialization failed */
bool reinit_all()
{
retry:
bool fail= false;
buf_block_t *free_block= buf_LRU_get_free_block(false);
mysql_mutex_lock(&recv_sys.mutex);
for (auto d= defers.begin(); d != defers.end(); )
{
const uint32_t space_id{d->first};
recv_sys_t::map::iterator p{recv_sys.pages.lower_bound({space_id,0})};
if (p == recv_sys.pages.end() || p->first.space() != space_id)
{
/* No pages were recovered. We create a dummy tablespace,
and let dict_drop_index_tree() delete the file. */
recv_spaces_t::iterator it{recv_spaces.find(space_id)};
if (it != recv_spaces.end())
create(it, d->second.file_name, static_cast<uint32_t>
(1U << FSP_FLAGS_FCRC32_POS_MARKER |
FSP_FLAGS_FCRC32_PAGE_SSIZE()), nullptr, 0);
}
else
fail= recv_sys.recover_deferred(p, d->second.file_name, free_block);
auto e= d++;
defers.erase(e);
if (fail)
break;
if (free_block)
continue;
mysql_mutex_unlock(&recv_sys.mutex);
goto retry;
}
clear();
mysql_mutex_unlock(&recv_sys.mutex);
if (free_block)
buf_pool.free_block(free_block);
return fail;
}
/** Create tablespace metadata for a data file that was initially
found corrupted during recovery.
@param it tablespace iterator
@param name latest file name
@param flags FSP_SPACE_FLAGS
@param crypt_data encryption metadata
@param size tablespace size in pages
@return tablespace */
static fil_space_t *create(const recv_spaces_t::const_iterator &it,
const std::string &name, uint32_t flags,
fil_space_crypt_t *crypt_data, uint32_t size)
{
fil_space_t *space= fil_space_t::create(it->first, flags,
FIL_TYPE_TABLESPACE, crypt_data);
ut_ad(space);
space->add(name.c_str(), OS_FILE_CLOSED, size, false, false);
space->recv_size= it->second.size;
space->size_in_header= size;
return space;
}
}
deferred_spaces;
/** Try to recover a tablespace that was not readable earlier
@param p iterator, initially pointing to page_id_t{space_id,0};
the records will be freed and the iterator advanced
@param name tablespace file name
@param free_block spare buffer block
@return whether recovery failed */
bool recv_sys_t::recover_deferred(recv_sys_t::map::iterator &p,
const std::string &name,
buf_block_t *&free_block)
{
mysql_mutex_assert_owner(&mutex);
const page_id_t first{p->first};
ut_ad(first.space());
recv_spaces_t::iterator it{recv_spaces.find(first.space())};
ut_ad(it != recv_spaces.end());
if (!first.page_no() && p->second.state == page_recv_t::RECV_WILL_NOT_READ)
{
mtr_t mtr;
buf_block_t *block= recover_low(first, p, mtr, free_block);
ut_ad(block == free_block);
free_block= nullptr;
const byte *page= UNIV_LIKELY_NULL(block->page.zip.data)
? block->page.zip.data
: block->frame;
const uint32_t space_id= mach_read_from_4(page + FIL_PAGE_SPACE_ID);
const uint32_t flags= fsp_header_get_flags(page);
const uint32_t page_no= mach_read_from_4(page + FIL_PAGE_OFFSET);
const uint32_t size= fsp_header_get_field(page, FSP_SIZE);
ut_ad(it != recv_spaces.end());
if (page_id_t{space_id, page_no} == first && size >= 4 &&
it != recv_spaces.end() &&
fil_space_t::is_valid_flags(flags, space_id) &&
fil_space_t::logical_size(flags) == srv_page_size)
{
fil_space_t *space= deferred_spaces.create(it, name, flags,
fil_space_read_crypt_data
(fil_space_t::zip_size(flags),
page), size);
space->free_limit= fsp_header_get_field(page, FSP_FREE_LIMIT);
space->free_len= flst_get_len(FSP_HEADER_OFFSET + FSP_FREE + page);
block->unfix();
fil_node_t *node= UT_LIST_GET_FIRST(space->chain);
node->deferred= true;
if (space->acquire())
{
node->deferred= false;
space->release();
return false;
}
}
block->unfix();
}
ib::error() << "Cannot apply log to " << first
<< " of corrupted file '" << name << "'";
return true;
}
/** Report an operation to create, delete, or rename a file during backup.
@param[in] space_id tablespace identifier
@param[in] create whether the file is being created
......@@ -790,10 +1004,14 @@ inline size_t recv_sys_t::files_size()
@param[in,out] name file name
@param[in] len length of the file name
@param[in] space_id the tablespace ID
@param[in] deleted whether this is a FILE_DELETE record */
@param[in] deleted whether this is a FILE_DELETE record
@param[in] lsn lsn of the redo log
@param[in] store whether the redo log has to
stored */
static
void
fil_name_process(char* name, ulint len, ulint space_id, bool deleted)
fil_name_process(char* name, ulint len, ulint space_id,
bool deleted, lsn_t lsn, store_t *store)
{
if (srv_operation == SRV_OPERATION_BACKUP) {
return;
......@@ -817,6 +1035,8 @@ fil_name_process(char* name, ulint len, ulint space_id, bool deleted)
if (deleted) {
/* Got FILE_DELETE */
deferred_spaces.remove(
static_cast<uint32_t>(space_id));
if (!p.second && f.status != file_name_t::DELETED) {
f.status = file_name_t::DELETED;
if (f.space != NULL) {
......@@ -838,6 +1058,8 @@ fil_name_process(char* name, ulint len, ulint space_id, bool deleted)
case FIL_LOAD_OK:
ut_ad(space != NULL);
deferred_spaces.remove(
static_cast<uint32_t>(space_id));
if (!f.space) {
if (f.size
|| f.flags != f.initial_flags) {
......@@ -885,6 +1107,15 @@ fil_name_process(char* name, ulint len, ulint space_id, bool deleted)
}
break;
case FIL_LOAD_DEFER:
/** Skip the deferred spaces
when lsn is already processed */
if (*store != store_t::STORE_IF_EXISTS) {
deferred_spaces.add(
static_cast<uint32_t>(space_id),
name, lsn);
}
break;
case FIL_LOAD_INVALID:
ut_ad(space == NULL);
if (srv_force_recovery == 0) {
......@@ -931,6 +1162,7 @@ void recv_sys_t::close()
dblwr.pages.clear();
ut_d(mysql_mutex_lock(&mutex));
clear();
deferred_spaces.clear();
ut_d(mysql_mutex_unlock(&mutex));
if (buf)
......@@ -947,7 +1179,6 @@ void recv_sys_t::close()
recv_spaces.clear();
renamed_spaces.clear();
mlog_init.clear();
close_files();
}
......@@ -2090,7 +2321,7 @@ bool recv_sys_t::parse(lsn_t checkpoint_lsn, store_t *store, bool apply)
if (!size)
continue;
}
else
else if (!deferred_spaces.find(space_id))
continue;
/* fall through */
case STORE_YES:
......@@ -2200,10 +2431,11 @@ bool recv_sys_t::parse(lsn_t checkpoint_lsn, store_t *store, bool apply)
const char saved_end= fn[rlen];
const_cast<char&>(fn[rlen])= '\0';
fil_name_process(const_cast<char*>(fn), fnend - fn, space_id,
(b & 0xf0) == FILE_DELETE);
(b & 0xf0) == FILE_DELETE, start_lsn,
store);
if (fn2)
fil_name_process(const_cast<char*>(fn2), fn2end - fn2, space_id,
false);
false, start_lsn, store);
if ((b & 0xf0) < FILE_MODIFY && log_file_op)
log_file_op(space_id, (b & 0xf0) == FILE_CREATE,
l, static_cast<ulint>(fnend - fn),
......@@ -2590,27 +2822,47 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id,
buf_block_t* block= nullptr;
mlog_init_t::init &i= mlog_init.last(page_id);
const lsn_t end_lsn = recs.log.last()->lsn;
bool first_page= page_id.page_no() == 0;
if (end_lsn < i.lsn)
DBUG_LOG("ib_log", "skip log for page " << page_id
<< " LSN " << end_lsn << " < " << i.lsn);
else if (fil_space_t *space= fil_space_t::get(page_id.space()))
{
fil_space_t *space= fil_space_t::get(page_id.space());
if (!space && !first_page)
return block;
mtr.start();
mtr.set_log_mode(MTR_LOG_NO_REDO);
block= buf_page_create(space, page_id.page_no(), space->zip_size(), &mtr,
b);
ulint zip_size= space ? space->zip_size() : 0;
if (!space)
{
auto it= recv_spaces.find(page_id.space());
ut_ad(it != recv_spaces.end());
uint32_t flags= it->second.flags;
zip_size= fil_space_t::zip_size(flags);
block= buf_page_create_deferred(page_id.space(), zip_size, &mtr, b);
}
else
block= buf_page_create(space, page_id.page_no(), zip_size, &mtr, b);
if (UNIV_UNLIKELY(block != b))
{
/* The page happened to exist in the buffer pool, or it was just
being read in. Before buf_page_get_with_no_latch() returned to
buf_page_create(), all changes must have been applied to the
page already. */
/* The page happened to exist in the buffer pool, or it
was just being read in. Before buf_page_get_with_no_latch()
returned to buf_page_create(), all changes must have been
applied to the page already. */
ut_ad(pages.find(page_id) == pages.end());
mtr.commit();
block= nullptr;
}
else
{
/* Buffer fix the first page while deferring the tablespace
and unfix it after creating defer tablespace */
if (first_page && !space)
block->fix();
ut_ad(&recs == &pages.find(page_id)->second);
i.created= true;
recv_recover_page(block, mtr, p, space, &i);
......@@ -2621,8 +2873,9 @@ inline buf_block_t *recv_sys_t::recover_low(const page_id_t page_id,
if (pages.empty())
pthread_cond_signal(&cond);
}
if (space)
space->release();
}
return block;
}
......@@ -2651,6 +2904,15 @@ buf_block_t *recv_sys_t::recover_low(const page_id_t page_id)
return block;
}
inline fil_space_t *fil_system_t::find(const char *path) const
{
mysql_mutex_assert_owner(&mutex);
for (fil_space_t &space : fil_system.space_list)
if (space.chain.start && !strcmp(space.chain.start->name, path))
return &space;
return nullptr;
}
/** Apply buffered log to persistent data pages.
@param last_batch whether it is possible to write more redo log */
void recv_sys_t::apply(bool last_batch)
......@@ -2722,6 +2984,28 @@ void recv_sys_t::apply(bool last_batch)
page_recv_t &recs= p->second;
ut_ad(!recs.log.empty());
const uint32_t space_id= page_id.space();
auto d= deferred_spaces.defers.find(space_id);
if (d != deferred_spaces.defers.end())
{
if (recover_deferred(p, d->second.file_name, free_block))
{
if (!srv_force_recovery)
set_corrupt_fs();
while (p != pages.end() && p->first.space() == space_id)
{
map::iterator r= p++;
r->second.log.clear();
pages.erase(r);
}
}
deferred_spaces.defers.erase(d);
if (!free_block)
goto next_free_block;
p= pages.lower_bound(page_id);
continue;
}
switch (recs.state) {
case page_recv_t::RECV_BEING_READ:
case page_recv_t::RECV_BEING_PROCESSED:
......@@ -2730,6 +3014,7 @@ void recv_sys_t::apply(bool last_batch)
case page_recv_t::RECV_WILL_NOT_READ:
if (UNIV_LIKELY(!!recover_low(page_id, p, mtr, free_block)))
{
next_free_block:
mysql_mutex_unlock(&mutex);
free_block= buf_LRU_get_free_block(false);
mysql_mutex_lock(&mutex);
......@@ -2824,48 +3109,6 @@ void recv_sys_t::apply(bool last_batch)
buf_pool_invalidate();
mysql_mutex_lock(&log_sys.mutex);
}
#if 1 /* Mariabackup FIXME: Remove or adjust rename_table_in_prepare() */
else if (srv_operation != SRV_OPERATION_NORMAL);
#endif
else
{
/* In the last batch, we will apply any rename operations. */
for (auto r : renamed_spaces)
{
const uint32_t id= r.first;
fil_space_t *space= fil_space_t::get(id);
if (!space)
continue;
ut_ad(UT_LIST_GET_LEN(space->chain) == 1);
const char *old= space->chain.start->name;
if (r.second != old)
{
bool exists;
os_file_type_t ftype;
const char *new_name= r.second.c_str();
if (!os_file_status(new_name, &exists, &ftype) || exists)
{
ib::error() << "Cannot replay rename of tablespace " << id
<< " from '" << old << "' to '" << r.second <<
(exists ? "' because the target file exists" : "'");
found_corrupt_fs= true;
}
else
{
mysql_mutex_lock(&log_sys.mutex);
if (dberr_t err= space->rename(r.second.c_str(), false))
{
ib::error() << "Cannot replay rename of tablespace " << id
<< " to '" << r.second << "': " << err;
found_corrupt_fs= true;
}
mysql_mutex_unlock(&log_sys.mutex);
}
}
space->release();
}
renamed_spaces.clear();
}
mysql_mutex_lock(&mutex);
......@@ -3307,6 +3550,12 @@ recv_validate_tablespace(bool rescan, bool& missing_tablespace)
recv_spaces_t::iterator i = recv_spaces.find(space);
ut_ad(i != recv_spaces.end());
if (deferred_spaces.find(static_cast<uint32_t>(space))) {
/* Skip redo logs belonging to
incomplete tablespaces */
goto next;
}
switch (i->second.status) {
case file_name_t::NORMAL:
goto next;
......@@ -3337,6 +3586,10 @@ recv_validate_tablespace(bool rescan, bool& missing_tablespace)
continue;
}
if (deferred_spaces.find(static_cast<uint32_t>(rs.first))) {
continue;
}
missing_tablespace = true;
if (srv_force_recovery > 0) {
......@@ -3422,6 +3675,77 @@ recv_init_crash_recovery_spaces(bool rescan, bool& missing_tablespace)
return DB_SUCCESS;
}
/** Apply any FILE_RENAME records */
static dberr_t recv_rename_files()
{
mysql_mutex_assert_owner(&recv_sys.mutex);
mysql_mutex_assert_owner(&log_sys.mutex);
dberr_t err= DB_SUCCESS;
for (const auto &r : renamed_spaces)
{
const uint32_t id= r.first;
fil_space_t *space= fil_space_t::get(id);
if (!space)
continue;
ut_ad(UT_LIST_GET_LEN(space->chain) == 1);
char *old= space->chain.start->name;
if (r.second != old)
{
bool exists;
os_file_type_t ftype;
const char *new_name= r.second.c_str();
mysql_mutex_lock(&fil_system.mutex);
const fil_space_t *other= nullptr;
if (!space->chain.start->is_open() && space->chain.start->deferred &&
(other= fil_system.find(new_name)) &&
(other->chain.start->is_open() || !other->chain.start->deferred))
other= nullptr;
if (other)
{
/* Multiple tablespaces use the same file name. This should
only be possible if the recovery of both files was deferred
(no valid page 0 is contained in either file). We shall not
rename the file, just rename the metadata. */
ib::info() << "Renaming tablespace metadata " << id
<< " from '" << old << "' to '" << r.second
<< "' that is also associated with tablespace "
<< other->id;
space->chain.start->name= mem_strdup(new_name);
ut_free(old);
}
else if (!os_file_status(new_name, &exists, &ftype) || exists)
{
ib::error() << "Cannot replay rename of tablespace " << id
<< " from '" << old << "' to '" << r.second <<
(exists ? "' because the target file exists" : "'");
err= DB_TABLESPACE_EXISTS;
}
else
{
mysql_mutex_unlock(&fil_system.mutex);
err= space->rename(new_name, false);
if (err != DB_SUCCESS)
ib::error() << "Cannot replay rename of tablespace " << id
<< " to '" << r.second << "': " << err;
goto done;
}
mysql_mutex_unlock(&fil_system.mutex);
}
done:
space->release();
if (err != DB_SUCCESS)
{
recv_sys.set_corrupt_fs();
break;
}
}
renamed_spaces.clear();
return err;
}
/** Start recovering from a redo log checkpoint.
@param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN
of first system tablespace page
......@@ -3722,6 +4046,9 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
recv_no_ibuf_operations = false;
ut_d(recv_no_log_write = srv_operation == SRV_OPERATION_RESTORE
|| srv_operation == SRV_OPERATION_RESTORE_EXPORT);
if (srv_operation == SRV_OPERATION_NORMAL) {
err = recv_rename_files();
}
mysql_mutex_unlock(&recv_sys.mutex);
mysql_mutex_unlock(&log_sys.mutex);
......@@ -3730,8 +4057,12 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
/* The database is now ready to start almost normal processing of user
transactions: transaction rollbacks and the application of the log
records in the hash table can be run in background. */
if (err == DB_SUCCESS && deferred_spaces.reinit_all()
&& !srv_force_recovery) {
err = DB_CORRUPTION;
}
return(DB_SUCCESS);
return err;
}
bool recv_dblwr_t::validate_page(const page_id_t page_id,
......
......@@ -4463,109 +4463,110 @@ void fil_node_t::find_metadata(os_file_t file
bool fil_node_t::read_page0()
{
mysql_mutex_assert_owner(&fil_system.mutex);
const unsigned psize = space->physical_size();
const unsigned psize= space->physical_size();
#ifndef _WIN32
struct stat statbuf;
if (fstat(handle, &statbuf)) {
if (fstat(handle, &statbuf))
return false;
}
os_offset_t size_bytes = statbuf.st_size;
os_offset_t size_bytes= statbuf.st_size;
#else
os_offset_t size_bytes = os_file_get_size(handle);
os_offset_t size_bytes= os_file_get_size(handle);
ut_a(size_bytes != (os_offset_t) -1);
#endif
const uint32_t min_size = FIL_IBD_FILE_INITIAL_SIZE * psize;
const uint32_t min_size= FIL_IBD_FILE_INITIAL_SIZE * psize;
if (size_bytes < min_size) {
if (size_bytes < min_size)
{
ib::error() << "The size of the file " << name
<< " is only " << size_bytes
<< " bytes, should be at least " << min_size;
return false;
}
if (!deferred)
{
page_t *page= static_cast<byte*>(aligned_malloc(psize, psize));
if (os_file_read(IORequestRead, handle, page, 0, psize)
!= DB_SUCCESS) {
!= DB_SUCCESS)
{
ib::error() << "Unable to read first page of file " << name;
corrupted:
aligned_free(page);
return false;
}
const ulint space_id = memcmp_aligned<2>(
FIL_PAGE_SPACE_ID + page,
const ulint space_id= memcmp_aligned<2>
(FIL_PAGE_SPACE_ID + page,
FSP_HEADER_OFFSET + FSP_SPACE_ID + page, 4)
? ULINT_UNDEFINED
: mach_read_from_4(FIL_PAGE_SPACE_ID + page);
ulint flags = fsp_header_get_flags(page);
const uint32_t size = fsp_header_get_field(page, FSP_SIZE);
const uint32_t free_limit = fsp_header_get_field(page, FSP_FREE_LIMIT);
const uint32_t free_len = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE
+ page);
if (!fil_space_t::is_valid_flags(flags, space->id)) {
ulint cflags = fsp_flags_convert_from_101(flags);
if (cflags == ULINT_UNDEFINED) {
ulint flags= fsp_header_get_flags(page);
const uint32_t size= fsp_header_get_field(page, FSP_SIZE);
const uint32_t free_limit= fsp_header_get_field(page, FSP_FREE_LIMIT);
const uint32_t free_len= flst_get_len(FSP_HEADER_OFFSET + FSP_FREE + page);
if (!fil_space_t::is_valid_flags(flags, space->id))
{
ulint cflags= fsp_flags_convert_from_101(flags);
if (cflags == ULINT_UNDEFINED)
{
invalid:
ib::error()
<< "Expected tablespace flags "
ib::error() << "Expected tablespace flags "
<< ib::hex(space->flags)
<< " but found " << ib::hex(flags)
<< " in the file " << name;
goto corrupted;
}
ulint cf = cflags & ~FSP_FLAGS_MEM_MASK;
ulint sf = space->flags & ~FSP_FLAGS_MEM_MASK;
ulint cf= cflags & ~FSP_FLAGS_MEM_MASK;
ulint sf= space->flags & ~FSP_FLAGS_MEM_MASK;
if (!fil_space_t::is_flags_equal(cf, sf)
&& !fil_space_t::is_flags_equal(sf, cf)) {
if (!fil_space_t::is_flags_equal(cf, sf) &&
!fil_space_t::is_flags_equal(sf, cf))
goto invalid;
}
flags = cflags;
flags= cflags;
}
ut_ad(!(flags & FSP_FLAGS_MEM_MASK));
/* Try to read crypt_data from page 0 if it is not yet read. */
if (!space->crypt_data) {
space->crypt_data = fil_space_read_crypt_data(
if (!space->crypt_data)
space->crypt_data= fil_space_read_crypt_data(
fil_space_t::zip_size(flags), page);
}
aligned_free(page);
if (UNIV_UNLIKELY(space_id != space->id)) {
if (UNIV_UNLIKELY(space_id != space->id))
{
ib::error() << "Expected tablespace id " << space->id
<< " but found " << space_id
<< " in the file " << name;
return false;
}
space->flags= (space->flags & FSP_FLAGS_MEM_MASK) | flags;
ut_ad(space->free_limit == 0 || space->free_limit == free_limit);
ut_ad(space->free_len == 0 || space->free_len == free_len);
space->size_in_header= size;
space->free_limit= free_limit;
space->free_len= free_len;
}
#ifdef UNIV_LINUX
find_metadata(handle, &statbuf);
#else
find_metadata();
#endif
/* Truncate the size to a multiple of extent size. */
ulint mask = psize * FSP_EXTENT_SIZE - 1;
ulint mask= psize * FSP_EXTENT_SIZE - 1;
if (size_bytes <= mask) {
if (size_bytes <= mask);
/* .ibd files start smaller than an
extent size. Do not truncate valid data. */
} else {
size_bytes &= ~os_offset_t(mask);
}
space->flags = (space->flags & FSP_FLAGS_MEM_MASK) | flags;
else size_bytes &= ~os_offset_t(mask);
space->punch_hole = space->is_compressed();
this->size = uint32_t(size_bytes / psize);
space->punch_hole= space->is_compressed();
this->size= uint32_t(size_bytes / psize);
space->set_sizes(this->size);
ut_ad(space->free_limit == 0 || space->free_limit == free_limit);
ut_ad(space->free_len == 0 || space->free_len == free_len);
space->size_in_header = size;
space->free_limit = free_limit;
space->free_len = free_len;
return true;
}
#endif /* !UNIV_INNOCHECKSUM */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment