Commit 2e43af69 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-28870 InnoDB: Missing FILE_CREATE, FILE_DELETE or FILE_MODIFY before FILE_CHECKPOINT

There was a race condition between log_checkpoint_low() and
deleting or renaming data files. The scenario is as follows:

1. The buffer pool does not contain dirty pages.
2. A FILE_DELETE or FILE_RENAME record is written.
3. The checkpoint LSN will be moved ahead of the write of the record.
4. The server is killed before the file is actually renamed or deleted.

We will prevent this race condition by ensuring that a log checkpoint
cannot occur between the durable write and the file system operation:

1. Durably write the FILE_DELETE or FILE_RENAME record.
2. Perform the file system operation.
3. Allow any log checkpoint to proceed.

mtr_t::commit_file(): Implement the DELETE or RENAME logic.

fil_delete_tablespace(): Delegate some of the logic to
mtr_t::commit_file().

fil_space_t::rename(): Delegate some logic to mtr_t::commit_file().
Remove the debug injection point fil_rename_tablespace_failure_2
because we do test RENAME failures without any debug injection.

fil_name_write_rename_low(), fil_name_write_rename(): Remove.

Tested by Matthias Leich
parent 55f02c24
call mtr.add_suppression("Cannot find space id [0-9]+ in the tablespace memory cache");
call mtr.add_suppression("Cannot rename table 'test/t1' to 'test/t2' since the dictionary cache already contains 'test/t2'.");
#
# WL5980 Remote tablespace debug error injection tests.
#
CREATE TABLE t1 (a int KEY, b text) ENGINE=Innodb DATA DIRECTORY='MYSQL_TMP_DIR/alt_dir' ;
INSERT INTO t1 VALUES (1, 'tablespace');
SELECT * FROM t1;
a b
1 tablespace
#
# Test the second injection point in fil_rename_tablespace().
# Make sure the table is useable after this failure.
#
SET @save_dbug=@@debug_dbug;
SET debug_dbug="+d,fil_rename_tablespace_failure_2";
RENAME TABLE t1 TO t2;
SET debug_dbug=@save_dbug;
INSERT INTO t1 VALUES (2, 'tablespace');
SELECT * FROM t1;
a b
1 tablespace
2 tablespace
#
# Cleanup
#
DROP TABLE t1;
#
# This testcase is to check the various debug injection points
# to make sure error conditions react corectly and acheive
# better code coverage.
#
# Not supported in embedded
--source include/not_embedded.inc
--source include/have_debug.inc
--source include/have_innodb.inc
--source include/have_symlink.inc
# These messages are expected in the log
call mtr.add_suppression("Cannot find space id [0-9]+ in the tablespace memory cache");
call mtr.add_suppression("Cannot rename table 'test/t1' to 'test/t2' since the dictionary cache already contains 'test/t2'.");
# Set up some variables
LET $MYSQL_DATA_DIR = `select @@datadir`;
LET $data_directory_clause = DATA DIRECTORY='$MYSQL_TMP_DIR/alt_dir';
--enable_query_log
--echo #
--echo # WL5980 Remote tablespace debug error injection tests.
--echo #
--replace_result $MYSQL_TMP_DIR MYSQL_TMP_DIR
eval CREATE TABLE t1 (a int KEY, b text) ENGINE=Innodb $data_directory_clause ;
INSERT INTO t1 VALUES (1, 'tablespace');
SELECT * FROM t1;
--echo #
--echo # Test the second injection point in fil_rename_tablespace().
--echo # Make sure the table is useable after this failure.
--echo #
SET @save_dbug=@@debug_dbug;
SET debug_dbug="+d,fil_rename_tablespace_failure_2";
--disable_result_log
--error ER_ERROR_ON_RENAME
RENAME TABLE t1 TO t2;
--enable_result_log
SET debug_dbug=@save_dbug;
INSERT INTO t1 VALUES (2, 'tablespace');
SELECT * FROM t1;
--echo #
--echo # Cleanup
--echo #
DROP TABLE t1;
--rmdir $MYSQL_TMP_DIR/alt_dir/test
--rmdir $MYSQL_TMP_DIR/alt_dir
...@@ -128,19 +128,6 @@ bool fil_space_t::try_to_close(bool print_info) ...@@ -128,19 +128,6 @@ bool fil_space_t::try_to_close(bool print_info)
return false; return false;
} }
/** Rename a single-table tablespace.
The tablespace must exist in the memory cache.
@param[in] id tablespace identifier
@param[in] old_path old file name
@param[in] new_path_in new file name,
or NULL if it is located in the normal data directory
@return true if success */
static bool
fil_rename_tablespace(
ulint id,
const char* old_path,
const char* new_path_in);
/* /*
IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
============================================= =============================================
...@@ -1523,40 +1510,6 @@ inline void mtr_t::log_file_op(mfile_type_t type, ulint space_id, ...@@ -1523,40 +1510,6 @@ inline void mtr_t::log_file_op(mfile_type_t type, ulint space_id,
m_log.push(reinterpret_cast<const byte*>(path), uint32_t(len)); m_log.push(reinterpret_cast<const byte*>(path), uint32_t(len));
} }
/** Write redo log for renaming a file.
@param[in] space_id tablespace id
@param[in] old_name tablespace file name
@param[in] new_name tablespace file name after renaming
@param[in,out] mtr mini-transaction */
static
void
fil_name_write_rename_low(
ulint space_id,
const char* old_name,
const char* new_name,
mtr_t* mtr)
{
ut_ad(!is_predefined_tablespace(space_id));
mtr->log_file_op(FILE_RENAME, space_id, old_name, new_name);
}
/** Write redo log for renaming a file.
@param[in] space_id tablespace id
@param[in] old_name tablespace file name
@param[in] new_name tablespace file name after renaming */
static void
fil_name_write_rename(
ulint space_id,
const char* old_name,
const char* new_name)
{
mtr_t mtr;
mtr.start();
fil_name_write_rename_low(space_id, old_name, new_name, &mtr);
mtr.commit();
log_write_up_to(mtr.commit_lsn(), true);
}
/** Write FILE_MODIFY for a file. /** Write FILE_MODIFY for a file.
@param[in] space_id tablespace id @param[in] space_id tablespace id
@param[in] name tablespace file name @param[in] name tablespace file name
...@@ -1688,41 +1641,8 @@ pfs_os_file_t fil_delete_tablespace(ulint id) ...@@ -1688,41 +1641,8 @@ pfs_os_file_t fil_delete_tablespace(ulint id)
mtr_t mtr; mtr_t mtr;
mtr.start(); mtr.start();
mtr.log_file_op(FILE_DELETE, id, space->chain.start->name); mtr.log_file_op(FILE_DELETE, id, space->chain.start->name);
mtr.commit(); handle= space->chain.start->handle;
log_write_up_to(mtr.commit_lsn(), true); mtr.commit_file(*space, nullptr);
/* Remove any additional files. */
if (char *cfg_name= fil_make_filepath(space->chain.start->name,
fil_space_t::name_type{}, CFG,
false))
{
os_file_delete_if_exists(innodb_data_file_key, cfg_name, nullptr);
ut_free(cfg_name);
}
if (FSP_FLAGS_HAS_DATA_DIR(space->flags))
RemoteDatafile::delete_link_file(space->name());
/* Remove the directory entry. The file will actually be deleted
when our caller closes the handle. */
os_file_delete(innodb_data_file_key, space->chain.start->name);
mysql_mutex_lock(&fil_system.mutex);
/* Sanity checks after reacquiring fil_system.mutex */
ut_ad(space == fil_space_get_by_id(id));
ut_ad(!space->referenced());
ut_ad(space->is_stopping());
ut_ad(UT_LIST_GET_LEN(space->chain) == 1);
/* Detach the file handle. */
handle= fil_system.detach(space, true);
mysql_mutex_unlock(&fil_system.mutex);
mysql_mutex_lock(&log_sys.mutex);
if (space->max_lsn)
{
ut_d(space->max_lsn = 0);
fil_system.named_spaces.remove(*space);
}
mysql_mutex_unlock(&log_sys.mutex);
fil_space_free_low(space); fil_space_free_low(space);
} }
...@@ -1847,120 +1767,55 @@ char *fil_make_filepath(const char* path, const table_name_t name, ...@@ -1847,120 +1767,55 @@ char *fil_make_filepath(const char* path, const table_name_t name,
dberr_t fil_space_t::rename(const char *path, bool log, bool replace) dberr_t fil_space_t::rename(const char *path, bool log, bool replace)
{ {
ut_ad(UT_LIST_GET_LEN(chain) == 1); ut_ad(UT_LIST_GET_LEN(chain) == 1);
ut_ad(!is_system_tablespace(id)); ut_ad(!is_predefined_tablespace(id));
const char *old_path= chain.start->name; const char *old_path= chain.start->name;
ut_ad(strchr(old_path, '/'));
ut_ad(strchr(path, '/'));
if (!strcmp(path, old_path)) if (!strcmp(path, old_path))
return DB_SUCCESS; return DB_SUCCESS;
if (log) if (!log)
{ {
bool exists= false; if (!os_file_rename(innodb_data_file_key, old_path, path))
os_file_type_t ftype; return DB_ERROR;
mysql_mutex_lock(&fil_system.mutex);
if (os_file_status(old_path, &exists, &ftype) && !exists) ut_free(chain.start->name);
{ chain.start->name= mem_strdup(path);
ib::error() << "Cannot rename '" << old_path << "' to '" << path mysql_mutex_unlock(&fil_system.mutex);
<< "' because the source file does not exist."; return DB_SUCCESS;
return DB_TABLESPACE_NOT_FOUND;
}
exists= false;
if (replace);
else if (!os_file_status(path, &exists, &ftype) || exists)
{
ib::error() << "Cannot rename '" << old_path << "' to '" << path
<< "' because the target file exists.";
return DB_TABLESPACE_EXISTS;
}
fil_name_write_rename(id, old_path, path);
} }
return fil_rename_tablespace(id, old_path, path) ? DB_SUCCESS : DB_ERROR; bool exists= false;
} os_file_type_t ftype;
/** Rename a single-table tablespace.
The tablespace must exist in the memory cache.
@param[in] id tablespace identifier
@param[in] old_path old file name
@param[in] new_path_in new file name,
or NULL if it is located in the normal data directory
@return true if success */
static bool
fil_rename_tablespace(
ulint id,
const char* old_path,
const char* new_path_in)
{
fil_space_t* space;
fil_node_t* node;
ut_a(id != 0);
mysql_mutex_lock(&fil_system.mutex);
space = fil_space_get_by_id(id);
if (space == NULL) {
ib::error() << "Cannot find space id " << id
<< " in the tablespace memory cache, though the file '"
<< old_path
<< "' in a rename operation should have that id.";
mysql_mutex_unlock(&fil_system.mutex);
return(false);
}
/* The following code must change when InnoDB supports
multiple datafiles per tablespace. */
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
node = UT_LIST_GET_FIRST(space->chain);
space->reacquire();
mysql_mutex_unlock(&fil_system.mutex); /* Check upfront if the rename operation might succeed, because we
must durably write redo log before actually attempting to execute
char* new_file_name = mem_strdup(new_path_in); the rename in the file system. */
char* old_file_name = node->name; if (os_file_status(old_path, &exists, &ftype) && !exists)
{
ut_ad(strchr(old_file_name, '/')); sql_print_error("InnoDB: Cannot rename '%s' to '%s'"
ut_ad(strchr(new_file_name, '/')); " because the source file does not exist.",
old_path, path);
if (!recv_recovery_is_on()) { return DB_TABLESPACE_NOT_FOUND;
mysql_mutex_lock(&log_sys.mutex); }
}
/* log_sys.mutex is above fil_system.mutex in the latching order */
mysql_mutex_assert_owner(&log_sys.mutex);
mysql_mutex_lock(&fil_system.mutex);
space->release();
ut_ad(node->name == old_file_name);
bool success;
DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
goto skip_second_rename; );
success = os_file_rename(innodb_data_file_key,
old_file_name,
new_file_name);
DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
skip_second_rename:
success = false; );
ut_ad(node->name == old_file_name);
if (success) {
node->name = new_file_name;
} else {
old_file_name = new_file_name;
}
if (!recv_recovery_is_on()) {
mysql_mutex_unlock(&log_sys.mutex);
}
mysql_mutex_unlock(&fil_system.mutex);
ut_free(old_file_name); exists= false;
if (replace);
else if (!os_file_status(path, &exists, &ftype) || exists)
{
sql_print_error("InnoDB: Cannot rename '%s' to '%s'"
" because the target file exists.",
old_path, path);
return DB_TABLESPACE_EXISTS;
}
return(success); mtr_t mtr;
mtr.start();
mtr.log_file_op(FILE_RENAME, id, old_path, path);
return mtr.commit_file(*this, path) ? DB_SUCCESS : DB_ERROR;
} }
/** Create a tablespace file. /** Create a tablespace file.
......
...@@ -102,6 +102,12 @@ struct mtr_t { ...@@ -102,6 +102,12 @@ struct mtr_t {
@param space tablespace that is being shrunk */ @param space tablespace that is being shrunk */
ATTRIBUTE_COLD void commit_shrink(fil_space_t &space); ATTRIBUTE_COLD void commit_shrink(fil_space_t &space);
/** Commit a mini-transaction that is deleting or renaming a file.
@param space tablespace that is being renamed or deleted
@param name new file name (nullptr=the file will be deleted)
@return whether the operation succeeded */
ATTRIBUTE_COLD bool commit_file(fil_space_t &space, const char *name);
/** Commit a mini-transaction that did not modify any pages, /** Commit a mini-transaction that did not modify any pages,
but generated some redo log on a higher level, such as but generated some redo log on a higher level, such as
FILE_MODIFY records and an optional FILE_CHECKPOINT marker. FILE_MODIFY records and an optional FILE_CHECKPOINT marker.
......
...@@ -626,6 +626,92 @@ void mtr_t::commit_shrink(fil_space_t &space) ...@@ -626,6 +626,92 @@ void mtr_t::commit_shrink(fil_space_t &space)
release_resources(); release_resources();
} }
/** Commit a mini-transaction that is deleting or renaming a file.
@param space tablespace that is being renamed or deleted
@param name new file name (nullptr=the file will be deleted)
@return whether the operation succeeded */
bool mtr_t::commit_file(fil_space_t &space, const char *name)
{
ut_ad(is_active());
ut_ad(!is_inside_ibuf());
ut_ad(!high_level_read_only);
ut_ad(m_modifications);
ut_ad(!m_made_dirty);
ut_ad(!recv_recovery_is_on());
ut_ad(m_log_mode == MTR_LOG_ALL);
ut_ad(UT_LIST_GET_LEN(space.chain) == 1);
log_write_and_flush_prepare();
do_write();
mysql_mutex_assert_owner(&log_sys.mutex);
if (!name && space.max_lsn)
{
ut_d(space.max_lsn= 0);
fil_system.named_spaces.remove(space);
}
/* Block log_checkpoint(). */
mysql_mutex_lock(&buf_pool.flush_list_mutex);
/* Durably write the log for the file system operation. */
log_write_and_flush();
char *old_name= space.chain.start->name;
bool success;
if (name)
{
success= os_file_rename(innodb_data_file_key, old_name, name);
if (success)
{
mysql_mutex_lock(&fil_system.mutex);
space.chain.start->name= mem_strdup(name);
mysql_mutex_unlock(&fil_system.mutex);
ut_free(old_name);
}
}
else
{
/* Remove any additional files. */
if (char *cfg_name= fil_make_filepath(old_name,
fil_space_t::name_type{}, CFG,
false))
{
os_file_delete_if_exists(innodb_data_file_key, cfg_name, nullptr);
ut_free(cfg_name);
}
if (FSP_FLAGS_HAS_DATA_DIR(space.flags))
RemoteDatafile::delete_link_file(space.name());
/* Remove the directory entry. The file will actually be deleted
when our caller closes the handle. */
os_file_delete(innodb_data_file_key, old_name);
mysql_mutex_lock(&fil_system.mutex);
/* Sanity checks after reacquiring fil_system.mutex */
ut_ad(&space == fil_space_get_by_id(space.id));
ut_ad(!space.referenced());
ut_ad(space.is_stopping());
fil_system.detach(&space, true);
mysql_mutex_unlock(&fil_system.mutex);
success= true;
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
ut_d(m_log.erase());
release_resources();
srv_stats.log_write_requests.inc();
return success;
}
/** Commit a mini-transaction that did not modify any pages, /** Commit a mini-transaction that did not modify any pages,
but generated some redo log on a higher level, such as but generated some redo log on a higher level, such as
FILE_MODIFY records and an optional FILE_CHECKPOINT marker. FILE_MODIFY records and an optional FILE_CHECKPOINT marker.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment