Commit 852d42e9 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-34483 Backup may copy unnecessarily much log

In mariadb-backup --backup there are multiple mechanisms for ensuring that
a sufficient amount of the InnoDB write-ahead log (ib_logfile0) is being
copied at the end of the backup. The backup needs to include the latest
committed transaction. While further transaction commits are blocked by
BACKUP STAGE BLOCK_COMMIT, ongoing transactions may modify the database
contents and write log records. We were unnecessarily copying such log,
which would also cause further effort of rolling back incomplete
transactions after the backup is restored.

backup_wait_for_lsn(): Declare as static, and refactor some code
to separate functions backup_wait_for_lsn_low() and
backup_wait_timeout().

backup_wait_for_commit_lsn(): A new function to determine the current
LSN (within BACKUP STAGE BLOCK_COMMIT) and to wait for the log to be
copied until that. Invoked by BackupStages::stage_block_commit().

xtrabackup_backup_func(): Remove a condition that had already been
checked by a caller of backup_wait_timeout().

server_lsn_after_lock: Declare as a local variable in
BackupStages::stage_block_ddl().

log_copying_thread(), io_watching_thread(): Use metadata_last_lsn
instead of metadata_to_lsn as the stop condition.

BackupStages::stage_block_commit(): Ensure that the log tables
(in particular, mysql.general_log) will have been copied before
the BACKUP STAGE BLOCK_COMMIT is being followed by any further
SQL statements.

Reviewed by: Debarun Banerjee
Tested by: Matthias Leich
parent d002b1f5
...@@ -1353,9 +1353,6 @@ backup_files(ds_ctxt *ds_data, const char *from) ...@@ -1353,9 +1353,6 @@ backup_files(ds_ctxt *ds_data, const char *from)
return(ret); return(ret);
} }
lsn_t server_lsn_after_lock;
extern void backup_wait_for_lsn(lsn_t lsn);
/** Release resources after backup_files() */ /** Release resources after backup_files() */
void backup_release() void backup_release()
{ {
......
...@@ -3482,35 +3482,70 @@ static bool xtrabackup_copy_logfile() ...@@ -3482,35 +3482,70 @@ static bool xtrabackup_copy_logfile()
return false; return false;
} }
static bool backup_wait_timeout(lsn_t lsn, lsn_t last_lsn)
{
if (last_lsn >= lsn)
return true;
msg("Was only able to copy log from " LSN_PF " to " LSN_PF
", not " LSN_PF "; try increasing innodb_log_file_size",
log_sys.next_checkpoint_lsn, last_lsn, lsn);
return false;
}
/** /**
Wait until redo log copying thread processes given lsn Wait for enough log to be copied.
@param lsn log sequence number target
@return the reached log sequence number (may be less or more than lsn)
*/ */
void backup_wait_for_lsn(lsn_t lsn) static lsn_t backup_wait_for_lsn_low(lsn_t lsn)
{ {
mysql_mutex_lock(&recv_sys.mutex); mysql_mutex_assert_owner(&recv_sys.mutex);
for (lsn_t last_lsn{recv_sys.lsn}; last_lsn < lsn; )
lsn_t last_lsn{recv_sys.lsn};
if (last_lsn >= lsn)
return last_lsn;
msg("Waiting for log copy thread to read lsn " LSN_PF, lsn);
while (log_copying_running && last_lsn < lsn)
{ {
timespec abstime; timespec abstime;
set_timespec(abstime, 5); set_timespec(abstime, 5);
if (my_cond_timedwait(&scanned_lsn_cond, &recv_sys.mutex.m_mutex, if (my_cond_timedwait(&scanned_lsn_cond, &recv_sys.mutex.m_mutex,
&abstime) && &abstime) &&
last_lsn == recv_sys.lsn) last_lsn == recv_sys.lsn)
die("Was only able to copy log from " LSN_PF " to " LSN_PF break;
", not " LSN_PF "; try increasing innodb_log_file_size",
log_sys.next_checkpoint_lsn, last_lsn, lsn);
last_lsn= recv_sys.lsn; last_lsn= recv_sys.lsn;
} }
return last_lsn;
}
/**
Wait for enough log to be copied after BACKUP STAGE BLOCK_DDL.
@param lsn log sequence number target
@return whether log_copying_thread() copied everything until the target lsn
*/
static bool backup_wait_for_lsn(lsn_t lsn)
{
if (!lsn)
return true;
mysql_mutex_lock(&recv_sys.mutex);
ut_ad(!metadata_to_lsn);
ut_ad(!metadata_last_lsn);
lsn_t last_lsn{backup_wait_for_lsn_low(lsn)};
mysql_mutex_unlock(&recv_sys.mutex); mysql_mutex_unlock(&recv_sys.mutex);
return backup_wait_timeout(lsn, last_lsn);
} }
extern lsn_t server_lsn_after_lock;
static void log_copying_thread() static void log_copying_thread()
{ {
my_thread_init(); my_thread_init();
mysql_mutex_lock(&recv_sys.mutex); mysql_mutex_lock(&recv_sys.mutex);
while (!xtrabackup_copy_logfile() && while (!xtrabackup_copy_logfile() &&
(!metadata_to_lsn || metadata_to_lsn > recv_sys.lsn)) (!metadata_last_lsn || metadata_last_lsn > recv_sys.lsn))
{ {
timespec abstime; timespec abstime;
set_timespec_nsec(abstime, 1000000ULL * xtrabackup_log_copy_interval); set_timespec_nsec(abstime, 1000000ULL * xtrabackup_log_copy_interval);
...@@ -3534,7 +3569,7 @@ static void io_watching_thread() ...@@ -3534,7 +3569,7 @@ static void io_watching_thread()
mysql_mutex_lock(&recv_sys.mutex); mysql_mutex_lock(&recv_sys.mutex);
ut_ad(have_io_watching_thread); ut_ad(have_io_watching_thread);
while (log_copying_running && !metadata_to_lsn) while (log_copying_running && !metadata_last_lsn)
{ {
timespec abstime; timespec abstime;
set_timespec(abstime, 1); set_timespec(abstime, 1);
...@@ -4685,6 +4720,7 @@ static ulong xb_set_max_open_files(rlim_t max_file_limit) ...@@ -4685,6 +4720,7 @@ static ulong xb_set_max_open_files(rlim_t max_file_limit)
static void stop_backup_threads() static void stop_backup_threads()
{ {
ut_ad(metadata_last_lsn);
mysql_cond_broadcast(&log_copying_stop); mysql_cond_broadcast(&log_copying_stop);
if (log_copying_running || have_io_watching_thread) if (log_copying_running || have_io_watching_thread)
...@@ -4708,38 +4744,58 @@ static void stop_backup_threads() ...@@ -4708,38 +4744,58 @@ static void stop_backup_threads()
mysql_cond_destroy(&log_copying_stop); mysql_cond_destroy(&log_copying_stop);
} }
/** Implement the core of --backup /**
@return whether the operation succeeded */ Wait for enough log to be copied.
bool Backup_datasinks::backup_low() @return whether log_copying_thread() copied everything until the target lsn
*/
static bool backup_wait_for_commit_lsn()
{ {
mysql_mutex_lock(&recv_sys.mutex); lsn_t lsn= get_current_lsn(mysql_connection);
ut_ad(!metadata_to_lsn); mysql_mutex_lock(&recv_sys.mutex);
ut_ad(!metadata_to_lsn);
ut_ad(!metadata_last_lsn);
/* read the latest checkpoint lsn */ lsn_t last_lsn= recv_sys.lsn;
{
const lsn_t lsn = recv_sys.lsn;
if (recv_sys.find_checkpoint() == DB_SUCCESS
&& log_sys.is_latest()) {
metadata_to_lsn = log_sys.next_checkpoint_lsn;
msg("mariabackup: The latest check point"
" (for incremental): '" LSN_PF "'",
metadata_to_lsn);
} else {
msg("Error: recv_sys.find_checkpoint() failed.");
}
recv_sys.lsn = lsn; /* read the latest checkpoint lsn */
stop_backup_threads(); if (recv_sys.find_checkpoint() == DB_SUCCESS && log_sys.is_latest())
} {
if (log_sys.next_checkpoint_lsn > lsn)
lsn= log_sys.next_checkpoint_lsn;
metadata_to_lsn= log_sys.next_checkpoint_lsn;
msg("mariabackup: The latest check point (for incremental): '"
LSN_PF "'", metadata_to_lsn);
}
else
{
msg("Error: recv_sys.find_checkpoint() failed.");
metadata_last_lsn= 1;
stop_backup_threads();
mysql_mutex_unlock(&recv_sys.mutex);
return false;
}
if (metadata_to_lsn && xtrabackup_copy_logfile()) { recv_sys.lsn= last_lsn;
mysql_mutex_unlock(&recv_sys.mutex); ut_ad(metadata_to_lsn);
ds_close(dst_log_file); metadata_last_lsn= lsn;
dst_log_file = NULL;
return false;
}
mysql_mutex_unlock(&recv_sys.mutex); last_lsn= backup_wait_for_lsn_low(LSN_MAX);
metadata_last_lsn= last_lsn;
stop_backup_threads();
mysql_mutex_unlock(&recv_sys.mutex);
return backup_wait_timeout(lsn, last_lsn);
}
/** Implement the core of --backup
@return whether the operation succeeded */
bool Backup_datasinks::backup_low()
{
ut_d(mysql_mutex_lock(&recv_sys.mutex));
ut_ad(metadata_last_lsn);
ut_ad(metadata_to_lsn);
ut_ad(!log_copying_running);
ut_d(mysql_mutex_unlock(&recv_sys.mutex));
if (ds_close(dst_log_file) || !metadata_to_lsn) { if (ds_close(dst_log_file) || !metadata_to_lsn) {
dst_log_file = NULL; dst_log_file = NULL;
...@@ -4756,7 +4812,7 @@ bool Backup_datasinks::backup_low() ...@@ -4756,7 +4812,7 @@ bool Backup_datasinks::backup_low()
for (uint32_t id : failed_ids) { for (uint32_t id : failed_ids) {
msg("mariabackup: Failed to read undo log " msg("mariabackup: Failed to read undo log "
"tablespace space id %d and there is no undo " "tablespace space id " UINT32PF " and there is no undo "
"tablespace truncation redo record.", "tablespace truncation redo record.",
id); id);
} }
...@@ -4843,17 +4899,17 @@ class InnodbDataCopier { ...@@ -4843,17 +4899,17 @@ class InnodbDataCopier {
// TODO: this came from the old code, where it was not thread-safe // TODO: this came from the old code, where it was not thread-safe
// too, use separate mysql connection per thread here // too, use separate mysql connection per thread here
DBUG_MARIABACKUP_EVENT("before_copy", node->space->name()); DBUG_MARIABACKUP_EVENT("before_copy", node->space->name());
DBUG_EXECUTE_FOR_KEY("wait_innodb_redo_before_copy", DBUG_EXECUTE_FOR_KEY("wait_innodb_redo_before_copy",
node->space->name(), node->space->name(),
backup_wait_for_lsn( backup_wait_for_lsn(
get_current_lsn(mysql_connection));); get_current_lsn(mysql_connection)););
/* copy the datafile */ /* copy the datafile */
if(xtrabackup_copy_datafile(m_backup_datasinks.m_data, if(xtrabackup_copy_datafile(m_backup_datasinks.m_data,
m_backup_datasinks.m_meta, m_backup_datasinks.m_meta,
node, thread_num, NULL, node, thread_num, NULL,
xtrabackup_incremental xtrabackup_incremental
? wf_incremental : wf_write_through, ? wf_incremental : wf_write_through,
m_corrupted_pages)) m_corrupted_pages))
die("mariabackup: Error: failed to copy datafile."); die("mariabackup: Error: failed to copy datafile.");
// TODO: this came from the old code, where it was not thread-safe // TODO: this came from the old code, where it was not thread-safe
// too, use separate mysql connection per thread here // too, use separate mysql connection per thread here
...@@ -4861,8 +4917,8 @@ class InnodbDataCopier { ...@@ -4861,8 +4917,8 @@ class InnodbDataCopier {
m_tasks.finish_task(1); m_tasks.finish_task(1);
} }
Backup_datasinks &m_backup_datasinks; Backup_datasinks &m_backup_datasinks;
CorruptedPages &m_corrupted_pages; CorruptedPages &m_corrupted_pages;
TasksGroup m_tasks; TasksGroup m_tasks;
}; };
...@@ -5063,9 +5119,9 @@ class BackupStages { ...@@ -5063,9 +5119,9 @@ class BackupStages {
return false; return false;
} }
msg("Waiting for log copy thread to read lsn %llu", if (!backup_wait_for_lsn(server_lsn_after_lock)) {
server_lsn_after_lock); return false;
backup_wait_for_lsn(server_lsn_after_lock); }
corrupted_pages.backup_fix_ddl(backup_datasinks.m_data, corrupted_pages.backup_fix_ddl(backup_datasinks.m_data,
backup_datasinks.m_meta); backup_datasinks.m_meta);
...@@ -5101,11 +5157,28 @@ class BackupStages { ...@@ -5101,11 +5157,28 @@ class BackupStages {
} }
// Copy log tables tail // Copy log tables tail
if (!m_common_backup.copy_log_tables(true)) { if (!m_common_backup.copy_log_tables(true) ||
!m_common_backup.close_log_tables()) {
msg("Error on copy log tables"); msg("Error on copy log tables");
return false; return false;
} }
// Copy just enough log to cover the latest commit.
// Meanwhile, there could be some active transactions
// that are modifying the database and writing more
// log. Not copying log for them will save busy work
// and avoid some rollback of the incomplete
// transactions after the backup has been restored.
//
// We find the current InnoDB LSN by executing
// SHOW ENGINE INNODB STATUS, which in the case of
// general_log=1, log_output='TABLES'
// would be written to the table mysql.general_log
// that we just finished copying above.
if (!backup_wait_for_commit_lsn()) {
return false;
}
// Copy stats tables // Copy stats tables
if (!m_common_backup.copy_stats_tables()) { if (!m_common_backup.copy_stats_tables()) {
msg("Error on copy stats tables"); msg("Error on copy stats tables");
...@@ -5123,11 +5196,6 @@ class BackupStages { ...@@ -5123,11 +5196,6 @@ class BackupStages {
return false; return false;
} }
if (!m_common_backup.close_log_tables()) {
msg("Error on close log tables");
return false;
}
if (!backup_files_from_datadir(backup_datasinks.m_data, if (!backup_files_from_datadir(backup_datasinks.m_data,
fil_path_to_mysql_datadir, fil_path_to_mysql_datadir,
"aws-kms-key")) { "aws-kms-key")) {
...@@ -5269,13 +5337,14 @@ static bool xtrabackup_backup_func() ...@@ -5269,13 +5337,14 @@ static bool xtrabackup_backup_func()
undo_space_trunc = backup_undo_trunc; undo_space_trunc = backup_undo_trunc;
first_page_init = backup_first_page_op; first_page_init = backup_first_page_op;
metadata_to_lsn = 0; metadata_to_lsn = 0;
metadata_last_lsn = 0;
/* initialize components */ /* initialize components */
if(innodb_init_param()) { if(innodb_init_param()) {
fail: fail:
if (log_copying_running) { if (log_copying_running) {
mysql_mutex_lock(&recv_sys.mutex); mysql_mutex_lock(&recv_sys.mutex);
metadata_to_lsn = 1; metadata_last_lsn = 1;
stop_backup_threads(); stop_backup_threads();
mysql_mutex_unlock(&recv_sys.mutex); mysql_mutex_unlock(&recv_sys.mutex);
} }
...@@ -5460,14 +5529,9 @@ static bool xtrabackup_backup_func() ...@@ -5460,14 +5529,9 @@ static bool xtrabackup_backup_func()
xb_data_files_close(); xb_data_files_close();
/* Make sure that the latest checkpoint was included */ ut_ad(!log_copying_running);
if (metadata_to_lsn > recv_sys.lsn) { ut_ad(metadata_to_lsn <= recv_sys.lsn);
msg("Error: failed to copy enough redo log (" ut_ad(metadata_last_lsn == recv_sys.lsn);
"LSN=" LSN_PF "; checkpoint LSN=" LSN_PF ").",
recv_sys.lsn, metadata_to_lsn);
goto fail;
}
innodb_shutdown(); innodb_shutdown();
log_file_op = NULL; log_file_op = NULL;
undo_space_trunc = NULL; undo_space_trunc = NULL;
......
CREATE TABLE t(i INT) CREATE TABLE t(i INT)
ENGINE ARIA TRANSACTIONAL=1 ROW_FORMAT=PAGE PAGE_CHECKSUM=1; ENGINE ARIA TRANSACTIONAL=1 ROW_FORMAT=PAGE PAGE_CHECKSUM=1;
SET GLOBAL general_log = 0;
TRUNCATE mysql.general_log;
SET GLOBAL general_log = 1; SET GLOBAL general_log = 1;
SET GLOBAL log_output = 'TABLE'; SET GLOBAL log_output = 'TABLE';
INSERT INTO t VALUES (1); INSERT INTO t VALUES (1);
......
...@@ -7,9 +7,9 @@ ...@@ -7,9 +7,9 @@
CREATE TABLE t(i INT) CREATE TABLE t(i INT)
ENGINE ARIA TRANSACTIONAL=1 ROW_FORMAT=PAGE PAGE_CHECKSUM=1; ENGINE ARIA TRANSACTIONAL=1 ROW_FORMAT=PAGE PAGE_CHECKSUM=1;
--let $general_log_old = `SELECT @@global.general_log` # Truncate the log in order to make the test ./mtr --repeat proof
--let $log_output_old = `SELECT @@global.log_output` SET GLOBAL general_log = 0;
TRUNCATE mysql.general_log;
SET GLOBAL general_log = 1; SET GLOBAL general_log = 1;
SET GLOBAL log_output = 'TABLE'; SET GLOBAL log_output = 'TABLE';
...@@ -43,7 +43,3 @@ SELECT * FROM mysql.general_log ...@@ -43,7 +43,3 @@ SELECT * FROM mysql.general_log
--rmdir $targetdir --rmdir $targetdir
DROP TABLE t; DROP TABLE t;
--disable_query_log
--eval SET GLOBAL general_log = $general_log_old
--eval SET GLOBAL log_output = $log_output_old
--enable_query_log
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment