Commit 644361af authored by unknown's avatar unknown

WL#3072 - Maria recovery

maria_read_log used to always print a warning message at startup
to say it is unsafe if ALTER TABLE was used. Now it prints it only
if the log does show the problem (=ALTER TABLE or CREATE SELECT, which
both disable logging of REDO_INSERT*).
For that, when ha_maria::external_lock() disables transactionality
it writes a LOGREC_INCOMPLETE_LOG to the log, which "maria_read_log -a"
picks up to write a warning.
REPAIR TABLE also disables those REDO_INSERT* but as maria_read_log
executes LOGREC_REDO_REPAIR no warning is needed.


storage/maria/ha_maria.cc:
  as we now log a record when disabling transactionility, we need the
  TRN to be set up first
storage/maria/ma_blockrec.c:
  comment
storage/maria/ma_loghandler.c:
  new type of log record
storage/maria/ma_loghandler.h:
  new type of log record
storage/maria/ma_recovery.c:
  * maria_apply_log() now returns a count of warnings. What currently
  produces warnings is:
  - skipping applying UNDOs though there are some (=> inconsistent table)
  - replaying log (in maria_read_log) though the log contains some
  ALTER TABLE or CREATE SELECT (log misses REDO_INSERT* for those
  and is so incomplete).
  Count of warnings affects the final message of maria_read_log and
  recovery (though in recovery none of the two conditions above should
  happen).
  * maria_read_log used to always print a warning message at startup
  to say it is unsafe if ALTER TABLE was used. Now it prints it only
  if the log does show the problem, i.e. ALTER TABLE or CREATE SELECT
  was used (both disable logging of REDO_INSERT* as those records are
  not needed for recovery; those missing records in turn make
  recreation-from-scratch, via maria_read_log, impossible). For that,
  when ha_maria::external_lock() disables transactionality,
  _ma_tmp_disable_logging_for_table() writes a LOGREC_INCOMPLETE_LOG to
  the log, which maria_apply_log() picks up to write a warning.
storage/maria/ma_recovery.h:
  maria_apply_log() returns a count of warnings
storage/maria/maria_def.h:
  _ma_tmp_disable_logging_for_table() grows so becomes a function
storage/maria/maria_read_log.c:
  maria_apply_log can now return a count of warnings, to temper the
  "SUCCESS" message printed in the end by maria_read_log.
  Advise users to make a backup first.
parent 9c6f8a2b
......@@ -2043,20 +2043,6 @@ int ha_maria::external_lock(THD *thd, int lock_type)
goto skip_transaction;
if (lock_type != F_UNLCK)
{
if (!thd->transaction.on)
{
/*
No need to log REDOs/UNDOs. If this is an internal temporary table
which will be renamed to a permanent table (like in ALTER TABLE),
the rename happens after unlocking so will be durable (and the table
will get its create_rename_lsn).
Note: if we wanted to enable users to have an old backup and apply
tons of archived logs to roll-forward, we could then not disable
REDOs/UNDOs in this case.
*/
DBUG_PRINT("info", ("Disabling logging for table"));
_ma_tmp_disable_logging_for_table(file->s);
}
if (!trn) /* no transaction yet - open it now */
{
trn= trnman_new_trn(& thd->mysys_var->mutex,
......@@ -2077,6 +2063,20 @@ int ha_maria::external_lock(THD *thd, int lock_type)
trans_register_ha(thd, FALSE, maria_hton);
trnman_new_statement(trn);
}
if (!thd->transaction.on)
{
/*
No need to log REDOs/UNDOs. If this is an internal temporary table
which will be renamed to a permanent table (like in ALTER TABLE),
the rename happens after unlocking so will be durable (and the table
will get its create_rename_lsn).
Note: if we wanted to enable users to have an old backup and apply
tons of archived logs to roll-forward, we could then not disable
REDOs/UNDOs in this case.
*/
DBUG_PRINT("info", ("Disabling logging for table"));
_ma_tmp_disable_logging_for_table(file, TRUE);
}
}
else
{
......
......@@ -5256,7 +5256,9 @@ uint _ma_apply_redo_purge_blocks(MARIA_HA *info,
we need to distinguish between blob page (too big, can't pin, and
needn't pin because in a group there is a single REDO touching the
page) and head/tail.
When merged with newer code this should become possible.
When merged with newer code this should become possible, as full
pages will be purged with different types of records than head/tail
pages.
*/
if (pagecache_write(share->pagecache,
&info->dfile, page+i, 0,
......
......@@ -536,6 +536,11 @@ static LOG_DESC INIT_LOGREC_LONG_TRANSACTION_ID=
{LOGRECTYPE_FIXEDLENGTH, 6, 6, NULL, NULL, NULL, 0,
"long_transaction_id", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
static LOG_DESC INIT_LOGREC_INCOMPLETE_LOG=
{LOGRECTYPE_FIXEDLENGTH, FILEID_STORE_SIZE, FILEID_STORE_SIZE,
NULL, NULL, NULL, 0,
"incomplete_log", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
const myf log_write_flags= MY_WME | MY_NABP | MY_WAIT_IF_FULL;
static void loghandler_init()
......@@ -603,12 +608,14 @@ static void loghandler_init()
INIT_LOGREC_FILE_ID;
log_record_type_descriptor[LOGREC_LONG_TRANSACTION_ID]=
INIT_LOGREC_LONG_TRANSACTION_ID;
for (i= LOGREC_LONG_TRANSACTION_ID + 1;
log_record_type_descriptor[LOGREC_INCOMPLETE_LOG]=
INIT_LOGREC_INCOMPLETE_LOG;
for (i= LOGREC_INCOMPLETE_LOG + 1;
i < LOGREC_NUMBER_OF_TYPES;
i++)
log_record_type_descriptor[i].class= LOGRECTYPE_NOT_ALLOWED;
DBUG_EXECUTE("info",
check_translog_description_table(LOGREC_LONG_TRANSACTION_ID););
check_translog_description_table(LOGREC_INCOMPLETE_LOG););
};
......
......@@ -127,6 +127,7 @@ enum translog_record_type
LOGREC_REDO_REPAIR_TABLE,
LOGREC_FILE_ID,
LOGREC_LONG_TRANSACTION_ID,
LOGREC_INCOMPLETE_LOG,
LOGREC_RESERVED_FUTURE_EXTENSION= 63
};
#define LOGREC_NUMBER_OF_TYPES 64 /* Maximum, can't be extended */
......
......@@ -59,6 +59,7 @@ static my_bool skip_DDLs; /**< if REDO phase should skip DDL records */
static my_bool checkpoint_useful;
static ulonglong now; /**< for tracking execution time of phases */
static char preamble[]= "Maria engine: starting recovery; ";
uint warnings; /**< count of warnings */
#define prototype_redo_exec_hook(R) \
static int exec_REDO_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec)
......@@ -77,6 +78,7 @@ prototype_redo_exec_hook(REDO_RENAME_TABLE);
prototype_redo_exec_hook(REDO_REPAIR_TABLE);
prototype_redo_exec_hook(REDO_DROP_TABLE);
prototype_redo_exec_hook(FILE_ID);
prototype_redo_exec_hook(INCOMPLETE_LOG);
prototype_redo_exec_hook(REDO_INSERT_ROW_HEAD);
prototype_redo_exec_hook(REDO_INSERT_ROW_TAIL);
prototype_redo_exec_hook(REDO_PURGE_ROW_HEAD);
......@@ -167,6 +169,7 @@ int maria_recover(void)
{
int res= 1;
FILE *trace_file;
uint warnings_count;
DBUG_ENTER("maria_recover");
DBUG_ASSERT(!maria_in_recovery);
......@@ -180,9 +183,22 @@ int maria_recover(void)
tprint(trace_file, "TRACE of the last MARIA recovery from mysqld\n");
DBUG_ASSERT(maria_pagecache->inited);
res= maria_apply_log(LSN_IMPOSSIBLE, MARIA_LOG_APPLY, trace_file,
TRUE, TRUE, TRUE);
TRUE, TRUE, TRUE, &warnings_count);
if (!res)
tprint(trace_file, "SUCCESS\n");
{
if (warnings_count == 0)
tprint(trace_file, "SUCCESS\n");
else
{
tprint(trace_file, "DOUBTFUL (%u warnings, check previous output)\n",
warnings_count);
/*
We asked for execution of UNDOs, and skipped DDLs, so shouldn't get
any warnings.
*/
DBUG_ASSERT(0);
}
}
if (trace_file)
fclose(trace_file);
maria_in_recovery= FALSE;
......@@ -200,6 +216,7 @@ int maria_recover(void)
@param skip_DDLs_arg Should DDL records (CREATE/RENAME/DROP/REPAIR)
be skipped by the REDO phase or not
@param take_checkpoints Should we take checkpoints or not.
@param[out] warnings_count Count of warnings will be put there
@todo This trace_file thing is primitive; soon we will make it similar to
ma_check_print_warning() etc, and a successful recovery does not need to
......@@ -213,7 +230,7 @@ int maria_recover(void)
int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply,
FILE *trace_file,
my_bool should_run_undo_phase, my_bool skip_DDLs_arg,
my_bool take_checkpoints)
my_bool take_checkpoints, uint *warnings_count)
{
int error= 0;
uint unfinished_trans;
......@@ -222,6 +239,7 @@ int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply,
DBUG_ASSERT(apply == MARIA_LOG_APPLY || !should_run_undo_phase);
DBUG_ASSERT(!maria_multi_threaded);
warnings= 0;
/* checkpoints can happen only if TRNs have been built */
DBUG_ASSERT(should_run_undo_phase || !take_checkpoints);
all_active_trans= (struct st_trn_for_recovery *)
......@@ -238,36 +256,7 @@ int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply,
recovery_message_printed= REC_MSG_NONE;
tracef= trace_file;
if (!(skip_DDLs= skip_DDLs_arg))
{
/*
Example of what can go wrong when replaying DDLs:
CREATE TABLE t (logged); INSERT INTO t VALUES(1) (logged);
ALTER TABLE t ... which does
CREATE a temporary table #sql... (logged)
INSERT data from t into #sql... (not logged)
RENAME #sql TO t (logged)
Removing tables by hand and replaying the log will leave in the
end an empty table "t": missing records. If after the RENAME an INSERT
into t was done, that row had number 1 in its page, executing the
REDO_INSERT_ROW_HEAD on the recreated empty t will fail (assertion
failure in _ma_apply_redo_insert_row_head_or_tail(): new data page is
created whereas rownr is not 0).
Another issue is that replaying of DDLs is not correct enough to work if
there was a crash during a DDL (see comment in execution of
REDO_RENAME_TABLE ).
*/
/**
@todo RECOVERY BUG instead of this warning, whenever log becomes
incomplete (ALTER TABLE, CREATE SELECT) write a log record
LOGREC_INCOMPLETE; when seeing this record, print warning below.
*/
tprint(tracef, "WARNING: MySQL server currently disables log records"
" about insertion of data by ALTER TABLE"
" (copy_data_between_tables()), applying of log records may"
" well not work. Additionally, applying of DDL records will"
" cause damage if there are tables left by a crash of a DDL.\n");
}
skip_DDLs= skip_DDLs_arg;
if (from_lsn == LSN_IMPOSSIBLE)
{
......@@ -313,7 +302,7 @@ int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply,
start from the checkpoint and never from before, wrongly skipping REDOs
(tested).
@todo fix this.
@todo fix this; pagecache_write() now can have a rec_lsn argument.
*/
#if 0
if (take_checkpoints && checkpoint_useful)
......@@ -334,8 +323,11 @@ int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply,
goto err;
}
else if (unfinished_trans > 0)
tprint(tracef, "WARNING: %u unfinished transactions; some tables may be"
" left inconsistent!\n", unfinished_trans);
{
tprint(tracef, "***WARNING: %u unfinished transactions; some tables may"
" be left inconsistent!***\n", unfinished_trans);
warnings++;
}
old_now= now;
now= my_getsystime();
......@@ -386,6 +378,7 @@ int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply,
log_record_buffer.str= NULL;
log_record_buffer.length= 0;
ma_checkpoint_end();
*warnings_count= warnings;
if (recovery_message_printed != REC_MSG_NONE)
{
/** @todo RECOVERY BUG all prints to stderr should go to error log */
......@@ -493,6 +486,48 @@ prototype_redo_exec_hook_dummy(CHECKPOINT)
}
prototype_redo_exec_hook(INCOMPLETE_LOG)
{
MARIA_HA *info;
if (skip_DDLs)
{
tprint(tracef, "we skip DDLs\n");
return 0;
}
if ((info= get_MARIA_HA_from_REDO_record(rec)) == NULL)
{
/* no such table, don't need to warn */
return 0;
}
/*
Example of what can go wrong when replaying DDLs:
CREATE TABLE t (logged); INSERT INTO t VALUES(1) (logged);
ALTER TABLE t ... which does
CREATE a temporary table #sql... (logged)
INSERT data from t into #sql... (not logged)
RENAME #sql TO t (logged)
Removing tables by hand and replaying the log will leave in the
end an empty table "t": missing records. If after the RENAME an INSERT
into t was done, that row had number 1 in its page, executing the
REDO_INSERT_ROW_HEAD on the recreated empty t will fail (assertion
failure in _ma_apply_redo_insert_row_head_or_tail(): new data page is
created whereas rownr is not 0).
So when the server disables logging for ALTER TABLE or CREATE SELECT, it
logs LOGREC_INCOMPLETE_LOG to warn maria_read_log and then the user.
Another issue is that replaying of DDLs is not correct enough to work if
there was a crash during a DDL (see comment in execution of
REDO_RENAME_TABLE ).
*/
tprint(tracef, "***WARNING: MySQL server currently logs no records"
" about insertion of data by ALTER TABLE and CREATE SELECT,"
" as they are not necessary for recovery;"
" present applying of log records may well not work.***\n");
warnings++;
return 0;
}
prototype_redo_exec_hook(REDO_CREATE_TABLE)
{
File dfile= -1, kfile= -1;
......@@ -708,7 +743,7 @@ prototype_redo_exec_hook(REDO_RENAME_TABLE)
scratch. It means that "maria_read_log -a" should not be used on a
database which just crashed during a DDL. And also ALTER TABLE does not
log insertions of records into the temporary table, so replaying may
fail (see comment and warning in maria_apply_log()).
fail (grep for INCOMPLETE_LOG in files).
*/
info= maria_open(old_name, O_RDONLY, HA_OPEN_FOR_REPAIR);
if (info)
......@@ -1069,7 +1104,7 @@ static int new_table(uint16 sid, const char *name,
goto end;
}
/* don't log any records for this work */
_ma_tmp_disable_logging_for_table(share);
_ma_tmp_disable_logging_for_table(info, FALSE);
/* _ma_unpin_all_pages() reads info->trn: */
info->trn= &dummy_transaction_object;
/* execution of some REDO records relies on data_file_length */
......@@ -1168,20 +1203,6 @@ prototype_redo_exec_hook(REDO_INSERT_ROW_HEAD)
goto end;
}
buff= log_record_buffer.str;
/**
@todo RECOVERY BUG
we stamp page with UNDO's LSN. Assume an operation logs REDO-REDO-UNDO
where the two REDOs are about the same page (that is possible only with a
head or tail page, not blob page). Then recovery applies first REDO and
skips second REDO which is wrong. Solution:
a)
* when applying REDO to head or tail, keep page pinned, don't stamp it,
* when applying REDO to blob page, stamp it with UNDO's LSN
* when seeing UNDO, unpin head/tail pages and stamp them with UNDO's
LSN.
or b) when applying REDO, stamp page with REDO's LSN (=> difference in
'cmp' between run-time and recovery, need a special 'cmp'...).
*/
if (_ma_apply_redo_insert_row_head_or_tail(info, current_group_end_lsn,
HEAD_PAGE,
buff + FILEID_STORE_SIZE,
......@@ -1659,6 +1680,7 @@ static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply)
install_redo_exec_hook(REDO_REPAIR_TABLE);
install_redo_exec_hook(REDO_DROP_TABLE);
install_redo_exec_hook(FILE_ID);
install_redo_exec_hook(INCOMPLETE_LOG);
install_redo_exec_hook(REDO_INSERT_ROW_HEAD);
install_redo_exec_hook(REDO_INSERT_ROW_TAIL);
install_redo_exec_hook(REDO_PURGE_ROW_HEAD);
......@@ -2371,6 +2393,39 @@ static my_bool close_one_table(const char *name, TRANSLOG_ADDRESS addr)
}
/**
Temporarily disables logging for this table.
If that makes the log incomplete, writes a LOGREC_INCOMPLETE_LOG to the log
to warn log readers.
@param info table
@param log_incomplete if that disabling makes the log incomplete
@note for example in the REDO phase we disable logging but that does not
make the log incomplete.
*/
void _ma_tmp_disable_logging_for_table(MARIA_HA *info,
my_bool log_incomplete)
{
MARIA_SHARE *share= info->s;
if (log_incomplete)
{
uchar log_data[FILEID_STORE_SIZE];
LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1];
LSN lsn;
log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data;
log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data);
translog_write_record(&lsn, LOGREC_INCOMPLETE_LOG,
info->trn, info, sizeof(log_data),
TRANSLOG_INTERNAL_PARTS + 1, log_array,
log_data, NULL);
}
/* if we disabled before writing the record, record wouldn't reach log */
share->now_transactional= FALSE;
share->page_type= PAGECACHE_PLAIN_PAGE;
}
static void print_redo_phase_progress(TRANSLOG_ADDRESS addr)
{
static int end_logno= FILENO_IMPOSSIBLE, end_offset, percentage_printed= 0;
......
......@@ -30,5 +30,5 @@ int maria_recover(void);
int maria_apply_log(LSN lsn, enum maria_apply_log_way apply,
FILE *trace_file,
my_bool execute_undo_phase, my_bool skip_DDLs,
my_bool take_checkpoints);
my_bool take_checkpoints, uint *warnings_count);
C_MODE_END
......@@ -952,8 +952,8 @@ int _ma_update_create_rename_lsn_sub(MARIA_SHARE *share,
LSN lsn, my_bool do_sync);
void _ma_unpin_all_pages(MARIA_HA *info, LSN undo_lsn);
#define _ma_tmp_disable_logging_for_table(S) \
{ (S)->now_transactional= FALSE; (S)->page_type= PAGECACHE_PLAIN_PAGE; }
void _ma_tmp_disable_logging_for_table(MARIA_HA *info,
my_bool log_incomplete);
#define _ma_reenable_logging_for_table(S) \
{ if (((S)->now_transactional= (S)->base.born_transactional)) \
(S)->page_type= PAGECACHE_LSN_PAGE; }
......
......@@ -38,6 +38,7 @@ int main(int argc, char **argv)
{
LSN lsn;
char **default_argv;
uint warnings_count;
MY_INIT(argv[0]);
my_progname_short= my_progname+dirname_length(my_progname);
......@@ -106,9 +107,13 @@ int main(int argc, char **argv)
if (maria_apply_log(lsn, opt_apply ? MARIA_LOG_APPLY :
(opt_check ? MARIA_LOG_CHECK :
MARIA_LOG_DISPLAY_HEADER), opt_silent ? NULL : stdout,
opt_apply_undo, FALSE, FALSE))
opt_apply_undo, FALSE, FALSE, &warnings_count))
goto err;
fprintf(stdout, "%s: SUCCESS\n", my_progname_short);
if (warnings_count == 0)
fprintf(stdout, "%s: SUCCESS\n", my_progname_short);
else
fprintf(stdout, "%s: DOUBTFUL (%u warnings, check previous output)\n",
my_progname_short, warnings_count);
goto end;
err:
......@@ -130,7 +135,8 @@ int main(int argc, char **argv)
static struct my_option my_long_options[] =
{
{"apply", 'a',
"Apply log to tables. Will display a lot of information if not run with --silent",
"Apply log to tables: modifies tables! you should make a backup first! "
" Displays a lot of information if not run with --silent",
(uchar **) &opt_apply, (uchar **) &opt_apply, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{"check", 'c',
......@@ -143,7 +149,7 @@ static struct my_option my_long_options[] =
#endif
{"help", '?', "Display this help and exit.",
0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
{"only-display", 'o', "display brief info about records's header",
{"only-display", 'o', "display brief info read from records' header",
(uchar **) &opt_only_display, (uchar **) &opt_only_display, 0, GET_BOOL,
NO_ARG,0, 0, 0, 0, 0, 0},
{ "page_buffer_size", 'P', "",
......@@ -154,7 +160,7 @@ static struct my_option my_long_options[] =
{"silent", 's', "Print less information during apply/undo phase",
(uchar **) &opt_silent, (uchar **) &opt_silent, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{"undo", 'u', "Apply undos to tables. (disable with --disable-undo)",
{"undo", 'u', "Apply UNDO records to tables. (disable with --disable-undo)",
(uchar **) &opt_apply_undo, (uchar **) &opt_apply_undo, 0,
GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0},
{"version", 'V', "Print version and exit.",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment