Commit 8f714513 authored by unknown's avatar unknown

WL#3072 Maria recovery

- fixes (in recovery, checkpoint, log handler) of bugs found
during testing.
- new option --check for maria_read_log: with --only-display (which only
reads the header) it reads the full record, for debugging.


storage/maria/ma_loghandler.c:
  importing patch from Sanja for bug of translog_next_LSN() found
  during recovery
storage/maria/ma_loghandler_lsn.h:
  better types (0L is 4 bytes on some platforms, it causes problems
  when used into lsn_store(): right shift >= width of type.
storage/maria/ma_pagecache.c:
  work around infamous "PAGECACHE_PLAIN_PAGE used for transactional
  tables in specialm case"; REDO phase disables logging and this causes
  pages to be PAGECACHE_PLAIN_PAGE, thus ignored wrongly by the
  checkpoint taken at the end of the REDO phase.
storage/maria/ma_recovery.c:
  - a #ifdef which broke maria_read_log in non-debug builds (no output!)
  - support for maria_read_log --check
  - detect record corruption before opening the table
  - updating is_of_horizon requires writing the state
  - fix for wrong parsing of checkpoint record by recovery
storage/maria/ma_recovery.h:
  support for maria_read_log --check
storage/maria/maria_read_log.c:
  Option --check: --only-display only looks at the header;
  adding --check tries a translog_read_record() to see if record can
  be fully read (this is to find bugs).
parent ef9a9c49
...@@ -5274,7 +5274,6 @@ my_bool translog_init_scanner(LSN lsn, ...@@ -5274,7 +5274,6 @@ my_bool translog_init_scanner(LSN lsn,
DBUG_ENTER("translog_init_scanner"); DBUG_ENTER("translog_init_scanner");
DBUG_PRINT("enter", ("Scanner: 0x%lx LSN: (0x%lu,0x%lx)", DBUG_PRINT("enter", ("Scanner: 0x%lx LSN: (0x%lu,0x%lx)",
(ulong) scanner, LSN_IN_PARTS(lsn))); (ulong) scanner, LSN_IN_PARTS(lsn)));
DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0);
DBUG_ASSERT(translog_inited == 1); DBUG_ASSERT(translog_inited == 1);
data.addr= &scanner->page_addr; data.addr= &scanner->page_addr;
...@@ -6632,6 +6631,26 @@ LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon) ...@@ -6632,6 +6631,26 @@ LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon)
DBUG_RETURN(LSN_IMPOSSIBLE); DBUG_RETURN(LSN_IMPOSSIBLE);
translog_init_scanner(addr, 0, &scanner, 1); translog_init_scanner(addr, 0, &scanner, 1);
/*
addr can point not to a chunk beginning but page end so next
page beginning.
*/
if (addr % TRANSLOG_PAGE_SIZE == 0)
{
/*
We are emulating the page end which cased such horizon value to
trigger translog_scanner_eop().
We can't just increase addr on page header overhead because it
can be file end so we allow translog_get_next_chunk() to skip
to the next page in correct way
*/
scanner.page_addr-= TRANSLOG_PAGE_SIZE;
scanner.page_offset= TRANSLOG_PAGE_SIZE;
#ifndef DBUG_OFF
scanner.page= NULL; /* prevent using incorrect page content */
#endif
}
/* addr can point not to a chunk beginning but to a page end */ /* addr can point not to a chunk beginning but to a page end */
if (translog_scanner_eop(&scanner)) if (translog_scanner_eop(&scanner))
{ {
......
...@@ -83,9 +83,9 @@ typedef LSN LSN_WITH_FLAGS; ...@@ -83,9 +83,9 @@ typedef LSN LSN_WITH_FLAGS;
#define FILENO_IMPOSSIBLE 0 /**< log file's numbering starts at 1 */ #define FILENO_IMPOSSIBLE 0 /**< log file's numbering starts at 1 */
#define LOG_OFFSET_IMPOSSIBLE 0 /**< log always has a header */ #define LOG_OFFSET_IMPOSSIBLE 0 /**< log always has a header */
#define LSN_IMPOSSIBLE 0L #define LSN_IMPOSSIBLE ((LSN)0)
/* following LSN also is impossible */ /* following LSN also is impossible */
#define LSN_ERROR 1 #define LSN_ERROR ((LSN)1)
/** @brief some impossible LSN serve as markers */ /** @brief some impossible LSN serve as markers */
#define LSN_REPAIRED_BY_MARIA_CHK ((LSN)2) #define LSN_REPAIRED_BY_MARIA_CHK ((LSN)2)
......
...@@ -3941,8 +3941,18 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, ...@@ -3941,8 +3941,18 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
*/ */
DBUG_ASSERT(block->hash_link != NULL); DBUG_ASSERT(block->hash_link != NULL);
DBUG_ASSERT(block->status & PCBLOCK_CHANGED); DBUG_ASSERT(block->status & PCBLOCK_CHANGED);
/**
@todo RECOVERY BUG
REDO phase uses PAGECACHE_PLAIN_PAGE, so the lines below would
confuse the indirect Checkpoint taken at the end of the REDO phase.
So we below collect even dirty pages of temporary tables as a result
:( Soon we should have the MARIA_SHARE accessible from the
pagecache's block and then we can test born_transactional.
*/
#ifdef TRANS_TABLES_ALWAYS_USE_LSN_PAGE
if (block->type != PAGECACHE_LSN_PAGE) if (block->type != PAGECACHE_LSN_PAGE)
continue; /* no need to store it */ continue; /* no need to store it */
#endif
stored_list_size++; stored_list_size++;
} }
} }
...@@ -3967,8 +3977,10 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, ...@@ -3967,8 +3977,10 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache,
block; block;
block= block->next_changed) block= block->next_changed)
{ {
#ifdef TRANS_TABLES_ALWAYS_USE_LSN_PAGE
if (block->type != PAGECACHE_LSN_PAGE) if (block->type != PAGECACHE_LSN_PAGE)
continue; /* no need to store it in the checkpoint record */ continue; /* no need to store it in the checkpoint record */
#endif
compile_time_assert(sizeof(block->hash_link->file.file) <= 4); compile_time_assert(sizeof(block->hash_link->file.file) <= 4);
compile_time_assert(sizeof(block->hash_link->pageno) <= 4); compile_time_assert(sizeof(block->hash_link->pageno) <= 4);
int4store(ptr, block->hash_link->file.file); int4store(ptr, block->hash_link->file.file);
......
...@@ -88,7 +88,7 @@ prototype_undo_exec_hook(UNDO_ROW_INSERT); ...@@ -88,7 +88,7 @@ prototype_undo_exec_hook(UNDO_ROW_INSERT);
prototype_undo_exec_hook(UNDO_ROW_DELETE); prototype_undo_exec_hook(UNDO_ROW_DELETE);
prototype_undo_exec_hook(UNDO_ROW_UPDATE); prototype_undo_exec_hook(UNDO_ROW_UPDATE);
static int run_redo_phase(LSN lsn, my_bool apply); static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply);
static uint end_of_redo_phase(my_bool prepare_for_undo_phase); static uint end_of_redo_phase(my_bool prepare_for_undo_phase);
static int run_undo_phase(uint unfinished); static int run_undo_phase(uint unfinished);
static void display_record_position(const LOG_DESC *log_desc, static void display_record_position(const LOG_DESC *log_desc,
...@@ -136,13 +136,11 @@ void tprint(FILE *trace_file, const char *format, ...) ...@@ -136,13 +136,11 @@ void tprint(FILE *trace_file, const char *format, ...)
void tprint(FILE *trace_file __attribute__ ((unused)), void tprint(FILE *trace_file __attribute__ ((unused)),
const char *format __attribute__ ((unused)), ...) const char *format __attribute__ ((unused)), ...)
{ {
#ifdef EXTRA_DEBUG
va_list args; va_list args;
va_start(args, format); va_start(args, format);
if (trace_file != NULL) if (trace_file != NULL)
vfprintf(trace_file, format, args); vfprintf(trace_file, format, args);
va_end(args); va_end(args);
#endif
} }
#define ALERT_USER() DBUG_ASSERT(0) #define ALERT_USER() DBUG_ASSERT(0)
...@@ -177,7 +175,8 @@ int maria_recover(void) ...@@ -177,7 +175,8 @@ int maria_recover(void)
#endif #endif
tprint(trace_file, "TRACE of the last MARIA recovery from mysqld\n"); tprint(trace_file, "TRACE of the last MARIA recovery from mysqld\n");
DBUG_ASSERT(maria_pagecache->inited); DBUG_ASSERT(maria_pagecache->inited);
res= maria_apply_log(LSN_IMPOSSIBLE, TRUE, trace_file, TRUE, TRUE, TRUE); res= maria_apply_log(LSN_IMPOSSIBLE, MARIA_LOG_APPLY, trace_file,
TRUE, TRUE, TRUE);
if (!res) if (!res)
tprint(trace_file, "SUCCESS\n"); tprint(trace_file, "SUCCESS\n");
if (trace_file) if (trace_file)
...@@ -192,7 +191,7 @@ int maria_recover(void) ...@@ -192,7 +191,7 @@ int maria_recover(void)
@param from_lsn LSN from which log reading/applying should start; @param from_lsn LSN from which log reading/applying should start;
LSN_IMPOSSIBLE means "use last checkpoint" LSN_IMPOSSIBLE means "use last checkpoint"
@param apply if log records should be applied or not @param apply how log records should be applied or not
@param trace_file trace file where progress/debug messages will go @param trace_file trace file where progress/debug messages will go
@param skip_DDLs_arg Should DDL records (CREATE/RENAME/DROP/REPAIR) @param skip_DDLs_arg Should DDL records (CREATE/RENAME/DROP/REPAIR)
be skipped by the REDO phase or not be skipped by the REDO phase or not
...@@ -207,7 +206,8 @@ int maria_recover(void) ...@@ -207,7 +206,8 @@ int maria_recover(void)
@retval !=0 Error @retval !=0 Error
*/ */
int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file, int maria_apply_log(LSN from_lsn, enum maria_apply_log_way apply,
FILE *trace_file,
my_bool should_run_undo_phase, my_bool skip_DDLs_arg, my_bool should_run_undo_phase, my_bool skip_DDLs_arg,
my_bool take_checkpoints) my_bool take_checkpoints)
{ {
...@@ -216,7 +216,7 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file, ...@@ -216,7 +216,7 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file,
ulonglong old_now; ulonglong old_now;
DBUG_ENTER("maria_apply_log"); DBUG_ENTER("maria_apply_log");
DBUG_ASSERT(apply || !should_run_undo_phase); DBUG_ASSERT(apply == MARIA_LOG_APPLY || !should_run_undo_phase);
DBUG_ASSERT(!maria_multi_threaded); DBUG_ASSERT(!maria_multi_threaded);
/* checkpoints can happen only if TRNs have been built */ /* checkpoints can happen only if TRNs have been built */
DBUG_ASSERT(should_run_undo_phase || !take_checkpoints); DBUG_ASSERT(should_run_undo_phase || !take_checkpoints);
...@@ -370,7 +370,7 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file, ...@@ -370,7 +370,7 @@ int maria_apply_log(LSN from_lsn, my_bool apply, FILE *trace_file,
if (recovery_message_printed != REC_MSG_NONE) if (recovery_message_printed != REC_MSG_NONE)
{ {
/** @todo RECOVERY BUG all prints to stderr should go to error log */ /** @todo RECOVERY BUG all prints to stderr should go to error log */
fprintf(stderr, "done.\n"); fprintf(stderr, "%s.\n", error ? " failed" : "done");
} }
/* we don't cleanly close tables if we hit some error (may corrupt them) */ /* we don't cleanly close tables if we hit some error (may corrupt them) */
DBUG_RETURN(error); DBUG_RETURN(error);
...@@ -981,10 +981,21 @@ static int new_table(uint16 sid, const char *name, ...@@ -981,10 +981,21 @@ static int new_table(uint16 sid, const char *name,
0 (success): leave table open and return 0. 0 (success): leave table open and return 0.
*/ */
int error= 1; int error= 1;
MARIA_HA *info;
checkpoint_useful= TRUE; checkpoint_useful= TRUE;
if ((name == NULL) || (name[0] == 0))
{
/*
we didn't use DBUG_ASSERT() because such record corruption could
silently pass in the "info == NULL" test below.
*/
tprint(tracef, ", record is corrupted");
info= NULL;
goto end;
}
tprint(tracef, "Table '%s', id %u", name, sid); tprint(tracef, "Table '%s', id %u", name, sid);
MARIA_HA *info= maria_open(name, O_RDWR, HA_OPEN_FOR_REPAIR); info= maria_open(name, O_RDWR, HA_OPEN_FOR_REPAIR);
if (info == NULL) if (info == NULL)
{ {
tprint(tracef, ", is absent (must have been dropped later?)" tprint(tracef, ", is absent (must have been dropped later?)"
...@@ -1563,7 +1574,7 @@ prototype_undo_exec_hook(UNDO_ROW_UPDATE) ...@@ -1563,7 +1574,7 @@ prototype_undo_exec_hook(UNDO_ROW_UPDATE)
} }
static int run_redo_phase(LSN lsn, my_bool apply) static int run_redo_phase(LSN lsn, enum maria_apply_log_way apply)
{ {
TRANSLOG_HEADER_BUFFER rec; TRANSLOG_HEADER_BUFFER rec;
struct st_translog_scanner_data scanner; struct st_translog_scanner_data scanner;
...@@ -1676,7 +1687,21 @@ static int run_redo_phase(LSN lsn, my_bool apply) ...@@ -1676,7 +1687,21 @@ static int run_redo_phase(LSN lsn, my_bool apply)
{ {
const LOG_DESC *log_desc2= &log_record_type_descriptor[rec2.type]; const LOG_DESC *log_desc2= &log_record_type_descriptor[rec2.type];
display_record_position(log_desc2, &rec2, 0); display_record_position(log_desc2, &rec2, 0);
if (apply && display_and_apply_record(log_desc2, &rec2)) if (apply == MARIA_LOG_CHECK)
{
enlarge_buffer(&rec2);
translog_size_t read_len=
translog_read_record(rec2.lsn, 0, rec2.record_length,
log_record_buffer.str, NULL);
if (read_len != rec2.record_length)
{
tprint(tracef, "Cannot read record's body: read %u of"
" %u bytes\n", read_len, rec2.record_length);
goto err;
}
}
if (apply == MARIA_LOG_APPLY &&
display_and_apply_record(log_desc2, &rec2))
{ {
translog_destroy_scanner(&scanner2); translog_destroy_scanner(&scanner2);
goto err; goto err;
...@@ -1698,7 +1723,8 @@ static int run_redo_phase(LSN lsn, my_bool apply) ...@@ -1698,7 +1723,8 @@ static int run_redo_phase(LSN lsn, my_bool apply)
translog_destroy_scanner(&scanner2); translog_destroy_scanner(&scanner2);
} }
} }
if (apply && display_and_apply_record(log_desc, &rec)) if (apply == MARIA_LOG_APPLY &&
display_and_apply_record(log_desc, &rec))
goto err; goto err;
} }
else /* record does not end group */ else /* record does not end group */
...@@ -1904,7 +1930,10 @@ static void prepare_table_for_close(MARIA_HA *info, TRANSLOG_ADDRESS horizon) ...@@ -1904,7 +1930,10 @@ static void prepare_table_for_close(MARIA_HA *info, TRANSLOG_ADDRESS horizon)
*/ */
if (cmp_translog_addr(share->state.is_of_horizon, horizon) < 0 && if (cmp_translog_addr(share->state.is_of_horizon, horizon) < 0 &&
cmp_translog_addr(share->lsn_of_file_id, horizon) < 0) cmp_translog_addr(share->lsn_of_file_id, horizon) < 0)
{
share->state.is_of_horizon= horizon; share->state.is_of_horizon= horizon;
_ma_state_info_write_sub(share->kfile.file, &share->state, 1);
}
_ma_reenable_logging_for_table(share); _ma_reenable_logging_for_table(share);
} }
...@@ -2103,8 +2132,8 @@ static LSN parse_checkpoint_record(LSN lsn) ...@@ -2103,8 +2132,8 @@ static LSN parse_checkpoint_record(LSN lsn)
LSN first_log_write_lsn= lsn_korr(ptr); LSN first_log_write_lsn= lsn_korr(ptr);
ptr+= LSN_STORE_SIZE; ptr+= LSN_STORE_SIZE;
uint name_len= strlen(ptr) + 1; uint name_len= strlen(ptr) + 1;
strmake(name, ptr, sizeof(name)-1);
ptr+= name_len; ptr+= name_len;
strnmov(name, ptr, sizeof(name));
if (new_table(sid, name, kfile, dfile, first_log_write_lsn)) if (new_table(sid, name, kfile, dfile, first_log_write_lsn))
return LSN_ERROR; return LSN_ERROR;
} }
......
...@@ -24,8 +24,11 @@ ...@@ -24,8 +24,11 @@
/* Performs recovery of the engine at start */ /* Performs recovery of the engine at start */
C_MODE_START C_MODE_START
enum maria_apply_log_way
{ MARIA_LOG_APPLY, MARIA_LOG_DISPLAY_HEADER, MARIA_LOG_CHECK };
int maria_recover(void); int maria_recover(void);
int maria_apply_log(LSN lsn, my_bool apply, FILE *trace_file, int maria_apply_log(LSN lsn, enum maria_apply_log_way apply,
FILE *trace_file,
my_bool execute_undo_phase, my_bool skip_DDLs, my_bool execute_undo_phase, my_bool skip_DDLs,
my_bool take_checkpoints); my_bool take_checkpoints);
C_MODE_END C_MODE_END
...@@ -29,7 +29,8 @@ const char *default_dbug_option= "d:t:i:O,\\maria_read_log.trace"; ...@@ -29,7 +29,8 @@ const char *default_dbug_option= "d:t:i:O,\\maria_read_log.trace";
const char *default_dbug_option= "d:t:i:o,/tmp/maria_read_log.trace"; const char *default_dbug_option= "d:t:i:o,/tmp/maria_read_log.trace";
#endif #endif
#endif /* DBUG_OFF */ #endif /* DBUG_OFF */
static my_bool opt_only_display, opt_apply, opt_apply_undo, opt_silent; static my_bool opt_only_display, opt_apply, opt_apply_undo, opt_silent,
opt_check;
static ulong opt_page_buffer_size; static ulong opt_page_buffer_size;
static const char *my_progname_short; static const char *my_progname_short;
...@@ -102,7 +103,9 @@ int main(int argc, char **argv) ...@@ -102,7 +103,9 @@ int main(int argc, char **argv)
LSN_IN_PARTS(lsn)); LSN_IN_PARTS(lsn));
fprintf(stdout, "TRACE of the last maria_read_log\n"); fprintf(stdout, "TRACE of the last maria_read_log\n");
if (maria_apply_log(lsn, opt_apply, opt_silent ? NULL : stdout, if (maria_apply_log(lsn, opt_apply ? MARIA_LOG_APPLY :
(opt_check ? MARIA_LOG_CHECK :
MARIA_LOG_DISPLAY_HEADER), opt_silent ? NULL : stdout,
opt_apply_undo, FALSE, FALSE)) opt_apply_undo, FALSE, FALSE))
goto err; goto err;
fprintf(stdout, "%s: SUCCESS\n", my_progname_short); fprintf(stdout, "%s: SUCCESS\n", my_progname_short);
...@@ -130,6 +133,10 @@ static struct my_option my_long_options[] = ...@@ -130,6 +133,10 @@ static struct my_option my_long_options[] =
"Apply log to tables. Will display a lot of information if not run with --silent", "Apply log to tables. Will display a lot of information if not run with --silent",
(uchar **) &opt_apply, (uchar **) &opt_apply, 0, (uchar **) &opt_apply, (uchar **) &opt_apply, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{"check", 'c',
"if --only-display, check if record is fully readable (for debugging)",
(uchar **) &opt_check, (uchar **) &opt_check, 0,
GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
#ifndef DBUG_OFF #ifndef DBUG_OFF
{"debug", '#', "Output debug log. Often the argument is 'd:t:o,filename'.", {"debug", '#', "Output debug log. Often the argument is 'd:t:o,filename'.",
0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment