From 3c50661f410c81fc214ab502ef23957fb3dd9525 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= <marko.makela@oracle.com> Date: Tue, 30 Apr 2013 13:39:50 +0300 Subject: [PATCH] Bug#16720368 INNODB IGNORES *.IBD FILE BREAKAGE AT STARTUP After a clean shutdown, InnoDB will not check the *.ibd file headers, for maximum performance. This is unchanged before and after this patch. What this fix addresses is the case when crash recovery is needed. Previously, InnoDB could load a corrupted tablespace file. buf_page_is_corrupted(): Add the parameter check_lsn. fil_check_first_page(): New function, to perform a consistency check on the first page of a file. This can be overridden by setting innodb_force_recovery. fil_read_first_page(), fil_open_single_table_tablespace(), fil_load_single_table_tablespace(): Invoke fil_check_first_page(). open_or_create_data_files(): Check the status of fil_open_single_table_tablespace(). rb#2352 approved by Jimmy Yang --- storage/innobase/buf/buf0buf.c | 6 +- storage/innobase/fil/fil0fil.c | 102 ++++++++++++++++++++++++++--- storage/innobase/include/buf0buf.h | 7 +- storage/innobase/include/fil0fil.h | 13 ++-- storage/innobase/srv/srv0start.c | 12 +++- storage/innobase/trx/trx0sys.c | 8 ++- 6 files changed, 126 insertions(+), 22 deletions(-) diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c index 571caf7125..5a8ddacac1 100644 --- a/storage/innobase/buf/buf0buf.c +++ b/storage/innobase/buf/buf0buf.c @@ -520,6 +520,8 @@ UNIV_INTERN ibool buf_page_is_corrupted( /*==================*/ + ibool check_lsn, /*!< in: TRUE if we need to check + and complain about the LSN */ const byte* read_buf, /*!< in: a database page */ ulint zip_size) /*!< in: size of compressed page; 0 for uncompressed pages */ @@ -539,7 +541,7 @@ buf_page_is_corrupted( } #ifndef UNIV_HOTBACKUP - if (recv_lsn_checks_on) { + if (check_lsn && recv_lsn_checks_on) { ib_uint64_t current_lsn; if (log_peek_lsn(¤t_lsn) @@ -3575,7 +3577,7 @@ buf_page_io_complete( /* From version 3.23.38 up we store the page checksum to the 4 first bytes of the page end lsn field */ - if (buf_page_is_corrupted(frame, + if (buf_page_is_corrupted(TRUE, frame, buf_page_get_zip_size(bpage))) { corrupt: fprintf(stderr, diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c index 2f87566303..06a77678a1 100644 --- a/storage/innobase/fil/fil0fil.c +++ b/storage/innobase/fil/fil0fil.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1859,11 +1859,63 @@ fil_write_flushed_lsn_to_data_files( return(DB_SUCCESS); } +/*******************************************************************//** +Checks the consistency of the first data page of a data file +at database startup. +@retval NULL on success, or if innodb_force_recovery is set +@return pointer to an error message string */ +static __attribute__((warn_unused_result)) +const char* +fil_check_first_page( +/*=================*/ + const page_t* page, /*!< in: data page */ + ibool first_page) /*!< in: TRUE if this is the + first page of the tablespace */ +{ + ulint space_id; + ulint flags; + + if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) { + return(NULL); + } + + space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page); + flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); + + if (first_page && !space_id && !flags) { + ulint nonzero_bytes = UNIV_PAGE_SIZE; + const byte* b = page; + + while (!*b && --nonzero_bytes) { + b++; + } + + if (!nonzero_bytes) { + return("space header page consists of zero bytes"); + } + } + + if (buf_page_is_corrupted( + FALSE, page, dict_table_flags_to_zip_size(flags))) { + return("checksum mismatch"); + } + + if (!first_page + || (page_get_space_id(page) == space_id + && page_get_page_no(page) == 0)) { + return(NULL); + } + + return("inconsistent data in space header"); +} + /*******************************************************************//** Reads the flushed lsn, arch no, and tablespace flag fields from a data -file at database startup. */ +file at database startup. +@retval NULL on success, or if innodb_force_recovery is set +@return pointer to an error message string */ UNIV_INTERN -void +const char* fil_read_first_page( /*================*/ os_file_t data_file, /*!< in: open data file */ @@ -1885,6 +1937,7 @@ fil_read_first_page( byte* buf; page_t* page; ib_uint64_t flushed_lsn; + const char* check_msg; buf = ut_malloc(2 * UNIV_PAGE_SIZE); /* Align the memory for a possible read from a raw device */ @@ -1892,13 +1945,18 @@ fil_read_first_page( os_file_read(data_file, page, 0, 0, UNIV_PAGE_SIZE); - *flags = mach_read_from_4(page + - FSP_HEADER_OFFSET + FSP_SPACE_FLAGS); + *flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN); + check_msg = fil_check_first_page(page, !one_read_already); + ut_free(buf); + if (check_msg) { + return(check_msg); + } + if (!one_read_already) { *min_flushed_lsn = flushed_lsn; *max_flushed_lsn = flushed_lsn; @@ -1906,7 +1964,7 @@ fil_read_first_page( *min_arch_log_no = arch_log_no; *max_arch_log_no = arch_log_no; #endif /* UNIV_LOG_ARCHIVE */ - return; + return(NULL); } if (*min_flushed_lsn > flushed_lsn) { @@ -1923,6 +1981,8 @@ fil_read_first_page( *max_arch_log_no = arch_log_no; } #endif /* UNIV_LOG_ARCHIVE */ + + return(NULL); } /*================ SINGLE-TABLE TABLESPACES ==========================*/ @@ -3151,6 +3211,7 @@ fil_open_single_table_tablespace( os_file_t file; char* filepath; ibool success; + const char* check_msg; byte* buf2; byte* page; ulint space_id; @@ -3211,6 +3272,8 @@ fil_open_single_table_tablespace( success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); + check_msg = fil_check_first_page(page, TRUE); + /* We have to read the tablespace id and flags from the file. */ space_id = fsp_header_get_space_id(page); @@ -3218,8 +3281,20 @@ fil_open_single_table_tablespace( ut_free(buf2); - if (UNIV_UNLIKELY(space_id != id - || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) { + if (check_msg) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Error: %s in file ", check_msg); + ut_print_filename(stderr, filepath); + fprintf(stderr, " (tablespace id=%lu, flags=%lu)\n" + "InnoDB: Please refer to " REFMAN + "innodb-troubleshooting-datadict.html\n", + (ulong) id, (ulong) flags); + success = FALSE; + goto func_exit; + } + + if (space_id != id + || space_flags != (flags & ~(~0 << DICT_TF_BITS))) { ut_print_timestamp(stderr); fputs(" InnoDB: Error: tablespace id and flags in file ", @@ -3447,10 +3522,21 @@ fil_load_single_table_tablespace( page = ut_align(buf2, UNIV_PAGE_SIZE); if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { + const char* check_msg; + success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); /* We have to read the tablespace id from the file */ + check_msg = fil_check_first_page(page, TRUE); + + if (check_msg) { + fprintf(stderr, + "InnoDB: Error: %s in file %s", + check_msg, filepath); + goto func_exit; + } + space_id = fsp_header_get_space_id(page); flags = fsp_header_get_flags(page); } else { diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index b7f6eae3f0..562f2f0887 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -663,9 +663,12 @@ UNIV_INTERN ibool buf_page_is_corrupted( /*==================*/ + ibool check_lsn, /*!< in: TRUE if we need to check + and complain about the LSN */ const byte* read_buf, /*!< in: a database page */ - ulint zip_size); /*!< in: size of compressed page; + ulint zip_size) /*!< in: size of compressed page; 0 for uncompressed pages */ + __attribute__((warn_unused_result)); #ifndef UNIV_HOTBACKUP /**********************************************************************//** Gets the space id, page offset, and byte offset within page of a diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index a804c26144..dd188e4dad 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -326,10 +326,12 @@ fil_write_flushed_lsn_to_data_files( ulint arch_log_no); /*!< in: latest archived log file number */ /*******************************************************************//** -Reads the flushed lsn and arch no fields from a data file at database -startup. */ +Reads the flushed lsn, arch no, and tablespace flag fields from a data +file at database startup. +@retval NULL on success, or if innodb_force_recovery is set +@return pointer to an error message string */ UNIV_INTERN -void +const char* fil_read_first_page( /*================*/ os_file_t data_file, /*!< in: open data file */ @@ -345,8 +347,9 @@ fil_read_first_page( #endif /* UNIV_LOG_ARCHIVE */ ib_uint64_t* min_flushed_lsn, /*!< out: min of flushed lsn values in data files */ - ib_uint64_t* max_flushed_lsn); /*!< out: max of flushed + ib_uint64_t* max_flushed_lsn) /*!< out: max of flushed lsn values in data files */ + __attribute__((warn_unused_result)); /*******************************************************************//** Increments the count of pending operation, if space is not being deleted. @return TRUE if being deleted, and operation should be skipped */ diff --git a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c index 86669a5089..d3d2e956e2 100644 --- a/storage/innobase/srv/srv0start.c +++ b/storage/innobase/srv/srv0start.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2009, Percona Inc. @@ -818,6 +818,7 @@ open_or_create_data_files( } if (ret == FALSE) { + const char* check_msg; /* We open the data file */ if (one_created) { @@ -915,13 +916,20 @@ open_or_create_data_files( return(DB_ERROR); } skip_size_check: - fil_read_first_page( + check_msg = fil_read_first_page( files[i], one_opened, &flags, #ifdef UNIV_LOG_ARCHIVE min_arch_log_no, max_arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ min_flushed_lsn, max_flushed_lsn); + if (check_msg) { + fprintf(stderr, + "InnoDB: Error: %s in data file %s\n", + check_msg, name); + return(DB_ERROR); + } + if (!one_opened && UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) { diff --git a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c index 4ae24cef5a..d5649ea989 100644 --- a/storage/innobase/trx/trx0sys.c +++ b/storage/innobase/trx/trx0sys.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -564,7 +564,8 @@ trx_sys_doublewrite_init_or_restore_pages( /* Check if the page is corrupt */ if (UNIV_UNLIKELY - (buf_page_is_corrupted(read_buf, zip_size))) { + (buf_page_is_corrupted( + TRUE, read_buf, zip_size))) { fprintf(stderr, "InnoDB: Warning: database page" @@ -575,7 +576,8 @@ trx_sys_doublewrite_init_or_restore_pages( " the doublewrite buffer.\n", (ulong) space_id, (ulong) page_no); - if (buf_page_is_corrupted(page, zip_size)) { + if (buf_page_is_corrupted( + TRUE, page, zip_size)) { fprintf(stderr, "InnoDB: Dump of the page:\n"); buf_page_print( -- 2.30.9