srv0srv.c, os0file.c, log0recv.h, log0log.h, fil0fil.h, fsp0fsp.c, fil0fil.c:

  Merge
trx0trx.c:
  Print more info about a trx in SHOW INNODB status; try to find the bug reported by Plaxo
buf0buf.c:
  Check that page log sequence numbers are not in the future
log0recv.c, log0log.c:
  Fixed a bug: if you used big BLOBs, and your log files were relatively small, InnoDB could in a big BLOB operation temporarily write over the log produced AFTER the latest checkpoint. If InnoDB would crash at that moment, then the crash recovery would fail, because InnoDB would not be able to scan the log even up to the latest checkpoint. Starting from this version, InnoDB tries to ensure the latest checkpoint is young enough. If that is not possible, InnoDB prints a warning to the .err log
parent 05fb8176
......@@ -34,6 +34,7 @@ Created 11/5/1995 Heikki Tuuri
#include "ibuf0ibuf.h"
#include "dict0dict.h"
#include "log0recv.h"
#include "log0log.h"
#include "trx0undo.h"
#include "srv0srv.h"
......@@ -273,6 +274,7 @@ buf_page_is_corrupted(
ulint old_checksum;
ulint checksum_field;
ulint old_checksum_field;
dulint current_lsn;
if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
!= mach_read_from_4(read_buf + UNIV_PAGE_SIZE
......@@ -284,6 +286,27 @@ buf_page_is_corrupted(
return(TRUE);
}
#ifndef UNIV_HOTBACKUP
if (recv_lsn_checks_on && log_peek_lsn(&current_lsn)) {
if (ut_dulint_cmp(current_lsn,
mach_read_from_8(read_buf + FIL_PAGE_LSN))
< 0) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Error: page %lu log sequence number %lu %lu\n"
"InnoDB: is in the future! Current system log sequence number %lu %lu.\n"
"InnoDB: Your database may be corrupt.\n",
mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
ut_dulint_get_high(
mach_read_from_8(read_buf + FIL_PAGE_LSN)),
ut_dulint_get_low(
mach_read_from_8(read_buf + FIL_PAGE_LSN)),
ut_dulint_get_high(current_lsn),
ut_dulint_get_low(current_lsn));
}
}
#endif
old_checksum = buf_calc_page_old_checksum(read_buf);
old_checksum_field = mach_read_from_4(read_buf + UNIV_PAGE_SIZE
......
......@@ -831,6 +831,34 @@ fil_space_release_free_extents(
mutex_exit(&(system->mutex));
}
/***********************************************************************
Gets the number of reserved extents. If the database is silent, this number
should be zero. */
ulint
fil_space_get_n_reserved_extents(
/*=============================*/
ulint id) /* in: space id */
{
fil_space_t* space;
fil_system_t* system = fil_system;
ulint n;
ut_ad(system);
mutex_enter(&(system->mutex));
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
ut_a(space);
n = space->n_reserved_extents;
mutex_exit(&(system->mutex));
return(n);
}
/************************************************************************
Prepares a file node for i/o. Opens the file if it is closed. Updates the
pending i/o's field in the node and the system appropriately. Takes the node
......
......@@ -2709,10 +2709,52 @@ fseg_free_page_low(
return;
}
/*
fprintf(stderr,
"InnoDB: InnoDB is freeing space %lu page %lu,\n"
"InnoDB: which belongs to descr seg %lu %lu\n"
"InnoDB: segment %lu %lu.\n",
space, page,
ut_dulint_get_high(
mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)),
ut_dulint_get_low(
mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)),
ut_dulint_get_high(
mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)),
ut_dulint_get_low(
mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)));
*/
/* If we get here, the page is in some extent of the segment */
ut_a(0 == ut_dulint_cmp(
if (0 != ut_dulint_cmp(
mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr),
mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)));
mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr))) {
ut_sprintf_buf(errbuf, descr, 40);
fprintf(stderr,
"InnoDB: Dump of the tablespace extent descriptor: %s\n", errbuf);
ut_sprintf_buf(errbuf, seg_inode, 40);
fprintf(stderr,
"InnoDB: Dump of the segment inode: %s\n", errbuf);
fprintf(stderr,
"InnoDB: Serious error: InnoDB is trying to free space %lu page %lu,\n"
"InnoDB: which does not belong to segment %lu %lu but belongs\n"
"InnoDB: to segment %lu %lu.\n",
space, page,
ut_dulint_get_high(
mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)),
ut_dulint_get_low(
mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr)),
ut_dulint_get_high(
mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)),
ut_dulint_get_low(
mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr)));
fprintf(stderr,
"InnoDB: If the InnoDB recovery crashes here, see section 6.1\n"
"InnoDB: of http://www.innodb.com/ibman.html about forcing recovery.\n");
ut_a(0);
}
not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
MLOG_4BYTES, mtr);
......
......@@ -386,6 +386,14 @@ fil_space_release_free_extents(
/*===========================*/
ulint id, /* in: space id */
ulint n_reserved); /* in: how many one reserved */
/***********************************************************************
Gets the number of reserved extents. If the database is silent, this number
should be zero. */
ulint
fil_space_get_n_reserved_extents(
/*=============================*/
ulint id); /* in: space id */
typedef struct fil_space_struct fil_space_t;
......
......@@ -513,6 +513,15 @@ log_print(
/*======*/
char* buf, /* in/out: buffer where to print */
char* buf_end);/* in: buffer end */
/**********************************************************
Peeks the current lsn. */
ibool
log_peek_lsn(
/*=========*/
/* out: TRUE if success, FALSE if could not get the
log system mutex */
dulint* lsn); /* out: if returns TRUE, current lsn is here */
/**************************************************************************
Refreshes the statistics used to print per-second averages. */
......@@ -785,6 +794,11 @@ struct log_struct{
called */
/* Fields involved in checkpoints */
ulint log_group_capacity; /* capacity of the log group; if
the checkpoint age exceeds this, it is
a serious error because it is possible
we will then overwrite log and spoil
crash recovery */
ulint max_modified_age_async;
/* when this recommended value for lsn
- buf_pool_get_oldest_modification()
......
......@@ -333,6 +333,8 @@ extern ibool recv_recovery_on;
extern ibool recv_no_ibuf_operations;
extern ibool recv_needed_recovery;
extern ibool recv_lsn_checks_on;
extern ibool recv_is_making_a_backup;
extern ulint recv_max_parsed_page_no;
......
......@@ -33,6 +33,11 @@ log_t* log_sys = NULL;
ibool log_do_write = TRUE;
ibool log_debug_writes = FALSE;
/* These control how often we print warnings if the last checkpoint is too
old */
ibool log_has_printed_chkp_warning = FALSE;
time_t log_last_warning_time;
/* Pointer to this variable is used as the i/o-message when we do i/o to an
archive */
byte log_archive_io;
......@@ -299,6 +304,7 @@ log_close(void)
dulint oldest_lsn;
dulint lsn;
log_t* log = log_sys;
ulint checkpoint_age;
ut_ad(mutex_own(&(log->mutex)));
......@@ -322,8 +328,34 @@ log_close(void)
log->check_flush_or_checkpoint = TRUE;
}
if (ut_dulint_minus(lsn, log->last_checkpoint_lsn)
<= log->max_modified_age_async) {
checkpoint_age = ut_dulint_minus(lsn, log->last_checkpoint_lsn);
if (checkpoint_age >= log->log_group_capacity) {
/* TODO: split btr_store_big_rec_extern_fields() into small
steps so that we can release all latches in the middle, and
call log_free_check() to ensure we never write over log written
after the latest checkpoint. In principle, we should split all
big_rec operations, but other operations are smaller. */
if (!log_has_printed_chkp_warning
|| difftime(time(NULL), log_last_warning_time) > 15) {
log_has_printed_chkp_warning = TRUE;
log_last_warning_time = time(NULL);
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: ERROR: the age of the last checkpoint is %lu,\n"
"InnoDB: which exceeds the log group capacity %lu.\n"
"InnoDB: If you are using big BLOB or TEXT rows, you must set the\n"
"InnoDB: combined size of log files at least 10 times bigger than the\n"
"InnoDB: largest such row.\n",
checkpoint_age, log->log_group_capacity);
}
}
if (checkpoint_age <= log->max_modified_age_async) {
goto function_exit;
}
......@@ -332,8 +364,7 @@ log_close(void)
if (ut_dulint_is_zero(oldest_lsn)
|| (ut_dulint_minus(lsn, oldest_lsn)
> log->max_modified_age_async)
|| (ut_dulint_minus(lsn, log->last_checkpoint_lsn)
> log->max_checkpoint_age_async)) {
|| checkpoint_age > log->max_checkpoint_age_async) {
log->check_flush_or_checkpoint = TRUE;
}
......@@ -551,7 +582,6 @@ log_calc_max_ages(void)
the database server */
{
log_group_t* group;
ulint n_threads;
ulint margin;
ulint free;
ibool success = TRUE;
......@@ -561,8 +591,6 @@ log_calc_max_ages(void)
ut_ad(!mutex_own(&(log_sys->mutex)));
n_threads = srv_get_n_threads();
mutex_enter(&(log_sys->mutex));
group = UT_LIST_GET_FIRST(log_sys->log_groups);
......@@ -590,12 +618,15 @@ log_calc_max_ages(void)
group = UT_LIST_GET_NEXT(log_groups, group);
}
/* Add extra safety */
smallest_capacity = smallest_capacity - smallest_capacity / 10;
/* For each OS thread we must reserve so much free space in the
smallest log group that it can accommodate the log entries produced
by single query steps: running out of free log space is a serious
system error which requires rebooting the database. */
free = LOG_CHECKPOINT_FREE_PER_THREAD * n_threads
free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
+ LOG_CHECKPOINT_EXTRA_FREE;
if (free >= smallest_capacity / 2) {
success = FALSE;
......@@ -607,6 +638,10 @@ log_calc_max_ages(void)
margin = ut_min(margin, log_sys->adm_checkpoint_interval);
margin = margin - margin / 10; /* Add still some extra safety */
log_sys->log_group_capacity = smallest_capacity;
log_sys->max_modified_age_async = margin
- margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
log_sys->max_modified_age_sync = margin
......@@ -626,7 +661,7 @@ log_calc_max_ages(void)
if (!success) {
fprintf(stderr,
"Error: log file group too small for the number of threads\n");
"InnoDB: Error: log file group too small for innodb_thread_concurrency\n");
}
return(success);
......@@ -3111,6 +3146,28 @@ log_check_log_recs(
return(TRUE);
}
/**********************************************************
Peeks the current lsn. */
ibool
log_peek_lsn(
/*=========*/
/* out: TRUE if success, FALSE if could not get the
log system mutex */
dulint* lsn) /* out: if returns TRUE, current lsn is here */
{
if (0 == mutex_enter_nowait(&(log_sys->mutex), (char*)__FILE__,
__LINE__)) {
*lsn = log_sys->lsn;
mutex_exit(&(log_sys->mutex));
return(TRUE);
}
return(FALSE);
}
/**********************************************************
Prints info of the log. */
......
......@@ -46,6 +46,8 @@ ibool recv_recovery_from_backup_on = FALSE;
ibool recv_needed_recovery = FALSE;
ibool recv_lsn_checks_on = FALSE;
/* If the following is TRUE, the buffer pool file pages must be invalidated
after recovery and no ibuf operations are allowed; this becomes TRUE if
the log record hash table becomes too full, and log records must be merged
......@@ -71,6 +73,12 @@ ulint recv_previous_parsed_rec_is_multi = 0;
ulint recv_max_parsed_page_no = 0;
/* The maximum lsn we see for a page during the recovery process. If this
is bigger than the lsn we are able to scan up to, that is an indication that
the recovery failed and the database may be corrupt. */
dulint recv_max_page_lsn;
/************************************************************
Creates the recovery system. */
......@@ -132,6 +140,8 @@ recv_sys_init(
OS_FILE_LOG_BLOCK_SIZE);
recv_sys->found_corrupt_log = FALSE;
recv_max_page_lsn = ut_dulint_zero;
mutex_exit(&(recv_sys->mutex));
}
......@@ -1124,6 +1134,10 @@ recv_recover_page(
mutex_enter(&(recv_sys->mutex));
if (ut_dulint_cmp(recv_max_page_lsn, page_lsn) < 0) {
recv_max_page_lsn = page_lsn;
}
recv_addr->state = RECV_PROCESSED;
ut_a(recv_sys->n_addrs);
......@@ -2192,9 +2206,12 @@ recv_scan_log_recs(
while (log_block < buf + len && !finished) {
no = log_block_get_hdr_no(log_block);
/*
fprintf(stderr, "Log block header no %lu\n", no);
/* fprintf(stderr, "Log block header no %lu\n", no); */
fprintf(stderr, "Scanned lsn no %lu\n",
log_block_convert_lsn_to_no(scanned_lsn));
*/
if (no != log_block_convert_lsn_to_no(scanned_lsn)
|| !log_block_checksum_is_ok_or_old_format(log_block)) {
......@@ -2586,7 +2603,6 @@ recv_recovery_from_checkpoint_start(
recv_group_scan_log_recs(group, &contiguous_lsn,
&group_scanned_lsn);
group->scanned_lsn = group_scanned_lsn;
if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) {
......@@ -2603,6 +2619,31 @@ recv_recovery_from_checkpoint_start(
group = UT_LIST_GET_NEXT(log_groups, group);
}
/* We currently have only one log group */
if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: ERROR: We were only able to scan the log up to\n"
"InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n"
"InnoDB: It is possible that the database is now corrupt!\n",
ut_dulint_get_high(group_scanned_lsn),
ut_dulint_get_low(group_scanned_lsn),
ut_dulint_get_high(checkpoint_lsn),
ut_dulint_get_low(checkpoint_lsn));
}
if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: ERROR: We were only able to scan the log up to %lu %lu\n"
"InnoDB: but a database page a had an lsn %lu %lu. It is possible that the\n"
"InnoDB: database is now corrupt!\n",
ut_dulint_get_high(group_scanned_lsn),
ut_dulint_get_low(group_scanned_lsn),
ut_dulint_get_high(recv_max_page_lsn),
ut_dulint_get_low(recv_max_page_lsn));
}
if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) {
mutex_exit(&(log_sys->mutex));
......@@ -2656,6 +2697,8 @@ recv_recovery_from_checkpoint_start(
sync_order_checks_on = FALSE;
recv_lsn_checks_on = TRUE;
/* The database is now ready to start almost normal processing of user
transactions: transaction rollbacks and the application of the log
records in the hash table can be run in background. */
......
......@@ -1030,6 +1030,7 @@ os_file_flush(
#ifdef HAVE_FDATASYNC
ret = fdatasync(file);
#else
/* printf("Flushing to file %lu\n", (ulint)file); */
ret = fsync(file);
#endif
os_n_fsyncs++;
......
......@@ -1798,8 +1798,12 @@ srv_conc_enter_innodb(
os_fast_mutex_unlock(&srv_conc_mutex);
trx->op_info = (char*)"sleeping before joining InnoDB queue";
os_thread_sleep(50000);
trx->op_info = (char*)"";
os_fast_mutex_lock(&srv_conc_mutex);
srv_conc_n_waiting_threads--;
......@@ -2299,6 +2303,7 @@ srv_sprintf_innodb_monitor(
char* buf_end = buf + len - 2000;
double time_elapsed;
time_t current_time;
ulint n_reserved;
mutex_enter(&srv_innodb_monitor_mutex);
......@@ -2414,8 +2419,16 @@ srv_sprintf_innodb_monitor(
"ROW OPERATIONS\n"
"--------------\n");
buf += sprintf(buf,
"%ld queries inside InnoDB, %ld queries in queue\n",
"%ld queries inside InnoDB, %lu queries in queue\n",
srv_conc_n_threads, srv_conc_n_waiting_threads);
n_reserved = fil_space_get_n_reserved_extents(0);
if (n_reserved > 0) {
buf += sprintf(buf,
"%lu tablespace extents now reserved for B-tree split operations\n",
n_reserved);
}
#ifdef UNIV_LINUX
buf += sprintf(buf,
"Main thread process no. %lu, id %lu, state: %s\n",
......
......@@ -1624,6 +1624,13 @@ trx_print(
}
buf += sprintf(buf, "\n");
if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
buf += sprintf(buf, "mysql tables in use %lu, locked %lu\n",
trx->n_mysql_tables_in_use,
trx->mysql_n_tables_locked);
}
start_of_line = buf;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment