Commit 78fe150a authored by jyang's avatar jyang

branches/5.1: Fix bug #47814 - Diagnostics are frequently not

printed after a long lock wait in InnoDB. Separate out the 
lock wait timeout check thread from monitor information
printing thread.

rb://200 Approved by Marko.
parent ec47414a
...@@ -7429,8 +7429,8 @@ innodb_show_status( ...@@ -7429,8 +7429,8 @@ innodb_show_status(
mutex_enter_noninline(&srv_monitor_file_mutex); mutex_enter_noninline(&srv_monitor_file_mutex);
rewind(srv_monitor_file); rewind(srv_monitor_file);
srv_printf_innodb_monitor(srv_monitor_file, srv_printf_innodb_monitor(srv_monitor_file, FALSE,
&trx_list_start, &trx_list_end); &trx_list_start, &trx_list_end);
flen = ftell(srv_monitor_file); flen = ftell(srv_monitor_file);
os_file_set_eof(srv_monitor_file); os_file_set_eof(srv_monitor_file);
......
...@@ -579,10 +579,15 @@ lock_rec_print( ...@@ -579,10 +579,15 @@ lock_rec_print(
/************************************************************************* /*************************************************************************
Prints info of locks for all transactions. */ Prints info of locks for all transactions. */
void ibool
lock_print_info_summary( lock_print_info_summary(
/*====================*/ /*====================*/
FILE* file); /* in: file where to print */ /* out: FALSE if not able to obtain
kernel mutex and exits without
printing info */
FILE* file, /* in: file where to print */
ibool nowait);/* in: whether to wait for the kernel
mutex */
/************************************************************************* /*************************************************************************
Prints info of locks for each transaction. */ Prints info of locks for each transaction. */
......
...@@ -146,7 +146,8 @@ extern ibool srv_print_innodb_tablespace_monitor; ...@@ -146,7 +146,8 @@ extern ibool srv_print_innodb_tablespace_monitor;
extern ibool srv_print_verbose_log; extern ibool srv_print_verbose_log;
extern ibool srv_print_innodb_table_monitor; extern ibool srv_print_innodb_table_monitor;
extern ibool srv_lock_timeout_and_monitor_active; extern ibool srv_lock_timeout_active;
extern ibool srv_monitor_active;
extern ibool srv_error_monitor_active; extern ibool srv_error_monitor_active;
extern ulong srv_n_spin_wait_rounds; extern ulong srv_n_spin_wait_rounds;
...@@ -427,12 +428,21 @@ srv_release_mysql_thread_if_suspended( ...@@ -427,12 +428,21 @@ srv_release_mysql_thread_if_suspended(
que_thr_t* thr); /* in: query thread associated with the que_thr_t* thr); /* in: query thread associated with the
MySQL OS thread */ MySQL OS thread */
/************************************************************************* /*************************************************************************
A thread which wakes up threads whose lock wait may have lasted too long. A thread which wakes up threads whose lock wait may have lasted too
This also prints the info output by various InnoDB monitors. */ long. */
os_thread_ret_t os_thread_ret_t
srv_lock_timeout_and_monitor_thread( srv_lock_timeout_thread(
/*================================*/ /*====================*/
/* out: a dummy parameter */
void* arg); /* in: a dummy parameter required by
os_thread_create */
/*************************************************************************
A thread which prints the info output by various InnoDB monitors. */
os_thread_ret_t
srv_monitor_thread(
/*===============*/
/* out: a dummy parameter */ /* out: a dummy parameter */
void* arg); /* in: a dummy parameter required by void* arg); /* in: a dummy parameter required by
os_thread_create */ os_thread_create */
...@@ -449,10 +459,14 @@ srv_error_monitor_thread( ...@@ -449,10 +459,14 @@ srv_error_monitor_thread(
/********************************************************************** /**********************************************************************
Outputs to a file the output of the InnoDB Monitor. */ Outputs to a file the output of the InnoDB Monitor. */
void ibool
srv_printf_innodb_monitor( srv_printf_innodb_monitor(
/*======================*/ /*======================*/
/* out: FALSE if not all information printed
due to failure to obtain necessary mutex */
FILE* file, /* in: output stream */ FILE* file, /* in: output stream */
ibool nowait, /* in: whether to wait for kernel
mutex. */
ulint* trx_start, /* out: file position of the start of ulint* trx_start, /* out: file position of the start of
the list of active transactions */ the list of active transactions */
ulint* trx_end); /* out: file position of the end of ulint* trx_end); /* out: file position of the end of
......
...@@ -4217,17 +4217,33 @@ lock_get_n_rec_locks(void) ...@@ -4217,17 +4217,33 @@ lock_get_n_rec_locks(void)
/************************************************************************* /*************************************************************************
Prints info of locks for all transactions. */ Prints info of locks for all transactions. */
void ibool
lock_print_info_summary( lock_print_info_summary(
/*====================*/ /*====================*/
FILE* file) /* in: file where to print */ /* out: FALSE if not able to obtain
kernel mutex and exit without
printing lock info */
FILE* file, /* in: file where to print */
ibool nowait) /* in: whether to wait for the kernel
mutex */
{ {
/* We must protect the MySQL thd->query field with a MySQL mutex, and /* We must protect the MySQL thd->query field with a MySQL mutex, and
because the MySQL mutex must be reserved before the kernel_mutex of because the MySQL mutex must be reserved before the kernel_mutex of
InnoDB, we call innobase_mysql_prepare_print_arbitrary_thd() here. */ InnoDB, we call innobase_mysql_prepare_print_arbitrary_thd() here. */
innobase_mysql_prepare_print_arbitrary_thd(); innobase_mysql_prepare_print_arbitrary_thd();
lock_mutex_enter_kernel();
/* if nowait is FALSE, wait on the kernel mutex,
otherwise return immediately if fail to obtain the
mutex. */
if (!nowait) {
lock_mutex_enter_kernel();
} else if (mutex_enter_nowait(&kernel_mutex)) {
innobase_mysql_end_print_arbitrary_thd();
fputs("FAIL TO OBTAIN KERNEL MUTEX, "
"SKIP LOCK INFO PRINTING\n", file);
return(FALSE);
}
if (lock_deadlock_found) { if (lock_deadlock_found) {
fputs("------------------------\n" fputs("------------------------\n"
...@@ -4261,6 +4277,7 @@ lock_print_info_summary( ...@@ -4261,6 +4277,7 @@ lock_print_info_summary(
"Total number of lock structs in row lock hash table %lu\n", "Total number of lock structs in row lock hash table %lu\n",
(ulong) lock_get_n_rec_locks()); (ulong) lock_get_n_rec_locks());
#endif /* PRINT_NUM_OF_LOCK_STRUCTS */ #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
return(TRUE);
} }
/************************************************************************* /*************************************************************************
......
...@@ -3045,7 +3045,7 @@ logs_empty_and_mark_files_at_shutdown(void) ...@@ -3045,7 +3045,7 @@ logs_empty_and_mark_files_at_shutdown(void)
if (srv_fast_shutdown < 2 if (srv_fast_shutdown < 2
&& (srv_error_monitor_active && (srv_error_monitor_active
|| srv_lock_timeout_and_monitor_active)) { || srv_lock_timeout_active || srv_monitor_active)) {
mutex_exit(&kernel_mutex); mutex_exit(&kernel_mutex);
......
...@@ -64,7 +64,8 @@ ulint srv_fatal_semaphore_wait_threshold = 600; ...@@ -64,7 +64,8 @@ ulint srv_fatal_semaphore_wait_threshold = 600;
in microseconds, in order to reduce the lagging of the purge thread. */ in microseconds, in order to reduce the lagging of the purge thread. */
ulint srv_dml_needed_delay = 0; ulint srv_dml_needed_delay = 0;
ibool srv_lock_timeout_and_monitor_active = FALSE; ibool srv_lock_timeout_active = FALSE;
ibool srv_monitor_active = FALSE;
ibool srv_error_monitor_active = FALSE; ibool srv_error_monitor_active = FALSE;
const char* srv_main_thread_op_info = ""; const char* srv_main_thread_op_info = "";
...@@ -122,6 +123,16 @@ ulint srv_log_file_size = ULINT_MAX; /* size in database pages */ ...@@ -122,6 +123,16 @@ ulint srv_log_file_size = ULINT_MAX; /* size in database pages */
ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */ ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */
ulong srv_flush_log_at_trx_commit = 1; ulong srv_flush_log_at_trx_commit = 1;
/* Maximum number of times allowed to conditionally acquire
mutex before switching to blocking wait on the mutex */
#define MAX_MUTEX_NOWAIT 20
/* Check whether the number of failed nonblocking mutex
acquisition attempts exceeds maximum allowed value. If so,
srv_printf_innodb_monitor() will request mutex acquisition
with mutex_enter(), which will wait until it gets the mutex. */
#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
byte srv_latin1_ordering[256] /* The sort order table of the latin1 byte srv_latin1_ordering[256] /* The sort order table of the latin1
character set. The following table is character set. The following table is
the MySQL order as of Feb 10th, 2002 */ the MySQL order as of Feb 10th, 2002 */
...@@ -1626,10 +1637,13 @@ srv_refresh_innodb_monitor_stats(void) ...@@ -1626,10 +1637,13 @@ srv_refresh_innodb_monitor_stats(void)
/********************************************************************** /**********************************************************************
Outputs to a file the output of the InnoDB Monitor. */ Outputs to a file the output of the InnoDB Monitor. */
void ibool
srv_printf_innodb_monitor( srv_printf_innodb_monitor(
/*======================*/ /*======================*/
/* out: FALSE if not all information printed
due to failure to obtain necessary mutex */
FILE* file, /* in: output stream */ FILE* file, /* in: output stream */
ibool nowait, /* in: whether to wait for the mutex. */
ulint* trx_start, /* out: file position of the start of ulint* trx_start, /* out: file position of the start of
the list of active transactions */ the list of active transactions */
ulint* trx_end) /* out: file position of the end of ulint* trx_end) /* out: file position of the end of
...@@ -1638,6 +1652,7 @@ srv_printf_innodb_monitor( ...@@ -1638,6 +1652,7 @@ srv_printf_innodb_monitor(
double time_elapsed; double time_elapsed;
time_t current_time; time_t current_time;
ulint n_reserved; ulint n_reserved;
ibool ret;
mutex_enter(&srv_innodb_monitor_mutex); mutex_enter(&srv_innodb_monitor_mutex);
...@@ -1682,24 +1697,31 @@ srv_printf_innodb_monitor( ...@@ -1682,24 +1697,31 @@ srv_printf_innodb_monitor(
mutex_exit(&dict_foreign_err_mutex); mutex_exit(&dict_foreign_err_mutex);
lock_print_info_summary(file); /* Only if lock_print_info_summary proceeds correctly,
if (trx_start) { before we call the lock_print_info_all_transactions
long t = ftell(file); to print all the lock information. */
if (t < 0) { ret = lock_print_info_summary(file, nowait);
*trx_start = ULINT_UNDEFINED;
} else { if (ret) {
*trx_start = (ulint) t; if (trx_start) {
long t = ftell(file);
if (t < 0) {
*trx_start = ULINT_UNDEFINED;
} else {
*trx_start = (ulint) t;
}
} }
} lock_print_info_all_transactions(file);
lock_print_info_all_transactions(file); if (trx_end) {
if (trx_end) { long t = ftell(file);
long t = ftell(file); if (t < 0) {
if (t < 0) { *trx_end = ULINT_UNDEFINED;
*trx_end = ULINT_UNDEFINED; } else {
} else { *trx_end = (ulint) t;
*trx_end = (ulint) t; }
} }
} }
fputs("--------\n" fputs("--------\n"
"FILE I/O\n" "FILE I/O\n"
"--------\n", file); "--------\n", file);
...@@ -1804,6 +1826,8 @@ srv_printf_innodb_monitor( ...@@ -1804,6 +1826,8 @@ srv_printf_innodb_monitor(
"============================\n", file); "============================\n", file);
mutex_exit(&srv_innodb_monitor_mutex); mutex_exit(&srv_innodb_monitor_mutex);
fflush(file); fflush(file);
return(ret);
} }
/********************************************************************** /**********************************************************************
...@@ -1883,26 +1907,23 @@ srv_export_innodb_status(void) ...@@ -1883,26 +1907,23 @@ srv_export_innodb_status(void)
} }
/************************************************************************* /*************************************************************************
A thread which wakes up threads whose lock wait may have lasted too long. A thread prints the info output by various InnoDB monitors. */
This also prints the info output by various InnoDB monitors. */
os_thread_ret_t os_thread_ret_t
srv_lock_timeout_and_monitor_thread( srv_monitor_thread(
/*================================*/ /*===============*/
/* out: a dummy parameter */ /* out: a dummy parameter */
void* arg __attribute__((unused))) void* arg __attribute__((unused)))
/* in: a dummy parameter required by /* in: a dummy parameter required by
os_thread_create */ os_thread_create */
{ {
srv_slot_t* slot;
double time_elapsed; double time_elapsed;
time_t current_time; time_t current_time;
time_t last_table_monitor_time; time_t last_table_monitor_time;
time_t last_tablespace_monitor_time; time_t last_tablespace_monitor_time;
time_t last_monitor_time; time_t last_monitor_time;
ibool some_waits; ulint mutex_skipped;
double wait_time; ibool last_srv_print_monitor;
ulint i;
#ifdef UNIV_DEBUG_THREAD_CREATION #ifdef UNIV_DEBUG_THREAD_CREATION
fprintf(stderr, "Lock timeout thread starts, id %lu\n", fprintf(stderr, "Lock timeout thread starts, id %lu\n",
...@@ -1913,13 +1934,15 @@ srv_lock_timeout_and_monitor_thread( ...@@ -1913,13 +1934,15 @@ srv_lock_timeout_and_monitor_thread(
last_table_monitor_time = time(NULL); last_table_monitor_time = time(NULL);
last_tablespace_monitor_time = time(NULL); last_tablespace_monitor_time = time(NULL);
last_monitor_time = time(NULL); last_monitor_time = time(NULL);
mutex_skipped = 0;
last_srv_print_monitor = srv_print_innodb_monitor;
loop: loop:
srv_lock_timeout_and_monitor_active = TRUE; srv_monitor_active = TRUE;
/* When someone is waiting for a lock, we wake up every second /* Wake up every 5 seconds to see if we need to print
and check if a timeout has passed for a lock wait */ monitor information. */
os_thread_sleep(1000000); os_thread_sleep(5000000);
current_time = time(NULL); current_time = time(NULL);
...@@ -1929,14 +1952,40 @@ srv_lock_timeout_and_monitor_thread( ...@@ -1929,14 +1952,40 @@ srv_lock_timeout_and_monitor_thread(
last_monitor_time = time(NULL); last_monitor_time = time(NULL);
if (srv_print_innodb_monitor) { if (srv_print_innodb_monitor) {
srv_printf_innodb_monitor(stderr, NULL, NULL); /* Reset mutex_skipped counter everytime
srv_print_innodb_monitor changes. This is to
ensure we will not be blocked by kernel_mutex
for short duration information printing,
such as requested by sync_array_print_long_waits() */
if (!last_srv_print_monitor) {
mutex_skipped = 0;
last_srv_print_monitor = TRUE;
}
if (!srv_printf_innodb_monitor(stderr,
MUTEX_NOWAIT(mutex_skipped),
NULL, NULL)) {
mutex_skipped++;
} else {
/* Reset the counter */
mutex_skipped = 0;
}
} else {
last_srv_print_monitor = FALSE;
} }
if (srv_innodb_status) { if (srv_innodb_status) {
mutex_enter(&srv_monitor_file_mutex); mutex_enter(&srv_monitor_file_mutex);
rewind(srv_monitor_file); rewind(srv_monitor_file);
srv_printf_innodb_monitor(srv_monitor_file, NULL, if (!srv_printf_innodb_monitor(srv_monitor_file,
NULL); MUTEX_NOWAIT(mutex_skipped),
NULL, NULL)) {
mutex_skipped++;
} else {
mutex_skipped = 0;
}
os_file_set_eof(srv_monitor_file); os_file_set_eof(srv_monitor_file);
mutex_exit(&srv_monitor_file_mutex); mutex_exit(&srv_monitor_file_mutex);
} }
...@@ -1989,6 +2038,56 @@ srv_lock_timeout_and_monitor_thread( ...@@ -1989,6 +2038,56 @@ srv_lock_timeout_and_monitor_thread(
} }
} }
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
goto exit_func;
}
if (srv_print_innodb_monitor
|| srv_print_innodb_lock_monitor
|| srv_print_innodb_tablespace_monitor
|| srv_print_innodb_table_monitor) {
goto loop;
}
srv_monitor_active = FALSE;
goto loop;
exit_func:
srv_monitor_active = FALSE;
/* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */
os_thread_exit(NULL);
OS_THREAD_DUMMY_RETURN;
}
/*************************************************************************
A thread which wakes up threads whose lock wait may have lasted too long. */
os_thread_ret_t
srv_lock_timeout_thread(
/*====================*/
/* out: a dummy parameter */
void* arg __attribute__((unused)))
/* in: a dummy parameter required by
os_thread_create */
{
srv_slot_t* slot;
ibool some_waits;
double wait_time;
ulint i;
loop:
/* When someone is waiting for a lock, we wake up every second
and check if a timeout has passed for a lock wait */
os_thread_sleep(1000000);
srv_lock_timeout_active = TRUE;
mutex_enter(&kernel_mutex); mutex_enter(&kernel_mutex);
some_waits = FALSE; some_waits = FALSE;
...@@ -2033,17 +2132,11 @@ srv_lock_timeout_and_monitor_thread( ...@@ -2033,17 +2132,11 @@ srv_lock_timeout_and_monitor_thread(
goto exit_func; goto exit_func;
} }
if (some_waits || srv_print_innodb_monitor if (some_waits) {
|| srv_print_innodb_lock_monitor
|| srv_print_innodb_tablespace_monitor
|| srv_print_innodb_table_monitor) {
goto loop; goto loop;
} }
/* No one was waiting for a lock and no monitor was active: srv_lock_timeout_active = FALSE;
suspend this thread */
srv_lock_timeout_and_monitor_active = FALSE;
#if 0 #if 0
/* The following synchronisation is disabled, since /* The following synchronisation is disabled, since
...@@ -2053,7 +2146,7 @@ srv_lock_timeout_and_monitor_thread( ...@@ -2053,7 +2146,7 @@ srv_lock_timeout_and_monitor_thread(
goto loop; goto loop;
exit_func: exit_func:
srv_lock_timeout_and_monitor_active = FALSE; srv_lock_timeout_active = FALSE;
/* We count the number of threads in os_thread_exit(). A created /* We count the number of threads in os_thread_exit(). A created
thread should always use that to exit and not use return() to exit. */ thread should always use that to exit and not use return() to exit. */
......
...@@ -87,8 +87,8 @@ static os_file_t files[1000]; ...@@ -87,8 +87,8 @@ static os_file_t files[1000];
static mutex_t ios_mutex; static mutex_t ios_mutex;
static ulint ios; static ulint ios;
static ulint n[SRV_MAX_N_IO_THREADS + 5]; static ulint n[SRV_MAX_N_IO_THREADS + 6];
static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5]; static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6];
/* We use this mutex to test the return value of pthread_mutex_trylock /* We use this mutex to test the return value of pthread_mutex_trylock
on successful locking. HP-UX does NOT return 0, though Linux et al do. */ on successful locking. HP-UX does NOT return 0, though Linux et al do. */
...@@ -1596,15 +1596,20 @@ innobase_start_or_create_for_mysql(void) ...@@ -1596,15 +1596,20 @@ innobase_start_or_create_for_mysql(void)
/* fprintf(stderr, "Max allowed record size %lu\n", /* fprintf(stderr, "Max allowed record size %lu\n",
page_get_free_space_of_empty() / 2); */ page_get_free_space_of_empty() / 2); */
/* Create the thread which watches the timeouts for lock waits /* Create the thread which watches the timeouts for lock
and prints InnoDB monitor info */ waits */
os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL, os_thread_create(&srv_lock_timeout_thread, NULL,
thread_ids + 2 + SRV_MAX_N_IO_THREADS); thread_ids + 2 + SRV_MAX_N_IO_THREADS);
/* Create the thread which warns of long semaphore waits */ /* Create the thread which warns of long semaphore waits */
os_thread_create(&srv_error_monitor_thread, NULL, os_thread_create(&srv_error_monitor_thread, NULL,
thread_ids + 3 + SRV_MAX_N_IO_THREADS); thread_ids + 3 + SRV_MAX_N_IO_THREADS);
/* Create the thread which prints InnoDB monitor info */
os_thread_create(&srv_monitor_thread, NULL,
thread_ids + 4 + SRV_MAX_N_IO_THREADS);
srv_was_started = TRUE; srv_was_started = TRUE;
srv_is_being_started = FALSE; srv_is_being_started = FALSE;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment