Commit 5d1ec1b9 authored by Jan Lindström's avatar Jan Lindström

MDEV-5247: DB locked up at btr0cur.c line 568. There is inconsistent and non...

MDEV-5247: DB locked up at btr0cur.c line 568. There is inconsistent and non logical usage of have_LRU_mutex and incorrect value on ha_innodb.cc when buf_LRU_free_block is called. Additionally, for future long semaphore wait cases added a new configuration variable innodb_use_stacktrace. If this variable is true a signal handler for SIGUSR2 is installed when InnoDB server starts and when a long semaphore wait is detected at sync/sync0array.c we send SIGUSR2 signal to waiting thread and thread that has acuired RW-latch. For both threads a full stacktrace is produced as well as its is possible.
parent 4f0dabcf
......@@ -274,6 +274,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
mem/mem0mem.c mem/mem0pool.c
mtr/mtr0log.c mtr/mtr0mtr.c
os/os0file.c os/os0proc.c os/os0sync.c os/os0thread.c
os/os0stacktrace.c
page/page0cur.c page/page0page.c page/page0zip.c
que/que0que.c
handler/ha_innodb.cc handler/handler0alter.cc handler/i_s.cc
......
......@@ -4306,6 +4306,7 @@ btr_blob_free(
buf_pool_t* buf_pool = buf_pool_from_block(block);
ulint space = buf_block_get_space(block);
ulint page_no = buf_block_get_page_no(block);
ibool have_LRU_mutex = FALSE;
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
......@@ -4313,6 +4314,7 @@ btr_blob_free(
//buf_pool_mutex_enter(buf_pool);
mutex_enter(&buf_pool->LRU_list_mutex);
have_LRU_mutex = TRUE;
mutex_enter(&block->mutex);
/* Only free the block if it is still allocated to
......@@ -4323,7 +4325,7 @@ btr_blob_free(
&& buf_block_get_space(block) == space
&& buf_block_get_page_no(block) == page_no) {
if (!buf_LRU_free_block(&block->page, all, TRUE)
if (!buf_LRU_free_block(&block->page, all, &have_LRU_mutex)
&& all && block->page.zip.data
/* Now, buf_LRU_free_block() may release mutex temporarily */
&& buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
......@@ -4332,12 +4334,14 @@ btr_blob_free(
/* Attempt to deallocate the uncompressed page
if the whole block cannot be deallocted. */
buf_LRU_free_block(&block->page, FALSE, TRUE);
buf_LRU_free_block(&block->page, FALSE, &have_LRU_mutex);
}
}
//buf_pool_mutex_exit(buf_pool);
mutex_exit(&buf_pool->LRU_list_mutex);
if (have_LRU_mutex) {
mutex_exit(&buf_pool->LRU_list_mutex);
}
mutex_exit(&block->mutex);
}
......
......@@ -2040,6 +2040,8 @@ buf_page_get_zip(
bpage->buf_fix_count++;
goto got_block;
case BUF_BLOCK_FILE_PAGE:
{
ibool have_LRU_mutex = FALSE;
ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
/* release mutex to obey to latch-order */
......@@ -2048,6 +2050,7 @@ buf_page_get_zip(
/* get LRU_list_mutex for buf_LRU_free_block() */
mutex_enter(&buf_pool->LRU_list_mutex);
mutex_enter(block_mutex);
have_LRU_mutex = TRUE;
if (UNIV_UNLIKELY(bpage->space != space
|| bpage->offset != offset
......@@ -2055,22 +2058,28 @@ buf_page_get_zip(
|| !bpage->zip.data)) {
/* someone should interrupt, retry */
mutex_exit(&buf_pool->LRU_list_mutex);
have_LRU_mutex = FALSE;
mutex_exit(block_mutex);
goto lookup;
}
/* Discard the uncompressed page frame if possible. */
if (buf_LRU_free_block(bpage, FALSE, TRUE)) {
mutex_exit(&buf_pool->LRU_list_mutex);
if (buf_LRU_free_block(bpage, FALSE, &have_LRU_mutex)) {
if (have_LRU_mutex) {
mutex_exit(&buf_pool->LRU_list_mutex);
}
mutex_exit(block_mutex);
goto lookup;
}
mutex_exit(&buf_pool->LRU_list_mutex);
if (have_LRU_mutex) {
mutex_exit(&buf_pool->LRU_list_mutex);
}
buf_block_buf_fix_inc((buf_block_t*) bpage,
__FILE__, __LINE__);
goto got_block;
}
}
ut_error;
......@@ -2822,8 +2831,9 @@ buf_page_get_gen(
/* Try to evict the block from the buffer pool, to use the
insert buffer (change buffer) as much as possible. */
ulint page_no = buf_block_get_page_no(block);
ibool have_LRU_mutex = FALSE;
if (buf_LRU_free_block(&block->page, TRUE, FALSE)) {
if (buf_LRU_free_block(&block->page, TRUE, &have_LRU_mutex)) {
mutex_exit(block_mutex);
if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
/* Set the watch, as it would have
......
......@@ -178,19 +178,23 @@ ibool
buf_LRU_evict_from_unzip_LRU(
/*=========================*/
buf_pool_t* buf_pool,
ibool have_LRU_mutex)
ibool* have_LRU_mutex)
{
ulint io_avg;
ulint unzip_avg;
//ut_ad(buf_pool_mutex_own(buf_pool));
if (!have_LRU_mutex)
if (!*have_LRU_mutex) {
mutex_enter(&buf_pool->LRU_list_mutex);
*have_LRU_mutex = TRUE;
}
/* If the unzip_LRU list is empty, we can only use the LRU. */
if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
if (!have_LRU_mutex)
if (*have_LRU_mutex) {
mutex_exit(&buf_pool->LRU_list_mutex);
*have_LRU_mutex = FALSE;
}
return(FALSE);
}
......@@ -199,20 +203,26 @@ buf_LRU_evict_from_unzip_LRU(
decompressed pages in the buffer pool. */
if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
<= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
if (!have_LRU_mutex)
if (*have_LRU_mutex) {
mutex_exit(&buf_pool->LRU_list_mutex);
*have_LRU_mutex = FALSE;
}
return(FALSE);
}
/* If eviction hasn't started yet, we assume by default
that a workload is disk bound. */
if (buf_pool->freed_page_clock == 0) {
if (!have_LRU_mutex)
if (*have_LRU_mutex) {
mutex_exit(&buf_pool->LRU_list_mutex);
*have_LRU_mutex = FALSE;
}
return(TRUE);
}
if (!have_LRU_mutex)
if (*have_LRU_mutex) {
mutex_exit(&buf_pool->LRU_list_mutex);
*have_LRU_mutex = FALSE;
}
/* Calculate the average over past intervals, and add the values
of the current interval. */
......@@ -387,7 +397,7 @@ buf_LRU_drop_page_hash_for_tablespace(
//buf_pool_mutex_exit(buf_pool);
mutex_exit(&buf_pool->LRU_list_mutex);
/* Drop any remaining batch of search hashed pages. */
buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
ut_free(page_arr);
......@@ -937,7 +947,7 @@ buf_LRU_free_from_unzip_LRU_list(
search n_iterations / 5 of the
unzip_LRU list, or nothing if
n_iterations >= 5 */
ibool have_LRU_mutex)
ibool* have_LRU_mutex)
{
buf_block_t* block;
ulint distance;
......@@ -1004,7 +1014,7 @@ buf_LRU_free_from_common_LRU_list(
n_iterations < 10, then we search
n_iterations / 10 * buf_pool->curr_size
pages from the end of the LRU list */
ibool have_LRU_mutex)
ibool* have_LRU_mutex)
{
buf_page_t* bpage;
ulint distance;
......@@ -1075,18 +1085,17 @@ buf_LRU_search_and_free_block(
ibool freed = FALSE;
ibool have_LRU_mutex = FALSE;
if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
have_LRU_mutex = TRUE;
//buf_pool_mutex_enter(buf_pool);
if (have_LRU_mutex)
if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)) {
mutex_enter(&buf_pool->LRU_list_mutex);
have_LRU_mutex = TRUE;
}
freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, have_LRU_mutex);
freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations, &have_LRU_mutex);
if (!freed) {
freed = buf_LRU_free_from_common_LRU_list(
buf_pool, n_iterations, have_LRU_mutex);
buf_pool, n_iterations, &have_LRU_mutex);
}
buf_pool_mutex_enter(buf_pool);
......@@ -1831,7 +1840,7 @@ buf_LRU_free_block(
buf_page_t* bpage, /*!< in: block to be freed */
ibool zip, /*!< in: TRUE if should remove also the
compressed page of an uncompressed page */
ibool have_LRU_mutex)
ibool* have_LRU_mutex)
{
buf_page_t* b = NULL;
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
......@@ -1897,8 +1906,10 @@ buf_LRU_free_block(
/* not to break latch order, must re-enter block_mutex */
mutex_exit(block_mutex);
if (!have_LRU_mutex)
if (!*have_LRU_mutex) {
mutex_enter(&buf_pool->LRU_list_mutex); /* optimistic */
*have_LRU_mutex = TRUE;
}
rw_lock_x_lock(&buf_pool->page_hash_latch);
mutex_enter(block_mutex);
......@@ -1909,8 +1920,10 @@ buf_LRU_free_block(
if (b) {
buf_page_free_descriptor(b);
}
if (!have_LRU_mutex)
if (*have_LRU_mutex) {
mutex_exit(&buf_pool->LRU_list_mutex);
*have_LRU_mutex = FALSE;
}
rw_lock_x_unlock(&buf_pool->page_hash_latch);
return(FALSE);
} else if (zip || !bpage->zip.data) {
......@@ -2045,7 +2058,10 @@ buf_LRU_free_block(
}
//buf_pool_mutex_exit(buf_pool);
mutex_exit(&buf_pool->LRU_list_mutex);
if (*have_LRU_mutex) {
mutex_exit(&buf_pool->LRU_list_mutex);
*have_LRU_mutex = FALSE;
}
rw_lock_x_unlock(&buf_pool->page_hash_latch);
mutex_exit(block_mutex);
......@@ -2079,8 +2095,10 @@ buf_LRU_free_block(
}
//buf_pool_mutex_enter(buf_pool);
if (have_LRU_mutex)
if (*have_LRU_mutex) {
mutex_enter(&buf_pool->LRU_list_mutex);
*have_LRU_mutex = FALSE;
}
mutex_enter(block_mutex);
if (b) {
......@@ -2097,8 +2115,10 @@ buf_LRU_free_block(
ut_ad(block_mutex == &buf_pool->zip_mutex);
mutex_enter(block_mutex);
if (!have_LRU_mutex)
if (*have_LRU_mutex) {
mutex_exit(&buf_pool->LRU_list_mutex);
*have_LRU_mutex = FALSE;
}
rw_lock_x_unlock(&buf_pool->page_hash_latch);
}
......
......@@ -12632,6 +12632,7 @@ innodb_buffer_pool_evict_update(
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool = &buf_pool_ptr[i];
ibool have_LRU_mutex = TRUE;
//buf_pool_mutex_enter(buf_pool);
mutex_enter(&buf_pool->LRU_list_mutex);
......@@ -12650,12 +12651,14 @@ innodb_buffer_pool_evict_update(
mutex_enter(&block->mutex);
buf_LRU_free_block(&block->page,
FALSE, TRUE);
FALSE, &have_LRU_mutex);
mutex_exit(&block->mutex);
block = prev_block;
}
mutex_exit(&buf_pool->LRU_list_mutex);
if (have_LRU_mutex) {
mutex_exit(&buf_pool->LRU_list_mutex);
}
//buf_pool_mutex_exit(buf_pool);
}
}
......@@ -13469,6 +13472,11 @@ static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks,
"Print all deadlocks to MySQL error log (off by default)",
NULL, NULL, FALSE);
static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace,
PLUGIN_VAR_OPCMDARG,
"Print stacktrace on long semaphore wait (off by default)",
NULL, NULL, FALSE);
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(page_size),
MYSQL_SYSVAR(log_block_size),
......@@ -13589,6 +13597,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(locking_fake_changes),
MYSQL_SYSVAR(merge_sort_block_size),
MYSQL_SYSVAR(print_all_deadlocks),
MYSQL_SYSVAR(use_stacktrace),
NULL
};
......
......@@ -101,7 +101,7 @@ buf_LRU_free_block(
buf_page_t* bpage, /*!< in: block to be freed */
ibool zip, /*!< in: TRUE if should remove also the
compressed page of an uncompressed page */
ibool have_LRU_mutex)
ibool* have_LRU_mutex)
__attribute__((nonnull));
/******************************************************************//**
Try to free a replaceable block.
......
/*****************************************************************************
Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
#ifndef os0stacktrace_h
#define os0stacktrace_h
#ifndef __WIN__
#include <execinfo.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
/***************************************************************//**
Prints stacktrace for this thread.
*/
void
os_stacktrace_print(
/*================*/
int sig_num, /*!< in: signal number */
siginfo_t* info, /*!< in: signal information */
void* ucontext);/*!< in: signal context */
#endif /* ! __WIN__ */
#endif /* os0stacktrace.h */
......@@ -274,6 +274,8 @@ extern ulong srv_adaptive_flushing_method;
extern ulong srv_expand_import;
extern ulong srv_pass_corrupt_table;
extern my_bool srv_use_stacktrace;
/* Helper macro to support srv_pass_corrupt_table checks. If 'cond' is FALSE,
execute 'code' if srv_pass_corrupt_table is non-zero, or trigger a fatal error
otherwise. The break statement in 'code' will obviously not work as expected. */
......
/*****************************************************************************
Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*****************************************************************************/
#include "os0thread.h"
#ifndef __WIN__
#include <execinfo.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#ifndef __USE_GNU
#define __USE_GNU
#endif
/* Since kernel version 2.2 the undocumented parameter to the signal handler has been declared
obsolete in adherence with POSIX.1b. A more correct way to retrieve additional information is
to use the SA_SIGINFO option when setting the handler */
#undef USE_SIGCONTEXT
#ifndef USE_SIGCONTEXT
/* get REG_EIP / REG_RIP from ucontext.h */
#include <ucontext.h>
#ifndef EIP
#define EIP 14
#endif
#if (defined (__x86_64__))
#ifndef REG_RIP
#define REG_RIP REG_INDEX(rip) /* seems to be 16 */
#endif
#endif
#endif
#define OS_STACKTRACE_MAX_DEPTH 128
/***************************************************************//**
Prints stacktrace for this thread.
*/
void
os_stacktrace_print(
/*================*/
int sig_num,
siginfo_t* info,
void* ucontext)
{
void* array[OS_STACKTRACE_MAX_DEPTH];
char** messages;
int size, i;
void* caller_address = NULL;
/* Get the address at the time the signal was raised */
#if defined(__x86_64__)
ucontext_t* uc = (ucontext_t*) ucontext;
caller_address = (void*) uc->uc_mcontext.gregs[REG_RIP] ;
#elif defined(__hppa__)
ucontext_t* uc = (ucontext_t*) ucontext;
caller_address = (void*) uc->uc_mcontext.sc_iaoq[0] & ~0×3UL ;
#elif (defined (__ppc__)) || (defined (__powerpc__))
ucontext_t* uc = (ucontext_t*) ucontext;
caller_address = (void*) uc->uc_mcontext.regs->nip ;
#elif defined(__sparc__)
struct sigcontext* sc = (struct sigcontext*) ucontext;
#if __WORDSIZE == 64
caller_address = (void*) scp->sigc_regs.tpc ;
#else
pnt = (void*) scp->si_regs.pc ;
#endif
#elif defined(__i386__)
ucontext_t* uc = (ucontext_t*) ucontext;
caller_address = (void*) uc->uc_mcontext.gregs[REG_EIP] ;
#else
/* Unsupported return */
return;
#endif
fprintf(stderr, "InnoDB: signal %d (%s), address is %p from %p\n",
sig_num, strsignal(sig_num), info->si_addr,
(void *)caller_address);
size = backtrace(array, OS_STACKTRACE_MAX_DEPTH);
/* overwrite sigaction with caller's address */
array[1] = caller_address;
messages = backtrace_symbols(array, size);
fprintf(stderr,
"InnoDB: Stacktrace for Thread %lu \n",
(ulong) os_thread_pf(os_thread_get_curr_id()));
/* skip first stack frame (points here) */
for (i = 1; i < size && messages != NULL; ++i)
{
fprintf(stderr, "InnoDB: [bt]: (%d) %s\n", i, messages[i]);
}
free(messages);
}
#endif /* ! __WIN__ */
......@@ -364,6 +364,9 @@ UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
/* print all user-level transactions deadlocks to mysqld stderr */
UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE;
/* Produce a stacktrace on long semaphore wait */
UNIV_INTERN my_bool srv_use_stacktrace = FALSE;
typedef struct srv_conc_slot_struct srv_conc_slot_t;
struct srv_conc_slot_struct{
os_event_t event; /*!< event to wait */
......
......@@ -89,6 +89,7 @@ Created 2/16/1996 Heikki Tuuri
# include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
# include "zlib.h" /* for ZLIB_VERSION */
# include "buf0lru.h" /* for buf_LRU_file_restore() */
# include "os0stacktrace.h"
/** Log sequence number immediately after startup */
UNIV_INTERN ib_uint64_t srv_start_lsn;
......@@ -1255,6 +1256,28 @@ innobase_start_or_create_for_mysql(void)
"of memory.\n");
}
/* If stacktrace is used we set up signal handler for SIGUSR2 signal
here. If signal handler set fails we report that and disable
stacktrace feature. */
if (srv_use_stacktrace) {
#ifndef __WIN__
struct sigaction sigact;
sigact.sa_sigaction = os_stacktrace_print;
sigact.sa_flags = SA_RESTART | SA_SIGINFO;
if (sigaction(SIGUSR2, &sigact, (struct sigaction *)NULL) != 0)
{
fprintf(stderr, " InnoDB:error setting signal handler for %d (%s)\n",
SIGUSR2, strsignal(SIGUSR2));
srv_use_stacktrace = FALSE;
}
#endif /*! __WIN__ */
}
/* System tables are created in tablespace 0. Thus, we must
temporarily clear srv_file_per_table. This is ok, because the
server will not accept connections (which could modify
......
......@@ -483,6 +483,15 @@ sync_array_cell_print(
innobase_basename(cell->file), (ulong) cell->line,
difftime(time(NULL), cell->reservation_time));
/* If stacktrace feature is enabled we will send a SIGUSR2
signal to thread waiting for the semaphore. Signal handler
will then dump the current stack to error log. */
if (srv_use_stacktrace) {
#ifndef __WIN__
pthread_kill(cell->thread, SIGUSR2);
#endif
}
if (type == SYNC_MUTEX) {
/* We use old_wait_mutex in case the cell has already
been freed meanwhile */
......@@ -537,6 +546,16 @@ sync_array_cell_print(
(ulong) rwlock->last_s_line,
rwlock->last_x_file_name,
(ulong) rwlock->last_x_line);
/* If stacktrace feature is enabled we will send a SIGUSR2
signal to thread that has locked RW-latch with write mode.
Signal handler will then dump the current stack to error log. */
if (writer != RW_LOCK_NOT_LOCKED && srv_use_stacktrace) {
#ifndef __WIN__
pthread_kill(rwlock->writer_thread, SIGUSR2);
#endif
}
} else {
ut_error;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment