Commit 449a88e1 authored by Marko Mäkelä's avatar Marko Mäkelä

MDEV-12052 Shutdown crash presumably due to master thread activity

InnoDB shutdown assumes that once the server has entered
SRV_SHUTDOWN_FLUSH_PHASE, no change to persistent data is allowed.
It was possible for the master thread to wake up while shutdown
is executing in SRV_SHUTDOWN_FLUSH_PHASE or
even in SRV_SHUTDOWN_LAST_PHASE.

We do not yet know if further crashes at shutdown are possible.
Also, we do not know if all the observed crashes could be explained
by the race conditions that we are now fixing.

srv_shutdown_print_master_pending(): Remove a redundant ut_time() call.

srv_shutdown(): Renamed from srv_master_do_shutdown_tasks().

srv_master_thread(): Do not resume after shutdown has been initiated.
parent fde86fc1
CREATE TABLE t(c0 SERIAL, c1 INT, c2 INT, c3 INT, c4 INT,
KEY(c1), KEY(c2), KEY(c2,c1),
KEY(c3), KEY(c3,c1), KEY(c3,c2), KEY(c3,c2,c1),
KEY(c4), KEY(c4,c1), KEY(c4,c2), KEY(c4,c2,c1),
KEY(c4,c3), KEY(c4,c3,c1), KEY(c4,c3,c2), KEY(c4,c3,c2,c1)) ENGINE=InnoDB;
SET DEBUG_DBUG='+d,row_drop_table_add_to_background';
DROP TABLE t;
CREATE TABLE t (a INT) ENGINE=InnoDB;
DROP TABLE t;
--source include/have_innodb.inc
--source include/have_debug.inc
# Embedded server does not support restarting
--source include/not_embedded.inc
CREATE TABLE t(c0 SERIAL, c1 INT, c2 INT, c3 INT, c4 INT,
KEY(c1), KEY(c2), KEY(c2,c1),
KEY(c3), KEY(c3,c1), KEY(c3,c2), KEY(c3,c2,c1),
KEY(c4), KEY(c4,c1), KEY(c4,c2), KEY(c4,c2,c1),
KEY(c4,c3), KEY(c4,c3,c1), KEY(c4,c3,c2), KEY(c4,c3,c2,c1)) ENGINE=InnoDB;
let $n= 10;
SET DEBUG_DBUG='+d,row_drop_table_add_to_background';
--disable_query_log
let $i= $n;
while ($i) {
eval CREATE TABLE t$i LIKE t;
dec $i;
}
let $i= $n;
while ($i) {
eval DROP TABLE t$i;
dec $i;
}
--enable_query_log
DROP TABLE t;
--source include/restart_mysqld.inc
CREATE TABLE t (a INT) ENGINE=InnoDB;
DROP TABLE t;
......@@ -1715,7 +1715,7 @@ DECLARE_THREAD(srv_monitor_thread)(
}
}
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
goto exit_func;
}
......@@ -1845,7 +1845,7 @@ DECLARE_THREAD(srv_error_monitor_thread)(
os_event_wait_time_low(srv_error_event, 1000000, sig_count);
if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
if (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
goto loop;
}
......@@ -2105,7 +2105,7 @@ srv_shutdown_print_master_pending(
time_elapsed = ut_difftime(current_time, *last_print_time);
if (time_elapsed > 60) {
*last_print_time = ut_time();
*last_print_time = current_time;
if (n_tables_to_drop) {
ut_print_timestamp(stderr);
......@@ -2157,7 +2157,7 @@ srv_master_do_active_tasks(void)
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time);
if (srv_shutdown_state > 0) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
......@@ -2191,11 +2191,7 @@ srv_master_do_active_tasks(void)
MONITOR_SRV_MEM_VALIDATE_MICROSECOND, counter_time);
}
#endif
if (srv_shutdown_state > 0) {
return;
}
if (srv_shutdown_state > 0) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
......@@ -2206,7 +2202,7 @@ srv_master_do_active_tasks(void)
MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
}
if (srv_shutdown_state > 0) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
......@@ -2249,7 +2245,7 @@ srv_master_do_idle_tasks(void)
MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
counter_time);
if (srv_shutdown_state > 0) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
......@@ -2265,7 +2261,7 @@ srv_master_do_idle_tasks(void)
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
if (srv_shutdown_state > 0) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
......@@ -2279,7 +2275,7 @@ srv_master_do_idle_tasks(void)
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
if (srv_shutdown_state > 0) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
......@@ -2290,70 +2286,42 @@ srv_master_do_idle_tasks(void)
counter_time);
}
/*********************************************************************//**
Perform the tasks during shutdown. The tasks that we do at shutdown
depend on srv_fast_shutdown:
2 => very fast shutdown => do no book keeping
1 => normal shutdown => clear drop table queue and make checkpoint
0 => slow shutdown => in addition to above do complete purge and ibuf
merge
@return TRUE if some work was done. FALSE otherwise */
/** Perform shutdown tasks.
@param[in] ibuf_merge whether to complete the change buffer merge */
static
ibool
srv_master_do_shutdown_tasks(
/*=========================*/
ib_time_t* last_print_time)/*!< last time the function
print the message */
void
srv_shutdown(bool ibuf_merge)
{
ulint n_bytes_merged = 0;
ulint n_tables_to_drop = 0;
ut_ad(!srv_read_only_mode);
++srv_main_shutdown_loops;
ut_a(srv_shutdown_state > 0);
/* In very fast shutdown none of the following is necessary */
if (srv_fast_shutdown == 2) {
return(FALSE);
}
/* ALTER TABLE in MySQL requires on Unix that the table handler
can drop tables lazily after there no longer are SELECT
queries to them. */
srv_main_thread_op_info = "doing background drop tables";
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
/* make sure that there is enough reusable space in the redo
log files */
srv_main_thread_op_info = "checking free log space";
log_free_check();
ulint n_bytes_merged = 0;
ulint n_tables_to_drop;
ib_time_t now = ut_time();
/* In case of normal shutdown we don't do ibuf merge or purge */
if (srv_fast_shutdown == 1) {
goto func_exit;
}
/* Do an ibuf merge */
srv_main_thread_op_info = "doing insert buffer merge";
n_bytes_merged = ibuf_merge_in_background(true);
/* Flush logs if needed */
srv_sync_log_buffer_in_background();
func_exit:
/* Make a new checkpoint about once in 10 seconds */
srv_main_thread_op_info = "making checkpoint";
log_checkpoint(TRUE, FALSE);
/* Print progress message every 60 seconds during shutdown */
if (srv_shutdown_state > 0 && srv_print_verbose_log) {
srv_shutdown_print_master_pending(
last_print_time, n_tables_to_drop, n_bytes_merged);
}
do {
ut_ad(!srv_read_only_mode);
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_CLEANUP);
++srv_main_shutdown_loops;
/* FIXME: Remove the background DROP TABLE queue; it is not
crash-safe and breaks ACID. */
srv_main_thread_op_info = "doing background drop tables";
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
if (ibuf_merge) {
srv_main_thread_op_info = "checking free log space";
log_free_check();
srv_main_thread_op_info = "doing insert buffer merge";
n_bytes_merged = ibuf_merge_in_background(true);
/* Flush logs if needed */
srv_sync_log_buffer_in_background();
}
return(n_bytes_merged || n_tables_to_drop);
/* Print progress message every 60 seconds during shutdown */
if (srv_print_verbose_log) {
srv_shutdown_print_master_pending(
&now, n_tables_to_drop, n_bytes_merged);
}
} while (n_bytes_merged || n_tables_to_drop);
}
/*********************************************************************//**
......@@ -2385,7 +2353,6 @@ DECLARE_THREAD(srv_master_thread)(
srv_slot_t* slot;
ulint old_activity_count = srv_get_activity_count();
ib_time_t last_print_time;
ut_ad(!srv_read_only_mode);
......@@ -2404,7 +2371,6 @@ DECLARE_THREAD(srv_master_thread)(
slot = srv_reserve_slot(SRV_MASTER);
ut_a(slot == srv_sys.sys_threads);
last_print_time = ut_time();
loop:
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
goto suspend_thread;
......@@ -2424,13 +2390,26 @@ DECLARE_THREAD(srv_master_thread)(
}
}
while (srv_master_do_shutdown_tasks(&last_print_time)) {
/* Shouldn't loop here in case of very fast shutdown */
ut_ad(srv_fast_shutdown < 2);
suspend_thread:
switch (srv_shutdown_state) {
case SRV_SHUTDOWN_NONE:
break;
case SRV_SHUTDOWN_FLUSH_PHASE:
case SRV_SHUTDOWN_LAST_PHASE:
ut_ad(0);
/* fall through */
case SRV_SHUTDOWN_EXIT_THREADS:
/* srv_init_abort() must have been invoked */
case SRV_SHUTDOWN_CLEANUP:
if (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP
&& srv_fast_shutdown < 2) {
srv_shutdown(srv_fast_shutdown == 1);
}
srv_suspend_thread(slot);
my_thread_end();
os_thread_exit(NULL);
}
suspend_thread:
srv_main_thread_op_info = "suspending";
srv_suspend_thread(slot);
......@@ -2442,15 +2421,7 @@ DECLARE_THREAD(srv_master_thread)(
srv_main_thread_op_info = "waiting for server activity";
srv_resume_thread(slot);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
my_thread_end();
os_thread_exit(NULL);
}
goto loop;
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
}
/*********************************************************************//**
......
......@@ -2147,7 +2147,7 @@ DECLARE_THREAD(srv_monitor_thread)(
}
}
if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
goto exit_func;
}
......@@ -2277,7 +2277,7 @@ DECLARE_THREAD(srv_error_monitor_thread)(
os_event_wait_time_low(srv_error_event, 1000000, sig_count);
if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
if (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
goto loop;
}
......@@ -2756,7 +2756,7 @@ srv_shutdown_print_master_pending(
time_elapsed = ut_difftime(current_time, *last_print_time);
if (time_elapsed > 60) {
*last_print_time = ut_time();
*last_print_time = current_time;
if (n_tables_to_drop) {
ut_print_timestamp(stderr);
......@@ -2808,7 +2808,7 @@ srv_master_do_active_tasks(void)
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time);
if (srv_shutdown_state > 0) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
......@@ -2842,11 +2842,7 @@ srv_master_do_active_tasks(void)
MONITOR_SRV_MEM_VALIDATE_MICROSECOND, counter_time);
}
#endif
if (srv_shutdown_state > 0) {
return;
}
if (srv_shutdown_state > 0) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
......@@ -2857,7 +2853,7 @@ srv_master_do_active_tasks(void)
MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
}
if (srv_shutdown_state > 0) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
......@@ -2900,7 +2896,7 @@ srv_master_do_idle_tasks(void)
MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
counter_time);
if (srv_shutdown_state > 0) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
......@@ -2916,7 +2912,7 @@ srv_master_do_idle_tasks(void)
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
if (srv_shutdown_state > 0) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
......@@ -2930,7 +2926,7 @@ srv_master_do_idle_tasks(void)
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
if (srv_shutdown_state > 0) {
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
return;
}
......@@ -2951,70 +2947,42 @@ srv_master_do_idle_tasks(void)
}
}
/*********************************************************************//**
Perform the tasks during shutdown. The tasks that we do at shutdown
depend on srv_fast_shutdown:
2 => very fast shutdown => do no book keeping
1 => normal shutdown => clear drop table queue and make checkpoint
0 => slow shutdown => in addition to above do complete purge and ibuf
merge
@return TRUE if some work was done. FALSE otherwise */
/** Perform shutdown tasks.
@param[in] ibuf_merge whether to complete the change buffer merge */
static
ibool
srv_master_do_shutdown_tasks(
/*=========================*/
ib_time_t* last_print_time)/*!< last time the function
print the message */
void
srv_shutdown(bool ibuf_merge)
{
ulint n_bytes_merged = 0;
ulint n_tables_to_drop = 0;
ut_ad(!srv_read_only_mode);
++srv_main_shutdown_loops;
ulint n_bytes_merged = 0;
ulint n_tables_to_drop;
ib_time_t now = ut_time();
ut_a(srv_shutdown_state > 0);
/* In very fast shutdown none of the following is necessary */
if (srv_fast_shutdown == 2) {
return(FALSE);
}
/* ALTER TABLE in MySQL requires on Unix that the table handler
can drop tables lazily after there no longer are SELECT
queries to them. */
srv_main_thread_op_info = "doing background drop tables";
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
/* make sure that there is enough reusable space in the redo
log files */
srv_main_thread_op_info = "checking free log space";
log_free_check();
/* In case of normal shutdown we don't do ibuf merge or purge */
if (srv_fast_shutdown == 1) {
goto func_exit;
}
/* Do an ibuf merge */
srv_main_thread_op_info = "doing insert buffer merge";
n_bytes_merged = ibuf_merge_in_background(true);
/* Flush logs if needed */
srv_sync_log_buffer_in_background();
func_exit:
/* Make a new checkpoint about once in 10 seconds */
srv_main_thread_op_info = "making checkpoint";
log_checkpoint(TRUE, FALSE, FALSE);
/* Print progress message every 60 seconds during shutdown */
if (srv_shutdown_state > 0 && srv_print_verbose_log) {
srv_shutdown_print_master_pending(
last_print_time, n_tables_to_drop, n_bytes_merged);
}
do {
ut_ad(!srv_read_only_mode);
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_CLEANUP);
++srv_main_shutdown_loops;
/* FIXME: Remove the background DROP TABLE queue; it is not
crash-safe and breaks ACID. */
srv_main_thread_op_info = "doing background drop tables";
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
if (ibuf_merge) {
srv_main_thread_op_info = "checking free log space";
log_free_check();
srv_main_thread_op_info = "doing insert buffer merge";
n_bytes_merged = ibuf_merge_in_background(true);
/* Flush logs if needed */
srv_sync_log_buffer_in_background();
}
return(n_bytes_merged || n_tables_to_drop);
/* Print progress message every 60 seconds during shutdown */
if (srv_print_verbose_log) {
srv_shutdown_print_master_pending(
&now, n_tables_to_drop, n_bytes_merged);
}
} while (n_bytes_merged || n_tables_to_drop);
}
/*********************************************************************//**
......@@ -3048,7 +3016,6 @@ DECLARE_THREAD(srv_master_thread)(
ulint old_activity_count = srv_get_activity_count();
ulint old_ibuf_merge_activity_count
= srv_get_ibuf_merge_activity_count();
ib_time_t last_print_time;
ut_ad(!srv_read_only_mode);
......@@ -3071,7 +3038,6 @@ DECLARE_THREAD(srv_master_thread)(
slot = srv_reserve_slot(SRV_MASTER);
ut_a(slot == srv_sys.sys_threads);
last_print_time = ut_time();
loop:
if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
goto suspend_thread;
......@@ -3097,13 +3063,26 @@ DECLARE_THREAD(srv_master_thread)(
}
}
while (srv_master_do_shutdown_tasks(&last_print_time)) {
/* Shouldn't loop here in case of very fast shutdown */
ut_ad(srv_fast_shutdown < 2);
suspend_thread:
switch (srv_shutdown_state) {
case SRV_SHUTDOWN_NONE:
break;
case SRV_SHUTDOWN_FLUSH_PHASE:
case SRV_SHUTDOWN_LAST_PHASE:
ut_ad(0);
/* fall through */
case SRV_SHUTDOWN_EXIT_THREADS:
/* srv_init_abort() must have been invoked */
case SRV_SHUTDOWN_CLEANUP:
if (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP
&& srv_fast_shutdown < 2) {
srv_shutdown(srv_fast_shutdown == 1);
}
srv_suspend_thread(slot);
my_thread_end();
os_thread_exit(NULL);
}
suspend_thread:
srv_main_thread_op_info = "suspending";
srv_suspend_thread(slot);
......@@ -3115,15 +3094,7 @@ DECLARE_THREAD(srv_master_thread)(
srv_main_thread_op_info = "waiting for server activity";
srv_resume_thread(slot);
if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
my_thread_end();
os_thread_exit(NULL);
}
goto loop;
OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
}
/*********************************************************************//**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment