Commit c71dc395 authored by Daniele Sciascia's avatar Daniele Sciascia Committed by Julius Goryavsky

MDEV-26499 Fix error "mysql_shutdown failed" during MTR tests

- Fix to avoid mysqltest client getting killed abruptly during
  mysql_shutdown(). When Galera replication is shutdown, wait for
  THDs with `thd->stmt_da()->is_eof()` to disconnect (these are about
  to disconnect anyway).
- Extract duplicate code from `wsrep_stop_replication()` and
  `wsrep_shutdown_replication()` in a new function.
- No need to use a custom `shutdown_mysqld.inc` in galera
  suite. Delete it, so that the one in `mysql-test/include/` is used.
Signed-off-by: default avatarJulius Goryavsky <julius.goryavsky@mariadb.com>
parent db0b9ec3
# This is the first half of include/restart_mysqld.inc.
if ($rpl_inited)
{
if (!$allow_rpl_inited)
{
--die ERROR IN TEST: When using the replication test framework (master-slave.inc, rpl_init.inc etc), use rpl_restart_server.inc instead of restart_mysqld.inc. If you know what you are doing and you really have to use restart_mysqld.inc, set allow_rpl_inited=1 before you source restart_mysqld.inc
}
}
# Write file to make mysql-test-run.pl expect the "crash", but don't start it
--let $_expect_file_name= `select regexp_replace(@@tmpdir, '^.*/','')`
--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/$_expect_file_name.expect
--exec echo "wait" > $_expect_file_name
# Send shutdown to the connected server
--shutdown_server
--source include/wait_until_disconnected.inc
connection node_2;
connection node_1;
connection node_1;
connection node_2;
connection node_2;
SET GLOBAL debug_dbug="+d,simulate_slow_client_at_shutdown";
#
# MDEV-26499
#
# This test reproduces some failure on mysql_shutdown() call
# which manifests sporadically in some galera MTR tests during
# restart of a node.
#
--source include/galera_cluster.inc
--source include/have_debug_sync.inc
--let $node_1=node_1
--let $node_2=node_2
--source include/auto_increment_offset_save.inc
--connection node_2
SET GLOBAL debug_dbug="+d,simulate_slow_client_at_shutdown";
--source include/restart_mysqld.inc
--source include/auto_increment_offset_restore.inc
......@@ -2208,6 +2208,7 @@ bool dispatch_command(enum enum_server_command command, THD *thd,
my_eof(thd);
kill_mysql(thd);
error=TRUE;
DBUG_EXECUTE_IF("simulate_slow_client_at_shutdown", my_sleep(2000000););
break;
}
#endif
......
......@@ -1014,10 +1014,8 @@ void wsrep_recover()
WSREP_INFO("Recovered position: %s", oss.str().c_str());
}
void wsrep_stop_replication(THD *thd)
static void wsrep_stop_replication_common(THD *thd)
{
WSREP_INFO("Stop replication by %llu", (thd) ? thd->thread_id : 0);
if (Wsrep_server_state::instance().state() !=
Wsrep_server_state::s_disconnected)
{
......@@ -1030,10 +1028,10 @@ void wsrep_stop_replication(THD *thd)
}
}
/* my connection, should not terminate with wsrep_close_client_connection(),
make transaction to rollback
*/
if (thd && !thd->wsrep_applier) trans_rollback(thd);
/* my connection, should not terminate with
wsrep_close_client_connections(), make transaction to rollback */
if (thd && !thd->wsrep_applier)
trans_rollback(thd);
wsrep_close_client_connections(TRUE, thd);
/* wait until appliers have stopped */
......@@ -1042,28 +1040,18 @@ void wsrep_stop_replication(THD *thd)
node_uuid= WSREP_UUID_UNDEFINED;
}
void wsrep_stop_replication(THD *thd)
{
WSREP_INFO("Stop replication by %llu", (thd) ? thd->thread_id : 0);
wsrep_stop_replication_common(thd);
}
void wsrep_shutdown_replication()
{
WSREP_INFO("Shutdown replication");
if (Wsrep_server_state::instance().state() != wsrep::server_state::s_disconnected)
{
WSREP_DEBUG("Disconnect provider");
Wsrep_server_state::instance().disconnect();
if (Wsrep_server_state::instance().wait_until_state(
Wsrep_server_state::s_disconnected))
{
WSREP_WARN("Wsrep interrupted while waiting for disconnected state");
}
}
wsrep_close_client_connections(TRUE);
/* wait until appliers have stopped */
wsrep_wait_appliers_close(NULL);
node_uuid= WSREP_UUID_UNDEFINED;
wsrep_stop_replication_common(nullptr);
/* Undocking the thread specific data. */
my_pthread_setspecific_ptr(THR_THD, NULL);
my_pthread_setspecific_ptr(THR_THD, nullptr);
}
bool wsrep_start_replication(const char *wsrep_cluster_address)
......@@ -2644,14 +2632,19 @@ static my_bool have_client_connections(THD *thd, void*)
{
DBUG_PRINT("quit",("Informing thread %lld that it's time to die",
(longlong) thd->thread_id));
if (is_client_connection(thd) && thd->killed == KILL_CONNECTION)
if (is_client_connection(thd))
{
if (thd->killed == KILL_CONNECTION)
{
WSREP_DEBUG("Informing thread %lld that it's time to die",
thd->thread_id);
(void)abort_replicated(thd);
return true;
}
return 0;
if (thd->get_stmt_da()->is_eof())
{
return true;
}
}
return false;
}
static void wsrep_close_thread(THD *thd)
......@@ -2691,14 +2684,24 @@ static my_bool kill_all_threads(THD *thd, THD *caller_thd)
/* We skip slave threads & scheduler on this first loop through. */
if (is_client_connection(thd) && thd != caller_thd)
{
if (thd->get_stmt_da()->is_eof())
{
return 0;
}
if (is_replaying_connection(thd))
{
thd->set_killed(KILL_CONNECTION);
else if (!abort_replicated(thd))
return 0;
}
if (!abort_replicated(thd))
{
/* replicated transactions must be skipped */
WSREP_DEBUG("closing connection %lld", (longlong) thd->thread_id);
/* instead of wsrep_close_thread() we do now soft kill by THD::awake */
thd->awake(KILL_CONNECTION);
return 0;
}
}
return 0;
......@@ -2710,6 +2713,7 @@ static my_bool kill_remaining_threads(THD *thd, THD *caller_thd)
if (is_client_connection(thd) &&
!abort_replicated(thd) &&
!is_replaying_connection(thd) &&
!thd->get_stmt_da()->is_eof() &&
thd_is_connection_alive(thd) &&
thd != caller_thd)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment