Commit 1d98255f authored by Vasil Dimov's avatar Vasil Dimov Committed by Sergei Golubchik

MDEV-15792 Fix mtr to be able to wait for >1 exited mysqld

If a mtr test case has started two mysqld processes (replication tests),
then kills the first one and kills the second one before starting the
first (so at some point there are two mysqlds down), then the ./mtr
waiting process bricks and forgets to monitor the "expect" file of the
first mysqld, so it never gets started again, even when its contents is
changed to "restart".

A victim of this deficiency is at least galera.galera_gcache_recover.

The fix is to keep a list of all mysqlds we should wait to start, not
just one (the last one killed).
parent 82bb0158
...@@ -3980,14 +3980,14 @@ sub run_testcase ($$) { ...@@ -3980,14 +3980,14 @@ sub run_testcase ($$) {
} }
my $test= $tinfo->{suite}->start_test($tinfo); my $test= $tinfo->{suite}->start_test($tinfo);
# Set only when we have to keep waiting after expectedly died server # Set to a list of processes we have to keep waiting (expectedly died servers)
my $keep_waiting_proc = 0; my %keep_waiting_proc = ();
my $print_timeout= start_timer($print_freq * 60); my $print_timeout= start_timer($print_freq * 60);
while (1) while (1)
{ {
my $proc; my $proc = 0;
if ($keep_waiting_proc) if (scalar(keys(%keep_waiting_proc)) > 0)
{ {
# Any other process exited? # Any other process exited?
$proc = My::SafeProcess->check_any(); $proc = My::SafeProcess->check_any();
...@@ -3997,48 +3997,34 @@ sub run_testcase ($$) { ...@@ -3997,48 +3997,34 @@ sub run_testcase ($$) {
} }
else else
{ {
$proc = $keep_waiting_proc;
# Also check if timer has expired, if so cancel waiting # Also check if timer has expired, if so cancel waiting
if ( has_expired($test_timeout) ) if ( has_expired($test_timeout) )
{ {
$keep_waiting_proc = 0; %keep_waiting_proc = ();
} }
} }
} }
if (! $keep_waiting_proc) if (scalar(keys(%keep_waiting_proc)) == 0 && !$proc)
{ {
if($test_timeout > $print_timeout) if ($test_timeout > $print_timeout)
{ {
$proc= My::SafeProcess->wait_any_timeout($print_timeout); $proc= My::SafeProcess->wait_any_timeout($print_timeout);
if ( $proc->{timeout} ) if ($proc->{timeout})
{ {
#print out that the test is still on #print out that the test is still on
mtr_print("Test still running: $tinfo->{name}"); mtr_print("Test still running: $tinfo->{name}");
#reset the timer #reset the timer
$print_timeout= start_timer($print_freq * 60); $print_timeout= start_timer($print_freq * 60);
next; next;
} }
} }
else else
{ {
$proc= My::SafeProcess->wait_any_timeout($test_timeout); $proc= My::SafeProcess->wait_any_timeout($test_timeout);
} }
} }
# Will be restored if we need to keep waiting if ($proc eq $test) # mysqltest itself exited
$keep_waiting_proc = 0;
unless ( defined $proc )
{
mtr_error("wait_any failed");
}
mtr_verbose("Got $proc");
mark_time_used('test');
# ----------------------------------------------------
# Was it the test program that exited
# ----------------------------------------------------
if ($proc eq $test)
{ {
my $res= $test->exit_status(); my $res= $test->exit_status();
...@@ -4053,12 +4039,12 @@ sub run_testcase ($$) { ...@@ -4053,12 +4039,12 @@ sub run_testcase ($$) {
if ( $res == 0 ) if ( $res == 0 )
{ {
my $check_res; my $check_res;
if ( $opt_check_testcases and if ( $opt_check_testcases and
$check_res= check_testcase($tinfo, "after")) $check_res= check_testcase($tinfo, "after"))
{ {
if ($check_res == 1) { if ($check_res == 1) {
# Test case had sideeffects, not fatal error, just continue # Test case had sideeffects, not fatal error, just continue
if ($opt_warnings) { if ($opt_warnings) {
# Checking error logs for warnings, so need to stop server # Checking error logs for warnings, so need to stop server
# gracefully so that memory leaks etc. can be properly detected. # gracefully so that memory leaks etc. can be properly detected.
...@@ -4069,90 +4055,111 @@ sub run_testcase ($$) { ...@@ -4069,90 +4055,111 @@ sub run_testcase ($$) {
# test. # test.
} else { } else {
# Not checking warnings, so can do a hard shutdown. # Not checking warnings, so can do a hard shutdown.
stop_all_servers($opt_shutdown_timeout); stop_all_servers($opt_shutdown_timeout);
} }
mtr_report("Resuming tests...\n"); mtr_report("Resuming tests...\n");
resfile_output($tinfo->{'check'}) if $opt_resfile; resfile_output($tinfo->{'check'}) if $opt_resfile;
} }
else { else {
# Test case check failed fatally, probably a server crashed # Test case check failed fatally, probably a server crashed
report_failure_and_restart($tinfo); report_failure_and_restart($tinfo);
return 1; return 1;
} }
} }
mtr_report_test_passed($tinfo); mtr_report_test_passed($tinfo);
} }
elsif ( $res == 62 ) elsif ( $res == 62 )
{ {
# Testcase itself tell us to skip this one # Testcase itself tell us to skip this one
$tinfo->{skip_detected_by_test}= 1; $tinfo->{skip_detected_by_test}= 1;
# Try to get reason from test log file # Try to get reason from test log file
find_testcase_skipped_reason($tinfo); find_testcase_skipped_reason($tinfo);
mtr_report_test_skipped($tinfo); mtr_report_test_skipped($tinfo);
# Restart if skipped due to missing perl, it may have had side effects # Restart if skipped due to missing perl, it may have had side effects
if ( $tinfo->{'comment'} =~ /^perl not found/ ) if ( $tinfo->{'comment'} =~ /^perl not found/ )
{ {
stop_all_servers($opt_shutdown_timeout); stop_all_servers($opt_shutdown_timeout);
} }
} }
elsif ( $res == 65 ) elsif ( $res == 65 )
{ {
# Testprogram killed by signal # Testprogram killed by signal
$tinfo->{comment}= $tinfo->{comment}=
"testprogram crashed(returned code $res)"; "testprogram crashed(returned code $res)";
report_failure_and_restart($tinfo); report_failure_and_restart($tinfo);
} }
elsif ( $res == 1 ) elsif ( $res == 1 )
{ {
# Check if the test tool requests that # Check if the test tool requests that
# an analyze script should be run # an analyze script should be run
my $analyze= find_analyze_request(); my $analyze= find_analyze_request();
if ($analyze){ if ($analyze){
run_on_all($tinfo, "analyze-$analyze"); run_on_all($tinfo, "analyze-$analyze");
} }
# Wait a bit and see if a server died, if so report that instead # Wait a bit and see if a server died, if so report that instead
mtr_milli_sleep(100); mtr_milli_sleep(100);
my $srvproc= My::SafeProcess::check_any(); my $srvproc= My::SafeProcess::check_any();
if ($srvproc && grep($srvproc eq $_, started(all_servers()))) { if ($srvproc && grep($srvproc eq $_, started(all_servers()))) {
$proc= $srvproc; $proc= $srvproc;
goto SRVDIED; goto SRVDIED;
} }
# Test case failure reported by mysqltest # Test case failure reported by mysqltest
report_failure_and_restart($tinfo); report_failure_and_restart($tinfo);
} }
else else
{ {
# mysqltest failed, probably crashed # mysqltest failed, probably crashed
$tinfo->{comment}= $tinfo->{comment}=
"mysqltest failed with unexpected return code $res\n"; "mysqltest failed with unexpected return code $res\n";
report_failure_and_restart($tinfo); report_failure_and_restart($tinfo);
} }
# Save info from this testcase run to mysqltest.log # Save info from this testcase run to mysqltest.log
if( -f $path_current_testlog) if( -f $path_current_testlog)
{ {
if ($opt_resfile && $res && $res != 62) { if ($opt_resfile && $res && $res != 62) {
resfile_output_file($path_current_testlog); resfile_output_file($path_current_testlog);
} }
mtr_appendfile_to_file($path_current_testlog, $path_testlog); mtr_appendfile_to_file($path_current_testlog, $path_testlog);
unlink($path_current_testlog); unlink($path_current_testlog);
} }
return ($res == 62) ? 0 : $res; return ($res == 62) ? 0 : $res;
} }
# ---------------------------------------------------- if ($proc)
# Check if it was an expected crash
# ----------------------------------------------------
my $check_crash = check_expected_crash_and_restart($proc);
if ($check_crash)
{ {
# Keep waiting if it returned 2, if 1 don't wait or stop waiting. # It was not mysqltest that exited, add to a wait-to-be-started-again list.
$keep_waiting_proc = 0 if $check_crash == 1; $keep_waiting_proc{$proc} = 1;
$keep_waiting_proc = $proc if $check_crash == 2; }
mtr_verbose("Got " . join(",", keys(%keep_waiting_proc)));
mark_time_used('test');
my $expected_exit = 1;
foreach my $wait_for_proc (keys(%keep_waiting_proc)) {
# ----------------------------------------------------
# Check if it was an expected crash
# ----------------------------------------------------
my $check_crash = check_expected_crash_and_restart($wait_for_proc);
if ($check_crash == 0) # unexpected exit/crash of $wait_for_proc
{
$expected_exit = 0;
last;
}
elsif ($check_crash == 1) # $wait_for_proc was started again by check_expected_crash_and_restart()
{
delete $keep_waiting_proc{$wait_for_proc};
}
elsif ($check_crash == 2) # we must keep waiting
{
# do nothing
}
}
if ($expected_exit) {
next; next;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment