Commit fed8bb02 authored by Bjorn Munch's avatar Bjorn Munch

Bug #40399 Please make mtr print stack trace after every failure

SIGABRT is sent to relevant processes after a timeout


client/mysqltest.cc:
  Fixed signal handlers to mysqltest actually dumps core
mysql-test/lib/My/CoreDump.pm:
  Added support for dbx
mysql-test/lib/My/SafeProcess.pm:
  Added dump_core to force process to dump core
mysql-test/lib/My/SafeProcess/safe_process.cc:
  Traps SIGABRT and sends this on to child
mysql-test/mysql-test-run.pl:
  When test times out, force core dumps on mysqltest and servers
parent 866f2c0f
...@@ -7326,6 +7326,13 @@ static sig_handler signal_handler(int sig) ...@@ -7326,6 +7326,13 @@ static sig_handler signal_handler(int sig)
{ {
fprintf(stderr, "mysqltest got " SIGNAL_FMT "\n", sig); fprintf(stderr, "mysqltest got " SIGNAL_FMT "\n", sig);
dump_backtrace(); dump_backtrace();
fprintf(stderr, "Writing a core file...\n");
fflush(stderr);
my_write_core(sig);
#ifndef __WIN__
exit(1); // Shouldn't get here but just in case
#endif
} }
#ifdef __WIN__ #ifdef __WIN__
......
...@@ -49,12 +49,56 @@ sub _gdb { ...@@ -49,12 +49,56 @@ sub _gdb {
unlink $tmp_name or die "Error removing $tmp_name: $!"; unlink $tmp_name or die "Error removing $tmp_name: $!";
return if $? >> 8;
return unless $gdb_output; return unless $gdb_output;
print <<EOF, $gdb_output, "\n"; print <<EOF, $gdb_output, "\n";
Output from gdb follows. The first stack trace is from the failing thread. Output from gdb follows. The first stack trace is from the failing thread.
The following stack traces are from all threads (so the failing one is The following stack traces are from all threads (so the failing one is
duplicated). duplicated).
--------------------------
EOF
return 1;
}
sub _dbx {
my ($core_name)= @_;
print "\nTrying 'dbx' to get a backtrace\n";
return unless -f $core_name;
# Find out name of binary that generated core
`echo | dbx - '$core_name' 2>&1` =~
/Corefile specified executable: "([^"]+)"/;
my $binary= $1 or return;
print "Core generated by '$binary'\n";
# Find all threads
my @thr_ids = `echo threads | dbx '$binary' '$core_name' 2>&1` =~ /t@\d+/g;
# Create tempfile containing dbx commands
my ($tmp, $tmp_name) = tempfile();
foreach my $thread (@thr_ids) {
print $tmp "where $thread\n";
}
print $tmp "exit\n";
close $tmp or die "Error closing $tmp_name: $!";
# Run dbx
my $dbx_output=
`cat '$tmp_name' | dbx '$binary' '$core_name' 2>&1`;
unlink $tmp_name or die "Error removing $tmp_name: $!";
return if $? >> 8;
return unless $dbx_output;
print <<EOF, $dbx_output, "\n";
Output from dbx follows. Stack trace is printed for all threads in order,
above this you should see info about which thread was the failing one.
----------------------------
EOF EOF
return 1; return 1;
} }
...@@ -63,12 +107,18 @@ EOF ...@@ -63,12 +107,18 @@ EOF
sub show { sub show {
my ($class, $core_name)= @_; my ($class, $core_name)= @_;
# We try dbx first; gdb itself may coredump if run on a Sun Studio
# compiled binary on Solaris.
my @debuggers = my @debuggers =
( (
\&_dbx,
\&_gdb, \&_gdb,
# TODO... # TODO...
); );
# Try debuggers until one succeeds
foreach my $debugger (@debuggers){ foreach my $debugger (@debuggers){
if ($debugger->($core_name)){ if ($debugger->($core_name)){
return; return;
......
...@@ -349,13 +349,24 @@ sub start_kill { ...@@ -349,13 +349,24 @@ sub start_kill {
{ {
$pid= $self->{SAFE_PID}; $pid= $self->{SAFE_PID};
die "Can't kill not started process" unless defined $pid; die "Can't kill not started process" unless defined $pid;
$ret= kill(15, $pid); $ret= kill("TERM", $pid);
} }
return $ret; return $ret;
} }
sub dump_core {
my ($self)= @_;
return if IS_WINDOWS;
my $pid= $self->{SAFE_PID};
die "Can't cet core from not started process" unless defined $pid;
_verbose("Sending ABRT to $self");
kill ("ABRT", $pid);
return 1;
}
# #
# Kill the process as fast as possible # Kill the process as fast as possible
# and wait for it to return # and wait for it to return
......
...@@ -117,6 +117,16 @@ static void kill_child (void) ...@@ -117,6 +117,16 @@ static void kill_child (void)
} }
static void handle_abort (int sig)
{
message("Got signal %d, child_pid: %d, sending ABRT", sig, child_pid);
if (child_pid > 0) {
kill (-child_pid, SIGABRT); // Don't wait for it to terminate
}
}
static void handle_signal (int sig) static void handle_signal (int sig)
{ {
message("Got signal %d, child_pid: %d", sig, child_pid); message("Got signal %d, child_pid: %d", sig, child_pid);
...@@ -144,6 +154,7 @@ int main(int argc, char* const argv[] ) ...@@ -144,6 +154,7 @@ int main(int argc, char* const argv[] )
signal(SIGTERM, handle_signal); signal(SIGTERM, handle_signal);
signal(SIGINT, handle_signal); signal(SIGINT, handle_signal);
signal(SIGCHLD, handle_signal); signal(SIGCHLD, handle_signal);
signal(SIGABRT, handle_abort);
sprintf(safe_process_name, "safe_process[%d]", own_pid); sprintf(safe_process_name, "safe_process[%d]", own_pid);
......
...@@ -464,7 +464,11 @@ sub run_test_server ($$$) { ...@@ -464,7 +464,11 @@ sub run_test_server ($$$) {
else { else {
mtr_report(" - saving '$worker_savedir/' to '$savedir/'"); mtr_report(" - saving '$worker_savedir/' to '$savedir/'");
rename($worker_savedir, $savedir); rename($worker_savedir, $savedir);
# Move any core files from e.g. mysqltest
foreach my $coref (glob("core*"))
{
move($coref, $savedir);
}
if ($opt_max_save_core > 0) { if ($opt_max_save_core > 0) {
# Limit number of core files saved # Limit number of core files saved
find({ no_chdir => 1, find({ no_chdir => 1,
...@@ -2381,7 +2385,7 @@ sub kill_leftovers ($) { ...@@ -2381,7 +2385,7 @@ sub kill_leftovers ($) {
} }
mtr_report(" - found old pid $pid in '$elem', killing it..."); mtr_report(" - found old pid $pid in '$elem', killing it...");
my $ret= kill(9, $pid); my $ret= kill("KILL", $pid);
if ($ret == 0) { if ($ret == 0) {
mtr_report(" process did not exist!"); mtr_report(" process did not exist!");
next; next;
...@@ -3283,11 +3287,6 @@ sub run_testcase ($) { ...@@ -3283,11 +3287,6 @@ sub run_testcase ($) {
# ---------------------------------------------------- # ----------------------------------------------------
$test_timeout_proc->kill(); $test_timeout_proc->kill();
# ----------------------------------------------------
# It's not mysqltest that has exited, kill it
# ----------------------------------------------------
$test->kill();
# ---------------------------------------------------- # ----------------------------------------------------
# Check if it was a server that died # Check if it was a server that died
# ---------------------------------------------------- # ----------------------------------------------------
...@@ -3297,10 +3296,30 @@ sub run_testcase ($) { ...@@ -3297,10 +3296,30 @@ sub run_testcase ($) {
$tinfo->{comment}= $tinfo->{comment}=
"Server $proc failed during test run"; "Server $proc failed during test run";
# ----------------------------------------------------
# It's not mysqltest that has exited, kill it
# ----------------------------------------------------
$test->kill();
report_failure_and_restart($tinfo); report_failure_and_restart($tinfo);
return 1; return 1;
} }
# Try to dump core for mysqltest and all servers
foreach my $proc ($test, started(all_servers()))
{
mtr_print("Trying to dump core for $proc");
if ($proc->dump_core())
{
$proc->wait_one(20);
}
}
# ----------------------------------------------------
# It's not mysqltest that has exited, kill it
# ----------------------------------------------------
$test->kill();
# ---------------------------------------------------- # ----------------------------------------------------
# Check if testcase timer expired # Check if testcase timer expired
# ---------------------------------------------------- # ----------------------------------------------------
...@@ -3319,6 +3338,7 @@ sub run_testcase ($) { ...@@ -3319,6 +3338,7 @@ sub run_testcase ($) {
} }
$tinfo->{'timeout'}= testcase_timeout(); # Mark as timeout $tinfo->{'timeout'}= testcase_timeout(); # Mark as timeout
run_on_all($tinfo, 'analyze-timeout'); run_on_all($tinfo, 'analyze-timeout');
report_failure_and_restart($tinfo); report_failure_and_restart($tinfo);
return 1; return 1;
} }
...@@ -4222,7 +4242,7 @@ sub start_servers($) { ...@@ -4222,7 +4242,7 @@ sub start_servers($) {
} }
else else
{ {
mysql_install_db($mysqld); mysql_install_db($mysqld); # For versional testing
mtr_error("Failed to install system db to '$datadir'") mtr_error("Failed to install system db to '$datadir'")
unless -d $datadir; unless -d $datadir;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment