mirror of
https://github.com/openvswitch/ovs
synced 2025-09-03 07:45:30 +00:00
fatal-signal: Catch SIGSEGV and print backtrace.
The patch catches the SIGSEGV signal and prints the backtrace using libunwind at the monitor daemon. This makes debugging easier when there is no debug symbol package or gdb installed on production systems. The patch works when the ovs-vswitchd compiles even without debug symbol (no -g option), because the object files still have function symbols. For example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52> |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108> |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c> |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa> |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d> |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca> |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d> |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \ (Segmentation fault), core dumped, restarting However, if the object files' symbols are stripped, then we can only get init function plus offset value. This is still useful when trying to see if two bugs have the same root cause, Example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a> |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280> |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324> |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371> |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0> |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261> |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \ (Segmentation fault), core dumped, restarting Most C library functions are not async-signal-safe, meaning that it is not safe to call them from a signal handler, for example printf() or fflush(). To be async-signal-safe, the handler only collects the stack info using libunwind, which is signal-safe, and issues 'write' to the pipe, where the monitor thread reads and prints to ovs-vswitchd.log. Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433 Signed-off-by: William Tu <u9012063@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
This commit is contained in:
@@ -15,6 +15,7 @@
|
||||
*/
|
||||
|
||||
#include <config.h>
|
||||
#include "backtrace.h"
|
||||
#include "daemon.h"
|
||||
#include "daemon-private.h"
|
||||
#include <errno.h>
|
||||
@@ -75,7 +76,7 @@ static bool overwrite_pidfile;
|
||||
static bool chdir_ = true;
|
||||
|
||||
/* File descriptor used by daemonize_start() and daemonize_complete(). */
|
||||
static int daemonize_fd = -1;
|
||||
int daemonize_fd = -1;
|
||||
|
||||
/* --monitor: Should a supervisory process monitor the daemon and restart it if
|
||||
* it dies due to an error signal? */
|
||||
@@ -291,8 +292,7 @@ fork_and_wait_for_startup(int *fdp, pid_t *child_pid)
|
||||
OVS_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
close(fds[0]);
|
||||
*fdp = -1;
|
||||
*fdp = fds[0];
|
||||
} else if (!pid) {
|
||||
/* Running in child process. */
|
||||
close(fds[0]);
|
||||
@@ -313,8 +313,6 @@ fork_notify_startup(int fd)
|
||||
if (error) {
|
||||
VLOG_FATAL("pipe write failed (%s)", ovs_strerror(error));
|
||||
}
|
||||
|
||||
close(fd);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -393,6 +391,8 @@ monitor_daemon(pid_t daemon_pid)
|
||||
}
|
||||
}
|
||||
|
||||
log_received_backtrace(daemonize_fd);
|
||||
|
||||
/* Throttle restarts to no more than once every 10 seconds. */
|
||||
if (time(NULL) < last_restart + 10) {
|
||||
VLOG_WARN("%s, waiting until 10 seconds since last "
|
||||
@@ -508,7 +508,6 @@ daemonize_complete(void)
|
||||
detached = true;
|
||||
|
||||
fork_notify_startup(daemonize_fd);
|
||||
daemonize_fd = -1;
|
||||
daemonize_post_detach();
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user