2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 18:07:40 +00:00
ovs/lib/fatal-signal.c

473 lines
12 KiB
C
Raw Normal View History

/*
* Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <config.h>
fatal-signal: Catch SIGSEGV and print backtrace. The patch catches the SIGSEGV signal and prints the backtrace using libunwind at the monitor daemon. This makes debugging easier when there is no debug symbol package or gdb installed on production systems. The patch works when the ovs-vswitchd compiles even without debug symbol (no -g option), because the object files still have function symbols. For example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52> |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108> |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c> |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa> |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d> |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca> |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d> |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \ (Segmentation fault), core dumped, restarting However, if the object files' symbols are stripped, then we can only get init function plus offset value. This is still useful when trying to see if two bugs have the same root cause, Example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a> |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280> |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324> |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371> |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0> |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261> |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \ (Segmentation fault), core dumped, restarting Most C library functions are not async-signal-safe, meaning that it is not safe to call them from a signal handler, for example printf() or fflush(). To be async-signal-safe, the handler only collects the stack info using libunwind, which is signal-safe, and issues 'write' to the pipe, where the monitor thread reads and prints to ovs-vswitchd.log. Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433 Signed-off-by: William Tu <u9012063@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-09-27 10:22:55 -07:00
#include "backtrace.h"
#include "fatal-signal.h"
#include <errno.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "ovs-thread.h"
#include "openvswitch/poll-loop.h"
#include "openvswitch/shash.h"
#include "sset.h"
#include "signals.h"
#include "socket-util.h"
#include "util.h"
#include "openvswitch/vlog.h"
#include "openvswitch/type-props.h"
fatal-signal: Catch SIGSEGV and print backtrace. The patch catches the SIGSEGV signal and prints the backtrace using libunwind at the monitor daemon. This makes debugging easier when there is no debug symbol package or gdb installed on production systems. The patch works when the ovs-vswitchd compiles even without debug symbol (no -g option), because the object files still have function symbols. For example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52> |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108> |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c> |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa> |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d> |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca> |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d> |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \ (Segmentation fault), core dumped, restarting However, if the object files' symbols are stripped, then we can only get init function plus offset value. This is still useful when trying to see if two bugs have the same root cause, Example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a> |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280> |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324> |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371> |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0> |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261> |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \ (Segmentation fault), core dumped, restarting Most C library functions are not async-signal-safe, meaning that it is not safe to call them from a signal handler, for example printf() or fflush(). To be async-signal-safe, the handler only collects the stack info using libunwind, which is signal-safe, and issues 'write' to the pipe, where the monitor thread reads and prints to ovs-vswitchd.log. Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433 Signed-off-by: William Tu <u9012063@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-09-27 10:22:55 -07:00
#ifdef HAVE_UNWIND
#include "daemon-private.h"
#endif
#ifndef SIG_ATOMIC_MAX
#define SIG_ATOMIC_MAX TYPE_MAXIMUM(sig_atomic_t)
#endif
VLOG_DEFINE_THIS_MODULE(fatal_signal);
/* Signals to catch. */
#ifndef _WIN32
fatal-signal: Catch SIGSEGV and print backtrace. The patch catches the SIGSEGV signal and prints the backtrace using libunwind at the monitor daemon. This makes debugging easier when there is no debug symbol package or gdb installed on production systems. The patch works when the ovs-vswitchd compiles even without debug symbol (no -g option), because the object files still have function symbols. For example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52> |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108> |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c> |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa> |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d> |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca> |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d> |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \ (Segmentation fault), core dumped, restarting However, if the object files' symbols are stripped, then we can only get init function plus offset value. This is still useful when trying to see if two bugs have the same root cause, Example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a> |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280> |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324> |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371> |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0> |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261> |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \ (Segmentation fault), core dumped, restarting Most C library functions are not async-signal-safe, meaning that it is not safe to call them from a signal handler, for example printf() or fflush(). To be async-signal-safe, the handler only collects the stack info using libunwind, which is signal-safe, and issues 'write' to the pipe, where the monitor thread reads and prints to ovs-vswitchd.log. Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433 Signed-off-by: William Tu <u9012063@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-09-27 10:22:55 -07:00
static const int fatal_signals[] = { SIGTERM, SIGINT, SIGHUP, SIGALRM,
SIGSEGV };
#else
static const int fatal_signals[] = { SIGTERM };
#endif
/* Hooks to call upon catching a signal */
struct hook {
void (*hook_cb)(void *aux);
void (*cancel_cb)(void *aux);
void *aux;
bool run_at_exit;
};
#define MAX_HOOKS 32
static struct hook hooks[MAX_HOOKS];
static size_t n_hooks;
static int signal_fds[2];
static volatile sig_atomic_t stored_sig_nr = SIG_ATOMIC_MAX;
poll-loop: Create Windows event handles for sockets automatically. We currently have a poll_fd_wait_event(fd, wevent, events) function that is used at places common to Windows and Linux where we have to wait on sockets. On Linux, 'wevent' is always set as zero. On Windows, for sockets, when we send both 'fd' and 'wevent', we associate them with each other for 'events' and then wait on 'wevent'. Also on Windows, when we only send 'wevent' to this function, we would simply wait for all events for that 'wevent'. There is a disadvantage with this approach. * Windows clients need to create a 'wevent' and then pass it along. This means that at a lot of places where we create sockets, we also are forced to create a 'wevent'. With this commit, we pass the responsibility of creating a 'wevent' to poll_fd_wait() in case of sockets. That way, a client using poll_fd_wait() is only concerned about sockets and not about 'wevents'. There is a potential disadvantage with this change in that we create events more often and that may have a performance penalty. If that turns out to be the case, we will eventually need to create a pool of wevents that can be re-used. In Windows, there are cases where we want to wait on a event (not associated with any sockets) and then control it using functions like SetEvent() etc. For that purpose, introduce a new function poll_wevent_wait(). For this function, the client needs to create a event and then pass it along as an argument. Signed-off-by: Gurucharan Shetty <gshetty@nicira.com> Acked-By: Ben Pfaff <blp@nicira.com>
2014-06-27 13:30:49 -07:00
#ifdef _WIN32
static HANDLE wevent;
#endif
static struct ovs_mutex mutex;
static void call_hooks(int sig_nr);
#ifdef _WIN32
static BOOL WINAPI ConsoleHandlerRoutine(DWORD dwCtrlType);
#endif
/* Initializes the fatal signal handling module. Calling this function is
* optional, because calling any other function in the module will also
* initialize it. However, in a multithreaded program, the module must be
* initialized while the process is still single-threaded. */
void
fatal_signal_init(void)
{
static bool inited = false;
if (!inited) {
size_t i;
assert_single_threaded();
inited = true;
ovs_mutex_init_recursive(&mutex);
#ifndef _WIN32
xpipe_nonblocking(signal_fds);
#else
wevent = CreateEvent(NULL, TRUE, FALSE, NULL);
if (!wevent) {
char *msg_buf = ovs_lasterror_to_string();
VLOG_FATAL("Failed to create a event (%s).", msg_buf);
}
/* Register a function to handle Ctrl+C. */
SetConsoleCtrlHandler(ConsoleHandlerRoutine, true);
#endif
for (i = 0; i < ARRAY_SIZE(fatal_signals); i++) {
int sig_nr = fatal_signals[i];
#ifndef _WIN32
struct sigaction old_sa;
xsigaction(sig_nr, NULL, &old_sa);
if (old_sa.sa_handler == SIG_DFL
&& signal(sig_nr, fatal_signal_handler) == SIG_ERR) {
VLOG_FATAL("signal failed (%s)", ovs_strerror(errno));
}
#else
if (signal(sig_nr, fatal_signal_handler) == SIG_ERR) {
VLOG_FATAL("signal failed (%s)", ovs_strerror(errno));
}
#endif
}
atexit(fatal_signal_atexit_handler);
}
}
/* Registers 'hook_cb' to be called from inside poll_block() following a fatal
* signal. 'hook_cb' does not need to be async-signal-safe. In a
* multithreaded program 'hook_cb' might be called from any thread, with
* threads other than the one running 'hook_cb' in unknown states.
*
* If 'run_at_exit' is true, 'hook_cb' is also called during normal process
* termination, e.g. when exit() is called or when main() returns.
*
* If the current process forks, fatal_signal_fork() may be called to clear the
* parent process's fatal signal hooks, so that 'hook_cb' is only called when
* the child terminates, not when the parent does. When fatal_signal_fork() is
* called, it calls the 'cancel_cb' function if it is nonnull, passing 'aux',
* to notify that the hook has been canceled. This allows the hook to free
* memory, etc. */
void
fatal_signal_add_hook(void (*hook_cb)(void *aux), void (*cancel_cb)(void *aux),
void *aux, bool run_at_exit)
{
fatal_signal_init();
ovs_mutex_lock(&mutex);
ovs_assert(n_hooks < MAX_HOOKS);
hooks[n_hooks].hook_cb = hook_cb;
hooks[n_hooks].cancel_cb = cancel_cb;
hooks[n_hooks].aux = aux;
hooks[n_hooks].run_at_exit = run_at_exit;
n_hooks++;
ovs_mutex_unlock(&mutex);
}
fatal-signal: Catch SIGSEGV and print backtrace. The patch catches the SIGSEGV signal and prints the backtrace using libunwind at the monitor daemon. This makes debugging easier when there is no debug symbol package or gdb installed on production systems. The patch works when the ovs-vswitchd compiles even without debug symbol (no -g option), because the object files still have function symbols. For example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52> |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108> |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c> |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa> |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d> |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca> |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d> |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \ (Segmentation fault), core dumped, restarting However, if the object files' symbols are stripped, then we can only get init function plus offset value. This is still useful when trying to see if two bugs have the same root cause, Example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a> |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280> |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324> |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371> |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0> |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261> |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \ (Segmentation fault), core dumped, restarting Most C library functions are not async-signal-safe, meaning that it is not safe to call them from a signal handler, for example printf() or fflush(). To be async-signal-safe, the handler only collects the stack info using libunwind, which is signal-safe, and issues 'write' to the pipe, where the monitor thread reads and prints to ovs-vswitchd.log. Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433 Signed-off-by: William Tu <u9012063@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-09-27 10:22:55 -07:00
#ifdef HAVE_UNWIND
/* Send the backtrace buffer to monitor thread.
*
* Note that this runs in the signal handling context, any system
* library functions used here must be async-signal-safe.
*/
static inline void
send_backtrace_to_monitor(void) {
/* volatile added to prevent a "clobbered" error on ppc64le with gcc */
volatile int dep;
fatal-signal: Catch SIGSEGV and print backtrace. The patch catches the SIGSEGV signal and prints the backtrace using libunwind at the monitor daemon. This makes debugging easier when there is no debug symbol package or gdb installed on production systems. The patch works when the ovs-vswitchd compiles even without debug symbol (no -g option), because the object files still have function symbols. For example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52> |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108> |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c> |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa> |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d> |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca> |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d> |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \ (Segmentation fault), core dumped, restarting However, if the object files' symbols are stripped, then we can only get init function plus offset value. This is still useful when trying to see if two bugs have the same root cause, Example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a> |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280> |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324> |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371> |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0> |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261> |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \ (Segmentation fault), core dumped, restarting Most C library functions are not async-signal-safe, meaning that it is not safe to call them from a signal handler, for example printf() or fflush(). To be async-signal-safe, the handler only collects the stack info using libunwind, which is signal-safe, and issues 'write' to the pipe, where the monitor thread reads and prints to ovs-vswitchd.log. Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433 Signed-off-by: William Tu <u9012063@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-09-27 10:22:55 -07:00
struct unw_backtrace unw_bt[UNW_MAX_DEPTH];
unw_cursor_t cursor;
unw_context_t uc;
if (daemonize_fd == -1) {
return;
}
dep = 0;
unw_getcontext(&uc);
unw_init_local(&cursor, &uc);
while (dep < UNW_MAX_DEPTH && unw_step(&cursor)) {
memset(unw_bt[dep].func, 0, UNW_MAX_FUNCN);
unw_get_reg(&cursor, UNW_REG_IP, &unw_bt[dep].ip);
unw_get_proc_name(&cursor, unw_bt[dep].func, UNW_MAX_FUNCN,
&unw_bt[dep].offset);
dep++;
fatal-signal: Catch SIGSEGV and print backtrace. The patch catches the SIGSEGV signal and prints the backtrace using libunwind at the monitor daemon. This makes debugging easier when there is no debug symbol package or gdb installed on production systems. The patch works when the ovs-vswitchd compiles even without debug symbol (no -g option), because the object files still have function symbols. For example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52> |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108> |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c> |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa> |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d> |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca> |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d> |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \ (Segmentation fault), core dumped, restarting However, if the object files' symbols are stripped, then we can only get init function plus offset value. This is still useful when trying to see if two bugs have the same root cause, Example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a> |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280> |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324> |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371> |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0> |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261> |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \ (Segmentation fault), core dumped, restarting Most C library functions are not async-signal-safe, meaning that it is not safe to call them from a signal handler, for example printf() or fflush(). To be async-signal-safe, the handler only collects the stack info using libunwind, which is signal-safe, and issues 'write' to the pipe, where the monitor thread reads and prints to ovs-vswitchd.log. Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433 Signed-off-by: William Tu <u9012063@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-09-27 10:22:55 -07:00
}
if (monitor) {
ignore(write(daemonize_fd, unw_bt,
dep * sizeof(struct unw_backtrace)));
} else {
/* Since there is no monitor daemon running, write backtrace
* in current process. This is not asyn-signal-safe due to
* use of snprintf().
*/
char str[] = "SIGSEGV detected, backtrace:\n";
vlog_direct_write_to_log_file_unsafe(str);
for (int i = 0; i < dep; i++) {
char line[64];
snprintf(line, 64, "0x%016"PRIxPTR" <%s+0x%"PRIxPTR">\n",
unw_bt[i].ip,
unw_bt[i].func,
unw_bt[i].offset);
vlog_direct_write_to_log_file_unsafe(line);
}
}
fatal-signal: Catch SIGSEGV and print backtrace. The patch catches the SIGSEGV signal and prints the backtrace using libunwind at the monitor daemon. This makes debugging easier when there is no debug symbol package or gdb installed on production systems. The patch works when the ovs-vswitchd compiles even without debug symbol (no -g option), because the object files still have function symbols. For example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52> |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108> |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c> |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa> |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d> |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca> |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d> |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \ (Segmentation fault), core dumped, restarting However, if the object files' symbols are stripped, then we can only get init function plus offset value. This is still useful when trying to see if two bugs have the same root cause, Example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a> |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280> |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324> |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371> |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0> |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261> |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \ (Segmentation fault), core dumped, restarting Most C library functions are not async-signal-safe, meaning that it is not safe to call them from a signal handler, for example printf() or fflush(). To be async-signal-safe, the handler only collects the stack info using libunwind, which is signal-safe, and issues 'write' to the pipe, where the monitor thread reads and prints to ovs-vswitchd.log. Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433 Signed-off-by: William Tu <u9012063@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-09-27 10:22:55 -07:00
}
#else
static inline void
send_backtrace_to_monitor(void) {
/* Nothing. */
}
#endif
/* Handles fatal signal number 'sig_nr'.
*
* Ordinarily this is the actual signal handler. When other code needs to
* handle one of our signals, however, it can register for that signal and, if
* and when necessary, call this function to do fatal signal processing for it
* and terminate the process. Currently only timeval.c does this, for SIGALRM.
* (It is not important whether the other code sets up its signal handler
* before or after this file, because this file will only set up a signal
* handler in the case where the signal has its default handling.) */
void
fatal_signal_handler(int sig_nr)
{
#ifndef _WIN32
fatal-signal: Catch SIGSEGV and print backtrace. The patch catches the SIGSEGV signal and prints the backtrace using libunwind at the monitor daemon. This makes debugging easier when there is no debug symbol package or gdb installed on production systems. The patch works when the ovs-vswitchd compiles even without debug symbol (no -g option), because the object files still have function symbols. For example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52> |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108> |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c> |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa> |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d> |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca> |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d> |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \ (Segmentation fault), core dumped, restarting However, if the object files' symbols are stripped, then we can only get init function plus offset value. This is still useful when trying to see if two bugs have the same root cause, Example: |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace: |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a> |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40> |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d> |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280> |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324> |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371> |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0> |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261> |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \ (Segmentation fault), core dumped, restarting Most C library functions are not async-signal-safe, meaning that it is not safe to call them from a signal handler, for example printf() or fflush(). To be async-signal-safe, the handler only collects the stack info using libunwind, which is signal-safe, and issues 'write' to the pipe, where the monitor thread reads and prints to ovs-vswitchd.log. Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433 Signed-off-by: William Tu <u9012063@gmail.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
2019-09-27 10:22:55 -07:00
if (sig_nr == SIGSEGV) {
signal(sig_nr, SIG_DFL); /* Set it back immediately. */
send_backtrace_to_monitor();
raise(sig_nr);
}
ignore(write(signal_fds[1], "", 1));
#else
SetEvent(wevent);
#endif
stored_sig_nr = sig_nr;
}
/* Check whether a fatal signal has occurred and, if so, call the fatal signal
* hooks and exit.
*
* This function is called automatically by poll_block(), but specialized
* programs that may not always call poll_block() on a regular basis should
* also call it periodically. (Therefore, any function with "block" in its
* name should call fatal_signal_run() each time it is called, either directly
* or through poll_block(), because such functions can only used by specialized
* programs that can afford to block outside their main loop around
* poll_block().)
*/
void
fatal_signal_run(void)
{
sig_atomic_t sig_nr;
fatal_signal_init();
sig_nr = stored_sig_nr;
if (sig_nr != SIG_ATOMIC_MAX) {
char namebuf[SIGNAL_NAME_BUFSIZE];
ovs_mutex_lock(&mutex);
#ifndef _WIN32
VLOG_WARN("terminating with signal %d (%s)",
(int)sig_nr, signal_name(sig_nr, namebuf, sizeof namebuf));
#else
VLOG_WARN("terminating with signal %d", (int)sig_nr);
#endif
call_hooks(sig_nr);
fflush(stderr);
/* Re-raise the signal with the default handling so that the program
* termination status reflects that we were killed by this signal */
signal(sig_nr, SIG_DFL);
raise(sig_nr);
ovs_mutex_unlock(&mutex);
OVS_NOT_REACHED();
}
}
void
fatal_signal_wait(void)
{
fatal_signal_init();
poll-loop: Create Windows event handles for sockets automatically. We currently have a poll_fd_wait_event(fd, wevent, events) function that is used at places common to Windows and Linux where we have to wait on sockets. On Linux, 'wevent' is always set as zero. On Windows, for sockets, when we send both 'fd' and 'wevent', we associate them with each other for 'events' and then wait on 'wevent'. Also on Windows, when we only send 'wevent' to this function, we would simply wait for all events for that 'wevent'. There is a disadvantage with this approach. * Windows clients need to create a 'wevent' and then pass it along. This means that at a lot of places where we create sockets, we also are forced to create a 'wevent'. With this commit, we pass the responsibility of creating a 'wevent' to poll_fd_wait() in case of sockets. That way, a client using poll_fd_wait() is only concerned about sockets and not about 'wevents'. There is a potential disadvantage with this change in that we create events more often and that may have a performance penalty. If that turns out to be the case, we will eventually need to create a pool of wevents that can be re-used. In Windows, there are cases where we want to wait on a event (not associated with any sockets) and then control it using functions like SetEvent() etc. For that purpose, introduce a new function poll_wevent_wait(). For this function, the client needs to create a event and then pass it along as an argument. Signed-off-by: Gurucharan Shetty <gshetty@nicira.com> Acked-By: Ben Pfaff <blp@nicira.com>
2014-06-27 13:30:49 -07:00
#ifdef _WIN32
poll_wevent_wait(wevent);
#else
poll_fd_wait(signal_fds[0], POLLIN);
#endif
}
void
fatal_ignore_sigpipe(void)
{
#ifndef _WIN32
signal(SIGPIPE, SIG_IGN);
#endif
}
void
fatal_signal_atexit_handler(void)
{
call_hooks(0);
}
static void
call_hooks(int sig_nr)
{
static volatile sig_atomic_t recurse = 0;
if (!recurse) {
size_t i;
recurse = 1;
for (i = 0; i < n_hooks; i++) {
struct hook *h = &hooks[i];
if (sig_nr || h->run_at_exit) {
h->hook_cb(h->aux);
}
}
}
}
#ifdef _WIN32
BOOL WINAPI ConsoleHandlerRoutine(DWORD dwCtrlType)
{
stored_sig_nr = SIGINT;
SetEvent(wevent);
return true;
}
#endif
/* Files to delete on exit. */
static struct sset files = SSET_INITIALIZER(&files);
/* Has a hook function been registered with fatal_signal_add_hook() (and not
* cleared by fatal_signal_fork())? */
static bool added_hook;
static void unlink_files(void *aux);
static void cancel_files(void *aux);
static void do_unlink_files(void);
/* Registers 'file' to be unlinked when the program terminates via exit() or a
* fatal signal. */
void
fatal_signal_add_file_to_unlink(const char *file)
{
fatal_signal_init();
ovs_mutex_lock(&mutex);
if (!added_hook) {
added_hook = true;
fatal_signal_add_hook(unlink_files, cancel_files, NULL, true);
}
sset_add(&files, file);
ovs_mutex_unlock(&mutex);
}
/* Unregisters 'file' from being unlinked when the program terminates via
* exit() or a fatal signal. */
void
fatal_signal_remove_file_to_unlink(const char *file)
{
fatal_signal_init();
ovs_mutex_lock(&mutex);
sset_find_and_delete(&files, file);
ovs_mutex_unlock(&mutex);
}
/* Like fatal_signal_remove_file_to_unlink(), but also unlinks 'file'.
* Returns 0 if successful, otherwise a positive errno value. */
int
fatal_signal_unlink_file_now(const char *file)
{
int error;
fatal_signal_init();
ovs_mutex_lock(&mutex);
error = unlink(file) ? errno : 0;
if (error) {
VLOG_WARN("could not unlink \"%s\" (%s)", file, ovs_strerror(error));
}
fatal_signal_remove_file_to_unlink(file);
ovs_mutex_unlock(&mutex);
return error;
}
static void
unlink_files(void *aux OVS_UNUSED)
{
do_unlink_files();
}
static void
cancel_files(void *aux OVS_UNUSED)
{
sset_clear(&files);
added_hook = false;
}
static void
do_unlink_files(void)
{
const char *file;
SSET_FOR_EACH (file, &files) {
unlink(file);
}
}
/* Clears all of the fatal signal hooks without executing them. If any of the
* hooks passed a 'cancel_cb' function to fatal_signal_add_hook(), then those
* functions will be called, allowing them to free resources, etc.
*
* Following a fork, one of the resulting processes can call this function to
* allow it to terminate without calling the hooks registered before calling
* this function. New hooks registered after calling this function will take
* effect normally. */
void
fatal_signal_fork(void)
{
size_t i;
assert_single_threaded();
for (i = 0; i < n_hooks; i++) {
struct hook *h = &hooks[i];
if (h->cancel_cb) {
h->cancel_cb(h->aux);
}
}
n_hooks = 0;
/* Raise any signals that we have already received with the default
* handler. */
if (stored_sig_nr != SIG_ATOMIC_MAX) {
raise(stored_sig_nr);
}
}
#ifndef _WIN32
/* Blocks all fatal signals and returns previous signal mask into
* 'prev_mask'. */
void
fatal_signal_block(sigset_t *prev_mask)
{
int i;
sigset_t block_mask;
sigemptyset(&block_mask);
for (i = 0; i < ARRAY_SIZE(fatal_signals); i++) {
int sig_nr = fatal_signals[i];
sigaddset(&block_mask, sig_nr);
}
xpthread_sigmask(SIG_BLOCK, &block_mask, prev_mask);
}
#endif