ovs/lib/fatal-signal.c

/*
 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#include <config.h>
#include "backtrace.h"
#include "fatal-signal.h"
#include <errno.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "ovs-thread.h"
#include "openvswitch/poll-loop.h"
#include "openvswitch/shash.h"
#include "sset.h"
#include "signals.h"
#include "socket-util.h"
#include "util.h"
#include "openvswitch/vlog.h"

#include "openvswitch/type-props.h"

#if defined(HAVE_UNWIND) || defined(HAVE_BACKTRACE)
#include "daemon-private.h"
#endif

#ifdef HAVE_BACKTRACE
#include <execinfo.h>
#endif

#ifndef SIG_ATOMIC_MAX
#define SIG_ATOMIC_MAX TYPE_MAXIMUM(sig_atomic_t)
#endif

VLOG_DEFINE_THIS_MODULE(fatal_signal);

/* Signals to catch. */
#ifndef _WIN32
static const int fatal_signals[] = { SIGTERM, SIGINT, SIGHUP, SIGALRM,
                                     SIGSEGV };
#else
static const int fatal_signals[] = { SIGTERM };
#endif

/* Hooks to call upon catching a signal */
struct hook {
    void (*hook_cb)(void *aux);
    void (*cancel_cb)(void *aux);
    void *aux;
    bool run_at_exit;
};
#define MAX_HOOKS 32
static struct hook hooks[MAX_HOOKS];
static size_t n_hooks;

static int signal_fds[2];
static volatile sig_atomic_t stored_sig_nr = SIG_ATOMIC_MAX;

#ifdef _WIN32
static HANDLE wevent;
#endif

static struct ovs_mutex mutex;

static void call_hooks(int sig_nr);
#ifdef _WIN32
static BOOL WINAPI ConsoleHandlerRoutine(DWORD dwCtrlType);
#endif

/* Sets up a pipe or event handle that will be used to wake up the current
 * process after signal is received, so it can be processed outside of the
 * signal handler context in fatal_signal_run(). */
static void
fatal_signal_create_wakeup_events(void)
{
#ifndef _WIN32
    xpipe_nonblocking(signal_fds);
#else
    wevent = CreateEvent(NULL, TRUE, FALSE, NULL);
    if (!wevent) {
        char *msg_buf = ovs_lasterror_to_string();
        VLOG_FATAL("Failed to create a event (%s).", msg_buf);
    }
#endif
}

static void
fatal_signal_destroy_wakeup_events(void)
{
#ifndef _WIN32
    close(signal_fds[0]);
    signal_fds[0] = -1;
    close(signal_fds[1]);
    signal_fds[1] = -1;
#else
    ResetEvent(wevent);
    CloseHandle(wevent);
    wevent = NULL;
#endif
}


/* Initializes the fatal signal handling module.  Calling this function is
 * optional, because calling any other function in the module will also
 * initialize it.  However, in a multithreaded program, the module must be
 * initialized while the process is still single-threaded. */
void
fatal_signal_init(void)
{
    static bool inited = false;

    if (!inited) {
        size_t i;

        assert_single_threaded();
        inited = true;

        ovs_mutex_init_recursive(&mutex);

        /* The dummy backtrace is needed.
         * See comment for send_backtrace_to_monitor(). */
        struct backtrace dummy_bt;

        backtrace_capture(&dummy_bt);

        fatal_signal_create_wakeup_events();

#ifdef _WIN32
        /* Register a function to handle Ctrl+C. */
        SetConsoleCtrlHandler(ConsoleHandlerRoutine, true);
#endif

        for (i = 0; i < ARRAY_SIZE(fatal_signals); i++) {
            int sig_nr = fatal_signals[i];
#ifndef _WIN32
            struct sigaction old_sa;

            xsigaction(sig_nr, NULL, &old_sa);
            if (old_sa.sa_handler == SIG_DFL
                && signal(sig_nr, fatal_signal_handler) == SIG_ERR) {
                VLOG_FATAL("signal failed (%s)", ovs_strerror(errno));
            }
#else
            if (signal(sig_nr, fatal_signal_handler) == SIG_ERR) {
                VLOG_FATAL("signal failed (%s)", ovs_strerror(errno));
            }
#endif
        }
        atexit(fatal_signal_atexit_handler);
    }
}

/* Registers 'hook_cb' to be called from inside poll_block() following a fatal
 * signal.  'hook_cb' does not need to be async-signal-safe.  In a
 * multithreaded program 'hook_cb' might be called from any thread, with
 * threads other than the one running 'hook_cb' in unknown states.
 *
 * If 'run_at_exit' is true, 'hook_cb' is also called during normal process
 * termination, e.g. when exit() is called or when main() returns.
 *
 * If the current process forks, fatal_signal_fork() may be called to clear the
 * parent process's fatal signal hooks, so that 'hook_cb' is only called when
 * the child terminates, not when the parent does.  When fatal_signal_fork() is
 * called, it calls the 'cancel_cb' function if it is nonnull, passing 'aux',
 * to notify that the hook has been canceled.  This allows the hook to free
 * memory, etc. */
void
fatal_signal_add_hook(void (*hook_cb)(void *aux), void (*cancel_cb)(void *aux),
                      void *aux, bool run_at_exit)
{
    fatal_signal_init();

    ovs_mutex_lock(&mutex);
    ovs_assert(n_hooks < MAX_HOOKS);
    hooks[n_hooks].hook_cb = hook_cb;
    hooks[n_hooks].cancel_cb = cancel_cb;
    hooks[n_hooks].aux = aux;
    hooks[n_hooks].run_at_exit = run_at_exit;
    n_hooks++;
    ovs_mutex_unlock(&mutex);
}

#ifdef HAVE_UNWIND
/* Convert unsigned long long to string.  This is needed because
 * using snprintf() is not async signal safe. */
static inline int
llong_to_hex_str(unsigned long long value, char *str)
{
    int i = 0, res;

    if (value / 16 > 0) {
        i = llong_to_hex_str(value / 16, str);
    }

    res = value % 16;
    str[i] = "0123456789abcdef"[res];

    return i + 1;
}

/* Send the backtrace buffer to monitor thread.
 *
 * Note that this runs in the signal handling context, any system
 * library functions used here must be async-signal-safe.
 */
static inline void
send_backtrace_to_monitor(void)
{
    /* volatile added to prevent a "clobbered" error on ppc64le with gcc */
    volatile int dep;
    struct unw_backtrace unw_bt[UNW_MAX_DEPTH];
    unw_cursor_t cursor;
    unw_context_t uc;

    if (daemonize_fd == -1) {
        return;
    }

    dep = 0;
    unw_getcontext(&uc);
    unw_init_local(&cursor, &uc);

    while (dep < UNW_MAX_DEPTH && unw_step(&cursor)) {
        memset(unw_bt[dep].func, 0, UNW_MAX_FUNCN);
        unw_get_reg(&cursor, UNW_REG_IP, &unw_bt[dep].ip);
        unw_get_proc_name(&cursor, unw_bt[dep].func, UNW_MAX_FUNCN,
                          &unw_bt[dep].offset);
        dep++;
    }

    if (monitor) {
        ignore(write(daemonize_fd, unw_bt,
                     dep * sizeof(struct unw_backtrace)));
    } else {
        /* Since there is no monitor daemon running, write backtrace
         * in current process.
         */
        char ip_str[16], offset_str[6];
        char line[64], fn_name[UNW_MAX_FUNCN];

        vlog_direct_write_to_log_file_unsafe(BACKTRACE_DUMP_MSG);

        for (int i = 0; i < dep; i++) {
            memset(line, 0, sizeof line);
            memset(fn_name, 0, sizeof fn_name);
            memset(offset_str, 0, sizeof offset_str);
            memset(ip_str, ' ', sizeof ip_str);
            ip_str[sizeof(ip_str) - 1] = 0;

            llong_to_hex_str(unw_bt[i].ip, ip_str);
            llong_to_hex_str(unw_bt[i].offset, offset_str);

            strcat(line, "0x");
            strcat(line, ip_str);
            strcat(line, "<");
            memcpy(fn_name, unw_bt[i].func, UNW_MAX_FUNCN - 1);
            strcat(line, fn_name);
            strcat(line, "+0x");
            strcat(line, offset_str);
            strcat(line, ">\n");
            vlog_direct_write_to_log_file_unsafe(line);
        }
    }
}
#elif HAVE_BACKTRACE
/* Send the backtrace to monitor thread.
 *
 * Note that this runs in the signal handling context, any system
 * library functions used here must be async-signal-safe.
 * backtrace() is only signal safe if the "libgcc" or equivalent was loaded
 * before the signal handler. In order to keep it safe the fatal_signal_init()
 * should always call backtrace_capture which will ensure that "libgcc" or
 * equivlent is loaded.
 */
static inline void
send_backtrace_to_monitor(void)
{
    struct backtrace bt;

    backtrace_capture(&bt);

    if (monitor && daemonize_fd > -1) {
        ignore(write(daemonize_fd, &bt, sizeof bt));
    } else {
        int log_fd = vlog_get_log_file_fd_unsafe();

        if (log_fd < 0) {
            return;
        }

        vlog_direct_write_to_log_file_unsafe(BACKTRACE_DUMP_MSG);
        backtrace_symbols_fd(bt.frames, bt.n_frames, log_fd);
    }
}
#else
static inline void
send_backtrace_to_monitor(void) {
    /* Nothing. */
}
#endif

/* Handles fatal signal number 'sig_nr'.
 *
 * Ordinarily this is the actual signal handler.  When other code needs to
 * handle one of our signals, however, it can register for that signal and, if
 * and when necessary, call this function to do fatal signal processing for it
 * and terminate the process.  Currently only timeval.c does this, for SIGALRM.
 * (It is not important whether the other code sets up its signal handler
 * before or after this file, because this file will only set up a signal
 * handler in the case where the signal has its default handling.)  */
void
fatal_signal_handler(int sig_nr)
{
#ifndef _WIN32
    if (sig_nr == SIGSEGV) {
        signal(sig_nr, SIG_DFL); /* Set it back immediately. */
        send_backtrace_to_monitor();
        raise(sig_nr);
    }
    ignore(write(signal_fds[1], "", 1));
#else
    SetEvent(wevent);
#endif
    stored_sig_nr = sig_nr;
}

/* Check whether a fatal signal has occurred and, if so, call the fatal signal
 * hooks and exit.
 *
 * This function is called automatically by poll_block(), but specialized
 * programs that may not always call poll_block() on a regular basis should
 * also call it periodically.  (Therefore, any function with "block" in its
 * name should call fatal_signal_run() each time it is called, either directly
 * or through poll_block(), because such functions can only used by specialized
 * programs that can afford to block outside their main loop around
 * poll_block().)
 */
void
fatal_signal_run(void)
{
    sig_atomic_t sig_nr;

    fatal_signal_init();

    sig_nr = stored_sig_nr;
    if (sig_nr != SIG_ATOMIC_MAX) {
        char namebuf[SIGNAL_NAME_BUFSIZE];

        ovs_mutex_lock(&mutex);

#ifndef _WIN32
        VLOG_WARN("terminating with signal %d (%s)",
                  (int)sig_nr, signal_name(sig_nr, namebuf, sizeof namebuf));
#else
        VLOG_WARN("terminating with signal %d", (int)sig_nr);
#endif
        call_hooks(sig_nr);
        fflush(stderr);

        /* Re-raise the signal with the default handling so that the program
         * termination status reflects that we were killed by this signal */
        signal(sig_nr, SIG_DFL);
        raise(sig_nr);

        ovs_mutex_unlock(&mutex);
        OVS_NOT_REACHED();
    }
}

void
fatal_signal_wait(void)
{
    fatal_signal_init();
#ifdef _WIN32
    poll_wevent_wait(wevent);
#else
    poll_fd_wait(signal_fds[0], POLLIN);
#endif
}

void
fatal_ignore_sigpipe(void)
{
#ifndef _WIN32
    signal(SIGPIPE, SIG_IGN);
#endif
}

void
fatal_signal_atexit_handler(void)
{
    call_hooks(0);
}

static void
call_hooks(int sig_nr)
{
    static volatile sig_atomic_t recurse = 0;
    if (!recurse) {
        size_t i;

        recurse = 1;

        for (i = 0; i < n_hooks; i++) {
            struct hook *h = &hooks[i];
            if (sig_nr || h->run_at_exit) {
                h->hook_cb(h->aux);
            }
        }
    }
}

#ifdef _WIN32
BOOL WINAPI ConsoleHandlerRoutine(DWORD dwCtrlType)
{
    stored_sig_nr = SIGINT;
    SetEvent(wevent);
    return true;
}
#endif

/* Files to delete on exit. */
static struct sset files = SSET_INITIALIZER(&files);

/* Has a hook function been registered with fatal_signal_add_hook() (and not
 * cleared by fatal_signal_fork())? */
static bool added_hook;

static void unlink_files(void *aux);
static void cancel_files(void *aux);
static void do_unlink_files(void);

/* Registers 'file' to be unlinked when the program terminates via exit() or a
 * fatal signal. */
void
fatal_signal_add_file_to_unlink(const char *file)
{
    fatal_signal_init();

    ovs_mutex_lock(&mutex);
    if (!added_hook) {
        added_hook = true;
        fatal_signal_add_hook(unlink_files, cancel_files, NULL, true);
    }

    sset_add(&files, file);
    ovs_mutex_unlock(&mutex);
}

/* Unregisters 'file' from being unlinked when the program terminates via
 * exit() or a fatal signal. */
void
fatal_signal_remove_file_to_unlink(const char *file)
{
    fatal_signal_init();

    ovs_mutex_lock(&mutex);
    sset_find_and_delete(&files, file);
    ovs_mutex_unlock(&mutex);
}

/* Like fatal_signal_remove_file_to_unlink(), but also unlinks 'file'.
 * Returns 0 if successful, otherwise a positive errno value. */
int
fatal_signal_unlink_file_now(const char *file)
{
    int error;

    fatal_signal_init();

    ovs_mutex_lock(&mutex);

    error = unlink(file) ? errno : 0;
    if (error) {
        VLOG_WARN("could not unlink \"%s\" (%s)", file, ovs_strerror(error));
    }

    fatal_signal_remove_file_to_unlink(file);

    ovs_mutex_unlock(&mutex);

    return error;
}

static void
unlink_files(void *aux OVS_UNUSED)
{
    do_unlink_files();
}

static void
cancel_files(void *aux OVS_UNUSED)
{
    sset_clear(&files);
    added_hook = false;
}

static void
do_unlink_files(void)
{
    const char *file;

    SSET_FOR_EACH (file, &files) {
        unlink(file);
    }
}

/* Clears all of the fatal signal hooks without executing them.  If any of the
 * hooks passed a 'cancel_cb' function to fatal_signal_add_hook(), then those
 * functions will be called, allowing them to free resources, etc.
 *
 * Also re-creates wake-up events, so signals in one of the processes do not
 * wake up the other one.
 *
 * Following a fork, one of the resulting processes can call this function to
 * allow it to terminate without calling the hooks registered before calling
 * this function.  New hooks registered after calling this function will take
 * effect normally. */
void
fatal_signal_fork(void)
{
    size_t i;

    assert_single_threaded();

    fatal_signal_destroy_wakeup_events();
    fatal_signal_create_wakeup_events();

    for (i = 0; i < n_hooks; i++) {
        struct hook *h = &hooks[i];
        if (h->cancel_cb) {
            h->cancel_cb(h->aux);
        }
    }
    n_hooks = 0;

    /* Raise any signals that we have already received with the default
     * handler. */
    if (stored_sig_nr != SIG_ATOMIC_MAX) {
        raise(stored_sig_nr);
    }
}

#ifndef _WIN32
/* Blocks all fatal signals and returns previous signal mask into
 * 'prev_mask'. */
void
fatal_signal_block(sigset_t *prev_mask)
{
    int i;
    sigset_t block_mask;

    sigemptyset(&block_mask);
    for (i = 0; i < ARRAY_SIZE(fatal_signals); i++) {
        int sig_nr = fatal_signals[i];
        sigaddset(&block_mask, sig_nr);
    }
    xpthread_sigmask(SIG_BLOCK, &block_mask, prev_mask);
}
#endif
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								/*
-												Replace all uses of strerror() by ovs_strerror(), for thread safety.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-24 10:54:49 -07:00
+								 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 *
-												Update primary code license to Apache 2.0.

											
										
										
											2009-06-15 15:11:30 -07:00
+								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 *
-												Update primary code license to Apache 2.0.

											
										
										
											2009-06-15 15:11:30 -07:00
+								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								 */
 								#include <config.h>
-												fatal-signal: Catch SIGSEGV and print backtrace.

The patch catches the SIGSEGV signal and prints the backtrace
using libunwind at the monitor daemon. This makes debugging easier
when there is no debug symbol package or gdb installed on production
systems.

The patch works when the ovs-vswitchd compiles even without debug symbol
(no -g option), because the object files still have function symbols.
For example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52>
 |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c>
 |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa>
 |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d>
 |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca>
 |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d>
 |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \
    (Segmentation fault), core dumped, restarting

However, if the object files' symbols are stripped, then we can only
get init function plus offset value. This is still useful when trying
to see if two bugs have the same root cause, Example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324>
 |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0>
 |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261>
 |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \
	(Segmentation fault), core dumped, restarting

Most C library functions are not async-signal-safe, meaning that
it is not safe to call them from a signal handler, for example
printf() or fflush(). To be async-signal-safe, the handler only
collects the stack info using libunwind, which is signal-safe, and
issues 'write' to the pipe, where the monitor thread reads and
prints to ovs-vswitchd.log.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-09-27 10:22:55 -07:00
+								#include "backtrace.h"
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								#include "fatal-signal.h"
 								#include <errno.h>
 								#include <signal.h>
 								#include <stdbool.h>
 								#include <stdio.h>
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								#include <stdint.h>
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								#include <stdlib.h>
 								#include <string.h>
 								#include <unistd.h>
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
+								#include "ovs-thread.h"
-												lib: Move lib/poll-loop.h to include/openvswitch

Poll-loop is the core to implement main loop. It should be available in
libopenvswitch.

Signed-off-by: Xiao Liang <shaw.leon@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-11-03 13:53:53 +08:00
+								#include "openvswitch/poll-loop.h"
-												json: Move from lib to include/openvswitch.

To easily allow both in- and out-of-tree building of the Python
wrapper for the OVS JSON parser (e.g. w/ pip), move json.h to
include/openvswitch. This also requires moving lib/{hmap,shash}.h.

Both hmap.h and shash.h were #include-ing "util.h" even though the
headers themselves did not use anything from there, but rather from
include/openvswitch/util.h. Fixing that required including util.h
in several C files mostly due to OVS_NOT_REACHED and things like
xmalloc.

Signed-off-by: Terry Wilson <twilson@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-12 16:37:34 -05:00
+								#include "openvswitch/shash.h"
-												Convert shash users that don't use the 'data' value to sset instead.

In each of the cases converted here, an shash was used simply to maintain
a set of strings, with the shash_nodes' 'data' values set to NULL.  This
commit converts them to use sset instead.

											
										
										
											2011-03-25 15:26:30 -07:00
+								#include "sset.h"
-												Log anything that could prevent a daemon from starting.

If a daemon doesn't start, we need to know why.  Being able to
consistently consult the log to find out is helpful.

											
										
										
											2011-03-31 16:23:50 -07:00
+								#include "signals.h"
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								#include "socket-util.h"
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								#include "util.h"
-												lib: Move vlog.h to <openvswitch/vlog.h>

A new function vlog_insert_module() is introduced to avoid using
list_insert() from the vlog.h header.

Signed-off-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-15 14:10:38 +01:00
+								#include "openvswitch/vlog.h"
-												fatal-signal: New function fatal_signal_unlink_file_now().

This is a helper function that combines two actions that callers commonly
wanted.  It will have an additional user in an upcoming commit.

											
										
										
											2009-09-21 12:37:20 -07:00
-												Move lib/type-props.h to include/openvswitch directory

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-04-04 21:32:08 -04:00
+								#include "openvswitch/type-props.h"
-												Add fallback definition of SIG_ATOMIC_MAX

Android appears to lack SIG_ATOMIC_MAX which is only
used in fatal-signal.c.

Observed when compiling using the Android NDK r6b (Android API level 13).

Patch based on a suggestion by Ben Pfaff

											
										
										
											2011-09-22 21:24:12 +09:00
-												backtrace: Extend the backtrace functionality.

Use the backtrace functions that is provided by libc, this allows us
to get backtrace that is independent of the current memory map of the
process.  Which in turn can be used for debugging/tracing purpose.
The backtrace is not 100% accurate due to various optimizations, most
notably "-fomit-frame-pointer" and LTO.  This might result that the
line in source file doesn't correspond to the real line.  However, it
should be able to pinpoint at least the function where the backtrace
was called.

The implementation is determined during compilation based on available
libraries.  Libunwind has higher priority if both methods are available
to keep the compatibility with current behavior.

The backtrace is not marked as signal safe however the backtrace manual
page gives more detailed explanation why it might be the case [0].
Load the "libgcc" or equivalent in advance within the "fatal_signal_init"
which should ensure that subsequent calls to backtrace* do not call
malloc and are signal safe.

The typical backtrace will look similar to the one below:
 /lib64/libopenvswitch-3.1.so.0(backtrace_capture+0x1e) [0x7fc5db298dfe]
 /lib64/libopenvswitch-3.1.so.0(log_backtrace_at+0x57) [0x7fc5db2999e7]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_complete+0x7b) [0x7fc5db56247b]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_propose_commit_block+0x8d) [0x7fc5db563a8d]
 ovsdb-server(+0xa661) [0x562cfce2e661]
 ovsdb-server(+0x7e39) [0x562cfce2be39]
 /lib64/libc.so.6(+0x27b4a) [0x7fc5db048b4a]
 /lib64/libc.so.6(__libc_start_main+0x8b) [0x7fc5db048c0b]
 ovsdb-server(+0x8c35) [0x562cfce2cc35]

backtrace.h elaborates on how to effectively get the line information
associated with the addressed presented in the backtrace.

[0]
backtrace() and backtrace_symbols_fd() don't call malloc() explicitly,
but they are part of libgcc, which gets loaded dynamically when first
used.  Dynamic loading usually triggers a call to malloc(3).  If you
need certain calls to these two functions to not allocate memory (in
signal handlers, for example), you need to make sure libgcc is loaded
beforehand

Reported-at: https://bugzilla.redhat.com/2177760
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-30 09:34:12 +02:00
+								#if defined(HAVE_UNWIND) || defined(HAVE_BACKTRACE)
-												fatal-signal: Catch SIGSEGV and print backtrace.

The patch catches the SIGSEGV signal and prints the backtrace
using libunwind at the monitor daemon. This makes debugging easier
when there is no debug symbol package or gdb installed on production
systems.

The patch works when the ovs-vswitchd compiles even without debug symbol
(no -g option), because the object files still have function symbols.
For example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52>
 |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c>
 |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa>
 |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d>
 |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca>
 |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d>
 |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \
    (Segmentation fault), core dumped, restarting

However, if the object files' symbols are stripped, then we can only
get init function plus offset value. This is still useful when trying
to see if two bugs have the same root cause, Example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324>
 |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0>
 |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261>
 |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \
	(Segmentation fault), core dumped, restarting

Most C library functions are not async-signal-safe, meaning that
it is not safe to call them from a signal handler, for example
printf() or fflush(). To be async-signal-safe, the handler only
collects the stack info using libunwind, which is signal-safe, and
issues 'write' to the pipe, where the monitor thread reads and
prints to ovs-vswitchd.log.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-09-27 10:22:55 -07:00
+								#include "daemon-private.h"
 								#endif
-												backtrace: Extend the backtrace functionality.

Use the backtrace functions that is provided by libc, this allows us
to get backtrace that is independent of the current memory map of the
process.  Which in turn can be used for debugging/tracing purpose.
The backtrace is not 100% accurate due to various optimizations, most
notably "-fomit-frame-pointer" and LTO.  This might result that the
line in source file doesn't correspond to the real line.  However, it
should be able to pinpoint at least the function where the backtrace
was called.

The implementation is determined during compilation based on available
libraries.  Libunwind has higher priority if both methods are available
to keep the compatibility with current behavior.

The backtrace is not marked as signal safe however the backtrace manual
page gives more detailed explanation why it might be the case [0].
Load the "libgcc" or equivalent in advance within the "fatal_signal_init"
which should ensure that subsequent calls to backtrace* do not call
malloc and are signal safe.

The typical backtrace will look similar to the one below:
 /lib64/libopenvswitch-3.1.so.0(backtrace_capture+0x1e) [0x7fc5db298dfe]
 /lib64/libopenvswitch-3.1.so.0(log_backtrace_at+0x57) [0x7fc5db2999e7]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_complete+0x7b) [0x7fc5db56247b]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_propose_commit_block+0x8d) [0x7fc5db563a8d]
 ovsdb-server(+0xa661) [0x562cfce2e661]
 ovsdb-server(+0x7e39) [0x562cfce2be39]
 /lib64/libc.so.6(+0x27b4a) [0x7fc5db048b4a]
 /lib64/libc.so.6(__libc_start_main+0x8b) [0x7fc5db048c0b]
 ovsdb-server(+0x8c35) [0x562cfce2cc35]

backtrace.h elaborates on how to effectively get the line information
associated with the addressed presented in the backtrace.

[0]
backtrace() and backtrace_symbols_fd() don't call malloc() explicitly,
but they are part of libgcc, which gets loaded dynamically when first
used.  Dynamic loading usually triggers a call to malloc(3).  If you
need certain calls to these two functions to not allocate memory (in
signal handlers, for example), you need to make sure libgcc is loaded
beforehand

Reported-at: https://bugzilla.redhat.com/2177760
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-30 09:34:12 +02:00
+								#ifdef HAVE_BACKTRACE
 								#include <execinfo.h>
 								#endif
-												Add fallback definition of SIG_ATOMIC_MAX

Android appears to lack SIG_ATOMIC_MAX which is only
used in fatal-signal.c.

Observed when compiling using the Android NDK r6b (Android API level 13).

Patch based on a suggestion by Ben Pfaff

											
										
										
											2011-09-22 21:24:12 +09:00
+								#ifndef SIG_ATOMIC_MAX
 								#define SIG_ATOMIC_MAX TYPE_MAXIMUM(sig_atomic_t)
 								#endif
-												vlog: Make client supply semicolon for VLOG_DEFINE_THIS_MODULE.

It's kind of odd for VLOG_DEFINE_THIS_MODULE to supply its own semicolon,
so this commit switches to the more common form.

											
										
										
											2010-10-19 14:47:01 -07:00
+								VLOG_DEFINE_THIS_MODULE(fatal_signal);
-												vlog: Introduce VLOG_DEFINE_THIS_MODULE for declaring vlog module in use.

Adding a macro to define the vlog module in use adds a level of
indirection, which makes it easier to change how the vlog module must be
defined.  A followup commit needs to do that, so getting these widespread
changes out of the way first should make that commit easier to review.

											
										
										
											2010-07-16 11:02:49 -07:00
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								/* Signals to catch. */
-												fatal-signal: Fatal signal handling for Windows.

Windows does not have a SIGHUP or SIGALRM. It does have
a SIGINT and SIGTERM. The documentation at msdn says that
SIGINT is not supported for win32 applications because
WIN32 operating systems generate a new thread to specifically
handle Ctrl+C.

This commit handles SIGTERM for Windows. The documentation also
states that nothing generates SIGTERM in Windows, but one can
use raise(SIGTERM) to manage it. The idea for handling SIGTERM
for Windows is to just have a place holder if there is need to
raise() a signal for some other purpose.

We use SIGALRM in timeval.c if we wake up from a sleep after
'deadline'. For Windows, print an error message and then
use SIGTERM.

There is an atexit() function for Windows, so we can call cleanup
functions during exit.

An upcoming commit separately handles Ctrl+C so that we can call
clean up functions for that use case.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 08:25:42 -08:00
+								#ifndef _WIN32
-												fatal-signal: Catch SIGSEGV and print backtrace.

The patch catches the SIGSEGV signal and prints the backtrace
using libunwind at the monitor daemon. This makes debugging easier
when there is no debug symbol package or gdb installed on production
systems.

The patch works when the ovs-vswitchd compiles even without debug symbol
(no -g option), because the object files still have function symbols.
For example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52>
 |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c>
 |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa>
 |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d>
 |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca>
 |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d>
 |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \
    (Segmentation fault), core dumped, restarting

However, if the object files' symbols are stripped, then we can only
get init function plus offset value. This is still useful when trying
to see if two bugs have the same root cause, Example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324>
 |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0>
 |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261>
 |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \
	(Segmentation fault), core dumped, restarting

Most C library functions are not async-signal-safe, meaning that
it is not safe to call them from a signal handler, for example
printf() or fflush(). To be async-signal-safe, the handler only
collects the stack info using libunwind, which is signal-safe, and
issues 'write' to the pipe, where the monitor thread reads and
prints to ovs-vswitchd.log.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-09-27 10:22:55 -07:00
+								static const int fatal_signals[] = { SIGTERM, SIGINT, SIGHUP, SIGALRM,
 								                                     SIGSEGV };
-												fatal-signal: Fatal signal handling for Windows.

Windows does not have a SIGHUP or SIGALRM. It does have
a SIGINT and SIGTERM. The documentation at msdn says that
SIGINT is not supported for win32 applications because
WIN32 operating systems generate a new thread to specifically
handle Ctrl+C.

This commit handles SIGTERM for Windows. The documentation also
states that nothing generates SIGTERM in Windows, but one can
use raise(SIGTERM) to manage it. The idea for handling SIGTERM
for Windows is to just have a place holder if there is need to
raise() a signal for some other purpose.

We use SIGALRM in timeval.c if we wake up from a sleep after
'deadline'. For Windows, print an error message and then
use SIGTERM.

There is an atexit() function for Windows, so we can call cleanup
functions during exit.

An upcoming commit separately handles Ctrl+C so that we can call
clean up functions for that use case.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 08:25:42 -08:00
+								#else
 								static const int fatal_signals[] = { SIGTERM };
 								#endif
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
 								/* Hooks to call upon catching a signal */
 								struct hook {
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								    void (*hook_cb)(void *aux);
 								    void (*cancel_cb)(void *aux);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								    void *aux;
 								    bool run_at_exit;
 								};
 								#define MAX_HOOKS 32
 								static struct hook hooks[MAX_HOOKS];
 								static size_t n_hooks;
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								static int signal_fds[2];
 								static volatile sig_atomic_t stored_sig_nr = SIG_ATOMIC_MAX;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												poll-loop: Create Windows event handles for sockets automatically.

We currently have a poll_fd_wait_event(fd, wevent, events) function that
is used at places common to Windows and Linux where we have to wait on
sockets.  On Linux, 'wevent' is always set as zero. On Windows, for sockets,
when we send both 'fd' and 'wevent', we associate them with each other for
'events' and then wait on 'wevent'. Also on Windows, when we only send 'wevent'
to this function, we would simply wait for all events for that 'wevent'.

There is a disadvantage with this approach.
* Windows clients need to create a 'wevent' and then pass it along. This
means that at a lot of places where we create sockets, we also are forced
to create a 'wevent'.

With this commit, we pass the responsibility of creating a 'wevent' to
poll_fd_wait() in case of sockets. That way, a client using poll_fd_wait()
is only concerned about sockets and not about 'wevents'. There is a potential
disadvantage with this change in that we create events more often and that
may have a performance penalty. If that turns out to be the case, we will
eventually need to create a pool of wevents that can be re-used.

In Windows, there are cases where we want to wait on a event (not
associated with any sockets) and then control it using functions
like SetEvent() etc. For that purpose, introduce a new function
poll_wevent_wait(). For this function, the client needs to create a event
and then pass it along as an argument.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-By: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-27 13:30:49 -07:00
+								#ifdef _WIN32
 								static HANDLE wevent;
 								#endif
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								static struct ovs_mutex mutex;
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								static void call_hooks(int sig_nr);
-												fatal-signal: Handle SIGINT for Windows.

Ctrl+C signals are a special case for Windows and can
be handled by registering a handle through
SetConsoleCtrlHandler() routine. This is only useful
when we run it directly on console and not as services in
the background.

Once we get a Ctrl+C signal, we call the cleanup functions
and then exit.

One thing to know here is that MinGW terminal handles
Ctrl+C signal differently (and looks a little buggy. I see
it exiting the handler midway with some sort of timeout).
So this implementation is only useful when run on Windows
terminal. Since we only use MinGW for compilation and
eventually to run unit tests, it should be okay. (The unit
tests would ideally use windows services and not expect
Ctrl+C)

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 09:37:19 -08:00
+								#ifdef _WIN32
 								static BOOL WINAPI ConsoleHandlerRoutine(DWORD dwCtrlType);
 								#endif
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												fatal-signal: Don't share signal fds/handles with forked process.

The signal_fds pipe and wevent are a mechanism to wake up the process
after it received a signal and stored the number for the future
processing.  They are not intended for inter-process communication.
However, in the current code, descriptors are not closed on fork().

The main scenario where we use fork() is a monitor process.  Monitor
doesn't actually use poll loops and doesn't wait on the descriptor.
But when a child process is killed, it (child) sends a byte to itself,
then it wakes up due to POLLIN on the pipe and terminates itself after
processing all the callbacks.  The byte stays unread.  And the pipe is
still open in the monitor process.  When child dies, the monitor wakes
up and forks again.  New child inherits the same pipe that still
contains unread data.  This data is never read, so the child will
constantly wake itself up for no reason.

Interestingly enough raise(SIGSEGV) doesn't immediately kill the
process.  The execution continues til the end of a signal handler,
so we're still able to write a byte to a pipe even in this case.
Presumably because we don't have SA_NODEFER.

Fix the issue by re-creating the pipe/event on fork.  This way
every new child will have its own notification channel and will
not wake up any other processes.

There was already an attempt to fix the issue, but it didn't get a
follow up (see the reported-at tag).  This is an alternative solution.

Fixes: ff8decf1a318 ("daemon: Add support for process monitoring and restart.")
Reported-at: https://patchwork.ozlabs.org/project/openvswitch/patch/20221019093147.2072-1-lifengqi@inspur.com/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-08 21:16:59 +02:00
+								/* Sets up a pipe or event handle that will be used to wake up the current
 								 * process after signal is received, so it can be processed outside of the
 								 * signal handler context in fatal_signal_run(). */
 								static void
 								fatal_signal_create_wakeup_events(void)
 								{
 								#ifndef _WIN32
 								    xpipe_nonblocking(signal_fds);
 								#else
 								    wevent = CreateEvent(NULL, TRUE, FALSE, NULL);
 								    if (!wevent) {
 								        char *msg_buf = ovs_lasterror_to_string();
 								        VLOG_FATAL("Failed to create a event (%s).", msg_buf);
 								    }
 								#endif
 								}
 								static void
 								fatal_signal_destroy_wakeup_events(void)
 								{
 								#ifndef _WIN32
 								    close(signal_fds[0]);
 								    signal_fds[0] = -1;
 								    close(signal_fds[1]);
 								    signal_fds[1] = -1;
 								#else
 								    ResetEvent(wevent);
 								    CloseHandle(wevent);
 								    wevent = NULL;
 								#endif
 								}
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
+								/* Initializes the fatal signal handling module.  Calling this function is
 								 * optional, because calling any other function in the module will also
 								 * initialize it.  However, in a multithreaded program, the module must be
 								 * initialized while the process is still single-threaded. */
 								void
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								fatal_signal_init(void)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
 								    static bool inited = false;
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								    if (!inited) {
 								        size_t i;
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
+								        assert_single_threaded();
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								        inited = true;
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
-												Use "error-checking" mutexes in place of other kinds wherever possible.

We've seen a number of deadlocks in the tree since thread safety was
introduced.  So far, all of these are self-deadlocks, that is, a single
thread acquiring a lock and then attempting to re-acquire the same lock
recursively.  When this has happened, the process simply hung, and it was
somewhat difficult to find the cause.

POSIX "error-checking" mutexes check for this specific problem (and
others).  This commit switches from other types of mutexes to
error-checking mutexes everywhere that we can, that is, everywhere that
we're not using recursive mutexes.  This ought to help find problems more
quickly in the future.

There might be performance advantages to other kinds of mutexes in some
cases.  However, the existing mutex type choices were just guesses, so I'd
rather go for easy detection of errors until we know that other mutex
types actually perform better in specific cases.  Also, I did a quick
microbenchmark of glibc mutex types on my host and found that the
error checking mutexes weren't any slower than the other types, at least
when the mutex is uncontended.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2013-08-20 13:40:02 -07:00
+								        ovs_mutex_init_recursive(&mutex);
-												backtrace: Extend the backtrace functionality.

Use the backtrace functions that is provided by libc, this allows us
to get backtrace that is independent of the current memory map of the
process.  Which in turn can be used for debugging/tracing purpose.
The backtrace is not 100% accurate due to various optimizations, most
notably "-fomit-frame-pointer" and LTO.  This might result that the
line in source file doesn't correspond to the real line.  However, it
should be able to pinpoint at least the function where the backtrace
was called.

The implementation is determined during compilation based on available
libraries.  Libunwind has higher priority if both methods are available
to keep the compatibility with current behavior.

The backtrace is not marked as signal safe however the backtrace manual
page gives more detailed explanation why it might be the case [0].
Load the "libgcc" or equivalent in advance within the "fatal_signal_init"
which should ensure that subsequent calls to backtrace* do not call
malloc and are signal safe.

The typical backtrace will look similar to the one below:
 /lib64/libopenvswitch-3.1.so.0(backtrace_capture+0x1e) [0x7fc5db298dfe]
 /lib64/libopenvswitch-3.1.so.0(log_backtrace_at+0x57) [0x7fc5db2999e7]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_complete+0x7b) [0x7fc5db56247b]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_propose_commit_block+0x8d) [0x7fc5db563a8d]
 ovsdb-server(+0xa661) [0x562cfce2e661]
 ovsdb-server(+0x7e39) [0x562cfce2be39]
 /lib64/libc.so.6(+0x27b4a) [0x7fc5db048b4a]
 /lib64/libc.so.6(__libc_start_main+0x8b) [0x7fc5db048c0b]
 ovsdb-server(+0x8c35) [0x562cfce2cc35]

backtrace.h elaborates on how to effectively get the line information
associated with the addressed presented in the backtrace.

[0]
backtrace() and backtrace_symbols_fd() don't call malloc() explicitly,
but they are part of libgcc, which gets loaded dynamically when first
used.  Dynamic loading usually triggers a call to malloc(3).  If you
need certain calls to these two functions to not allocate memory (in
signal handlers, for example), you need to make sure libgcc is loaded
beforehand

Reported-at: https://bugzilla.redhat.com/2177760
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-30 09:34:12 +02:00
 								        /* The dummy backtrace is needed.
 								         * See comment for send_backtrace_to_monitor(). */
 								        struct backtrace dummy_bt;
 								        backtrace_capture(&dummy_bt);
-												fatal-signal: Don't share signal fds/handles with forked process.

The signal_fds pipe and wevent are a mechanism to wake up the process
after it received a signal and stored the number for the future
processing.  They are not intended for inter-process communication.
However, in the current code, descriptors are not closed on fork().

The main scenario where we use fork() is a monitor process.  Monitor
doesn't actually use poll loops and doesn't wait on the descriptor.
But when a child process is killed, it (child) sends a byte to itself,
then it wakes up due to POLLIN on the pipe and terminates itself after
processing all the callbacks.  The byte stays unread.  And the pipe is
still open in the monitor process.  When child dies, the monitor wakes
up and forks again.  New child inherits the same pipe that still
contains unread data.  This data is never read, so the child will
constantly wake itself up for no reason.

Interestingly enough raise(SIGSEGV) doesn't immediately kill the
process.  The execution continues til the end of a signal handler,
so we're still able to write a byte to a pipe even in this case.
Presumably because we don't have SA_NODEFER.

Fix the issue by re-creating the pipe/event on fork.  This way
every new child will have its own notification channel and will
not wake up any other processes.

There was already an attempt to fix the issue, but it didn't get a
follow up (see the reported-at tag).  This is an alternative solution.

Fixes: ff8decf1a318 ("daemon: Add support for process monitoring and restart.")
Reported-at: https://patchwork.ozlabs.org/project/openvswitch/patch/20221019093147.2072-1-lifengqi@inspur.com/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-08 21:16:59 +02:00
+								        fatal_signal_create_wakeup_events();
-												fatal-signal: Handle SIGINT for Windows.

Ctrl+C signals are a special case for Windows and can
be handled by registering a handle through
SetConsoleCtrlHandler() routine. This is only useful
when we run it directly on console and not as services in
the background.

Once we get a Ctrl+C signal, we call the cleanup functions
and then exit.

One thing to know here is that MinGW terminal handles
Ctrl+C signal differently (and looks a little buggy. I see
it exiting the handler midway with some sort of timeout).
So this implementation is only useful when run on Windows
terminal. Since we only use MinGW for compilation and
eventually to run unit tests, it should be okay. (The unit
tests would ideally use windows services and not expect
Ctrl+C)

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 09:37:19 -08:00
-												fatal-signal: Don't share signal fds/handles with forked process.

The signal_fds pipe and wevent are a mechanism to wake up the process
after it received a signal and stored the number for the future
processing.  They are not intended for inter-process communication.
However, in the current code, descriptors are not closed on fork().

The main scenario where we use fork() is a monitor process.  Monitor
doesn't actually use poll loops and doesn't wait on the descriptor.
But when a child process is killed, it (child) sends a byte to itself,
then it wakes up due to POLLIN on the pipe and terminates itself after
processing all the callbacks.  The byte stays unread.  And the pipe is
still open in the monitor process.  When child dies, the monitor wakes
up and forks again.  New child inherits the same pipe that still
contains unread data.  This data is never read, so the child will
constantly wake itself up for no reason.

Interestingly enough raise(SIGSEGV) doesn't immediately kill the
process.  The execution continues til the end of a signal handler,
so we're still able to write a byte to a pipe even in this case.
Presumably because we don't have SA_NODEFER.

Fix the issue by re-creating the pipe/event on fork.  This way
every new child will have its own notification channel and will
not wake up any other processes.

There was already an attempt to fix the issue, but it didn't get a
follow up (see the reported-at tag).  This is an alternative solution.

Fixes: ff8decf1a318 ("daemon: Add support for process monitoring and restart.")
Reported-at: https://patchwork.ozlabs.org/project/openvswitch/patch/20221019093147.2072-1-lifengqi@inspur.com/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-08 21:16:59 +02:00
+								#ifdef _WIN32
-												fatal-signal: Handle SIGINT for Windows.

Ctrl+C signals are a special case for Windows and can
be handled by registering a handle through
SetConsoleCtrlHandler() routine. This is only useful
when we run it directly on console and not as services in
the background.

Once we get a Ctrl+C signal, we call the cleanup functions
and then exit.

One thing to know here is that MinGW terminal handles
Ctrl+C signal differently (and looks a little buggy. I see
it exiting the handler midway with some sort of timeout).
So this implementation is only useful when run on Windows
terminal. Since we only use MinGW for compilation and
eventually to run unit tests, it should be okay. (The unit
tests would ideally use windows services and not expect
Ctrl+C)

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 09:37:19 -08:00
+								        /* Register a function to handle Ctrl+C. */
 								        SetConsoleCtrlHandler(ConsoleHandlerRoutine, true);
-												fatal-signal: Fatal signal handling for Windows.

Windows does not have a SIGHUP or SIGALRM. It does have
a SIGINT and SIGTERM. The documentation at msdn says that
SIGINT is not supported for win32 applications because
WIN32 operating systems generate a new thread to specifically
handle Ctrl+C.

This commit handles SIGTERM for Windows. The documentation also
states that nothing generates SIGTERM in Windows, but one can
use raise(SIGTERM) to manage it. The idea for handling SIGTERM
for Windows is to just have a place holder if there is need to
raise() a signal for some other purpose.

We use SIGALRM in timeval.c if we wake up from a sleep after
'deadline'. For Windows, print an error message and then
use SIGTERM.

There is an atexit() function for Windows, so we can call cleanup
functions during exit.

An upcoming commit separately handles Ctrl+C so that we can call
clean up functions for that use case.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 08:25:42 -08:00
+								#endif
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								        for (i = 0; i < ARRAY_SIZE(fatal_signals); i++) {
 								            int sig_nr = fatal_signals[i];
-												fatal-signal: Fatal signal handling for Windows.

Windows does not have a SIGHUP or SIGALRM. It does have
a SIGINT and SIGTERM. The documentation at msdn says that
SIGINT is not supported for win32 applications because
WIN32 operating systems generate a new thread to specifically
handle Ctrl+C.

This commit handles SIGTERM for Windows. The documentation also
states that nothing generates SIGTERM in Windows, but one can
use raise(SIGTERM) to manage it. The idea for handling SIGTERM
for Windows is to just have a place holder if there is need to
raise() a signal for some other purpose.

We use SIGALRM in timeval.c if we wake up from a sleep after
'deadline'. For Windows, print an error message and then
use SIGTERM.

There is an atexit() function for Windows, so we can call cleanup
functions during exit.

An upcoming commit separately handles Ctrl+C so that we can call
clean up functions for that use case.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 08:25:42 -08:00
+								#ifndef _WIN32
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								            struct sigaction old_sa;
-												Log anything that could prevent a daemon from starting.

If a daemon doesn't start, we need to know why.  Being able to
consistently consult the log to find out is helpful.

											
										
										
											2011-03-31 16:23:50 -07:00
+								            xsigaction(sig_nr, NULL, &old_sa);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								            if (old_sa.sa_handler == SIG_DFL
 								                && signal(sig_nr, fatal_signal_handler) == SIG_ERR) {
-												Replace all uses of strerror() by ovs_strerror(), for thread safety.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-24 10:54:49 -07:00
+								                VLOG_FATAL("signal failed (%s)", ovs_strerror(errno));
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								            }
-												fatal-signal: Fatal signal handling for Windows.

Windows does not have a SIGHUP or SIGALRM. It does have
a SIGINT and SIGTERM. The documentation at msdn says that
SIGINT is not supported for win32 applications because
WIN32 operating systems generate a new thread to specifically
handle Ctrl+C.

This commit handles SIGTERM for Windows. The documentation also
states that nothing generates SIGTERM in Windows, but one can
use raise(SIGTERM) to manage it. The idea for handling SIGTERM
for Windows is to just have a place holder if there is need to
raise() a signal for some other purpose.

We use SIGALRM in timeval.c if we wake up from a sleep after
'deadline'. For Windows, print an error message and then
use SIGTERM.

There is an atexit() function for Windows, so we can call cleanup
functions during exit.

An upcoming commit separately handles Ctrl+C so that we can call
clean up functions for that use case.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 08:25:42 -08:00
+								#else
 								            if (signal(sig_nr, fatal_signal_handler) == SIG_ERR) {
 								                VLOG_FATAL("signal failed (%s)", ovs_strerror(errno));
 								            }
 								#endif
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								        }
-												daemon-windows: unlink pidfile before stopping the service.

When a OVS daemon is configured to run as a Windows service,
when the service is stopped by calling service_stop(), the
windows services manager does not give enough time to do
everything in the atexit handler. So call the exit handler
directly from service_stop().

Also add a test case for Windows services which checks for
the termination of the service by looking at pidfile cleaned
by the exit handler.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-28 15:07:31 -07:00
+								        atexit(fatal_signal_atexit_handler);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								    }
 								}
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
+								/* Registers 'hook_cb' to be called from inside poll_block() following a fatal
 								 * signal.  'hook_cb' does not need to be async-signal-safe.  In a
 								 * multithreaded program 'hook_cb' might be called from any thread, with
 								 * threads other than the one running 'hook_cb' in unknown states.
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								 *
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
+								 * If 'run_at_exit' is true, 'hook_cb' is also called during normal process
 								 * termination, e.g. when exit() is called or when main() returns.
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								 *
 								 * If the current process forks, fatal_signal_fork() may be called to clear the
 								 * parent process's fatal signal hooks, so that 'hook_cb' is only called when
 								 * the child terminates, not when the parent does.  When fatal_signal_fork() is
 								 * called, it calls the 'cancel_cb' function if it is nonnull, passing 'aux',
 								 * to notify that the hook has been canceled.  This allows the hook to free
 								 * memory, etc. */
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								void
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								fatal_signal_add_hook(void (*hook_cb)(void *aux), void (*cancel_cb)(void *aux),
 								                      void *aux, bool run_at_exit)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								    fatal_signal_init();
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_lock(&mutex);
-												Replace most uses of assert by ovs_assert.

This is a straight search-and-replace, except that I also removed #include
<assert.h> from each file where there were no assert calls left.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>

											
										
										
											2012-11-06 13:14:55 -08:00
+								    ovs_assert(n_hooks < MAX_HOOKS);
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								    hooks[n_hooks].hook_cb = hook_cb;
 								    hooks[n_hooks].cancel_cb = cancel_cb;
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								    hooks[n_hooks].aux = aux;
 								    hooks[n_hooks].run_at_exit = run_at_exit;
 								    n_hooks++;
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_unlock(&mutex);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
-												fatal-signal: Catch SIGSEGV and print backtrace.

The patch catches the SIGSEGV signal and prints the backtrace
using libunwind at the monitor daemon. This makes debugging easier
when there is no debug symbol package or gdb installed on production
systems.

The patch works when the ovs-vswitchd compiles even without debug symbol
(no -g option), because the object files still have function symbols.
For example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52>
 |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c>
 |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa>
 |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d>
 |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca>
 |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d>
 |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \
    (Segmentation fault), core dumped, restarting

However, if the object files' symbols are stripped, then we can only
get init function plus offset value. This is still useful when trying
to see if two bugs have the same root cause, Example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324>
 |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0>
 |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261>
 |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \
	(Segmentation fault), core dumped, restarting

Most C library functions are not async-signal-safe, meaning that
it is not safe to call them from a signal handler, for example
printf() or fflush(). To be async-signal-safe, the handler only
collects the stack info using libunwind, which is signal-safe, and
issues 'write' to the pipe, where the monitor thread reads and
prints to ovs-vswitchd.log.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-09-27 10:22:55 -07:00
+								#ifdef HAVE_UNWIND
-												fatal-signal: Remove snprintf.

Function snprintf is not async-signal-safe.  Replace it with
our own implementation.  Example ovs-vswitchd.log output:
  2020-03-25T01:08:19.673Z|00050|memory|INFO|handlers:2 ports:3
  SIGSEGV detected, backtrace:
  0x4872d9         <fatal_signal_handler+0x49>
  0x7f4e2ab974b0   <killpg+0x40>
  0x7f4e2ac5d74d   <__poll+0x2d>
  0x531098         <time_poll+0x108>
  0x51aefc         <poll_block+0x8c>
  0x445ca9         <udpif_revalidator+0x289>
  0x5056fd         <ovsthread_wrapper+0x7d>
  0x7f4e2b65f6ba   <start_thread+0xca>
  0x7f4e2ac6941d   <clone+0x6d>
  0x0              <+0x0>

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/674901331
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-04-14 08:17:04 -07:00
+								/* Convert unsigned long long to string.  This is needed because
 								 * using snprintf() is not async signal safe. */
 								static inline int
 								llong_to_hex_str(unsigned long long value, char *str)
 								{
 								    int i = 0, res;
 								    if (value / 16 > 0) {
 								        i = llong_to_hex_str(value / 16, str);
 								    }
 								    res = value % 16;
 								    str[i] = "0123456789abcdef"[res];
 								    return i + 1;
 								}
-												fatal-signal: Catch SIGSEGV and print backtrace.

The patch catches the SIGSEGV signal and prints the backtrace
using libunwind at the monitor daemon. This makes debugging easier
when there is no debug symbol package or gdb installed on production
systems.

The patch works when the ovs-vswitchd compiles even without debug symbol
(no -g option), because the object files still have function symbols.
For example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52>
 |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c>
 |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa>
 |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d>
 |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca>
 |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d>
 |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \
    (Segmentation fault), core dumped, restarting

However, if the object files' symbols are stripped, then we can only
get init function plus offset value. This is still useful when trying
to see if two bugs have the same root cause, Example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324>
 |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0>
 |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261>
 |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \
	(Segmentation fault), core dumped, restarting

Most C library functions are not async-signal-safe, meaning that
it is not safe to call them from a signal handler, for example
printf() or fflush(). To be async-signal-safe, the handler only
collects the stack info using libunwind, which is signal-safe, and
issues 'write' to the pipe, where the monitor thread reads and
prints to ovs-vswitchd.log.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-09-27 10:22:55 -07:00
+								/* Send the backtrace buffer to monitor thread.
 								 *
 								 * Note that this runs in the signal handling context, any system
 								 * library functions used here must be async-signal-safe.
 								 */
 								static inline void
-												backtrace: Extend the backtrace functionality.

Use the backtrace functions that is provided by libc, this allows us
to get backtrace that is independent of the current memory map of the
process.  Which in turn can be used for debugging/tracing purpose.
The backtrace is not 100% accurate due to various optimizations, most
notably "-fomit-frame-pointer" and LTO.  This might result that the
line in source file doesn't correspond to the real line.  However, it
should be able to pinpoint at least the function where the backtrace
was called.

The implementation is determined during compilation based on available
libraries.  Libunwind has higher priority if both methods are available
to keep the compatibility with current behavior.

The backtrace is not marked as signal safe however the backtrace manual
page gives more detailed explanation why it might be the case [0].
Load the "libgcc" or equivalent in advance within the "fatal_signal_init"
which should ensure that subsequent calls to backtrace* do not call
malloc and are signal safe.

The typical backtrace will look similar to the one below:
 /lib64/libopenvswitch-3.1.so.0(backtrace_capture+0x1e) [0x7fc5db298dfe]
 /lib64/libopenvswitch-3.1.so.0(log_backtrace_at+0x57) [0x7fc5db2999e7]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_complete+0x7b) [0x7fc5db56247b]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_propose_commit_block+0x8d) [0x7fc5db563a8d]
 ovsdb-server(+0xa661) [0x562cfce2e661]
 ovsdb-server(+0x7e39) [0x562cfce2be39]
 /lib64/libc.so.6(+0x27b4a) [0x7fc5db048b4a]
 /lib64/libc.so.6(__libc_start_main+0x8b) [0x7fc5db048c0b]
 ovsdb-server(+0x8c35) [0x562cfce2cc35]

backtrace.h elaborates on how to effectively get the line information
associated with the addressed presented in the backtrace.

[0]
backtrace() and backtrace_symbols_fd() don't call malloc() explicitly,
but they are part of libgcc, which gets loaded dynamically when first
used.  Dynamic loading usually triggers a call to malloc(3).  If you
need certain calls to these two functions to not allocate memory (in
signal handlers, for example), you need to make sure libgcc is loaded
beforehand

Reported-at: https://bugzilla.redhat.com/2177760
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-30 09:34:12 +02:00
+								send_backtrace_to_monitor(void)
 								{
-												Avoid clobbered variable warning on ppc64le.

Since commit e2ed6fbeb1, Ci on ppc64le with Ubuntu 16.04.6 LTS throws
this error:

lib/fatal-signal.c: In function 'send_backtrace_to_monitor':
lib/fatal-signal.c:168:9: error: variable 'dep' might be clobbered by
'longjmp' or 'vfork' [-Werror=clobbered]
     int dep;

Declaring dep as a volatile int.

Signed-off-by: David Wilder <dwilder@us.ibm.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-10-08 12:40:19 -07:00
+								    /* volatile added to prevent a "clobbered" error on ppc64le with gcc */
 								    volatile int dep;
-												fatal-signal: Catch SIGSEGV and print backtrace.

The patch catches the SIGSEGV signal and prints the backtrace
using libunwind at the monitor daemon. This makes debugging easier
when there is no debug symbol package or gdb installed on production
systems.

The patch works when the ovs-vswitchd compiles even without debug symbol
(no -g option), because the object files still have function symbols.
For example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52>
 |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c>
 |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa>
 |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d>
 |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca>
 |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d>
 |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \
    (Segmentation fault), core dumped, restarting

However, if the object files' symbols are stripped, then we can only
get init function plus offset value. This is still useful when trying
to see if two bugs have the same root cause, Example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324>
 |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0>
 |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261>
 |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \
	(Segmentation fault), core dumped, restarting

Most C library functions are not async-signal-safe, meaning that
it is not safe to call them from a signal handler, for example
printf() or fflush(). To be async-signal-safe, the handler only
collects the stack info using libunwind, which is signal-safe, and
issues 'write' to the pipe, where the monitor thread reads and
prints to ovs-vswitchd.log.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-09-27 10:22:55 -07:00
+								    struct unw_backtrace unw_bt[UNW_MAX_DEPTH];
 								    unw_cursor_t cursor;
 								    unw_context_t uc;
 								    if (daemonize_fd == -1) {
 								        return;
 								    }
 								    dep = 0;
 								    unw_getcontext(&uc);
 								    unw_init_local(&cursor, &uc);
 								    while (dep < UNW_MAX_DEPTH && unw_step(&cursor)) {
 								        memset(unw_bt[dep].func, 0, UNW_MAX_FUNCN);
 								        unw_get_reg(&cursor, UNW_REG_IP, &unw_bt[dep].ip);
 								        unw_get_proc_name(&cursor, unw_bt[dep].func, UNW_MAX_FUNCN,
 								                          &unw_bt[dep].offset);
-												trivial: Fix indentation.

Add extra space to fix indentation.

Signed-off-by: William Tu <u9012063@gmail.com>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2020-03-20 13:54:50 -07:00
+								        dep++;
-												fatal-signal: Catch SIGSEGV and print backtrace.

The patch catches the SIGSEGV signal and prints the backtrace
using libunwind at the monitor daemon. This makes debugging easier
when there is no debug symbol package or gdb installed on production
systems.

The patch works when the ovs-vswitchd compiles even without debug symbol
(no -g option), because the object files still have function symbols.
For example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52>
 |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c>
 |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa>
 |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d>
 |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca>
 |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d>
 |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \
    (Segmentation fault), core dumped, restarting

However, if the object files' symbols are stripped, then we can only
get init function plus offset value. This is still useful when trying
to see if two bugs have the same root cause, Example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324>
 |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0>
 |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261>
 |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \
	(Segmentation fault), core dumped, restarting

Most C library functions are not async-signal-safe, meaning that
it is not safe to call them from a signal handler, for example
printf() or fflush(). To be async-signal-safe, the handler only
collects the stack info using libunwind, which is signal-safe, and
issues 'write' to the pipe, where the monitor thread reads and
prints to ovs-vswitchd.log.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-09-27 10:22:55 -07:00
+								    }
-												fatal-signal: Log backtrace when no monitor daemon.

Currently the backtrace logging is only available when monitor
daemon is running.  This patch enables backtrace logging when
no monitor daemon exists.  At signal handling context, it detects
whether monitor daemon exists.  If not, write directly the backtrace
to the vlog fd.  Note that using VLOG_* macro doesn't work due to
it's buffer I/O, so this patch directly issue write() syscall to
the file descriptor.

For some system we stop using monitor daemon and use systemd to
monitor ovs-vswitchd, thus need this patch. Example of
ovs-vswitchd.log (note that there is no timestamp printed):
  2020-03-23T14:42:12.949Z|00049|memory|INFO|175332 kB peak resident
  2020-03-23T14:42:12.949Z|00050|memory|INFO|handlers:2 ports:3 reva
  SIGSEGV detected, backtrace:
  0x0000000000486969 <fatal_signal_handler+0x49>
  0x00007f7f5e57f4b0 <killpg+0x40>
  0x000000000047daa8 <pmd_thread_main+0x238>
  0x0000000000504edd <ovsthread_wrapper+0x7d>
  0x00007f7f5f0476ba <start_thread+0xca>
  0x00007f7f5e65141d <clone+0x6d>
  0x0000000000000000 <+0x0>

Acked-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-23 07:44:48 -07:00
+								    if (monitor) {
 								        ignore(write(daemonize_fd, unw_bt,
 								                     dep * sizeof(struct unw_backtrace)));
 								    } else {
 								        /* Since there is no monitor daemon running, write backtrace
-												fatal-signal: Remove snprintf.

Function snprintf is not async-signal-safe.  Replace it with
our own implementation.  Example ovs-vswitchd.log output:
  2020-03-25T01:08:19.673Z|00050|memory|INFO|handlers:2 ports:3
  SIGSEGV detected, backtrace:
  0x4872d9         <fatal_signal_handler+0x49>
  0x7f4e2ab974b0   <killpg+0x40>
  0x7f4e2ac5d74d   <__poll+0x2d>
  0x531098         <time_poll+0x108>
  0x51aefc         <poll_block+0x8c>
  0x445ca9         <udpif_revalidator+0x289>
  0x5056fd         <ovsthread_wrapper+0x7d>
  0x7f4e2b65f6ba   <start_thread+0xca>
  0x7f4e2ac6941d   <clone+0x6d>
  0x0              <+0x0>

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/674901331
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-04-14 08:17:04 -07:00
+								         * in current process.
-												fatal-signal: Log backtrace when no monitor daemon.

Currently the backtrace logging is only available when monitor
daemon is running.  This patch enables backtrace logging when
no monitor daemon exists.  At signal handling context, it detects
whether monitor daemon exists.  If not, write directly the backtrace
to the vlog fd.  Note that using VLOG_* macro doesn't work due to
it's buffer I/O, so this patch directly issue write() syscall to
the file descriptor.

For some system we stop using monitor daemon and use systemd to
monitor ovs-vswitchd, thus need this patch. Example of
ovs-vswitchd.log (note that there is no timestamp printed):
  2020-03-23T14:42:12.949Z|00049|memory|INFO|175332 kB peak resident
  2020-03-23T14:42:12.949Z|00050|memory|INFO|handlers:2 ports:3 reva
  SIGSEGV detected, backtrace:
  0x0000000000486969 <fatal_signal_handler+0x49>
  0x00007f7f5e57f4b0 <killpg+0x40>
  0x000000000047daa8 <pmd_thread_main+0x238>
  0x0000000000504edd <ovsthread_wrapper+0x7d>
  0x00007f7f5f0476ba <start_thread+0xca>
  0x00007f7f5e65141d <clone+0x6d>
  0x0000000000000000 <+0x0>

Acked-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-23 07:44:48 -07:00
+								         */
-												fatal-signal: Remove snprintf.

Function snprintf is not async-signal-safe.  Replace it with
our own implementation.  Example ovs-vswitchd.log output:
  2020-03-25T01:08:19.673Z|00050|memory|INFO|handlers:2 ports:3
  SIGSEGV detected, backtrace:
  0x4872d9         <fatal_signal_handler+0x49>
  0x7f4e2ab974b0   <killpg+0x40>
  0x7f4e2ac5d74d   <__poll+0x2d>
  0x531098         <time_poll+0x108>
  0x51aefc         <poll_block+0x8c>
  0x445ca9         <udpif_revalidator+0x289>
  0x5056fd         <ovsthread_wrapper+0x7d>
  0x7f4e2b65f6ba   <start_thread+0xca>
  0x7f4e2ac6941d   <clone+0x6d>
  0x0              <+0x0>

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/674901331
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-04-14 08:17:04 -07:00
+								        char ip_str[16], offset_str[6];
 								        char line[64], fn_name[UNW_MAX_FUNCN];
-												fatal-signal: Log backtrace when no monitor daemon.

Currently the backtrace logging is only available when monitor
daemon is running.  This patch enables backtrace logging when
no monitor daemon exists.  At signal handling context, it detects
whether monitor daemon exists.  If not, write directly the backtrace
to the vlog fd.  Note that using VLOG_* macro doesn't work due to
it's buffer I/O, so this patch directly issue write() syscall to
the file descriptor.

For some system we stop using monitor daemon and use systemd to
monitor ovs-vswitchd, thus need this patch. Example of
ovs-vswitchd.log (note that there is no timestamp printed):
  2020-03-23T14:42:12.949Z|00049|memory|INFO|175332 kB peak resident
  2020-03-23T14:42:12.949Z|00050|memory|INFO|handlers:2 ports:3 reva
  SIGSEGV detected, backtrace:
  0x0000000000486969 <fatal_signal_handler+0x49>
  0x00007f7f5e57f4b0 <killpg+0x40>
  0x000000000047daa8 <pmd_thread_main+0x238>
  0x0000000000504edd <ovsthread_wrapper+0x7d>
  0x00007f7f5f0476ba <start_thread+0xca>
  0x00007f7f5e65141d <clone+0x6d>
  0x0000000000000000 <+0x0>

Acked-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-23 07:44:48 -07:00
-												backtrace: Extend the backtrace functionality.

Use the backtrace functions that is provided by libc, this allows us
to get backtrace that is independent of the current memory map of the
process.  Which in turn can be used for debugging/tracing purpose.
The backtrace is not 100% accurate due to various optimizations, most
notably "-fomit-frame-pointer" and LTO.  This might result that the
line in source file doesn't correspond to the real line.  However, it
should be able to pinpoint at least the function where the backtrace
was called.

The implementation is determined during compilation based on available
libraries.  Libunwind has higher priority if both methods are available
to keep the compatibility with current behavior.

The backtrace is not marked as signal safe however the backtrace manual
page gives more detailed explanation why it might be the case [0].
Load the "libgcc" or equivalent in advance within the "fatal_signal_init"
which should ensure that subsequent calls to backtrace* do not call
malloc and are signal safe.

The typical backtrace will look similar to the one below:
 /lib64/libopenvswitch-3.1.so.0(backtrace_capture+0x1e) [0x7fc5db298dfe]
 /lib64/libopenvswitch-3.1.so.0(log_backtrace_at+0x57) [0x7fc5db2999e7]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_complete+0x7b) [0x7fc5db56247b]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_propose_commit_block+0x8d) [0x7fc5db563a8d]
 ovsdb-server(+0xa661) [0x562cfce2e661]
 ovsdb-server(+0x7e39) [0x562cfce2be39]
 /lib64/libc.so.6(+0x27b4a) [0x7fc5db048b4a]
 /lib64/libc.so.6(__libc_start_main+0x8b) [0x7fc5db048c0b]
 ovsdb-server(+0x8c35) [0x562cfce2cc35]

backtrace.h elaborates on how to effectively get the line information
associated with the addressed presented in the backtrace.

[0]
backtrace() and backtrace_symbols_fd() don't call malloc() explicitly,
but they are part of libgcc, which gets loaded dynamically when first
used.  Dynamic loading usually triggers a call to malloc(3).  If you
need certain calls to these two functions to not allocate memory (in
signal handlers, for example), you need to make sure libgcc is loaded
beforehand

Reported-at: https://bugzilla.redhat.com/2177760
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-30 09:34:12 +02:00
+								        vlog_direct_write_to_log_file_unsafe(BACKTRACE_DUMP_MSG);
-												fatal-signal: Log backtrace when no monitor daemon.

Currently the backtrace logging is only available when monitor
daemon is running.  This patch enables backtrace logging when
no monitor daemon exists.  At signal handling context, it detects
whether monitor daemon exists.  If not, write directly the backtrace
to the vlog fd.  Note that using VLOG_* macro doesn't work due to
it's buffer I/O, so this patch directly issue write() syscall to
the file descriptor.

For some system we stop using monitor daemon and use systemd to
monitor ovs-vswitchd, thus need this patch. Example of
ovs-vswitchd.log (note that there is no timestamp printed):
  2020-03-23T14:42:12.949Z|00049|memory|INFO|175332 kB peak resident
  2020-03-23T14:42:12.949Z|00050|memory|INFO|handlers:2 ports:3 reva
  SIGSEGV detected, backtrace:
  0x0000000000486969 <fatal_signal_handler+0x49>
  0x00007f7f5e57f4b0 <killpg+0x40>
  0x000000000047daa8 <pmd_thread_main+0x238>
  0x0000000000504edd <ovsthread_wrapper+0x7d>
  0x00007f7f5f0476ba <start_thread+0xca>
  0x00007f7f5e65141d <clone+0x6d>
  0x0000000000000000 <+0x0>

Acked-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-23 07:44:48 -07:00
 								        for (int i = 0; i < dep; i++) {
-												fatal-signal: Remove snprintf.

Function snprintf is not async-signal-safe.  Replace it with
our own implementation.  Example ovs-vswitchd.log output:
  2020-03-25T01:08:19.673Z|00050|memory|INFO|handlers:2 ports:3
  SIGSEGV detected, backtrace:
  0x4872d9         <fatal_signal_handler+0x49>
  0x7f4e2ab974b0   <killpg+0x40>
  0x7f4e2ac5d74d   <__poll+0x2d>
  0x531098         <time_poll+0x108>
  0x51aefc         <poll_block+0x8c>
  0x445ca9         <udpif_revalidator+0x289>
  0x5056fd         <ovsthread_wrapper+0x7d>
  0x7f4e2b65f6ba   <start_thread+0xca>
  0x7f4e2ac6941d   <clone+0x6d>
  0x0              <+0x0>

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/674901331
Tested-by: Yifeng Sun <pkusunyifeng@gmail.com>
Reviewed-by: Yifeng Sun <pkusunyifeng@gmail.com>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-04-14 08:17:04 -07:00
+								            memset(line, 0, sizeof line);
 								            memset(fn_name, 0, sizeof fn_name);
 								            memset(offset_str, 0, sizeof offset_str);
 								            memset(ip_str, ' ', sizeof ip_str);
 								            ip_str[sizeof(ip_str) - 1] = 0;
 								            llong_to_hex_str(unw_bt[i].ip, ip_str);
 								            llong_to_hex_str(unw_bt[i].offset, offset_str);
 								            strcat(line, "0x");
 								            strcat(line, ip_str);
 								            strcat(line, "<");
 								            memcpy(fn_name, unw_bt[i].func, UNW_MAX_FUNCN - 1);
 								            strcat(line, fn_name);
 								            strcat(line, "+0x");
 								            strcat(line, offset_str);
 								            strcat(line, ">\n");
-												fatal-signal: Fix clang error due to lock.

Due to not acquiring lock, clang reports:
  lib/vlog.c:618:12: error: reading variable 'log_fd' requires holding mutex
  'log_file_mutex' [-Werror,-Wthread-safety-analysis]
  return log_fd;

The patch fixes it by creating a function in vlog.c to write
directly to log file unsafely.

Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666165883
Fixes: ecd4a8fcdff2 ("fatal-signal: Log backtrace when no monitor daemon.")
Suggested-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-24 07:17:02 -07:00
+								            vlog_direct_write_to_log_file_unsafe(line);
-												fatal-signal: Log backtrace when no monitor daemon.

Currently the backtrace logging is only available when monitor
daemon is running.  This patch enables backtrace logging when
no monitor daemon exists.  At signal handling context, it detects
whether monitor daemon exists.  If not, write directly the backtrace
to the vlog fd.  Note that using VLOG_* macro doesn't work due to
it's buffer I/O, so this patch directly issue write() syscall to
the file descriptor.

For some system we stop using monitor daemon and use systemd to
monitor ovs-vswitchd, thus need this patch. Example of
ovs-vswitchd.log (note that there is no timestamp printed):
  2020-03-23T14:42:12.949Z|00049|memory|INFO|175332 kB peak resident
  2020-03-23T14:42:12.949Z|00050|memory|INFO|handlers:2 ports:3 reva
  SIGSEGV detected, backtrace:
  0x0000000000486969 <fatal_signal_handler+0x49>
  0x00007f7f5e57f4b0 <killpg+0x40>
  0x000000000047daa8 <pmd_thread_main+0x238>
  0x0000000000504edd <ovsthread_wrapper+0x7d>
  0x00007f7f5f0476ba <start_thread+0xca>
  0x00007f7f5e65141d <clone+0x6d>
  0x0000000000000000 <+0x0>

Acked-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: William Tu <u9012063@gmail.com>

											
										
										
											2020-03-23 07:44:48 -07:00
+								        }
 								    }
-												fatal-signal: Catch SIGSEGV and print backtrace.

The patch catches the SIGSEGV signal and prints the backtrace
using libunwind at the monitor daemon. This makes debugging easier
when there is no debug symbol package or gdb installed on production
systems.

The patch works when the ovs-vswitchd compiles even without debug symbol
(no -g option), because the object files still have function symbols.
For example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52>
 |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c>
 |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa>
 |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d>
 |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca>
 |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d>
 |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \
    (Segmentation fault), core dumped, restarting

However, if the object files' symbols are stripped, then we can only
get init function plus offset value. This is still useful when trying
to see if two bugs have the same root cause, Example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324>
 |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0>
 |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261>
 |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \
	(Segmentation fault), core dumped, restarting

Most C library functions are not async-signal-safe, meaning that
it is not safe to call them from a signal handler, for example
printf() or fflush(). To be async-signal-safe, the handler only
collects the stack info using libunwind, which is signal-safe, and
issues 'write' to the pipe, where the monitor thread reads and
prints to ovs-vswitchd.log.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-09-27 10:22:55 -07:00
+								}
-												backtrace: Extend the backtrace functionality.

Use the backtrace functions that is provided by libc, this allows us
to get backtrace that is independent of the current memory map of the
process.  Which in turn can be used for debugging/tracing purpose.
The backtrace is not 100% accurate due to various optimizations, most
notably "-fomit-frame-pointer" and LTO.  This might result that the
line in source file doesn't correspond to the real line.  However, it
should be able to pinpoint at least the function where the backtrace
was called.

The implementation is determined during compilation based on available
libraries.  Libunwind has higher priority if both methods are available
to keep the compatibility with current behavior.

The backtrace is not marked as signal safe however the backtrace manual
page gives more detailed explanation why it might be the case [0].
Load the "libgcc" or equivalent in advance within the "fatal_signal_init"
which should ensure that subsequent calls to backtrace* do not call
malloc and are signal safe.

The typical backtrace will look similar to the one below:
 /lib64/libopenvswitch-3.1.so.0(backtrace_capture+0x1e) [0x7fc5db298dfe]
 /lib64/libopenvswitch-3.1.so.0(log_backtrace_at+0x57) [0x7fc5db2999e7]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_complete+0x7b) [0x7fc5db56247b]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_propose_commit_block+0x8d) [0x7fc5db563a8d]
 ovsdb-server(+0xa661) [0x562cfce2e661]
 ovsdb-server(+0x7e39) [0x562cfce2be39]
 /lib64/libc.so.6(+0x27b4a) [0x7fc5db048b4a]
 /lib64/libc.so.6(__libc_start_main+0x8b) [0x7fc5db048c0b]
 ovsdb-server(+0x8c35) [0x562cfce2cc35]

backtrace.h elaborates on how to effectively get the line information
associated with the addressed presented in the backtrace.

[0]
backtrace() and backtrace_symbols_fd() don't call malloc() explicitly,
but they are part of libgcc, which gets loaded dynamically when first
used.  Dynamic loading usually triggers a call to malloc(3).  If you
need certain calls to these two functions to not allocate memory (in
signal handlers, for example), you need to make sure libgcc is loaded
beforehand

Reported-at: https://bugzilla.redhat.com/2177760
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-05-30 09:34:12 +02:00
+								#elif HAVE_BACKTRACE
 								/* Send the backtrace to monitor thread.
 								 *
 								 * Note that this runs in the signal handling context, any system
 								 * library functions used here must be async-signal-safe.
 								 * backtrace() is only signal safe if the "libgcc" or equivalent was loaded
 								 * before the signal handler. In order to keep it safe the fatal_signal_init()
 								 * should always call backtrace_capture which will ensure that "libgcc" or
 								 * equivlent is loaded.
 								 */
 								static inline void
 								send_backtrace_to_monitor(void)
 								{
 								    struct backtrace bt;
 								    backtrace_capture(&bt);
 								    if (monitor && daemonize_fd > -1) {
 								        ignore(write(daemonize_fd, &bt, sizeof bt));
 								    } else {
 								        int log_fd = vlog_get_log_file_fd_unsafe();
 								        if (log_fd < 0) {
 								            return;
 								        }
 								        vlog_direct_write_to_log_file_unsafe(BACKTRACE_DUMP_MSG);
 								        backtrace_symbols_fd(bt.frames, bt.n_frames, log_fd);
 								    }
 								}
-												fatal-signal: Catch SIGSEGV and print backtrace.

The patch catches the SIGSEGV signal and prints the backtrace
using libunwind at the monitor daemon. This makes debugging easier
when there is no debug symbol package or gdb installed on production
systems.

The patch works when the ovs-vswitchd compiles even without debug symbol
(no -g option), because the object files still have function symbols.
For example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52>
 |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c>
 |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa>
 |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d>
 |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca>
 |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d>
 |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \
    (Segmentation fault), core dumped, restarting

However, if the object files' symbols are stripped, then we can only
get init function plus offset value. This is still useful when trying
to see if two bugs have the same root cause, Example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324>
 |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0>
 |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261>
 |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \
	(Segmentation fault), core dumped, restarting

Most C library functions are not async-signal-safe, meaning that
it is not safe to call them from a signal handler, for example
printf() or fflush(). To be async-signal-safe, the handler only
collects the stack info using libunwind, which is signal-safe, and
issues 'write' to the pipe, where the monitor thread reads and
prints to ovs-vswitchd.log.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-09-27 10:22:55 -07:00
+								#else
 								static inline void
 								send_backtrace_to_monitor(void) {
 								    /* Nothing. */
 								}
 								#endif
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								/* Handles fatal signal number 'sig_nr'.
 								 *
 								 * Ordinarily this is the actual signal handler.  When other code needs to
 								 * handle one of our signals, however, it can register for that signal and, if
 								 * and when necessary, call this function to do fatal signal processing for it
 								 * and terminate the process.  Currently only timeval.c does this, for SIGALRM.
 								 * (It is not important whether the other code sets up its signal handler
 								 * before or after this file, because this file will only set up a signal
 								 * handler in the case where the signal has its default handling.)  */
 								void
 								fatal_signal_handler(int sig_nr)
 								{
-												fatal-signal: Fatal signal handling for Windows.

Windows does not have a SIGHUP or SIGALRM. It does have
a SIGINT and SIGTERM. The documentation at msdn says that
SIGINT is not supported for win32 applications because
WIN32 operating systems generate a new thread to specifically
handle Ctrl+C.

This commit handles SIGTERM for Windows. The documentation also
states that nothing generates SIGTERM in Windows, but one can
use raise(SIGTERM) to manage it. The idea for handling SIGTERM
for Windows is to just have a place holder if there is need to
raise() a signal for some other purpose.

We use SIGALRM in timeval.c if we wake up from a sleep after
'deadline'. For Windows, print an error message and then
use SIGTERM.

There is an atexit() function for Windows, so we can call cleanup
functions during exit.

An upcoming commit separately handles Ctrl+C so that we can call
clean up functions for that use case.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 08:25:42 -08:00
+								#ifndef _WIN32
-												fatal-signal: Catch SIGSEGV and print backtrace.

The patch catches the SIGSEGV signal and prints the backtrace
using libunwind at the monitor daemon. This makes debugging easier
when there is no debug symbol package or gdb installed on production
systems.

The patch works when the ovs-vswitchd compiles even without debug symbol
(no -g option), because the object files still have function symbols.
For example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <fatal_signal_handler+0x52>
 |daemon_unix(monitor)|WARN|0x00007fb4900734b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007fb49013974d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <time_poll+0x108>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <poll_block+0x8c>
 |daemon_unix(monitor)|WARN|0x000000000058630a <clean_thread_main+0x1aa>
 |daemon_unix(monitor)|WARN|0x00000000004ffd1d <ovsthread_wrapper+0x7d>
 |daemon_unix(monitor)|WARN|0x00007fb490b3b6ba <start_thread+0xca>
 |daemon_unix(monitor)|WARN|0x00007fb49014541d <clone+0x6d>
 |daemon_unix(monitor)|ERR|1 crashes: pid 122849 died, killed \
    (Segmentation fault), core dumped, restarting

However, if the object files' symbols are stripped, then we can only
get init function plus offset value. This is still useful when trying
to see if two bugs have the same root cause, Example:
 |daemon_unix(monitor)|WARN|SIGSEGV detected, backtrace:
 |daemon_unix(monitor)|WARN|0x0000000000482752 <_init+0x7d68a>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8cf4b0 <killpg+0x40>
 |daemon_unix(monitor)|WARN|0x00007f5f7c99574d <__poll+0x2d>
 |daemon_unix(monitor)|WARN|0x000000000052b348 <_init+0x126280>
 |daemon_unix(monitor)|WARN|0x00000000005153ec <_init+0x110324>
 |daemon_unix(monitor)|WARN|0x0000000000407439 <_init+0x2371>
 |daemon_unix(monitor)|WARN|0x00007f5f7c8ba830 <__libc_start_main+0xf0>
 |daemon_unix(monitor)|WARN|0x0000000000408329 <_init+0x3261>
 |daemon_unix(monitor)|ERR|1 crashes: pid 106155 died, killed \
	(Segmentation fault), core dumped, restarting

Most C library functions are not async-signal-safe, meaning that
it is not safe to call them from a signal handler, for example
printf() or fflush(). To be async-signal-safe, the handler only
collects the stack info using libunwind, which is signal-safe, and
issues 'write' to the pipe, where the monitor thread reads and
prints to ovs-vswitchd.log.

Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/590503433
Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-09-27 10:22:55 -07:00
+								    if (sig_nr == SIGSEGV) {
 								        signal(sig_nr, SIG_DFL); /* Set it back immediately. */
 								        send_backtrace_to_monitor();
 								        raise(sig_nr);
 								    }
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								    ignore(write(signal_fds[1], "", 1));
-												fatal-signal: Fatal signal handling for Windows.

Windows does not have a SIGHUP or SIGALRM. It does have
a SIGINT and SIGTERM. The documentation at msdn says that
SIGINT is not supported for win32 applications because
WIN32 operating systems generate a new thread to specifically
handle Ctrl+C.

This commit handles SIGTERM for Windows. The documentation also
states that nothing generates SIGTERM in Windows, but one can
use raise(SIGTERM) to manage it. The idea for handling SIGTERM
for Windows is to just have a place holder if there is need to
raise() a signal for some other purpose.

We use SIGALRM in timeval.c if we wake up from a sleep after
'deadline'. For Windows, print an error message and then
use SIGTERM.

There is an atexit() function for Windows, so we can call cleanup
functions during exit.

An upcoming commit separately handles Ctrl+C so that we can call
clean up functions for that use case.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 08:25:42 -08:00
+								#else
 								    SetEvent(wevent);
 								#endif
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								    stored_sig_nr = sig_nr;
 								}
-												Make fatal signals cause an exit more promptly in special cases.

The fatal-signal library notices and records fatal signals (e.g. SIGTERM)
and terminates the process on the next trip through poll_block().  But
some special utilities do not always invoke poll_block() promptly, e.g.
"ovs-ofctl monitor" does not call poll_block() as long as OpenFlow messages
are available.  But these special cases seem like they are all likely to
call into functions that themselves block (those with "_block" in their
names).  So make a new rule that such functions should always call
fatal_signal_run(), either directly or through poll_block().  This commit
implements and documents that rule.

Bug #2625.

											
										
										
											2010-04-13 09:28:13 -07:00
+								/* Check whether a fatal signal has occurred and, if so, call the fatal signal
 								 * hooks and exit.
 								 *
 								 * This function is called automatically by poll_block(), but specialized
 								 * programs that may not always call poll_block() on a regular basis should
 								 * also call it periodically.  (Therefore, any function with "block" in its
 								 * name should call fatal_signal_run() each time it is called, either directly
 								 * or through poll_block(), because such functions can only used by specialized
 								 * programs that can afford to block outside their main loop around
 								 * poll_block().)
 								 */
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								void
 								fatal_signal_run(void)
 								{
-												lib: Do not assume sig_atomic_t is int.

On FreeBSD sig_atomic_t is long, which causes the comparison in
fatal_signal_run to be true when no signal has been reported.

Signed-off-by: Ed Maste <emaste@freebsd.org>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-06-26 14:43:54 +00:00
+								    sig_atomic_t sig_nr;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												fatal-signal: Initialize library upon any call to public function.

Not calling fatal_signal_init() means that the signal handlers don't get
registered, so the process won't clean up on fatal signals.  Furthermore,
signal_fds[0] is then 0, which means that fatal-signal_wait() waits on
stdin, so if you are testing a program interactively and accidentally type
something on stdin then that program's CPU usage jumps to 100%.

Since poll_block() calls fatal_signal_wait() this seems like the most
reliable solution.

											
										
										
											2010-03-23 15:27:44 -07:00
+								    fatal_signal_init();
 								    sig_nr = stored_sig_nr;
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								    if (sig_nr != SIG_ATOMIC_MAX) {
-												signals: Make signal_name() thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-04-16 15:25:10 -07:00
+								        char namebuf[SIGNAL_NAME_BUFSIZE];
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								        ovs_mutex_lock(&mutex);
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
-												fatal-signal: Fatal signal handling for Windows.

Windows does not have a SIGHUP or SIGALRM. It does have
a SIGINT and SIGTERM. The documentation at msdn says that
SIGINT is not supported for win32 applications because
WIN32 operating systems generate a new thread to specifically
handle Ctrl+C.

This commit handles SIGTERM for Windows. The documentation also
states that nothing generates SIGTERM in Windows, but one can
use raise(SIGTERM) to manage it. The idea for handling SIGTERM
for Windows is to just have a place holder if there is need to
raise() a signal for some other purpose.

We use SIGALRM in timeval.c if we wake up from a sleep after
'deadline'. For Windows, print an error message and then
use SIGTERM.

There is an atexit() function for Windows, so we can call cleanup
functions during exit.

An upcoming commit separately handles Ctrl+C so that we can call
clean up functions for that use case.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 08:25:42 -08:00
+								#ifndef _WIN32
-												fatal-signal: Log when terminating due to a fatal signal.

This makes it easier to diagnose why and when a daemon exited.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-08 16:54:21 -07:00
+								        VLOG_WARN("terminating with signal %d (%s)",
-												signals: Make signal_name() thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-04-16 15:25:10 -07:00
+								                  (int)sig_nr, signal_name(sig_nr, namebuf, sizeof namebuf));
-												fatal-signal: Fatal signal handling for Windows.

Windows does not have a SIGHUP or SIGALRM. It does have
a SIGINT and SIGTERM. The documentation at msdn says that
SIGINT is not supported for win32 applications because
WIN32 operating systems generate a new thread to specifically
handle Ctrl+C.

This commit handles SIGTERM for Windows. The documentation also
states that nothing generates SIGTERM in Windows, but one can
use raise(SIGTERM) to manage it. The idea for handling SIGTERM
for Windows is to just have a place holder if there is need to
raise() a signal for some other purpose.

We use SIGALRM in timeval.c if we wake up from a sleep after
'deadline'. For Windows, print an error message and then
use SIGTERM.

There is an atexit() function for Windows, so we can call cleanup
functions during exit.

An upcoming commit separately handles Ctrl+C so that we can call
clean up functions for that use case.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 08:25:42 -08:00
+								#else
 								        VLOG_WARN("terminating with signal %d", (int)sig_nr);
 								#endif
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								        call_hooks(sig_nr);
-												ovs-vsctl.at: Workaround lack of 'kill -l' on Windows.

Also, fflush(stderr) when we raise a signal. The test
this commit is changing would fail otherwise.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-17 12:35:32 -07:00
+								        fflush(stderr);
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
 								        /* Re-raise the signal with the default handling so that the program
 								         * termination status reflects that we were killed by this signal */
 								        signal(sig_nr, SIG_DFL);
 								        raise(sig_nr);
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								        ovs_mutex_unlock(&mutex);
-												Rename NOT_REACHED to OVS_NOT_REACHED

This allows other libraries to use util.h that has already
defined NOT_REACHED.

Signed-off-by: Harold Lim <haroldl@vmware.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-12-17 10:32:12 -08:00
+								        OVS_NOT_REACHED();
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
+								    }
 								}
 								void
 								fatal_signal_wait(void)
 								{
-												fatal-signal: Initialize library upon any call to public function.

Not calling fatal_signal_init() means that the signal handlers don't get
registered, so the process won't clean up on fatal signals.  Furthermore,
signal_fds[0] is then 0, which means that fatal-signal_wait() waits on
stdin, so if you are testing a program interactively and accidentally type
something on stdin then that program's CPU usage jumps to 100%.

Since poll_block() calls fatal_signal_wait() this seems like the most
reliable solution.

											
										
										
											2010-03-23 15:27:44 -07:00
+								    fatal_signal_init();
-												poll-loop: Create Windows event handles for sockets automatically.

We currently have a poll_fd_wait_event(fd, wevent, events) function that
is used at places common to Windows and Linux where we have to wait on
sockets.  On Linux, 'wevent' is always set as zero. On Windows, for sockets,
when we send both 'fd' and 'wevent', we associate them with each other for
'events' and then wait on 'wevent'. Also on Windows, when we only send 'wevent'
to this function, we would simply wait for all events for that 'wevent'.

There is a disadvantage with this approach.
* Windows clients need to create a 'wevent' and then pass it along. This
means that at a lot of places where we create sockets, we also are forced
to create a 'wevent'.

With this commit, we pass the responsibility of creating a 'wevent' to
poll_fd_wait() in case of sockets. That way, a client using poll_fd_wait()
is only concerned about sockets and not about 'wevents'. There is a potential
disadvantage with this change in that we create events more often and that
may have a performance penalty. If that turns out to be the case, we will
eventually need to create a pool of wevents that can be re-used.

In Windows, there are cases where we want to wait on a event (not
associated with any sockets) and then control it using functions
like SetEvent() etc. For that purpose, introduce a new function
poll_wevent_wait(). For this function, the client needs to create a event
and then pass it along as an argument.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-By: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-06-27 13:30:49 -07:00
+								#ifdef _WIN32
 								    poll_wevent_wait(wevent);
 								#else
 								    poll_fd_wait(signal_fds[0], POLLIN);
 								#endif
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
-												fatal-signal: SIGPIPE for Windows.

Windows does not have a SIGPIPE. We ignore SIGPIPE for
Linux. To compile on Windows, carve out a new function
to ignore SIGPIPE on Linux.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 10:44:46 -08:00
+								void
 								fatal_ignore_sigpipe(void)
 								{
 								#ifndef _WIN32
 								    signal(SIGPIPE, SIG_IGN);
 								#endif
 								}
-												daemon-windows: unlink pidfile before stopping the service.

When a OVS daemon is configured to run as a Windows service,
when the service is stopped by calling service_stop(), the
windows services manager does not give enough time to do
everything in the atexit handler. So call the exit handler
directly from service_stop().

Also add a test case for Windows services which checks for
the termination of the service by looking at pidfile cleaned
by the exit handler.

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-05-28 15:07:31 -07:00
+								void
 								fatal_signal_atexit_handler(void)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								    call_hooks(0);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
 								static void
 								call_hooks(int sig_nr)
 								{
 								    static volatile sig_atomic_t recurse = 0;
 								    if (!recurse) {
 								        size_t i;
 								        recurse = 1;
 								        for (i = 0; i < n_hooks; i++) {
 								            struct hook *h = &hooks[i];
 								            if (sig_nr || h->run_at_exit) {
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								                h->hook_cb(h->aux);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								            }
 								        }
 								    }
 								}
-												fatal-signal: Handle SIGINT for Windows.

Ctrl+C signals are a special case for Windows and can
be handled by registering a handle through
SetConsoleCtrlHandler() routine. This is only useful
when we run it directly on console and not as services in
the background.

Once we get a Ctrl+C signal, we call the cleanup functions
and then exit.

One thing to know here is that MinGW terminal handles
Ctrl+C signal differently (and looks a little buggy. I see
it exiting the handler midway with some sort of timeout).
So this implementation is only useful when run on Windows
terminal. Since we only use MinGW for compilation and
eventually to run unit tests, it should be okay. (The unit
tests would ideally use windows services and not expect
Ctrl+C)

Signed-off-by: Gurucharan Shetty <gshetty@nicira.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-02-26 09:37:19 -08:00
 								#ifdef _WIN32
 								BOOL WINAPI ConsoleHandlerRoutine(DWORD dwCtrlType)
 								{
 								    stored_sig_nr = SIGINT;
 								    SetEvent(wevent);
 								    return true;
 								}
 								#endif
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												Convert shash users that don't use the 'data' value to sset instead.

In each of the cases converted here, an shash was used simply to maintain
a set of strings, with the shash_nodes' 'data' values set to NULL.  This
commit converts them to use sset instead.

											
										
										
											2011-03-25 15:26:30 -07:00
+								/* Files to delete on exit. */
 								static struct sset files = SSET_INITIALIZER(&files);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								/* Has a hook function been registered with fatal_signal_add_hook() (and not
 								 * cleared by fatal_signal_fork())? */
 								static bool added_hook;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								static void unlink_files(void *aux);
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								static void cancel_files(void *aux);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								static void do_unlink_files(void);
 								/* Registers 'file' to be unlinked when the program terminates via exit() or a
 								 * fatal signal. */
 								void
 								fatal_signal_add_file_to_unlink(const char *file)
 								{
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
+								    fatal_signal_init();
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_lock(&mutex);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								    if (!added_hook) {
 								        added_hook = true;
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								        fatal_signal_add_hook(unlink_files, cancel_files, NULL, true);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								    }
-												Convert shash users that don't use the 'data' value to sset instead.

In each of the cases converted here, an shash was used simply to maintain
a set of strings, with the shash_nodes' 'data' values set to NULL.  This
commit converts them to use sset instead.

											
										
										
											2011-03-25 15:26:30 -07:00
+								    sset_add(&files, file);
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_unlock(&mutex);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
 								/* Unregisters 'file' from being unlinked when the program terminates via
 								 * exit() or a fatal signal. */
 								void
 								fatal_signal_remove_file_to_unlink(const char *file)
 								{
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
+								    fatal_signal_init();
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_lock(&mutex);
-												Convert shash users that don't use the 'data' value to sset instead.

In each of the cases converted here, an shash was used simply to maintain
a set of strings, with the shash_nodes' 'data' values set to NULL.  This
commit converts them to use sset instead.

											
										
										
											2011-03-25 15:26:30 -07:00
+								    sset_find_and_delete(&files, file);
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_unlock(&mutex);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
-												fatal-signal: New function fatal_signal_unlink_file_now().

This is a helper function that combines two actions that callers commonly
wanted.  It will have an additional user in an upcoming commit.

											
										
										
											2009-09-21 12:37:20 -07:00
+								/* Like fatal_signal_remove_file_to_unlink(), but also unlinks 'file'.
 								 * Returns 0 if successful, otherwise a positive errno value. */
 								int
 								fatal_signal_unlink_file_now(const char *file)
 								{
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
+								    int error;
 								    fatal_signal_init();
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_lock(&mutex);
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
 								    error = unlink(file) ? errno : 0;
-												fatal-signal: New function fatal_signal_unlink_file_now().

This is a helper function that combines two actions that callers commonly
wanted.  It will have an additional user in an upcoming commit.

											
										
										
											2009-09-21 12:37:20 -07:00
+								    if (error) {
-												Replace all uses of strerror() by ovs_strerror(), for thread safety.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-06-24 10:54:49 -07:00
+								        VLOG_WARN("could not unlink \"%s\" (%s)", file, ovs_strerror(error));
-												fatal-signal: New function fatal_signal_unlink_file_now().

This is a helper function that combines two actions that callers commonly
wanted.  It will have an additional user in an upcoming commit.

											
										
										
											2009-09-21 12:37:20 -07:00
+								    }
 								    fatal_signal_remove_file_to_unlink(file);
-												clang: Add annotations for thread safety check.

This commit adds annotations for thread safety check. And the
check can be conducted by using -Wthread-safety flag in clang.

Co-authored-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-07-30 15:31:48 -07:00
+								    ovs_mutex_unlock(&mutex);
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
-												fatal-signal: New function fatal_signal_unlink_file_now().

This is a helper function that combines two actions that callers commonly
wanted.  It will have an additional user in an upcoming commit.

											
										
										
											2009-09-21 12:37:20 -07:00
+								    return error;
 								}
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								static void
-												Rename UNUSED macro to OVS_UNUSED to avoid naming conflict.

Requested by Jean Tourrilhes <jt@hpl.hp.com>.

											
										
										
											2010-02-11 10:59:47 -08:00
+								unlink_files(void *aux OVS_UNUSED)
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								{
-												treewide: Remove trailing whitespace

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Jesse Gross <jesse@nicira.com>

											
										
										
											2010-08-30 00:24:53 -07:00
+								    do_unlink_files();
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								}
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								static void
-												Merge "master" into "next".

The main change here is the need to update all of the uses of UNUSED in
the next branch to OVS_UNUSED as it is now spelled on "master".

											
										
										
											2010-02-11 11:11:23 -08:00
+								cancel_files(void *aux OVS_UNUSED)
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								{
-												Convert shash users that don't use the 'data' value to sset instead.

In each of the cases converted here, an shash was used simply to maintain
a set of strings, with the shash_nodes' 'data' values set to NULL.  This
commit converts them to use sset instead.

											
										
										
											2011-03-25 15:26:30 -07:00
+								    sset_clear(&files);
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								    added_hook = false;
 								}
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								static void
 								do_unlink_files(void)
 								{
-												Convert shash users that don't use the 'data' value to sset instead.

In each of the cases converted here, an shash was used simply to maintain
a set of strings, with the shash_nodes' 'data' values set to NULL.  This
commit converts them to use sset instead.

											
										
										
											2011-03-25 15:26:30 -07:00
+								    const char *file;
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
-												Convert shash users that don't use the 'data' value to sset instead.

In each of the cases converted here, an shash was used simply to maintain
a set of strings, with the shash_nodes' 'data' values set to NULL.  This
commit converts them to use sset instead.

											
										
										
											2011-03-25 15:26:30 -07:00
+								    SSET_FOR_EACH (file, &files) {
 								        unlink(file);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								    }
 								}
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								/* Clears all of the fatal signal hooks without executing them.  If any of the
 								 * hooks passed a 'cancel_cb' function to fatal_signal_add_hook(), then those
 								 * functions will be called, allowing them to free resources, etc.
 								 *
-												fatal-signal: Don't share signal fds/handles with forked process.

The signal_fds pipe and wevent are a mechanism to wake up the process
after it received a signal and stored the number for the future
processing.  They are not intended for inter-process communication.
However, in the current code, descriptors are not closed on fork().

The main scenario where we use fork() is a monitor process.  Monitor
doesn't actually use poll loops and doesn't wait on the descriptor.
But when a child process is killed, it (child) sends a byte to itself,
then it wakes up due to POLLIN on the pipe and terminates itself after
processing all the callbacks.  The byte stays unread.  And the pipe is
still open in the monitor process.  When child dies, the monitor wakes
up and forks again.  New child inherits the same pipe that still
contains unread data.  This data is never read, so the child will
constantly wake itself up for no reason.

Interestingly enough raise(SIGSEGV) doesn't immediately kill the
process.  The execution continues til the end of a signal handler,
so we're still able to write a byte to a pipe even in this case.
Presumably because we don't have SA_NODEFER.

Fix the issue by re-creating the pipe/event on fork.  This way
every new child will have its own notification channel and will
not wake up any other processes.

There was already an attempt to fix the issue, but it didn't get a
follow up (see the reported-at tag).  This is an alternative solution.

Fixes: ff8decf1a318 ("daemon: Add support for process monitoring and restart.")
Reported-at: https://patchwork.ozlabs.org/project/openvswitch/patch/20221019093147.2072-1-lifengqi@inspur.com/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-08 21:16:59 +02:00
+								 * Also re-creates wake-up events, so signals in one of the processes do not
 								 * wake up the other one.
 								 *
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								 * Following a fork, one of the resulting processes can call this function to
 								 * allow it to terminate without calling the hooks registered before calling
 								 * this function.  New hooks registered after calling this function will take
 								 * effect normally. */
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								void
 								fatal_signal_fork(void)
 								{
 								    size_t i;
-												fatal-signal: Make thread-safe.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2013-05-09 10:54:04 -07:00
+								    assert_single_threaded();
-												fatal-signal: Don't share signal fds/handles with forked process.

The signal_fds pipe and wevent are a mechanism to wake up the process
after it received a signal and stored the number for the future
processing.  They are not intended for inter-process communication.
However, in the current code, descriptors are not closed on fork().

The main scenario where we use fork() is a monitor process.  Monitor
doesn't actually use poll loops and doesn't wait on the descriptor.
But when a child process is killed, it (child) sends a byte to itself,
then it wakes up due to POLLIN on the pipe and terminates itself after
processing all the callbacks.  The byte stays unread.  And the pipe is
still open in the monitor process.  When child dies, the monitor wakes
up and forks again.  New child inherits the same pipe that still
contains unread data.  This data is never read, so the child will
constantly wake itself up for no reason.

Interestingly enough raise(SIGSEGV) doesn't immediately kill the
process.  The execution continues til the end of a signal handler,
so we're still able to write a byte to a pipe even in this case.
Presumably because we don't have SA_NODEFER.

Fix the issue by re-creating the pipe/event on fork.  This way
every new child will have its own notification channel and will
not wake up any other processes.

There was already an attempt to fix the issue, but it didn't get a
follow up (see the reported-at tag).  This is an alternative solution.

Fixes: ff8decf1a318 ("daemon: Add support for process monitoring and restart.")
Reported-at: https://patchwork.ozlabs.org/project/openvswitch/patch/20221019093147.2072-1-lifengqi@inspur.com/
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-06-08 21:16:59 +02:00
+								    fatal_signal_destroy_wakeup_events();
 								    fatal_signal_create_wakeup_events();
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								    for (i = 0; i < n_hooks; i++) {
 								        struct hook *h = &hooks[i];
 								        if (h->cancel_cb) {
 								            h->cancel_cb(h->aux);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								        }
 								    }
-												fatal-signal: After fork, clear hooks instead of disabling them.

Until now, fatal_signal_fork() has simply disabled all the fatal signal
callback hooks.  This worked fine, because a daemon process forked only
once and the parent didn't do much before it exited.

But upcoming commits will introduce a --monitor option, which requires
processes to fork multiple times.  Sometimes the parent process will fork,
then run for a while, then fork again.  It's not good to disable the
hooks in the child process in such a case, because that prevents e.g.
pidfiles from being removed at the child's exit.

So this commit changes the semantics of fatal_signal_fork() to just
clearing out hooks.  After hooks are cleared, new hooks can be added and
will be executed on process termination in the usual way.

This commit also introduces a cancellation callback function so that a
canceled hook can free resources.

											
										
										
											2010-01-15 15:28:14 -08:00
+								    n_hooks = 0;
-												fatal-signal: Run signal hooks outside of actual signal handlers.

Rather than running signal hooks directly from the actual signal
handler, simply record the fact that the signal occured and run
the hook next time around the poll loop.  This allows significantly
more freedom as to what can actually be done in the signal hooks.

											
										
										
											2009-12-08 14:11:22 -08:00
 								    /* Raise any signals that we have already received with the default
 								     * handler. */
 								    if (stored_sig_nr != SIG_ATOMIC_MAX) {
 								        raise(stored_sig_nr);
-												Import from old repository commit 61ef2b42a9c4ba8e1600f15bb0236765edc2ad45.

											
										
										
											2009-07-08 13:19:16 -07:00
+								    }
 								}
-												process: block signals while spawning child processes

Between fork() and execvp() calls in the process_start()
function both child and parent processes share the same
file descriptors.  This means that, if a child process
received a signal during this time interval, then it could
potentially write data to a shared file descriptor.

One such example is fatal signal handler, where, if
child process received SIGTERM signal, then it would
write data into pipe.  Then a read event would occur
on the other end of the pipe where parent process is
listening and this would make parent process to incorrectly
believe that it was the one who received SIGTERM.
Also, since parent process never reads data from this
pipe, then this bug would make parent process to consume
100% CPU by immediately waking up from the event loop.

This patch will help to avoid this problem by blocking
signals until child closes all its file descriptors.

Signed-off-by: Ansis Atteka <aatteka@nicira.com>
Reported-by: Suganya Ramachandran <suganyar@vmware.com>
Issue: 1255110

											
										
										
											2014-05-23 14:15:28 -07:00
 								#ifndef _WIN32
 								/* Blocks all fatal signals and returns previous signal mask into
 								 * 'prev_mask'. */
 								void
 								fatal_signal_block(sigset_t *prev_mask)
 								{
 								    int i;
 								    sigset_t block_mask;
 								    sigemptyset(&block_mask);
 								    for (i = 0; i < ARRAY_SIZE(fatal_signals); i++) {
 								        int sig_nr = fatal_signals[i];
 								        sigaddset(&block_mask, sig_nr);
 								    }
 								    xpthread_sigmask(SIG_BLOCK, &block_mask, prev_mask);
 								}
 								#endif