2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-31 14:25:26 +00:00

backtrace: Extend the backtrace functionality.

Use the backtrace functions that is provided by libc, this allows us
to get backtrace that is independent of the current memory map of the
process.  Which in turn can be used for debugging/tracing purpose.
The backtrace is not 100% accurate due to various optimizations, most
notably "-fomit-frame-pointer" and LTO.  This might result that the
line in source file doesn't correspond to the real line.  However, it
should be able to pinpoint at least the function where the backtrace
was called.

The implementation is determined during compilation based on available
libraries.  Libunwind has higher priority if both methods are available
to keep the compatibility with current behavior.

The backtrace is not marked as signal safe however the backtrace manual
page gives more detailed explanation why it might be the case [0].
Load the "libgcc" or equivalent in advance within the "fatal_signal_init"
which should ensure that subsequent calls to backtrace* do not call
malloc and are signal safe.

The typical backtrace will look similar to the one below:
 /lib64/libopenvswitch-3.1.so.0(backtrace_capture+0x1e) [0x7fc5db298dfe]
 /lib64/libopenvswitch-3.1.so.0(log_backtrace_at+0x57) [0x7fc5db2999e7]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_complete+0x7b) [0x7fc5db56247b]
 /lib64/libovsdb-3.1.so.0(ovsdb_txn_propose_commit_block+0x8d) [0x7fc5db563a8d]
 ovsdb-server(+0xa661) [0x562cfce2e661]
 ovsdb-server(+0x7e39) [0x562cfce2be39]
 /lib64/libc.so.6(+0x27b4a) [0x7fc5db048b4a]
 /lib64/libc.so.6(__libc_start_main+0x8b) [0x7fc5db048c0b]
 ovsdb-server(+0x8c35) [0x562cfce2cc35]

backtrace.h elaborates on how to effectively get the line information
associated with the addressed presented in the backtrace.

[0]
backtrace() and backtrace_symbols_fd() don't call malloc() explicitly,
but they are part of libgcc, which gets loaded dynamically when first
used.  Dynamic loading usually triggers a call to malloc(3).  If you
need certain calls to these two functions to not allocate memory (in
signal handlers, for example), you need to make sure libgcc is loaded
beforehand

Reported-at: https://bugzilla.redhat.com/2177760
Signed-off-by: Ales Musil <amusil@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
This commit is contained in:
Ales Musil
2023-05-30 09:34:12 +02:00
committed by Ilya Maximets
parent 474a179aff
commit 759a29dc2d
9 changed files with 244 additions and 72 deletions

View File

@@ -148,6 +148,9 @@ void vlog_set_syslog_target(const char *target);
/* Write directly to log file. */
void vlog_direct_write_to_log_file_unsafe(const char *s);
/* Return the current log file descriptor. */
int vlog_get_log_file_fd_unsafe(void);
/* Initialization. */
void vlog_init(void);
void vlog_enable_async(void);

View File

@@ -32,12 +32,27 @@ VLOG_DEFINE_THIS_MODULE(backtrace);
void
backtrace_capture(struct backtrace *b)
{
void *frames[BACKTRACE_MAX_FRAMES];
int i;
b->n_frames = backtrace(b->frames, BACKTRACE_MAX_FRAMES);
}
b->n_frames = backtrace(frames, BACKTRACE_MAX_FRAMES);
for (i = 0; i < b->n_frames; i++) {
b->frames[i] = (uintptr_t) frames[i];
void
backtrace_format(struct ds *ds, const struct backtrace *bt,
const char *delimiter)
{
if (bt->n_frames) {
char **symbols = backtrace_symbols(bt->frames, bt->n_frames);
if (!symbols) {
return;
}
for (int i = 0; i < bt->n_frames - 1; i++) {
ds_put_format(ds, "%s%s", symbols[i], delimiter);
}
ds_put_format(ds, "%s", symbols[bt->n_frames - 1]);
free(symbols);
}
}
@@ -47,23 +62,14 @@ backtrace_capture(struct backtrace *backtrace)
{
backtrace->n_frames = 0;
}
#endif
static char *
backtrace_format(const struct backtrace *b, struct ds *ds)
void
backtrace_format(struct ds *ds, const struct backtrace *bt OVS_UNUSED,
const char *delimiter OVS_UNUSED)
{
if (b->n_frames) {
int i;
ds_put_cstr(ds, " (backtrace:");
for (i = 0; i < b->n_frames; i++) {
ds_put_format(ds, " 0x%08"PRIxPTR, b->frames[i]);
}
ds_put_cstr(ds, ")");
}
return ds_cstr(ds);
ds_put_cstr(ds, "backtrace() is not supported!\n");
}
#endif
void
log_backtrace_at(const char *msg, const char *where)
@@ -77,41 +83,85 @@ log_backtrace_at(const char *msg, const char *where)
}
ds_put_cstr(&ds, where);
VLOG_ERR("%s", backtrace_format(&b, &ds));
ds_put_cstr(&ds, " backtrace:\n");
backtrace_format(&ds, &b, "\n");
VLOG_ERR("%s", ds_cstr_ro(&ds));
ds_destroy(&ds);
}
#if defined(HAVE_UNWIND) || defined(HAVE_BACKTRACE)
static bool
read_received_backtrace(int fd, void *dest, size_t len)
{
VLOG_DBG("%s fd %d", __func__, fd);
fcntl(fd, F_SETFL, O_NONBLOCK);
memset(dest, 0, len);
int byte_read = read(fd, dest, len);
if (byte_read < 0) {
VLOG_ERR("Read fd %d failed: %s", fd, ovs_strerror(errno));
}
return byte_read > 0;;
}
#else
static bool
read_received_backtrace(int fd OVS_UNUSED, void *dest OVS_UNUSED,
size_t len OVS_UNUSED)
{
return false;
}
#endif
#ifdef HAVE_UNWIND
void
log_received_backtrace(int fd) {
int byte_read;
log_received_backtrace(int fd)
{
struct unw_backtrace backtrace[UNW_MAX_DEPTH];
VLOG_WARN("%s fd %d", __func__, fd);
fcntl(fd, F_SETFL, O_NONBLOCK);
memset(backtrace, 0, UNW_MAX_BUF);
if (read_received_backtrace(fd, backtrace, UNW_MAX_BUF)) {
struct ds ds = DS_EMPTY_INITIALIZER;
ds_put_cstr(&ds, BACKTRACE_DUMP_MSG);
byte_read = read(fd, backtrace, UNW_MAX_BUF);
if (byte_read < 0) {
VLOG_ERR("Read fd %d failed: %s", fd,
ovs_strerror(errno));
} else if (byte_read > 0) {
VLOG_WARN("SIGSEGV detected, backtrace:");
for (int i = 0; i < UNW_MAX_DEPTH; i++) {
if (backtrace[i].func[0] == 0) {
break;
}
VLOG_WARN("0x%016"PRIxPTR" <%s+0x%"PRIxPTR">\n",
backtrace[i].ip,
backtrace[i].func,
backtrace[i].offset);
ds_put_format(&ds, "0x%016"PRIxPTR" <%s+0x%"PRIxPTR">\n",
backtrace[i].ip,
backtrace[i].func,
backtrace[i].offset);
}
VLOG_WARN("%s", ds_cstr_ro(&ds));
ds_destroy(&ds);
}
}
#else /* !HAVE_UNWIND */
#elif HAVE_BACKTRACE
void
log_received_backtrace(int daemonize_fd OVS_UNUSED) {
VLOG_WARN("Backtrace using libunwind not supported.");
log_received_backtrace(int fd)
{
struct backtrace bt;
if (read_received_backtrace(fd, &bt, sizeof bt)) {
struct ds ds = DS_EMPTY_INITIALIZER;
bt.n_frames = MIN(bt.n_frames, BACKTRACE_MAX_FRAMES);
ds_put_cstr(&ds, BACKTRACE_DUMP_MSG);
backtrace_format(&ds, &bt, "\n");
VLOG_WARN("%s", ds_cstr_ro(&ds));
ds_destroy(&ds);
}
}
#endif /* HAVE_UNWIND */
#else
void
log_received_backtrace(int daemonize_fd OVS_UNUSED)
{
VLOG_WARN("Backtrace using libunwind or backtrace() is not supported.");
}
#endif

View File

@@ -36,41 +36,53 @@
* log_backtrace_msg("your message"); <-- with a message
*
*
* A typical log will look like the following. The hex numbers listed after
* "backtrace" are the addresses of the backtrace.
* A typical backtrace will look like the following example:
* /lib64/libopenvswitch-3.1.so.0(backtrace_capture+0x1e) [0x7fc5db298dfe]
* /lib64/libopenvswitch-3.1.so.0(log_backtrace_at+0x57) [0x7fc5db2999e7]
* /lib64/libovsdb-3.1.so.0(ovsdb_txn_complete+0x7b) [0x7fc5db56247b]
* /lib64/libovsdb-3.1.so.0(ovsdb_txn_propose_commit_block+0x8d)
* [0x7fc5db563a8d]
* ovsdb-server(+0xa661) [0x562cfce2e661]
* ovsdb-server(+0x7e39) [0x562cfce2be39]
* /lib64/libc.so.6(+0x27b4a) [0x7fc5db048b4a]
* /lib64/libc.so.6(__libc_start_main+0x8b) [0x7fc5db048c0b]
* ovsdb-server(+0x8c35) [0x562cfce2cc35]
*
* 2014-03-13T23:18:11.979Z|00002|backtrace(revalidator_6)|ERR|lib/dpif-netdev.c:1312: (backtrace: 0x00521f57 0x00460365 0x00463ea4 0x0046470b 0x0043b32d 0x0043bac3 0x0043bae2 0x0043943b 0x004c22b3 0x2b5b3ac94e9a 0x2b5b3b4a33fd)
* GDB can be used to view the exact line of the code for particular backtrace.
* One thing to keep in mind is that the lines in source files might not
* 100% correspond with the backtrace due to various optimizations as LTO etc.
* (The effect can be seen in this example).
*
* The following bash command can be used to view backtrace in
* a more readable form.
* addr2line -p -e vswitchd/ovs-vswitchd <cut-and-paste back traces>
* Assuming that debuginfo for the library or binary is installed load it to
* GDB:
* $ gdb ovsdb-server
* (gdb) list *(+0x7e39)
* 0x7e39 is in main (ovsdb/ovsdb-server.c:278).
* (gdb) list *(+0xa661)
* 0xa661 is in commit_txn (ovsdb/ovsdb-server.c:1173)
*
* An typical run and output will look like:
* addr2line -p -e vswitchd/ovs-vswitchd 0x00521f57 0x00460365 0x00463ea4
* 0x0046470b 0x0043b32d 0x0043bac3 0x0043bae2 0x0043943b 0x004c22b3
* 0x2b5b3ac94e9a 0x2b5b3b4a33fd
* $ gdb /lib64/libovsdb-3.1.so.0
* (gdb) list *(ovsdb_txn_propose_commit_block+0x8d)
* 0x3aa8d is in ovsdb_txn_propose_commit_block (ovsdb/transaction.c:1328)
* (gdb) list *(ovsdb_txn_complete+0x7b)
* 0x3947b is in ovsdb_txn_complete (./include/openvswitch/list.h:321)
*
* openvswitch/lib/backtrace.c:33
* openvswitch/lib/dpif-netdev.c:1312
* openvswitch/lib/dpif.c:937
* openvswitch/lib/dpif.c:1258
* openvswitch/ofproto/ofproto-dpif-upcall.c:1440
* openvswitch/ofproto/ofproto-dpif-upcall.c:1595
* openvswitch/ofproto/ofproto-dpif-upcall.c:160
* openvswitch/ofproto/ofproto-dpif-upcall.c:717
* openvswitch/lib/ovs-thread.c:268
* ??:0
* ??:0
* $ gdb /lib64/libopenvswitch-3.1.so.0
* (gdb) list *(log_backtrace_at+0x57)
* 0x999e7 is in log_backtrace_at (lib/backtrace.c:77)
* (gdb) list *(backtrace_capture+0x1e)
* 0x98dfe is in backtrace_capture (lib/backtrace.c:35)
*/
#define log_backtrace() log_backtrace_at(NULL, OVS_SOURCE_LOCATOR);
#define log_backtrace_msg(msg) log_backtrace_at(msg, OVS_SOURCE_LOCATOR);
#define BACKTRACE_MAX_FRAMES 31
#define BACKTRACE_DUMP_MSG "SIGSEGV detected, backtrace:\n"
struct backtrace {
int n_frames;
uintptr_t frames[BACKTRACE_MAX_FRAMES];
void *frames[BACKTRACE_MAX_FRAMES];
};
#ifdef HAVE_UNWIND
@@ -88,6 +100,8 @@ struct unw_backtrace {
void backtrace_capture(struct backtrace *);
void log_backtrace_at(const char *msg, const char *where);
void backtrace_format(struct ds *, const struct backtrace *,
const char *delimiter);
void log_received_backtrace(int fd);
#endif /* backtrace.h */

View File

@@ -35,10 +35,14 @@
#include "openvswitch/type-props.h"
#ifdef HAVE_UNWIND
#if defined(HAVE_UNWIND) || defined(HAVE_BACKTRACE)
#include "daemon-private.h"
#endif
#ifdef HAVE_BACKTRACE
#include <execinfo.h>
#endif
#ifndef SIG_ATOMIC_MAX
#define SIG_ATOMIC_MAX TYPE_MAXIMUM(sig_atomic_t)
#endif
@@ -94,6 +98,17 @@ fatal_signal_init(void)
inited = true;
ovs_mutex_init_recursive(&mutex);
/* The dummy backtrace is needed.
* See comment for send_backtrace_to_monitor(). */
struct backtrace dummy_bt;
backtrace_capture(&dummy_bt);
if (!dummy_bt.n_frames) {
VLOG_DBG("Capturing of dummy backtrace has failed.");
}
#ifndef _WIN32
xpipe_nonblocking(signal_fds);
#else
@@ -181,7 +196,8 @@ llong_to_hex_str(unsigned long long value, char *str)
* library functions used here must be async-signal-safe.
*/
static inline void
send_backtrace_to_monitor(void) {
send_backtrace_to_monitor(void)
{
/* volatile added to prevent a "clobbered" error on ppc64le with gcc */
volatile int dep;
struct unw_backtrace unw_bt[UNW_MAX_DEPTH];
@@ -211,11 +227,10 @@ send_backtrace_to_monitor(void) {
/* Since there is no monitor daemon running, write backtrace
* in current process.
*/
char str[] = "SIGSEGV detected, backtrace:\n";
char ip_str[16], offset_str[6];
char line[64], fn_name[UNW_MAX_FUNCN];
vlog_direct_write_to_log_file_unsafe(str);
vlog_direct_write_to_log_file_unsafe(BACKTRACE_DUMP_MSG);
for (int i = 0; i < dep; i++) {
memset(line, 0, sizeof line);
@@ -239,6 +254,36 @@ send_backtrace_to_monitor(void) {
}
}
}
#elif HAVE_BACKTRACE
/* Send the backtrace to monitor thread.
*
* Note that this runs in the signal handling context, any system
* library functions used here must be async-signal-safe.
* backtrace() is only signal safe if the "libgcc" or equivalent was loaded
* before the signal handler. In order to keep it safe the fatal_signal_init()
* should always call backtrace_capture which will ensure that "libgcc" or
* equivlent is loaded.
*/
static inline void
send_backtrace_to_monitor(void)
{
struct backtrace bt;
backtrace_capture(&bt);
if (monitor && daemonize_fd > -1) {
ignore(write(daemonize_fd, &bt, sizeof bt));
} else {
int log_fd = vlog_get_log_file_fd_unsafe();
if (log_fd < 0) {
return;
}
vlog_direct_write_to_log_file_unsafe(BACKTRACE_DUMP_MSG);
backtrace_symbols_fd(bt.frames, bt.n_frames, log_fd);
}
}
#else
static inline void
send_backtrace_to_monitor(void) {

View File

@@ -141,12 +141,8 @@ ovsdb_internal_error(struct ovsdb_error *inner_error,
backtrace_capture(&backtrace);
if (backtrace.n_frames) {
int i;
ds_put_cstr(&ds, " (backtrace:");
for (i = 0; i < backtrace.n_frames; i++) {
ds_put_format(&ds, " 0x%08"PRIxPTR, backtrace.frames[i]);
}
backtrace_format(&ds, &backtrace, ", ");
ds_put_char(&ds, ')');
}

View File

@@ -664,6 +664,13 @@ vlog_direct_write_to_log_file_unsafe(const char *s)
}
}
int
vlog_get_log_file_fd_unsafe(void)
OVS_NO_THREAD_SAFETY_ANALYSIS
{
return log_fd;
}
/* Returns 'false' if 'facility' is not a valid string. If 'facility'
* is a valid string, sets 'value' with the integer value of 'facility'
* and returns 'true'. */

View File

@@ -360,8 +360,12 @@ AC_DEFUN([OVS_CHECK_DBDIR],
dnl Defines HAVE_BACKTRACE if backtrace() is found.
AC_DEFUN([OVS_CHECK_BACKTRACE],
[AC_SEARCH_LIBS([backtrace], [execinfo ubacktrace],
[AC_DEFINE([HAVE_BACKTRACE], [1],
[Define to 1 if you have backtrace(3).])])])
[HAVE_BACKTRACE=yes], [HAVE_BACKTRACE=no])
if test "$HAVE_BACKTRACE" = "yes"; then
AC_DEFINE([HAVE_BACKTRACE], [1], [Define to 1 if you have backtrace(3).])
fi
AM_CONDITIONAL([HAVE_BACKTRACE], [test "$HAVE_BACKTRACE" = "yes"])
AC_SUBST([HAVE_BACKTRACE])])
dnl Defines HAVE_PERF_EVENT if linux/perf_event.h is found.
AC_DEFUN([OVS_CHECK_PERF_EVENT],

View File

@@ -2,6 +2,8 @@
HAVE_OPENSSL='@HAVE_OPENSSL@'
OPENSSL_SUPPORTS_SNI='@OPENSSL_SUPPORTS_SNI@'
HAVE_UNBOUND='@HAVE_UNBOUND@'
HAVE_BACKTRACE='@HAVE_BACKTRACE@'
HAVE_UNWIND='@HAVE_UNWIND@'
EGREP='@EGREP@'
PYTHON3='@PYTHON3@'
CFLAGS='@CFLAGS@'

View File

@@ -234,3 +234,54 @@ OVS_WAIT_UNTIL([sc query ovsdb-server | grep STATE | grep STOPPED > /dev/null 2>
AT_CHECK([sc delete ovsdb-server], [0], [[[SC]] DeleteService SUCCESS
])
AT_CLEANUP
AT_SETUP([backtrace without monitor])
AT_SKIP_IF([test "$HAVE_BACKTRACE" = "no" && test "$HAVE_UNWIND" = "no"])
AT_SKIP_IF([test "$IS_WIN32" = "yes"])
# This test intentionally causes SIGSEGV, so make Address Sanitizer ignore it.
ASAN_OPTIONS=$ASAN_OPTIONS:handle_segv=0; export ASAN_OPTIONS
# Skip it if UB Sanitizer is being used. There's no way to disable the
# SEGV check at runtime.
AT_SKIP_IF([test $TESTS_WITH_UBSAN = yes])
AT_CHECK([ovsdb-server --detach --no-chdir --pidfile --no-db \
--log-file --verbose=DBG], [0], [ignore], [ignore])
OVS_WAIT_UNTIL([test -s ovsdb-server.pid])
child=$(cat ovsdb-server.pid)
AT_CAPTURE_FILE([ovsdb-server.log])
AT_CHECK([kill -SEGV $child])
OVS_WAIT_UNTIL([grep -q "^SIGSEGV detected, backtrace:" ovsdb-server.log])
AT_CLEANUP
AT_SETUP([backtrace with monitor])
AT_SKIP_IF([test "$HAVE_BACKTRACE" = "no" && test "$HAVE_UNWIND" = "no"])
AT_SKIP_IF([test "$IS_WIN32" = "yes"])
# This test intentionally causes SIGSEGV, so make Address Sanitizer ignore it.
ASAN_OPTIONS=$ASAN_OPTIONS:handle_segv=0; export ASAN_OPTIONS
# Skip it if UB Sanitizer is being used. There's no way to disable the
# SEGV check at runtime.
AT_SKIP_IF([test $TESTS_WITH_UBSAN = yes])
on_exit 'kill $(cat *.pid)'
AT_CHECK([ovsdb-server --detach --monitor --no-chdir --pidfile --no-db \
--log-file --verbose=DBG], [0], [ignore], [ignore])
OVS_WAIT_UNTIL([test -s ovsdb-server.pid])
child=$(cat ovsdb-server.pid)
AT_CAPTURE_FILE([ovsdb-server.log])
AT_CHECK([kill -SEGV $child])
OVS_WAIT_UNTIL([grep -q "backtrace(monitor)|WARN|SIGSEGV detected, backtrace:" ovsdb-server.log])
OVS_WAIT_UNTIL([grep -q "daemon_unix(monitor)|ERR|1 crashes: pid .* died, killed (Segmentation fault)" ovsdb-server.log])
AT_CLEANUP