2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-23 10:39:16 +00:00
bind/lib/isc/netmgr/tlsdns.c

985 lines
26 KiB
C
Raw Normal View History

Refactor netmgr and add more unit tests This is a part of the works that intends to make the netmgr stable, testable, maintainable and tested. It contains a numerous changes to the netmgr code and unfortunately, it was not possible to split this into smaller chunks as the work here needs to be committed as a complete works. NOTE: There's a quite a lot of duplicated code between udp.c, tcp.c and tcpdns.c and it should be a subject to refactoring in the future. The changes that are included in this commit are listed here (extensively, but not exclusively): * The netmgr_test unit test was split into individual tests (udp_test, tcp_test, tcpdns_test and newly added tcp_quota_test) * The udp_test and tcp_test has been extended to allow programatic failures from the libuv API. Unfortunately, we can't use cmocka mock() and will_return(), so we emulate the behaviour with #define and including the netmgr/{udp,tcp}.c source file directly. * The netievents that we put on the nm queue have variable number of members, out of these the isc_nmsocket_t and isc_nmhandle_t always needs to be attached before enqueueing the netievent_<foo> and detached after we have called the isc_nm_async_<foo> to ensure that the socket (handle) doesn't disappear between scheduling the event and actually executing the event. * Cancelling the in-flight TCP connection using libuv requires to call uv_close() on the original uv_tcp_t handle which just breaks too many assumptions we have in the netmgr code. Instead of using uv_timer for TCP connection timeouts, we use platform specific socket option. * Fix the synchronization between {nm,async}_{listentcp,tcpconnect} When isc_nm_listentcp() or isc_nm_tcpconnect() is called it was waiting for socket to either end up with error (that path was fine) or to be listening or connected using condition variable and mutex. Several things could happen: 0. everything is ok 1. the waiting thread would miss the SIGNAL() - because the enqueued event would be processed faster than we could start WAIT()ing. In case the operation would end up with error, it would be ok, as the error variable would be unchanged. 2. the waiting thread miss the sock->{connected,listening} = `true` would be set to `false` in the tcp_{listen,connect}close_cb() as the connection would be so short lived that the socket would be closed before we could even start WAIT()ing * The tcpdns has been converted to using libuv directly. Previously, the tcpdns protocol used tcp protocol from netmgr, this proved to be very complicated to understand, fix and make changes to. The new tcpdns protocol is modeled in a similar way how tcp netmgr protocol. Closes: #2194, #2283, #2318, #2266, #2034, #1920 * The tcp and tcpdns is now not using isc_uv_import/isc_uv_export to pass accepted TCP sockets between netthreads, but instead (similar to UDP) uses per netthread uv_loop listener. This greatly reduces the complexity as the socket is always run in the associated nm and uv loops, and we are also not touching the libuv internals. There's an unfortunate side effect though, the new code requires support for load-balanced sockets from the operating system for both UDP and TCP (see #2137). If the operating system doesn't support the load balanced sockets (either SO_REUSEPORT on Linux or SO_REUSEPORT_LB on FreeBSD 12+), the number of netthreads is limited to 1. * The netmgr has now two debugging #ifdefs: 1. Already existing NETMGR_TRACE prints any dangling nmsockets and nmhandles before triggering assertion failure. This options would reduce performance when enabled, but in theory, it could be enabled on low-performance systems. 2. New NETMGR_TRACE_VERBOSE option has been added that enables extensive netmgr logging that allows the software engineer to precisely track any attach/detach operations on the nmsockets and nmhandles. This is not suitable for any kind of production machine, only for debugging. * The tlsdns netmgr protocol has been split from the tcpdns and it still uses the old method of stacking the netmgr boxes on top of each other. We will have to refactor the tlsdns netmgr protocol to use the same approach - build the stack using only libuv and openssl. * Limit but not assert the tcp buffer size in tcp_alloc_cb Closes: #2061
2020-11-12 10:32:18 +01:00
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#include <unistd.h>
#include <uv.h>
#include <isc/atomic.h>
#include <isc/buffer.h>
#include <isc/condition.h>
#include <isc/magic.h>
#include <isc/mem.h>
#include <isc/netmgr.h>
#include <isc/random.h>
#include <isc/refcount.h>
#include <isc/region.h>
#include <isc/result.h>
#include <isc/sockaddr.h>
#include <isc/thread.h>
#include <isc/util.h>
#include "netmgr-int.h"
#include "uv-compat.h"
#define TLSDNS_CLIENTS_PER_CONN 23
/*%<
*
* Maximum number of simultaneous handles in flight supported for a single
* connected TLSDNS socket. This value was chosen arbitrarily, and may be
* changed in the future.
*/
static void
dnslisten_readcb(isc_nmhandle_t *handle, isc_result_t eresult,
isc_region_t *region, void *arg);
static void
resume_processing(void *arg);
static void
tlsdns_close_direct(isc_nmsocket_t *sock);
static inline size_t
dnslen(unsigned char *base) {
return ((base[0] << 8) + (base[1]));
}
/*
* COMPAT CODE
*/
static void *
isc__nm_get_ievent(isc_nm_t *mgr, isc__netievent_type type) {
isc__netievent_storage_t *event = isc_mempool_get(mgr->evpool);
*event = (isc__netievent_storage_t){ .ni.type = type };
return (event);
}
/*
* Regular TCP buffer, should suffice in most cases.
*/
#define NM_REG_BUF 4096
/*
* Two full DNS packets with lengths.
* netmgr receives 64k at most so there's no risk
* of overrun.
*/
#define NM_BIG_BUF (65535 + 2) * 2
static inline void
alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) {
REQUIRE(len <= NM_BIG_BUF);
if (sock->buf == NULL) {
/* We don't have the buffer at all */
size_t alloc_len = len < NM_REG_BUF ? NM_REG_BUF : NM_BIG_BUF;
sock->buf = isc_mem_allocate(sock->mgr->mctx, alloc_len);
sock->buf_size = alloc_len;
} else {
/* We have the buffer but it's too small */
sock->buf = isc_mem_reallocate(sock->mgr->mctx, sock->buf,
NM_BIG_BUF);
sock->buf_size = NM_BIG_BUF;
}
}
static void
timer_close_cb(uv_handle_t *handle) {
isc_nmsocket_t *sock = (isc_nmsocket_t *)uv_handle_get_data(handle);
REQUIRE(VALID_NMSOCK(sock));
atomic_store(&sock->closed, true);
tlsdns_close_direct(sock);
}
static void
dnstcp_readtimeout(uv_timer_t *timer) {
isc_nmsocket_t *sock =
(isc_nmsocket_t *)uv_handle_get_data((uv_handle_t *)timer);
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->tid == isc_nm_tid());
/* Close the TCP connection; its closure should fire ours. */
if (sock->outerhandle != NULL) {
isc_nmhandle_detach(&sock->outerhandle);
}
}
/*
* Accept callback for TCP-DNS connection.
*/
static isc_result_t
dnslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) {
isc_nmsocket_t *dnslistensock = (isc_nmsocket_t *)cbarg;
isc_nmsocket_t *dnssock = NULL;
isc_nmhandle_t *readhandle = NULL;
isc_nm_accept_cb_t accept_cb;
void *accept_cbarg;
REQUIRE(VALID_NMSOCK(dnslistensock));
REQUIRE(dnslistensock->type == isc_nm_tlsdnslistener);
if (result != ISC_R_SUCCESS) {
return (result);
}
accept_cb = dnslistensock->accept_cb;
accept_cbarg = dnslistensock->accept_cbarg;
if (accept_cb != NULL) {
result = accept_cb(handle, ISC_R_SUCCESS, accept_cbarg);
if (result != ISC_R_SUCCESS) {
return (result);
}
}
/* We need to create a 'wrapper' dnssocket for this connection */
dnssock = isc_mem_get(handle->sock->mgr->mctx, sizeof(*dnssock));
isc__nmsocket_init(dnssock, handle->sock->mgr, isc_nm_tlsdnssocket,
handle->sock->iface);
dnssock->extrahandlesize = dnslistensock->extrahandlesize;
isc__nmsocket_attach(dnslistensock, &dnssock->listener);
isc__nmsocket_attach(dnssock, &dnssock->self);
isc_nmhandle_attach(handle, &dnssock->outerhandle);
dnssock->peer = handle->sock->peer;
Fix the data race in accessing the isc_nm_t timers The following TSAN report about accessing the mgr timers (mgr->init, mgr->idle, mgr->keepalive and mgr->advertised) has been fixed in this commit: ================== WARNING: ThreadSanitizer: data race (pid=2746) Read of size 4 at 0x7b440008a948 by thread T18: #0 isc__nm_tcpdns_read /home/ondrej/Projects/bind9/lib/isc/netmgr/tcpdns.c:849:25 (libisc.so.1706+0x2ba0f) #1 isc_nm_read /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:1679:3 (libisc.so.1706+0x22258) #2 tcpdns_connect_connect_cb /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:363:2 (tcpdns_test+0x4bc5fb) #3 isc__nm_async_connectcb /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:1816:2 (libisc.so.1706+0x228c9) #4 isc__nm_connectcb /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:1791:3 (libisc.so.1706+0x22713) #5 tcpdns_connect_cb /home/ondrej/Projects/bind9/lib/isc/netmgr/tcpdns.c:343:2 (libisc.so.1706+0x2d89d) #6 uv__stream_connect /home/ondrej/Projects/tsan/libuv/src/unix/stream.c:1381:5 (libuv.so.1+0x27c18) #7 uv__stream_io /home/ondrej/Projects/tsan/libuv/src/unix/stream.c:1298:5 (libuv.so.1+0x25977) #8 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11 (libuv.so.1+0x2e795) #9 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:385:5 (libuv.so.1+0x158ec) #10 nm_thread /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:530:11 (libisc.so.1706+0x1c94a) Previous write of size 4 at 0x7b440008a948 by main thread: #0 isc_nm_settimeouts /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:490:12 (libisc.so.1706+0x1dda5) #1 tcpdns_recv_two /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:601:2 (tcpdns_test+0x4bad0e) #2 cmocka_run_one_test_or_fixture <null> (libcmocka.so.0+0x70be) #3 __libc_start_main /build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c:308:16 (libc.so.6+0x2409a) Location is heap block of size 281 at 0x7b440008a840 allocated by main thread: #0 malloc <null> (tcpdns_test+0x42864b) #1 default_memalloc /home/ondrej/Projects/bind9/lib/isc/mem.c:713:8 (libisc.so.1706+0x6d261) #2 mem_get /home/ondrej/Projects/bind9/lib/isc/mem.c:622:8 (libisc.so.1706+0x69b9c) #3 isc___mem_get /home/ondrej/Projects/bind9/lib/isc/mem.c:1044:9 (libisc.so.1706+0x6d379) #4 isc__mem_get /home/ondrej/Projects/bind9/lib/isc/mem.c:2432:10 (libisc.so.1706+0x6889e) #5 isc_nm_start /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:203:8 (libisc.so.1706+0x1c219) #6 nm_setup /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:244:11 (tcpdns_test+0x4baaa4) #7 cmocka_run_one_test_or_fixture <null> (libcmocka.so.0+0x70fd) #8 __libc_start_main /build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c:308:16 (libc.so.6+0x2409a) Thread T18 'isc-net-0000' (tid=3513, running) created by main thread at: #0 pthread_create <null> (tcpdns_test+0x429e7b) #1 isc_thread_create /home/ondrej/Projects/bind9/lib/isc/pthreads/thread.c:73:8 (libisc.so.1706+0x8476a) #2 isc_nm_start /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:271:3 (libisc.so.1706+0x1c66a) #3 nm_setup /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:244:11 (tcpdns_test+0x4baaa4) #4 cmocka_run_one_test_or_fixture <null> (libcmocka.so.0+0x70fd) #5 __libc_start_main /build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c:308:16 (libc.so.6+0x2409a) SUMMARY: ThreadSanitizer: data race /home/ondrej/Projects/bind9/lib/isc/netmgr/tcpdns.c:849:25 in isc__nm_tcpdns_read ================== ThreadSanitizer: reported 1 warnings
2020-12-02 09:52:39 +01:00
dnssock->read_timeout = atomic_load(&handle->sock->mgr->init);
Refactor netmgr and add more unit tests This is a part of the works that intends to make the netmgr stable, testable, maintainable and tested. It contains a numerous changes to the netmgr code and unfortunately, it was not possible to split this into smaller chunks as the work here needs to be committed as a complete works. NOTE: There's a quite a lot of duplicated code between udp.c, tcp.c and tcpdns.c and it should be a subject to refactoring in the future. The changes that are included in this commit are listed here (extensively, but not exclusively): * The netmgr_test unit test was split into individual tests (udp_test, tcp_test, tcpdns_test and newly added tcp_quota_test) * The udp_test and tcp_test has been extended to allow programatic failures from the libuv API. Unfortunately, we can't use cmocka mock() and will_return(), so we emulate the behaviour with #define and including the netmgr/{udp,tcp}.c source file directly. * The netievents that we put on the nm queue have variable number of members, out of these the isc_nmsocket_t and isc_nmhandle_t always needs to be attached before enqueueing the netievent_<foo> and detached after we have called the isc_nm_async_<foo> to ensure that the socket (handle) doesn't disappear between scheduling the event and actually executing the event. * Cancelling the in-flight TCP connection using libuv requires to call uv_close() on the original uv_tcp_t handle which just breaks too many assumptions we have in the netmgr code. Instead of using uv_timer for TCP connection timeouts, we use platform specific socket option. * Fix the synchronization between {nm,async}_{listentcp,tcpconnect} When isc_nm_listentcp() or isc_nm_tcpconnect() is called it was waiting for socket to either end up with error (that path was fine) or to be listening or connected using condition variable and mutex. Several things could happen: 0. everything is ok 1. the waiting thread would miss the SIGNAL() - because the enqueued event would be processed faster than we could start WAIT()ing. In case the operation would end up with error, it would be ok, as the error variable would be unchanged. 2. the waiting thread miss the sock->{connected,listening} = `true` would be set to `false` in the tcp_{listen,connect}close_cb() as the connection would be so short lived that the socket would be closed before we could even start WAIT()ing * The tcpdns has been converted to using libuv directly. Previously, the tcpdns protocol used tcp protocol from netmgr, this proved to be very complicated to understand, fix and make changes to. The new tcpdns protocol is modeled in a similar way how tcp netmgr protocol. Closes: #2194, #2283, #2318, #2266, #2034, #1920 * The tcp and tcpdns is now not using isc_uv_import/isc_uv_export to pass accepted TCP sockets between netthreads, but instead (similar to UDP) uses per netthread uv_loop listener. This greatly reduces the complexity as the socket is always run in the associated nm and uv loops, and we are also not touching the libuv internals. There's an unfortunate side effect though, the new code requires support for load-balanced sockets from the operating system for both UDP and TCP (see #2137). If the operating system doesn't support the load balanced sockets (either SO_REUSEPORT on Linux or SO_REUSEPORT_LB on FreeBSD 12+), the number of netthreads is limited to 1. * The netmgr has now two debugging #ifdefs: 1. Already existing NETMGR_TRACE prints any dangling nmsockets and nmhandles before triggering assertion failure. This options would reduce performance when enabled, but in theory, it could be enabled on low-performance systems. 2. New NETMGR_TRACE_VERBOSE option has been added that enables extensive netmgr logging that allows the software engineer to precisely track any attach/detach operations on the nmsockets and nmhandles. This is not suitable for any kind of production machine, only for debugging. * The tlsdns netmgr protocol has been split from the tcpdns and it still uses the old method of stacking the netmgr boxes on top of each other. We will have to refactor the tlsdns netmgr protocol to use the same approach - build the stack using only libuv and openssl. * Limit but not assert the tcp buffer size in tcp_alloc_cb Closes: #2061
2020-11-12 10:32:18 +01:00
dnssock->tid = isc_nm_tid();
dnssock->closehandle_cb = resume_processing;
uv_timer_init(&dnssock->mgr->workers[isc_nm_tid()].loop,
&dnssock->timer);
dnssock->timer.data = dnssock;
dnssock->timer_initialized = true;
uv_timer_start(&dnssock->timer, dnstcp_readtimeout,
dnssock->read_timeout, 0);
dnssock->timer_running = true;
/*
* Add a reference to handle to keep it from being freed by
* the caller. It will be detached in dnslisted_readcb() when
* the connection is closed or there is no more data to be read.
*/
isc_nmhandle_attach(handle, &readhandle);
isc_nm_read(readhandle, dnslisten_readcb, dnssock);
isc__nmsocket_detach(&dnssock);
return (ISC_R_SUCCESS);
}
/*
* Process a single packet from the incoming buffer.
*
* Return ISC_R_SUCCESS and attach 'handlep' to a handle if something
* was processed; return ISC_R_NOMORE if there isn't a full message
* to be processed.
*
* The caller will need to unreference the handle.
*/
static isc_result_t
processbuffer(isc_nmsocket_t *dnssock, isc_nmhandle_t **handlep) {
size_t len;
REQUIRE(VALID_NMSOCK(dnssock));
REQUIRE(dnssock->tid == isc_nm_tid());
REQUIRE(handlep != NULL && *handlep == NULL);
/*
* If we don't even have the length yet, we can't do
* anything.
*/
if (dnssock->buf_len < 2) {
return (ISC_R_NOMORE);
}
/*
* Process the first packet from the buffer, leaving
* the rest (if any) for later.
*/
len = dnslen(dnssock->buf);
if (len <= dnssock->buf_len - 2) {
isc_nmhandle_t *dnshandle = NULL;
isc_nmsocket_t *listener = NULL;
isc_nm_recv_cb_t cb = NULL;
void *cbarg = NULL;
if (atomic_load(&dnssock->client) &&
dnssock->statichandle != NULL) {
isc_nmhandle_attach(dnssock->statichandle, &dnshandle);
} else {
dnshandle = isc__nmhandle_get(dnssock, NULL, NULL);
}
listener = dnssock->listener;
if (listener != NULL) {
cb = listener->recv_cb;
cbarg = listener->recv_cbarg;
} else if (dnssock->recv_cb != NULL) {
cb = dnssock->recv_cb;
cbarg = dnssock->recv_cbarg;
/*
* We need to clear the read callback *before*
* calling it, because it might make another
* call to isc_nm_read() and set up a new callback.
*/
isc__nmsocket_clearcb(dnssock);
}
if (cb != NULL) {
cb(dnshandle, ISC_R_SUCCESS,
&(isc_region_t){ .base = dnssock->buf + 2,
.length = len },
cbarg);
}
len += 2;
dnssock->buf_len -= len;
if (len > 0) {
memmove(dnssock->buf, dnssock->buf + len,
dnssock->buf_len);
}
*handlep = dnshandle;
return (ISC_R_SUCCESS);
}
return (ISC_R_NOMORE);
}
/*
* We've got a read on our underlying socket. Check whether
* we have a complete DNS packet and, if so, call the callback.
*/
static void
dnslisten_readcb(isc_nmhandle_t *handle, isc_result_t eresult,
isc_region_t *region, void *arg) {
isc_nmsocket_t *dnssock = (isc_nmsocket_t *)arg;
unsigned char *base = NULL;
bool done = false;
size_t len;
REQUIRE(VALID_NMSOCK(dnssock));
REQUIRE(dnssock->tid == isc_nm_tid());
REQUIRE(VALID_NMHANDLE(handle));
if (!isc__nmsocket_active(dnssock) || atomic_load(&dnssock->closing) ||
dnssock->outerhandle == NULL ||
(dnssock->listener != NULL &&
!isc__nmsocket_active(dnssock->listener)) ||
atomic_load(&dnssock->mgr->closing))
{
if (eresult == ISC_R_SUCCESS) {
eresult = ISC_R_CANCELED;
}
}
if (region == NULL || eresult != ISC_R_SUCCESS) {
isc_nm_recv_cb_t cb = dnssock->recv_cb;
void *cbarg = dnssock->recv_cbarg;
/* Connection closed */
dnssock->result = eresult;
isc__nmsocket_clearcb(dnssock);
if (atomic_load(&dnssock->client) && cb != NULL) {
cb(dnssock->statichandle, eresult, NULL, cbarg);
}
if (dnssock->self != NULL) {
isc__nmsocket_detach(&dnssock->self);
}
if (dnssock->outerhandle != NULL) {
isc__nmsocket_clearcb(dnssock->outerhandle->sock);
isc_nmhandle_detach(&dnssock->outerhandle);
}
if (dnssock->listener != NULL) {
isc__nmsocket_detach(&dnssock->listener);
}
/*
* Server connections will hold two handle references when
* shut down, but client (tlsdnsconnect) connections have
* only one.
*/
if (!atomic_load(&dnssock->client)) {
isc_nmhandle_detach(&handle);
}
return;
}
base = region->base;
len = region->length;
if (dnssock->buf_len + len > dnssock->buf_size) {
alloc_dnsbuf(dnssock, dnssock->buf_len + len);
}
memmove(dnssock->buf + dnssock->buf_len, base, len);
dnssock->buf_len += len;
dnssock->read_timeout = (atomic_load(&dnssock->keepalive)
Fix the data race in accessing the isc_nm_t timers The following TSAN report about accessing the mgr timers (mgr->init, mgr->idle, mgr->keepalive and mgr->advertised) has been fixed in this commit: ================== WARNING: ThreadSanitizer: data race (pid=2746) Read of size 4 at 0x7b440008a948 by thread T18: #0 isc__nm_tcpdns_read /home/ondrej/Projects/bind9/lib/isc/netmgr/tcpdns.c:849:25 (libisc.so.1706+0x2ba0f) #1 isc_nm_read /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:1679:3 (libisc.so.1706+0x22258) #2 tcpdns_connect_connect_cb /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:363:2 (tcpdns_test+0x4bc5fb) #3 isc__nm_async_connectcb /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:1816:2 (libisc.so.1706+0x228c9) #4 isc__nm_connectcb /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:1791:3 (libisc.so.1706+0x22713) #5 tcpdns_connect_cb /home/ondrej/Projects/bind9/lib/isc/netmgr/tcpdns.c:343:2 (libisc.so.1706+0x2d89d) #6 uv__stream_connect /home/ondrej/Projects/tsan/libuv/src/unix/stream.c:1381:5 (libuv.so.1+0x27c18) #7 uv__stream_io /home/ondrej/Projects/tsan/libuv/src/unix/stream.c:1298:5 (libuv.so.1+0x25977) #8 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11 (libuv.so.1+0x2e795) #9 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:385:5 (libuv.so.1+0x158ec) #10 nm_thread /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:530:11 (libisc.so.1706+0x1c94a) Previous write of size 4 at 0x7b440008a948 by main thread: #0 isc_nm_settimeouts /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:490:12 (libisc.so.1706+0x1dda5) #1 tcpdns_recv_two /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:601:2 (tcpdns_test+0x4bad0e) #2 cmocka_run_one_test_or_fixture <null> (libcmocka.so.0+0x70be) #3 __libc_start_main /build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c:308:16 (libc.so.6+0x2409a) Location is heap block of size 281 at 0x7b440008a840 allocated by main thread: #0 malloc <null> (tcpdns_test+0x42864b) #1 default_memalloc /home/ondrej/Projects/bind9/lib/isc/mem.c:713:8 (libisc.so.1706+0x6d261) #2 mem_get /home/ondrej/Projects/bind9/lib/isc/mem.c:622:8 (libisc.so.1706+0x69b9c) #3 isc___mem_get /home/ondrej/Projects/bind9/lib/isc/mem.c:1044:9 (libisc.so.1706+0x6d379) #4 isc__mem_get /home/ondrej/Projects/bind9/lib/isc/mem.c:2432:10 (libisc.so.1706+0x6889e) #5 isc_nm_start /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:203:8 (libisc.so.1706+0x1c219) #6 nm_setup /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:244:11 (tcpdns_test+0x4baaa4) #7 cmocka_run_one_test_or_fixture <null> (libcmocka.so.0+0x70fd) #8 __libc_start_main /build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c:308:16 (libc.so.6+0x2409a) Thread T18 'isc-net-0000' (tid=3513, running) created by main thread at: #0 pthread_create <null> (tcpdns_test+0x429e7b) #1 isc_thread_create /home/ondrej/Projects/bind9/lib/isc/pthreads/thread.c:73:8 (libisc.so.1706+0x8476a) #2 isc_nm_start /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:271:3 (libisc.so.1706+0x1c66a) #3 nm_setup /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:244:11 (tcpdns_test+0x4baaa4) #4 cmocka_run_one_test_or_fixture <null> (libcmocka.so.0+0x70fd) #5 __libc_start_main /build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c:308:16 (libc.so.6+0x2409a) SUMMARY: ThreadSanitizer: data race /home/ondrej/Projects/bind9/lib/isc/netmgr/tcpdns.c:849:25 in isc__nm_tcpdns_read ================== ThreadSanitizer: reported 1 warnings
2020-12-02 09:52:39 +01:00
? atomic_load(&dnssock->mgr->keepalive)
: atomic_load(&dnssock->mgr->idle));
Refactor netmgr and add more unit tests This is a part of the works that intends to make the netmgr stable, testable, maintainable and tested. It contains a numerous changes to the netmgr code and unfortunately, it was not possible to split this into smaller chunks as the work here needs to be committed as a complete works. NOTE: There's a quite a lot of duplicated code between udp.c, tcp.c and tcpdns.c and it should be a subject to refactoring in the future. The changes that are included in this commit are listed here (extensively, but not exclusively): * The netmgr_test unit test was split into individual tests (udp_test, tcp_test, tcpdns_test and newly added tcp_quota_test) * The udp_test and tcp_test has been extended to allow programatic failures from the libuv API. Unfortunately, we can't use cmocka mock() and will_return(), so we emulate the behaviour with #define and including the netmgr/{udp,tcp}.c source file directly. * The netievents that we put on the nm queue have variable number of members, out of these the isc_nmsocket_t and isc_nmhandle_t always needs to be attached before enqueueing the netievent_<foo> and detached after we have called the isc_nm_async_<foo> to ensure that the socket (handle) doesn't disappear between scheduling the event and actually executing the event. * Cancelling the in-flight TCP connection using libuv requires to call uv_close() on the original uv_tcp_t handle which just breaks too many assumptions we have in the netmgr code. Instead of using uv_timer for TCP connection timeouts, we use platform specific socket option. * Fix the synchronization between {nm,async}_{listentcp,tcpconnect} When isc_nm_listentcp() or isc_nm_tcpconnect() is called it was waiting for socket to either end up with error (that path was fine) or to be listening or connected using condition variable and mutex. Several things could happen: 0. everything is ok 1. the waiting thread would miss the SIGNAL() - because the enqueued event would be processed faster than we could start WAIT()ing. In case the operation would end up with error, it would be ok, as the error variable would be unchanged. 2. the waiting thread miss the sock->{connected,listening} = `true` would be set to `false` in the tcp_{listen,connect}close_cb() as the connection would be so short lived that the socket would be closed before we could even start WAIT()ing * The tcpdns has been converted to using libuv directly. Previously, the tcpdns protocol used tcp protocol from netmgr, this proved to be very complicated to understand, fix and make changes to. The new tcpdns protocol is modeled in a similar way how tcp netmgr protocol. Closes: #2194, #2283, #2318, #2266, #2034, #1920 * The tcp and tcpdns is now not using isc_uv_import/isc_uv_export to pass accepted TCP sockets between netthreads, but instead (similar to UDP) uses per netthread uv_loop listener. This greatly reduces the complexity as the socket is always run in the associated nm and uv loops, and we are also not touching the libuv internals. There's an unfortunate side effect though, the new code requires support for load-balanced sockets from the operating system for both UDP and TCP (see #2137). If the operating system doesn't support the load balanced sockets (either SO_REUSEPORT on Linux or SO_REUSEPORT_LB on FreeBSD 12+), the number of netthreads is limited to 1. * The netmgr has now two debugging #ifdefs: 1. Already existing NETMGR_TRACE prints any dangling nmsockets and nmhandles before triggering assertion failure. This options would reduce performance when enabled, but in theory, it could be enabled on low-performance systems. 2. New NETMGR_TRACE_VERBOSE option has been added that enables extensive netmgr logging that allows the software engineer to precisely track any attach/detach operations on the nmsockets and nmhandles. This is not suitable for any kind of production machine, only for debugging. * The tlsdns netmgr protocol has been split from the tcpdns and it still uses the old method of stacking the netmgr boxes on top of each other. We will have to refactor the tlsdns netmgr protocol to use the same approach - build the stack using only libuv and openssl. * Limit but not assert the tcp buffer size in tcp_alloc_cb Closes: #2061
2020-11-12 10:32:18 +01:00
do {
isc_result_t result;
isc_nmhandle_t *dnshandle = NULL;
result = processbuffer(dnssock, &dnshandle);
if (result != ISC_R_SUCCESS) {
/*
* There wasn't anything in the buffer to process.
*/
return;
}
/*
* We have a packet: stop timeout timers
*/
if (dnssock->timer_initialized) {
uv_timer_stop(&dnssock->timer);
}
if (atomic_load(&dnssock->sequential) ||
dnssock->recv_cb == NULL) {
/*
* There are two reasons we might want to pause here:
* - We're in sequential mode and we've received
* a whole packet, so we're done until it's been
* processed; or
* - We no longer have a read callback.
*/
isc_nm_pauseread(dnssock->outerhandle);
done = true;
} else {
/*
* We're pipelining, so we now resume processing
* packets until the clients-per-connection limit
* is reached (as determined by the number of
* active handles on the socket). When the limit
* is reached, pause reading.
*/
if (atomic_load(&dnssock->ah) >=
TLSDNS_CLIENTS_PER_CONN) {
isc_nm_pauseread(dnssock->outerhandle);
done = true;
}
}
isc_nmhandle_detach(&dnshandle);
} while (!done);
}
/*
* isc_nm_listentlsdns works exactly as listentlsdns but on an SSL socket.
*/
isc_result_t
isc_nm_listentlsdns(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb,
void *cbarg, isc_nm_accept_cb_t accept_cb,
void *accept_cbarg, size_t extrahandlesize, int backlog,
isc_quota_t *quota, SSL_CTX *sslctx,
isc_nmsocket_t **sockp) {
isc_nmsocket_t *dnslistensock = isc_mem_get(mgr->mctx,
sizeof(*dnslistensock));
isc_result_t result;
REQUIRE(VALID_NM(mgr));
REQUIRE(sslctx != NULL);
isc__nmsocket_init(dnslistensock, mgr, isc_nm_tlsdnslistener, iface);
dnslistensock->recv_cb = cb;
dnslistensock->recv_cbarg = cbarg;
dnslistensock->accept_cb = accept_cb;
dnslistensock->accept_cbarg = accept_cbarg;
dnslistensock->extrahandlesize = extrahandlesize;
result = isc_nm_listentls(mgr, iface, dnslisten_acceptcb, dnslistensock,
extrahandlesize, backlog, quota, sslctx,
&dnslistensock->outer);
if (result == ISC_R_SUCCESS) {
atomic_store(&dnslistensock->listening, true);
*sockp = dnslistensock;
return (ISC_R_SUCCESS);
} else {
atomic_store(&dnslistensock->closed, true);
isc__nmsocket_detach(&dnslistensock);
return (result);
}
}
void
isc__nm_async_tlsdnsstop(isc__networker_t *worker, isc__netievent_t *ev0) {
isc__netievent_tlsdnsstop_t *ievent =
(isc__netievent_tlsdnsstop_t *)ev0;
isc_nmsocket_t *sock = ievent->sock;
UNUSED(worker);
REQUIRE(isc__nm_in_netthread());
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->type == isc_nm_tlsdnslistener);
REQUIRE(sock->tid == isc_nm_tid());
atomic_store(&sock->listening, false);
atomic_store(&sock->closed, true);
isc__nmsocket_clearcb(sock);
if (sock->outer != NULL) {
isc__nm_tls_stoplistening(sock->outer);
isc__nmsocket_detach(&sock->outer);
}
}
void
isc__nm_tlsdns_stoplistening(isc_nmsocket_t *sock) {
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->type == isc_nm_tlsdnslistener);
isc__netievent_tlsdnsstop_t *ievent =
isc__nm_get_ievent(sock->mgr, netievent_tlsdnsstop);
isc__nmsocket_attach(sock, &ievent->sock);
isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
(isc__netievent_t *)ievent);
}
void
isc_nm_tlsdns_sequential(isc_nmhandle_t *handle) {
REQUIRE(VALID_NMHANDLE(handle));
if (handle->sock->type != isc_nm_tlsdnssocket ||
handle->sock->outerhandle == NULL)
{
return;
}
/*
* We don't want pipelining on this connection. That means
* that we need to pause after reading each request, and
* resume only after the request has been processed. This
* is done in resume_processing(), which is the socket's
* closehandle_cb callback, called whenever a handle
* is released.
*/
isc_nm_pauseread(handle->sock->outerhandle);
atomic_store(&handle->sock->sequential, true);
}
void
isc_nm_tlsdns_keepalive(isc_nmhandle_t *handle, bool value) {
REQUIRE(VALID_NMHANDLE(handle));
if (handle->sock->type != isc_nm_tlsdnssocket ||
handle->sock->outerhandle == NULL)
{
return;
}
atomic_store(&handle->sock->keepalive, value);
atomic_store(&handle->sock->outerhandle->sock->keepalive, value);
}
static void
resume_processing(void *arg) {
isc_nmsocket_t *sock = (isc_nmsocket_t *)arg;
isc_result_t result;
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->tid == isc_nm_tid());
if (sock->type != isc_nm_tlsdnssocket || sock->outerhandle == NULL) {
return;
}
if (atomic_load(&sock->ah) == 0) {
/* Nothing is active; sockets can timeout now */
if (sock->timer_initialized) {
uv_timer_start(&sock->timer, dnstcp_readtimeout,
sock->read_timeout, 0);
sock->timer_running = true;
}
}
/*
* For sequential sockets: Process what's in the buffer, or
* if there aren't any messages buffered, resume reading.
*/
if (atomic_load(&sock->sequential)) {
isc_nmhandle_t *handle = NULL;
result = processbuffer(sock, &handle);
if (result == ISC_R_SUCCESS) {
if (sock->timer_initialized) {
uv_timer_stop(&sock->timer);
}
isc_nmhandle_detach(&handle);
} else if (sock->outerhandle != NULL) {
isc_nm_resumeread(sock->outerhandle);
}
return;
}
/*
* For pipelined sockets: If we're under the clients-per-connection
* limit, resume processing until we reach the limit again.
*/
do {
isc_nmhandle_t *dnshandle = NULL;
result = processbuffer(sock, &dnshandle);
if (result != ISC_R_SUCCESS) {
/*
* Nothing in the buffer; resume reading.
*/
if (sock->outerhandle != NULL) {
isc_nm_resumeread(sock->outerhandle);
}
break;
}
if (sock->timer_initialized) {
uv_timer_stop(&sock->timer);
}
isc_nmhandle_detach(&dnshandle);
} while (atomic_load(&sock->ah) < TLSDNS_CLIENTS_PER_CONN);
}
static void
tlsdnssend_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) {
isc__nm_uvreq_t *req = (isc__nm_uvreq_t *)cbarg;
REQUIRE(VALID_UVREQ(req));
UNUSED(handle);
req->cb.send(req->handle, result, req->cbarg);
isc_mem_put(req->sock->mgr->mctx, req->uvbuf.base, req->uvbuf.len);
isc__nm_uvreq_put(&req, req->handle->sock);
isc_nmhandle_detach(&handle);
}
/*
* The socket is closing, outerhandle has been detached, listener is
* inactive, or the netmgr is closing: any operation on it should abort
* with ISC_R_CANCELED.
*/
static bool
inactive(isc_nmsocket_t *sock) {
return (!isc__nmsocket_active(sock) || atomic_load(&sock->closing) ||
sock->outerhandle == NULL ||
(sock->listener != NULL &&
!isc__nmsocket_active(sock->listener)) ||
atomic_load(&sock->mgr->closing));
}
void
isc__nm_async_tlsdnssend(isc__networker_t *worker, isc__netievent_t *ev0) {
isc__netievent_tlsdnssend_t *ievent =
(isc__netievent_tlsdnssend_t *)ev0;
isc__nm_uvreq_t *req = ievent->req;
isc_nmsocket_t *sock = ievent->sock;
isc_nmhandle_t *sendhandle = NULL;
isc_region_t r;
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(VALID_UVREQ(req));
REQUIRE(worker->id == sock->tid);
REQUIRE(sock->tid == isc_nm_tid());
REQUIRE(sock->type == isc_nm_tlsdnssocket);
if (inactive(sock)) {
req->cb.send(req->handle, ISC_R_CANCELED, req->cbarg);
isc_mem_put(sock->mgr->mctx, req->uvbuf.base, req->uvbuf.len);
isc__nm_uvreq_put(&req, req->handle->sock);
return;
}
r.base = (unsigned char *)req->uvbuf.base;
r.length = req->uvbuf.len;
isc_nmhandle_attach(sock->outerhandle, &sendhandle);
isc_nm_send(sendhandle, &r, tlsdnssend_cb, req);
}
/*
* isc__nm_tcp_send sends buf to a peer on a socket.
*/
void
isc__nm_tlsdns_send(isc_nmhandle_t *handle, isc_region_t *region,
isc_nm_cb_t cb, void *cbarg) {
isc__nm_uvreq_t *uvreq = NULL;
REQUIRE(VALID_NMHANDLE(handle));
isc_nmsocket_t *sock = handle->sock;
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->type == isc_nm_tlsdnssocket);
if (inactive(sock)) {
cb(handle, ISC_R_CANCELED, cbarg);
return;
}
uvreq = isc__nm_uvreq_get(sock->mgr, sock);
isc_nmhandle_attach(handle, &uvreq->handle);
uvreq->cb.send = cb;
uvreq->cbarg = cbarg;
uvreq->uvbuf.base = isc_mem_get(sock->mgr->mctx, region->length + 2);
uvreq->uvbuf.len = region->length + 2;
*(uint16_t *)uvreq->uvbuf.base = htons(region->length);
memmove(uvreq->uvbuf.base + 2, region->base, region->length);
isc__netievent_tlsdnssend_t *ievent = NULL;
ievent = isc__nm_get_ievent(sock->mgr, netievent_tlsdnssend);
ievent->req = uvreq;
isc__nmsocket_attach(sock, &ievent->sock);
isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
(isc__netievent_t *)ievent);
}
static void
tlsdns_close_direct(isc_nmsocket_t *sock) {
REQUIRE(sock->tid == isc_nm_tid());
if (sock->timer_running) {
uv_timer_stop(&sock->timer);
sock->timer_running = false;
}
/* We don't need atomics here, it's all in single network thread */
if (sock->self != NULL) {
isc__nmsocket_detach(&sock->self);
} else if (sock->timer_initialized) {
/*
* We need to fire the timer callback to clean it up,
* it will then call us again (via detach) so that we
* can finally close the socket.
*/
sock->timer_initialized = false;
uv_timer_stop(&sock->timer);
uv_close((uv_handle_t *)&sock->timer, timer_close_cb);
} else {
/*
* At this point we're certain that there are no external
* references, we can close everything.
*/
if (sock->outerhandle != NULL) {
isc__nmsocket_clearcb(sock->outerhandle->sock);
isc_nmhandle_detach(&sock->outerhandle);
}
if (sock->listener != NULL) {
isc__nmsocket_detach(&sock->listener);
}
atomic_store(&sock->closed, true);
isc__nmsocket_prep_destroy(sock);
}
}
void
isc__nm_tlsdns_close(isc_nmsocket_t *sock) {
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->type == isc_nm_tlsdnssocket);
if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
true)) {
return;
}
if (sock->tid == isc_nm_tid()) {
tlsdns_close_direct(sock);
} else {
isc__netievent_tlsdnsclose_t *ievent =
isc__nm_get_ievent(sock->mgr, netievent_tlsdnsclose);
isc__nmsocket_attach(sock, &ievent->sock);
isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
(isc__netievent_t *)ievent);
}
}
void
isc__nm_async_tlsdnsclose(isc__networker_t *worker, isc__netievent_t *ev0) {
isc__netievent_tlsdnsclose_t *ievent =
(isc__netievent_tlsdnsclose_t *)ev0;
isc_nmsocket_t *sock = ievent->sock;
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->tid == isc_nm_tid());
UNUSED(worker);
tlsdns_close_direct(ievent->sock);
}
typedef struct {
isc_mem_t *mctx;
isc_nm_cb_t cb;
void *cbarg;
size_t extrahandlesize;
} tcpconnect_t;
static void
tlsdnsconnect_cb(isc_nmhandle_t *handle, isc_result_t result, void *arg) {
tcpconnect_t *conn = (tcpconnect_t *)arg;
isc_nm_cb_t cb = conn->cb;
void *cbarg = conn->cbarg;
size_t extrahandlesize = conn->extrahandlesize;
isc_nmsocket_t *dnssock = NULL;
isc_nmhandle_t *readhandle = NULL;
REQUIRE(result != ISC_R_SUCCESS || VALID_NMHANDLE(handle));
isc_mem_putanddetach(&conn->mctx, conn, sizeof(*conn));
dnssock = isc_mem_get(handle->sock->mgr->mctx, sizeof(*dnssock));
isc__nmsocket_init(dnssock, handle->sock->mgr, isc_nm_tlsdnssocket,
handle->sock->iface);
dnssock->extrahandlesize = extrahandlesize;
isc_nmhandle_attach(handle, &dnssock->outerhandle);
dnssock->peer = handle->sock->peer;
Fix the data race in accessing the isc_nm_t timers The following TSAN report about accessing the mgr timers (mgr->init, mgr->idle, mgr->keepalive and mgr->advertised) has been fixed in this commit: ================== WARNING: ThreadSanitizer: data race (pid=2746) Read of size 4 at 0x7b440008a948 by thread T18: #0 isc__nm_tcpdns_read /home/ondrej/Projects/bind9/lib/isc/netmgr/tcpdns.c:849:25 (libisc.so.1706+0x2ba0f) #1 isc_nm_read /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:1679:3 (libisc.so.1706+0x22258) #2 tcpdns_connect_connect_cb /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:363:2 (tcpdns_test+0x4bc5fb) #3 isc__nm_async_connectcb /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:1816:2 (libisc.so.1706+0x228c9) #4 isc__nm_connectcb /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:1791:3 (libisc.so.1706+0x22713) #5 tcpdns_connect_cb /home/ondrej/Projects/bind9/lib/isc/netmgr/tcpdns.c:343:2 (libisc.so.1706+0x2d89d) #6 uv__stream_connect /home/ondrej/Projects/tsan/libuv/src/unix/stream.c:1381:5 (libuv.so.1+0x27c18) #7 uv__stream_io /home/ondrej/Projects/tsan/libuv/src/unix/stream.c:1298:5 (libuv.so.1+0x25977) #8 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11 (libuv.so.1+0x2e795) #9 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:385:5 (libuv.so.1+0x158ec) #10 nm_thread /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:530:11 (libisc.so.1706+0x1c94a) Previous write of size 4 at 0x7b440008a948 by main thread: #0 isc_nm_settimeouts /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:490:12 (libisc.so.1706+0x1dda5) #1 tcpdns_recv_two /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:601:2 (tcpdns_test+0x4bad0e) #2 cmocka_run_one_test_or_fixture <null> (libcmocka.so.0+0x70be) #3 __libc_start_main /build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c:308:16 (libc.so.6+0x2409a) Location is heap block of size 281 at 0x7b440008a840 allocated by main thread: #0 malloc <null> (tcpdns_test+0x42864b) #1 default_memalloc /home/ondrej/Projects/bind9/lib/isc/mem.c:713:8 (libisc.so.1706+0x6d261) #2 mem_get /home/ondrej/Projects/bind9/lib/isc/mem.c:622:8 (libisc.so.1706+0x69b9c) #3 isc___mem_get /home/ondrej/Projects/bind9/lib/isc/mem.c:1044:9 (libisc.so.1706+0x6d379) #4 isc__mem_get /home/ondrej/Projects/bind9/lib/isc/mem.c:2432:10 (libisc.so.1706+0x6889e) #5 isc_nm_start /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:203:8 (libisc.so.1706+0x1c219) #6 nm_setup /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:244:11 (tcpdns_test+0x4baaa4) #7 cmocka_run_one_test_or_fixture <null> (libcmocka.so.0+0x70fd) #8 __libc_start_main /build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c:308:16 (libc.so.6+0x2409a) Thread T18 'isc-net-0000' (tid=3513, running) created by main thread at: #0 pthread_create <null> (tcpdns_test+0x429e7b) #1 isc_thread_create /home/ondrej/Projects/bind9/lib/isc/pthreads/thread.c:73:8 (libisc.so.1706+0x8476a) #2 isc_nm_start /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:271:3 (libisc.so.1706+0x1c66a) #3 nm_setup /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:244:11 (tcpdns_test+0x4baaa4) #4 cmocka_run_one_test_or_fixture <null> (libcmocka.so.0+0x70fd) #5 __libc_start_main /build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c:308:16 (libc.so.6+0x2409a) SUMMARY: ThreadSanitizer: data race /home/ondrej/Projects/bind9/lib/isc/netmgr/tcpdns.c:849:25 in isc__nm_tcpdns_read ================== ThreadSanitizer: reported 1 warnings
2020-12-02 09:52:39 +01:00
dnssock->read_timeout = atomic_load(&handle->sock->mgr->init);
Refactor netmgr and add more unit tests This is a part of the works that intends to make the netmgr stable, testable, maintainable and tested. It contains a numerous changes to the netmgr code and unfortunately, it was not possible to split this into smaller chunks as the work here needs to be committed as a complete works. NOTE: There's a quite a lot of duplicated code between udp.c, tcp.c and tcpdns.c and it should be a subject to refactoring in the future. The changes that are included in this commit are listed here (extensively, but not exclusively): * The netmgr_test unit test was split into individual tests (udp_test, tcp_test, tcpdns_test and newly added tcp_quota_test) * The udp_test and tcp_test has been extended to allow programatic failures from the libuv API. Unfortunately, we can't use cmocka mock() and will_return(), so we emulate the behaviour with #define and including the netmgr/{udp,tcp}.c source file directly. * The netievents that we put on the nm queue have variable number of members, out of these the isc_nmsocket_t and isc_nmhandle_t always needs to be attached before enqueueing the netievent_<foo> and detached after we have called the isc_nm_async_<foo> to ensure that the socket (handle) doesn't disappear between scheduling the event and actually executing the event. * Cancelling the in-flight TCP connection using libuv requires to call uv_close() on the original uv_tcp_t handle which just breaks too many assumptions we have in the netmgr code. Instead of using uv_timer for TCP connection timeouts, we use platform specific socket option. * Fix the synchronization between {nm,async}_{listentcp,tcpconnect} When isc_nm_listentcp() or isc_nm_tcpconnect() is called it was waiting for socket to either end up with error (that path was fine) or to be listening or connected using condition variable and mutex. Several things could happen: 0. everything is ok 1. the waiting thread would miss the SIGNAL() - because the enqueued event would be processed faster than we could start WAIT()ing. In case the operation would end up with error, it would be ok, as the error variable would be unchanged. 2. the waiting thread miss the sock->{connected,listening} = `true` would be set to `false` in the tcp_{listen,connect}close_cb() as the connection would be so short lived that the socket would be closed before we could even start WAIT()ing * The tcpdns has been converted to using libuv directly. Previously, the tcpdns protocol used tcp protocol from netmgr, this proved to be very complicated to understand, fix and make changes to. The new tcpdns protocol is modeled in a similar way how tcp netmgr protocol. Closes: #2194, #2283, #2318, #2266, #2034, #1920 * The tcp and tcpdns is now not using isc_uv_import/isc_uv_export to pass accepted TCP sockets between netthreads, but instead (similar to UDP) uses per netthread uv_loop listener. This greatly reduces the complexity as the socket is always run in the associated nm and uv loops, and we are also not touching the libuv internals. There's an unfortunate side effect though, the new code requires support for load-balanced sockets from the operating system for both UDP and TCP (see #2137). If the operating system doesn't support the load balanced sockets (either SO_REUSEPORT on Linux or SO_REUSEPORT_LB on FreeBSD 12+), the number of netthreads is limited to 1. * The netmgr has now two debugging #ifdefs: 1. Already existing NETMGR_TRACE prints any dangling nmsockets and nmhandles before triggering assertion failure. This options would reduce performance when enabled, but in theory, it could be enabled on low-performance systems. 2. New NETMGR_TRACE_VERBOSE option has been added that enables extensive netmgr logging that allows the software engineer to precisely track any attach/detach operations on the nmsockets and nmhandles. This is not suitable for any kind of production machine, only for debugging. * The tlsdns netmgr protocol has been split from the tcpdns and it still uses the old method of stacking the netmgr boxes on top of each other. We will have to refactor the tlsdns netmgr protocol to use the same approach - build the stack using only libuv and openssl. * Limit but not assert the tcp buffer size in tcp_alloc_cb Closes: #2061
2020-11-12 10:32:18 +01:00
dnssock->tid = isc_nm_tid();
atomic_init(&dnssock->client, true);
readhandle = isc__nmhandle_get(dnssock, NULL, NULL);
if (result != ISC_R_SUCCESS) {
cb(readhandle, result, cbarg);
isc__nmsocket_detach(&dnssock);
isc_nmhandle_detach(&readhandle);
return;
}
INSIST(dnssock->statichandle != NULL);
INSIST(dnssock->statichandle == readhandle);
INSIST(readhandle->sock == dnssock);
INSIST(dnssock->recv_cb == NULL);
uv_timer_init(&dnssock->mgr->workers[isc_nm_tid()].loop,
&dnssock->timer);
dnssock->timer.data = dnssock;
dnssock->timer_initialized = true;
uv_timer_start(&dnssock->timer, dnstcp_readtimeout,
dnssock->read_timeout, 0);
dnssock->timer_running = true;
/*
* The connection is now established; we start reading immediately,
* before we've been asked to. We'll read and buffer at most one
* packet.
*/
isc_nm_read(handle, dnslisten_readcb, dnssock);
cb(readhandle, ISC_R_SUCCESS, cbarg);
/*
* The sock is now attached to the handle.
*/
isc__nmsocket_detach(&dnssock);
}
isc_result_t
isc_nm_tlsdnsconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer,
isc_nm_cb_t cb, void *cbarg, unsigned int timeout,
size_t extrahandlesize) {
isc_result_t result = ISC_R_SUCCESS;
tcpconnect_t *conn = isc_mem_get(mgr->mctx, sizeof(tcpconnect_t));
SSL_CTX *ctx = NULL;
*conn = (tcpconnect_t){ .cb = cb,
.cbarg = cbarg,
.extrahandlesize = extrahandlesize };
isc_mem_attach(mgr->mctx, &conn->mctx);
ctx = SSL_CTX_new(SSLv23_client_method());
result = isc_nm_tlsconnect(mgr, local, peer, tlsdnsconnect_cb, conn,
ctx, timeout, 0);
SSL_CTX_free(ctx);
if (result != ISC_R_SUCCESS) {
isc_mem_putanddetach(&conn->mctx, conn, sizeof(*conn));
}
return (result);
}
void
isc__nm_tlsdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
isc_nmsocket_t *sock = NULL;
isc__netievent_tlsdnsread_t *ievent = NULL;
isc_nmhandle_t *eventhandle = NULL;
REQUIRE(VALID_NMHANDLE(handle));
sock = handle->sock;
REQUIRE(sock->statichandle == handle);
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->recv_cb == NULL);
REQUIRE(sock->tid == isc_nm_tid());
REQUIRE(atomic_load(&sock->client));
if (inactive(sock)) {
cb(handle, ISC_R_NOTCONNECTED, NULL, cbarg);
return;
}
/*
* This MUST be done asynchronously, no matter which thread we're
* in. The callback function for isc_nm_read() often calls
* isc_nm_read() again; if we tried to do that synchronously
* we'd clash in processbuffer() and grow the stack indefinitely.
*/
ievent = isc__nm_get_ievent(sock->mgr, netievent_tlsdnsread);
isc__nmsocket_attach(sock, &ievent->sock);
sock->recv_cb = cb;
sock->recv_cbarg = cbarg;
sock->read_timeout = (atomic_load(&sock->keepalive)
Fix the data race in accessing the isc_nm_t timers The following TSAN report about accessing the mgr timers (mgr->init, mgr->idle, mgr->keepalive and mgr->advertised) has been fixed in this commit: ================== WARNING: ThreadSanitizer: data race (pid=2746) Read of size 4 at 0x7b440008a948 by thread T18: #0 isc__nm_tcpdns_read /home/ondrej/Projects/bind9/lib/isc/netmgr/tcpdns.c:849:25 (libisc.so.1706+0x2ba0f) #1 isc_nm_read /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:1679:3 (libisc.so.1706+0x22258) #2 tcpdns_connect_connect_cb /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:363:2 (tcpdns_test+0x4bc5fb) #3 isc__nm_async_connectcb /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:1816:2 (libisc.so.1706+0x228c9) #4 isc__nm_connectcb /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:1791:3 (libisc.so.1706+0x22713) #5 tcpdns_connect_cb /home/ondrej/Projects/bind9/lib/isc/netmgr/tcpdns.c:343:2 (libisc.so.1706+0x2d89d) #6 uv__stream_connect /home/ondrej/Projects/tsan/libuv/src/unix/stream.c:1381:5 (libuv.so.1+0x27c18) #7 uv__stream_io /home/ondrej/Projects/tsan/libuv/src/unix/stream.c:1298:5 (libuv.so.1+0x25977) #8 uv__io_poll /home/ondrej/Projects/tsan/libuv/src/unix/linux-core.c:462:11 (libuv.so.1+0x2e795) #9 uv_run /home/ondrej/Projects/tsan/libuv/src/unix/core.c:385:5 (libuv.so.1+0x158ec) #10 nm_thread /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:530:11 (libisc.so.1706+0x1c94a) Previous write of size 4 at 0x7b440008a948 by main thread: #0 isc_nm_settimeouts /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:490:12 (libisc.so.1706+0x1dda5) #1 tcpdns_recv_two /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:601:2 (tcpdns_test+0x4bad0e) #2 cmocka_run_one_test_or_fixture <null> (libcmocka.so.0+0x70be) #3 __libc_start_main /build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c:308:16 (libc.so.6+0x2409a) Location is heap block of size 281 at 0x7b440008a840 allocated by main thread: #0 malloc <null> (tcpdns_test+0x42864b) #1 default_memalloc /home/ondrej/Projects/bind9/lib/isc/mem.c:713:8 (libisc.so.1706+0x6d261) #2 mem_get /home/ondrej/Projects/bind9/lib/isc/mem.c:622:8 (libisc.so.1706+0x69b9c) #3 isc___mem_get /home/ondrej/Projects/bind9/lib/isc/mem.c:1044:9 (libisc.so.1706+0x6d379) #4 isc__mem_get /home/ondrej/Projects/bind9/lib/isc/mem.c:2432:10 (libisc.so.1706+0x6889e) #5 isc_nm_start /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:203:8 (libisc.so.1706+0x1c219) #6 nm_setup /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:244:11 (tcpdns_test+0x4baaa4) #7 cmocka_run_one_test_or_fixture <null> (libcmocka.so.0+0x70fd) #8 __libc_start_main /build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c:308:16 (libc.so.6+0x2409a) Thread T18 'isc-net-0000' (tid=3513, running) created by main thread at: #0 pthread_create <null> (tcpdns_test+0x429e7b) #1 isc_thread_create /home/ondrej/Projects/bind9/lib/isc/pthreads/thread.c:73:8 (libisc.so.1706+0x8476a) #2 isc_nm_start /home/ondrej/Projects/bind9/lib/isc/netmgr/netmgr.c:271:3 (libisc.so.1706+0x1c66a) #3 nm_setup /home/ondrej/Projects/bind9/lib/isc/tests/tcpdns_test.c:244:11 (tcpdns_test+0x4baaa4) #4 cmocka_run_one_test_or_fixture <null> (libcmocka.so.0+0x70fd) #5 __libc_start_main /build/glibc-vjB4T1/glibc-2.28/csu/../csu/libc-start.c:308:16 (libc.so.6+0x2409a) SUMMARY: ThreadSanitizer: data race /home/ondrej/Projects/bind9/lib/isc/netmgr/tcpdns.c:849:25 in isc__nm_tcpdns_read ================== ThreadSanitizer: reported 1 warnings
2020-12-02 09:52:39 +01:00
? atomic_load(&sock->mgr->keepalive)
: atomic_load(&sock->mgr->idle));
Refactor netmgr and add more unit tests This is a part of the works that intends to make the netmgr stable, testable, maintainable and tested. It contains a numerous changes to the netmgr code and unfortunately, it was not possible to split this into smaller chunks as the work here needs to be committed as a complete works. NOTE: There's a quite a lot of duplicated code between udp.c, tcp.c and tcpdns.c and it should be a subject to refactoring in the future. The changes that are included in this commit are listed here (extensively, but not exclusively): * The netmgr_test unit test was split into individual tests (udp_test, tcp_test, tcpdns_test and newly added tcp_quota_test) * The udp_test and tcp_test has been extended to allow programatic failures from the libuv API. Unfortunately, we can't use cmocka mock() and will_return(), so we emulate the behaviour with #define and including the netmgr/{udp,tcp}.c source file directly. * The netievents that we put on the nm queue have variable number of members, out of these the isc_nmsocket_t and isc_nmhandle_t always needs to be attached before enqueueing the netievent_<foo> and detached after we have called the isc_nm_async_<foo> to ensure that the socket (handle) doesn't disappear between scheduling the event and actually executing the event. * Cancelling the in-flight TCP connection using libuv requires to call uv_close() on the original uv_tcp_t handle which just breaks too many assumptions we have in the netmgr code. Instead of using uv_timer for TCP connection timeouts, we use platform specific socket option. * Fix the synchronization between {nm,async}_{listentcp,tcpconnect} When isc_nm_listentcp() or isc_nm_tcpconnect() is called it was waiting for socket to either end up with error (that path was fine) or to be listening or connected using condition variable and mutex. Several things could happen: 0. everything is ok 1. the waiting thread would miss the SIGNAL() - because the enqueued event would be processed faster than we could start WAIT()ing. In case the operation would end up with error, it would be ok, as the error variable would be unchanged. 2. the waiting thread miss the sock->{connected,listening} = `true` would be set to `false` in the tcp_{listen,connect}close_cb() as the connection would be so short lived that the socket would be closed before we could even start WAIT()ing * The tcpdns has been converted to using libuv directly. Previously, the tcpdns protocol used tcp protocol from netmgr, this proved to be very complicated to understand, fix and make changes to. The new tcpdns protocol is modeled in a similar way how tcp netmgr protocol. Closes: #2194, #2283, #2318, #2266, #2034, #1920 * The tcp and tcpdns is now not using isc_uv_import/isc_uv_export to pass accepted TCP sockets between netthreads, but instead (similar to UDP) uses per netthread uv_loop listener. This greatly reduces the complexity as the socket is always run in the associated nm and uv loops, and we are also not touching the libuv internals. There's an unfortunate side effect though, the new code requires support for load-balanced sockets from the operating system for both UDP and TCP (see #2137). If the operating system doesn't support the load balanced sockets (either SO_REUSEPORT on Linux or SO_REUSEPORT_LB on FreeBSD 12+), the number of netthreads is limited to 1. * The netmgr has now two debugging #ifdefs: 1. Already existing NETMGR_TRACE prints any dangling nmsockets and nmhandles before triggering assertion failure. This options would reduce performance when enabled, but in theory, it could be enabled on low-performance systems. 2. New NETMGR_TRACE_VERBOSE option has been added that enables extensive netmgr logging that allows the software engineer to precisely track any attach/detach operations on the nmsockets and nmhandles. This is not suitable for any kind of production machine, only for debugging. * The tlsdns netmgr protocol has been split from the tcpdns and it still uses the old method of stacking the netmgr boxes on top of each other. We will have to refactor the tlsdns netmgr protocol to use the same approach - build the stack using only libuv and openssl. * Limit but not assert the tcp buffer size in tcp_alloc_cb Closes: #2061
2020-11-12 10:32:18 +01:00
/*
* Add a reference to the handle to keep it from being freed by
* the caller; it will be detached in in isc__nm_async_tlsdnsread().
*/
isc_nmhandle_attach(handle, &eventhandle);
isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
(isc__netievent_t *)ievent);
}
void
isc__nm_async_tlsdnsread(isc__networker_t *worker, isc__netievent_t *ev0) {
isc_result_t result;
isc__netievent_tlsdnsread_t *ievent =
(isc__netievent_tlsdnsclose_t *)ev0;
isc_nmsocket_t *sock = ievent->sock;
isc_nmhandle_t *handle = NULL, *newhandle = NULL;
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(worker->id == sock->tid);
REQUIRE(sock->tid == isc_nm_tid());
handle = sock->statichandle;
if (inactive(sock)) {
isc_nm_recv_cb_t cb = sock->recv_cb;
void *cbarg = sock->recv_cbarg;
isc__nmsocket_clearcb(sock);
if (cb != NULL) {
cb(handle, ISC_R_NOTCONNECTED, NULL, cbarg);
}
isc_nmhandle_detach(&handle);
return;
}
/*
* Maybe we have a packet already?
*/
result = processbuffer(sock, &newhandle);
if (result == ISC_R_SUCCESS) {
if (sock->timer_initialized) {
uv_timer_stop(&sock->timer);
}
isc_nmhandle_detach(&newhandle);
} else if (sock->outerhandle != NULL) {
/* Restart reading, wait for the callback */
if (sock->timer_initialized) {
uv_timer_start(&sock->timer, dnstcp_readtimeout,
sock->read_timeout, 0);
sock->timer_running = true;
}
isc_nm_resumeread(sock->outerhandle);
} else {
isc_nm_recv_cb_t cb = sock->recv_cb;
void *cbarg = sock->recv_cbarg;
isc__nmsocket_clearcb(sock);
cb(handle, ISC_R_NOTCONNECTED, NULL, cbarg);
}
isc_nmhandle_detach(&handle);
}
void
isc__nm_tlsdns_cancelread(isc_nmhandle_t *handle) {
isc_nmsocket_t *sock = NULL;
isc__netievent_tlsdnscancel_t *ievent = NULL;
REQUIRE(VALID_NMHANDLE(handle));
sock = handle->sock;
REQUIRE(sock->type == isc_nm_tlsdnssocket);
ievent = isc__nm_get_ievent(sock->mgr, netievent_tlsdnscancel);
isc__nmsocket_attach(sock, &ievent->sock);
isc_nmhandle_attach(handle, &ievent->handle);
isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
(isc__netievent_t *)ievent);
}
void
isc__nm_async_tlsdnscancel(isc__networker_t *worker, isc__netievent_t *ev0) {
isc__netievent_tlsdnscancel_t *ievent =
(isc__netievent_tlsdnscancel_t *)ev0;
isc_nmsocket_t *sock = ievent->sock;
isc_nmhandle_t *handle = ievent->handle;
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(worker->id == sock->tid);
REQUIRE(sock->tid == isc_nm_tid());
if (atomic_load(&sock->client)) {
isc_nm_recv_cb_t cb;
void *cbarg = NULL;
cb = sock->recv_cb;
cbarg = sock->recv_cbarg;
isc__nmsocket_clearcb(sock);
if (cb != NULL) {
cb(handle, ISC_R_EOF, NULL, cbarg);
}
isc__nm_tcp_cancelread(sock->outerhandle);
}
}
void
isc__nm_tlsdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout) {
isc_nmsocket_t *sock = NULL;
REQUIRE(VALID_NMHANDLE(handle));
sock = handle->sock;
if (sock->outerhandle != NULL) {
isc__nm_tcp_settimeout(sock->outerhandle, timeout);
}
sock->read_timeout = timeout;
if (sock->timer_running) {
uv_timer_start(&sock->timer, dnstcp_readtimeout,
sock->read_timeout, 0);
}
}