From 16908ec3d912f0ca7e01edf95685eb91a52b4f08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Thu, 16 Jan 2020 11:52:58 +0100 Subject: [PATCH 1/6] netmgr: don't send to an inactive (closing) udp socket We had a race in which n UDP socket could have been already closing by libuv but we still sent data to it. Mark socket as not-active when stopping listening and verify that socket is not active when trying to send data to it. --- lib/isc/netmgr/netmgr-int.h | 7 +++++++ lib/isc/netmgr/netmgr.c | 2 +- lib/isc/netmgr/udp.c | 17 +++++++++++++---- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 177adec3c6..f309ab5131 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -603,6 +603,13 @@ isc__nmsocket_prep_destroy(isc_nmsocket_t *sock); * if there are no remaining references or active handles. */ +bool +isc__nmsocket_active(isc_nmsocket_t *sock); +/*%< + * Check is socket 'sock' is active - by checking either sock->active or + * sock->parent->active; + */ + void isc__nm_async_closecb(isc__networker_t *worker, isc__netievent_t *ev0); /*%< diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 6a66bd5fdb..7943b1f5ba 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -681,7 +681,7 @@ isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event) { uv_async_send(&worker->async); } -static bool +bool isc__nmsocket_active(isc_nmsocket_t *sock) { REQUIRE(VALID_NMSOCK(sock)); if (sock->parent != NULL) { diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index 6b79d8e0bc..0bad9ceaf5 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -179,14 +179,18 @@ stop_udp_child(isc_nmsocket_t *sock) { static void stoplistening(isc_nmsocket_t *sock) { + INSIST(sock->type == isc_nm_udplistener); /* * Socket is already closing; there's nothing to do. */ - if (uv_is_closing((uv_handle_t *) &sock->uv_handle.udp)) { + if (!isc__nmsocket_active(sock)) { return; } - - INSIST(sock->type == isc_nm_udplistener); + /* + * Mark it inactive now so that all sends will be ignored + * and we won't try to stop listening again. + */ + atomic_store(&sock->active, false); for (int i = 0; i < sock->nchildren; i++) { isc__netievent_udpstop_t *event = NULL; @@ -375,6 +379,11 @@ isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, return (ISC_R_UNEXPECTED); } + if (!isc__nmsocket_active(sock)) { + isc_nmhandle_unref(handle); + return (ISC_R_CANCELED); + } + if (isc__nm_in_netthread()) { ntid = isc_nm_tid(); } else { @@ -425,7 +434,7 @@ isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ev0) { REQUIRE(worker->id == ievent->sock->tid); - if (atomic_load(&ievent->sock->active)) { + if (isc__nmsocket_active(ievent->sock)) { udp_send_direct(ievent->sock, ievent->req, &ievent->peer); } else { ievent->req->cb.send(ievent->req->handle, From f75a9e32be18788bec50d714f76f538dfe366b19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Thu, 16 Jan 2020 12:13:28 +0100 Subject: [PATCH 2/6] netmgr: fix a non-thread-safe access to libuv structures In tcp and udp stoplistening code we accessed libuv structures from a different thread, which caused a shutdown crash when named was under load. Also added additional DbC checks making sure we're in a proper thread when accessing uv_ functions. --- lib/isc/netmgr/netmgr-int.h | 4 ++-- lib/isc/netmgr/tcp.c | 2 +- lib/isc/netmgr/udp.c | 10 +++++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index f309ab5131..7dfa8872e6 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -606,8 +606,8 @@ isc__nmsocket_prep_destroy(isc_nmsocket_t *sock); bool isc__nmsocket_active(isc_nmsocket_t *sock); /*%< - * Check is socket 'sock' is active - by checking either sock->active or - * sock->parent->active; + * Determine whether 'sock' is active by checking 'sock->active' + * or, for child sockets, 'sock->parent->active'. */ void diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 17a97cda38..e0c566e176 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -419,7 +419,7 @@ stoplistening(isc_nmsocket_t *sock) { event = isc__nm_get_ievent(sock->mgr, netievent_tcpchildstop); isc_nmsocket_attach(&sock->children[i], &event->sock); - if (i == sock->tid) { + if (isc_nm_tid() == sock->children[i].tid) { isc__nm_async_tcpchildstop(&sock->mgr->workers[i], (isc__netievent_t *) event); isc__nm_put_ievent(sock->mgr, event); diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index 0bad9ceaf5..936fa963d9 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -122,6 +122,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { REQUIRE(sock->type == isc_nm_udpsocket); REQUIRE(sock->iface != NULL); REQUIRE(sock->parent != NULL); + REQUIRE(sock->tid == isc_nm_tid()); uv_udp_init(&worker->loop, &sock->uv_handle.udp); uv_handle_set_data(&sock->uv_handle.handle, NULL); @@ -164,7 +165,8 @@ udp_close_cb(uv_handle_t *handle) { static void stop_udp_child(isc_nmsocket_t *sock) { - INSIST(sock->type == isc_nm_udpsocket); + REQUIRE(sock->type == isc_nm_udpsocket); + REQUIRE(sock->tid == isc_nm_tid()); uv_udp_recv_stop(&sock->uv_handle.udp); uv_close((uv_handle_t *) &sock->uv_handle.udp, udp_close_cb); @@ -179,13 +181,15 @@ stop_udp_child(isc_nmsocket_t *sock) { static void stoplistening(isc_nmsocket_t *sock) { - INSIST(sock->type == isc_nm_udplistener); + REQUIRE(sock->type == isc_nm_udplistener); + /* * Socket is already closing; there's nothing to do. */ if (!isc__nmsocket_active(sock)) { return; } + /* * Mark it inactive now so that all sends will be ignored * and we won't try to stop listening again. @@ -195,7 +199,7 @@ stoplistening(isc_nmsocket_t *sock) { for (int i = 0; i < sock->nchildren; i++) { isc__netievent_udpstop_t *event = NULL; - if (i == sock->tid) { + if (isc_nm_tid() == sock->children[i].tid) { stop_udp_child(&sock->children[i]); continue; } From dcc0835a3af2774128674fd4de25461609223d9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Thu, 16 Jan 2020 11:53:31 +0100 Subject: [PATCH 3/6] cleanup properly if we fail to initialize ns_client structure If taskmgr is shutting down ns_client_setup will fail to create a task for the newly created client, we weren't cleaning up already created/attached things (memory context, server, clientmgr). --- lib/ns/client.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/ns/client.c b/lib/ns/client.c index ad99883a23..9ef8afa094 100644 --- a/lib/ns/client.c +++ b/lib/ns/client.c @@ -2333,6 +2333,16 @@ ns__client_setup(ns_client_t *client, ns_clientmgr_t *mgr, bool new) { isc_task_detach(&client->task); } + if (client->manager != NULL) { + clientmgr_detach(&client->manager); + } + if (client->mctx != NULL) { + isc_mem_detach(&client->mctx); + } + if (client->sctx != NULL) { + ns_server_detach(&client->sctx); + } + return (result); } From 8d6dc8613ab81e8f719ad6f150f59eb63d6192f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Fri, 17 Jan 2020 11:42:35 +0100 Subject: [PATCH 4/6] clean up some handle/client reference counting errors in error cases. We weren't consistent about who should unreference the handle in case of network error. Make it consistent so that it's always the client code responsibility to unreference the handle - either in the callback or right away if send function failed and the callback will never be called. --- lib/isc/netmgr/tcpdns.c | 3 +-- lib/isc/netmgr/udp.c | 8 +++++--- lib/ns/client.c | 12 +++++++++++- lib/ns/xfrout.c | 6 ++---- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index a15cb07b96..36f63c8189 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -470,8 +470,7 @@ isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region, REQUIRE(sock->type == isc_nm_tcpdnssocket); if (sock->outer == NULL) { - /* The socket is closed, just issue the callback */ - cb(handle, ISC_R_FAILURE, cbarg); + /* The socket is closed */ return (ISC_R_NOTCONNECTED); } diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index 936fa963d9..cb14425703 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -366,7 +366,11 @@ isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, /* * Simulate a firewall blocking UDP packets bigger than - * 'maxudp' bytes. + * 'maxudp' bytes, for testing purposes. + * + * The client would ordinarily have unreferenced the handle + * in the callback, but that won't happen in this case, so + * we need to do so here. */ if (maxudp != 0 && region->length > maxudp) { isc_nmhandle_unref(handle); @@ -379,12 +383,10 @@ isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, } else if (sock->type == isc_nm_udplistener) { psock = sock; } else { - isc_nmhandle_unref(handle); return (ISC_R_UNEXPECTED); } if (!isc__nmsocket_active(sock)) { - isc_nmhandle_unref(handle); return (ISC_R_CANCELED); } diff --git a/lib/ns/client.c b/lib/ns/client.c index 9ef8afa094..b60db14c80 100644 --- a/lib/ns/client.c +++ b/lib/ns/client.c @@ -381,8 +381,9 @@ ns_client_sendraw(ns_client_t *client, dns_message_t *message) { r.base[1] = client->message->id & 0xff; result = client_sendpkg(client, &buffer); - if (result == ISC_R_SUCCESS) + if (result == ISC_R_SUCCESS) { return; + } done: if (client->tcpbuf != NULL) { @@ -392,6 +393,7 @@ ns_client_sendraw(ns_client_t *client, dns_message_t *message) { } ns_client_drop(client, result); + isc_nmhandle_unref(client->handle); } void @@ -596,6 +598,10 @@ ns_client_send(ns_client_t *client) { isc_nmhandle_ref(client->handle); result = client_sendpkg(client, &tcpbuffer); + if (result != ISC_R_SUCCESS) { + /* We won't get a callback to clean it up */ + isc_nmhandle_unref(client->handle); + } switch (isc_sockaddr_pf(&client->peeraddr)) { case AF_INET: @@ -629,6 +635,10 @@ ns_client_send(ns_client_t *client) { isc_nmhandle_ref(client->handle); result = client_sendpkg(client, &buffer); + if (result != ISC_R_SUCCESS) { + /* We won't get a callback to clean it up */ + isc_nmhandle_unref(client->handle); + } switch (isc_sockaddr_pf(&client->peeraddr)) { case AF_INET: diff --git a/lib/ns/xfrout.c b/lib/ns/xfrout.c index 9c393c63ac..7ad4ef96ea 100644 --- a/lib/ns/xfrout.c +++ b/lib/ns/xfrout.c @@ -1663,8 +1663,6 @@ xfrout_senddone(isc_nmhandle_t *handle, isc_result_t result, void *arg) { xfrout_fail(xfr, result, "send"); } else if (xfr->end_of_stream == false) { sendstream(xfr); - /* Return now so we don't unref the handle */ - return; } else { /* End of zone transfer stream. */ uint64_t msecs, persec; @@ -1690,9 +1688,9 @@ xfrout_senddone(isc_nmhandle_t *handle, isc_result_t result, void *arg) { (unsigned int) persec); xfrout_ctx_destroy(&xfr); + /* We're done, unreference the handle */ + isc_nmhandle_unref(handle); } - - isc_nmhandle_unref(handle); } static void From 42f0e25a4c40b231c04d0493ebbc21105d1617c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Fri, 17 Jan 2020 12:07:34 +0100 Subject: [PATCH 5/6] calling isc__nm_udp_send() on a non-udp socket is not 'unexpected', it's a critical failure --- lib/isc/netmgr/udp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index cb14425703..d4f27a7c36 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -383,7 +383,8 @@ isc__nm_udp_send(isc_nmhandle_t *handle, isc_region_t *region, } else if (sock->type == isc_nm_udplistener) { psock = sock; } else { - return (ISC_R_UNEXPECTED); + INSIST(0); + ISC_UNREACHABLE(); } if (!isc__nmsocket_active(sock)) { From fd8788eb94f516d2c6d15ee023c867e938402abf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Fri, 17 Jan 2020 14:42:57 +0100 Subject: [PATCH 6/6] Fix possible race in socket destruction. When two threads unreferenced handles coming from one socket while the socket was being destructed we could get a use-after-free: Having handle H1 coming from socket S1, H2 coming from socket S2, S0 being a parent socket to S1 and S2: Thread A Thread B Unref handle H1 Unref handle H2 Remove H1 from S1 active handles Remove H2 from S2 active handles nmsocket_maybe_destroy(S1) nmsocket_maybe_destroy(S2) nmsocket_maybe_destroy(S0) nmsocket_maybe_destroy(S0) LOCK(S0->lock) Go through all children, figure out that we have no more active handles: sum of S0->children[i]->ah == 0 UNLOCK(S0->lock) destroy(S0) LOCK(S0->lock) - but S0 is already gone --- lib/isc/netmgr/netmgr.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 7943b1f5ba..d7cf3a36af 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -1158,7 +1158,7 @@ nmhandle_deactivate(isc_nmsocket_t *sock, isc_nmhandle_t *handle) { void isc_nmhandle_unref(isc_nmhandle_t *handle) { - isc_nmsocket_t *sock = NULL; + isc_nmsocket_t *sock = NULL, *tmp = NULL; REQUIRE(VALID_NMHANDLE(handle)); @@ -1199,8 +1199,14 @@ isc_nmhandle_unref(isc_nmhandle_t *handle) { } } + /* + * Temporarily reference the socket to ensure that it can't + * be deleted by another thread while we're deactivating the + * handle. + */ + isc_nmsocket_attach(sock, &tmp); nmhandle_deactivate(sock, handle); - nmsocket_maybe_destroy(sock); + isc_nmsocket_detach(&tmp); } void *