From 73707250088cecf3324a9bd105b5f52324e3485a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Tue, 11 Jan 2022 12:14:23 +0100 Subject: [PATCH] Fix the UDP recvmmsg support Previously, the netmgr/udp.c tried to detect the recvmmsg detection in libuv with #ifdef UV_UDP_ preprocessor macros. However, because the UV_UDP_ are not preprocessor macros, but enum members, the detection didn't work. Because the detection didn't work, the code didn't have access to the information when we received the final chunk of the recvmmsg and tried to free the uvbuf every time. Fortunately, the isc__nm_free_uvbuf() had a kludge that detected attempt to free in the middle of the receive buffer, so the code worked. However, libuv 1.37.0 changed the way the recvmmsg was enabled from implicit to explicit, and we checked for yet another enum member presence with preprocessor macro, so in fact libuv recvmmsg support was never enabled with libuv >= 1.37.0. This commit changes to the preprocessor macros to autoconf checks for declaration, so the detection now works again. On top of that, it's now possible to cleanup the alloc_cb and free_uvbuf functions because now, the information whether we can or cannot free the buffer is available to us. --- configure.ac | 3 +++ lib/isc/netmgr/netmgr-int.h | 48 +++++++++++++++++++++++++++++------ lib/isc/netmgr/netmgr.c | 45 +++++++++++++-------------------- lib/isc/netmgr/tcp.c | 9 +++++++ lib/isc/netmgr/tcpdns.c | 9 +++++++ lib/isc/netmgr/tlsdns.c | 34 ++++++++++++++++--------- lib/isc/netmgr/udp.c | 50 +++++++++++++++++++++++++++---------- 7 files changed, 137 insertions(+), 61 deletions(-) diff --git a/configure.ac b/configure.ac index 580095ef75..ce8117b15d 100644 --- a/configure.ac +++ b/configure.ac @@ -551,6 +551,9 @@ AC_MSG_CHECKING([for libuv]) PKG_CHECK_MODULES([LIBUV], [libuv >= 1.0.0], [], [AC_MSG_ERROR([libuv not found])]) +# libuv recvmmsg support +AC_CHECK_DECLS([UV_UDP_RECVMMSG, UV_UDP_MMSG_FREE, UV_UDP_MMSG_CHUNK], [], [], [[#include ]]) + # [pairwise: --enable-doh --with-libnghttp2=auto, --enable-doh --with-libnghttp2=yes, --disable-doh] AC_ARG_ENABLE([doh], [AS_HELP_STRING([--disable-doh], [enable DNS over HTTPS, requires libnghttp2 (default=yes)])], diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index d368dcafac..0b93c22903 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -46,19 +46,51 @@ /* Must be different from ISC_NETMGR_TID_UNKNOWN */ #define ISC_NETMGR_NON_INTERLOCKED -2 -#define ISC_NETMGR_TLSBUF_SIZE 65536 +/* + * Receive buffers + */ +#if HAVE_DECL_UV_UDP_MMSG_CHUNK +/* + * The value 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source, + * libuv will not receive more that 20 datagrams in a single recvmmsg call. + */ +#define ISC_NETMGR_UDP_RECVBUF_SIZE (20 * UINT16_MAX) +#else +/* + * A single DNS message size + */ +#define ISC_NETMGR_UDP_RECVBUF_SIZE UINT16_MAX +#endif /* - * New versions of libuv support recvmmsg on unices. - * Since recvbuf is only allocated per worker allocating a bigger one is not - * that wasteful. - * 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source, nothing - * will break if the original value changes. + * The TCP receive buffer can fit one maximum sized DNS message plus its size, + * the receive buffer here affects TCP, DoT and DoH. */ -#define ISC_NETMGR_RECVBUF_SIZE (20 * 65536) +#define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) +/* Pick the larger buffer */ +#define ISC_NETMGR_RECVBUF_SIZE \ + (ISC_NETMGR_UDP_RECVBUF_SIZE >= ISC_NETMGR_TCP_RECVBUF_SIZE \ + ? ISC_NETMGR_UDP_RECVBUF_SIZE \ + : ISC_NETMGR_TCP_RECVBUF_SIZE) + +/* + * Send buffer + */ #define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX) +/* + * Make sure our RECVBUF size is large enough + */ + +STATIC_ASSERT(ISC_NETMGR_UDP_RECVBUF_SIZE <= ISC_NETMGR_RECVBUF_SIZE, + "UDP receive buffer size must be smaller or equal than worker " + "receive buffer size"); + +STATIC_ASSERT(ISC_NETMGR_TCP_RECVBUF_SIZE <= ISC_NETMGR_RECVBUF_SIZE, + "TCP receive buffer size must be smaller or equal than worker " + "receive buffer size"); + /*% * Regular TCP buffer size. */ @@ -70,7 +102,7 @@ * most in TCPDNS or TLSDNS connections, so there's no risk of overrun * when using a buffer this size. */ -#define NM_BIG_BUF (65535 + 2) * 2 +#define NM_BIG_BUF ISC_NETMGR_TCP_RECVBUF_SIZE * 2 #if defined(SO_REUSEPORT_LB) || (defined(SO_REUSEPORT) && defined(__linux__)) #define HAVE_SO_REUSEPORT_LB 1 diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index e68ee9f368..0af8b1044d 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -1599,20 +1599,10 @@ isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf) { isc__networker_t *worker = NULL; REQUIRE(VALID_NMSOCK(sock)); - if (buf->base == NULL) { - /* Empty buffer: might happen in case of error. */ - return; - } - worker = &sock->mgr->workers[sock->tid]; - REQUIRE(worker->recvbuf_inuse); - if (sock->type == isc_nm_udpsocket && buf->base > worker->recvbuf && - buf->base <= worker->recvbuf + ISC_NETMGR_RECVBUF_SIZE) - { - /* Can happen in case of out-of-order recvmmsg in libuv1.36 */ - return; - } + worker = &sock->mgr->workers[sock->tid]; REQUIRE(buf->base == worker->recvbuf); + worker->recvbuf_inuse = false; } @@ -2187,7 +2177,7 @@ isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr) { } /*%< - * Allocator for read operations. Limited to size 2^16. + * Allocator callback for read operations. * * Note this doesn't actually allocate anything, it just assigns the * worker's receive buffer to a socket, and marks it as "in use". @@ -2199,35 +2189,34 @@ isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) { REQUIRE(VALID_NMSOCK(sock)); REQUIRE(isc__nm_in_netthread()); + /* + * The size provided by libuv is only suggested size, and it always + * defaults to 64 * 1024 in the current versions of libuv (see + * src/unix/udp.c and src/unix/stream.c). + */ + UNUSED(size); + + worker = &sock->mgr->workers[sock->tid]; + INSIST(!worker->recvbuf_inuse); + INSIST(worker->recvbuf != NULL); switch (sock->type) { case isc_nm_udpsocket: - REQUIRE(size <= ISC_NETMGR_RECVBUF_SIZE); - size = ISC_NETMGR_RECVBUF_SIZE; + buf->len = ISC_NETMGR_UDP_RECVBUF_SIZE; break; case isc_nm_tcpsocket: case isc_nm_tcpdnssocket: - break; case isc_nm_tlsdnssocket: - /* - * We need to limit the individual chunks to be read, so the - * BIO_write() will always succeed and the consumed before the - * next readcb is called. - */ - if (size >= ISC_NETMGR_TLSBUF_SIZE) { - size = ISC_NETMGR_TLSBUF_SIZE; - } + buf->len = ISC_NETMGR_TCP_RECVBUF_SIZE; break; default: INSIST(0); ISC_UNREACHABLE(); } - worker = &sock->mgr->workers[sock->tid]; - INSIST(!worker->recvbuf_inuse || sock->type == isc_nm_udpsocket); - + REQUIRE(buf->len <= ISC_NETMGR_RECVBUF_SIZE); buf->base = worker->recvbuf; - buf->len = size; + worker->recvbuf_inuse = true; } diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 012a9643fd..186bb83cf4 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -903,6 +903,15 @@ isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) { } free: + if (nread < 0) { + /* + * The buffer may be a null buffer on error. + */ + if (buf->base == NULL && buf->len == 0) { + return; + } + } + isc__nm_free_uvbuf(sock, buf); } diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c index c600982e52..6c840634f4 100644 --- a/lib/isc/netmgr/tcpdns.c +++ b/lib/isc/netmgr/tcpdns.c @@ -879,6 +879,15 @@ isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread, isc__nm_process_sock_buffer(sock); free: + if (nread < 0) { + /* + * The buffer may be a null buffer on error. + */ + if (buf->base == NULL && buf->len == 0) { + return; + } + } + isc__nm_free_uvbuf(sock, buf); } diff --git a/lib/isc/netmgr/tlsdns.c b/lib/isc/netmgr/tlsdns.c index 830e464189..385a18aace 100644 --- a/lib/isc/netmgr/tlsdns.c +++ b/lib/isc/netmgr/tlsdns.c @@ -264,12 +264,12 @@ tlsdns_connect_cb(uv_connect_t *uvreq, int status) { /* * */ - r = BIO_new_bio_pair(&sock->tls.ssl_wbio, ISC_NETMGR_TLSBUF_SIZE, - &sock->tls.app_rbio, ISC_NETMGR_TLSBUF_SIZE); + r = BIO_new_bio_pair(&sock->tls.ssl_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE, + &sock->tls.app_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE); RUNTIME_CHECK(r == 1); - r = BIO_new_bio_pair(&sock->tls.ssl_rbio, ISC_NETMGR_TLSBUF_SIZE, - &sock->tls.app_wbio, ISC_NETMGR_TLSBUF_SIZE); + r = BIO_new_bio_pair(&sock->tls.ssl_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE, + &sock->tls.app_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE); RUNTIME_CHECK(r == 1); #if HAVE_SSL_SET0_RBIO && HAVE_SSL_SET0_WBIO @@ -1003,8 +1003,8 @@ tls_cycle_input(isc_nmsocket_t *sock) { (void)SSL_peek(sock->tls.tls, &(char){ '\0' }, 0); int pending = SSL_pending(sock->tls.tls); - if (pending > ISC_NETMGR_TLSBUF_SIZE) { - pending = ISC_NETMGR_TLSBUF_SIZE; + if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) { + pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE; } if ((sock->buf_len + pending) > sock->buf_size) { @@ -1194,8 +1194,8 @@ tls_cycle_output(isc_nmsocket_t *sock) { break; } - if (pending > ISC_NETMGR_TLSBUF_SIZE) { - pending = ISC_NETMGR_TLSBUF_SIZE; + if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) { + pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE; } sock->tls.senddata.base = isc_mem_get(sock->mgr->mctx, pending); @@ -1381,6 +1381,16 @@ isc__nm_tlsdns_read_cb(uv_stream_t *stream, ssize_t nread, } free: async_tlsdns_cycle(sock); + + if (nread < 0) { + /* + * The buffer may be a null buffer on error. + */ + if (buf->base == NULL && buf->len == 0) { + return; + } + } + isc__nm_free_uvbuf(sock, buf); } @@ -1516,12 +1526,12 @@ accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) { csock->tls.tls = isc_tls_create(ssock->tls.ctx); RUNTIME_CHECK(csock->tls.tls != NULL); - r = BIO_new_bio_pair(&csock->tls.ssl_wbio, ISC_NETMGR_TLSBUF_SIZE, - &csock->tls.app_rbio, ISC_NETMGR_TLSBUF_SIZE); + r = BIO_new_bio_pair(&csock->tls.ssl_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE, + &csock->tls.app_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE); RUNTIME_CHECK(r == 1); - r = BIO_new_bio_pair(&csock->tls.ssl_rbio, ISC_NETMGR_TLSBUF_SIZE, - &csock->tls.app_wbio, ISC_NETMGR_TLSBUF_SIZE); + r = BIO_new_bio_pair(&csock->tls.ssl_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE, + &csock->tls.app_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE); RUNTIME_CHECK(r == 1); #if HAVE_SSL_SET0_RBIO && HAVE_SSL_SET0_WBIO diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index 8d0f190040..4cb3fdc429 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -431,7 +431,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { REQUIRE(sock->parent != NULL); REQUIRE(sock->tid == isc_nm_tid()); -#ifdef UV_UDP_RECVMMSG +#if HAVE_DECL_UV_UDP_RECVMMSG uv_init_flags |= UV_UDP_RECVMMSG; #endif r = uv_udp_init_ex(&worker->loop, &sock->uv_handle.udp, uv_init_flags); @@ -556,7 +556,6 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle); isc__nm_uvreq_t *req = NULL; uint32_t maxudp; - bool free_buf; isc_result_t result; isc_sockaddr_t sockaddr, *sa = NULL; @@ -564,19 +563,22 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, REQUIRE(sock->tid == isc_nm_tid()); REQUIRE(atomic_load(&sock->reading)); -#ifdef UV_UDP_MMSG_FREE - free_buf = ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE); -#elif UV_UDP_MMSG_CHUNK - free_buf = ((flags & UV_UDP_MMSG_CHUNK) == 0); + /* + * When using recvmmsg(2), if no errors occur, there will be a final + * callback with nrecv set to 0, addr set to NULL and the buffer + * pointing at the initially allocated data with the UV_UDP_MMSG_CHUNK + * flag cleared and the UV_UDP_MMSG_FREE flag set. + */ +#if HAVE_DECL_UV_UDP_MMSG_FREE + if ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE) { + INSIST(nrecv == 0); + INSIST(addr == NULL); + goto free; + } #else - free_buf = true; UNUSED(flags); #endif - /* - * Four possible reasons to return now without processing: - */ - /* * - If we're simulating a firewall blocking UDP packets * bigger than 'maxudp' bytes for testing purposes. @@ -640,9 +642,31 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf, sock->processing = false; free: - if (free_buf) { - isc__nm_free_uvbuf(sock, buf); +#if HAVE_DECL_UV_UDP_MMSG_CHUNK + /* + * When using recvmmsg(2), chunks will have the UV_UDP_MMSG_CHUNK flag + * set, those must not be freed. + */ + if ((flags & UV_UDP_MMSG_CHUNK) == UV_UDP_MMSG_CHUNK) { + return; } +#endif + + /* + * When using recvmmsg(2), if a UDP socket error occurs, nrecv will be < + * 0. In either scenario, the callee can now safely free the provided + * buffer. + */ + if (nrecv < 0) { + /* + * The buffer may be a null buffer on error. + */ + if (buf->base == NULL && buf->len == 0) { + return; + } + } + + isc__nm_free_uvbuf(sock, buf); } /*