2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-31 14:35:26 +00:00

Merge branch '3095-invalid-recvmmsg-detection' into 'main'

Fix the UDP recvmmsg support

Closes #3095

See merge request isc-projects/bind9!5713
This commit is contained in:
Ondřej Surý
2022-01-13 18:43:00 +00:00
9 changed files with 148 additions and 74 deletions

View File

@@ -1,3 +1,6 @@
5793. [bug] Correctly detect and enable UDP recvmmsg support
in all versions of libuv that support it. [GL #3095]
5792. [bug] Don't schedule zone events on ISC_R_SHUTTINGDOWN
event failures. [GL #3084]

View File

@@ -551,6 +551,11 @@ AC_MSG_CHECKING([for libuv])
PKG_CHECK_MODULES([LIBUV], [libuv >= 1.0.0], [],
[AC_MSG_ERROR([libuv not found])])
# libuv recvmmsg support
AC_CHECK_DECLS([UV_UDP_RECVMMSG, UV_UDP_MMSG_FREE, UV_UDP_MMSG_CHUNK], [], [], [[#include <uv.h>]])
AS_CASE([$host],
[*-musl],[AC_DEFINE([HAVE_DECL_UV_UDP_RECVMMSG], [0], [Disable recvmmsg support on systems with MUSL glibc])])
# [pairwise: --enable-doh --with-libnghttp2=auto, --enable-doh --with-libnghttp2=yes, --disable-doh]
AC_ARG_ENABLE([doh],
[AS_HELP_STRING([--disable-doh], [enable DNS over HTTPS, requires libnghttp2 (default=yes)])],

View File

@@ -57,3 +57,7 @@ Bug Fixes
- Using ``rndc`` on a busy recursive server could cause the ``named`` to abort
with assertion failure. This has been fixed. :gl:`#3079`
- With libuv >= 1.37.0, the recvmmsg support would not be enabled in ``named``
reducing the maximum query-response performance. The recvmmsg support would
be used only in libuv 1.35.0 and 1.36.0. This has been fixed. :gl:`#3095`

View File

@@ -46,19 +46,51 @@
/* Must be different from ISC_NETMGR_TID_UNKNOWN */
#define ISC_NETMGR_NON_INTERLOCKED -2
#define ISC_NETMGR_TLSBUF_SIZE 65536
/*
* Receive buffers
*/
#if HAVE_DECL_UV_UDP_MMSG_CHUNK
/*
* The value 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source,
* libuv will not receive more that 20 datagrams in a single recvmmsg call.
*/
#define ISC_NETMGR_UDP_RECVBUF_SIZE (20 * UINT16_MAX)
#else
/*
* A single DNS message size
*/
#define ISC_NETMGR_UDP_RECVBUF_SIZE UINT16_MAX
#endif
/*
* New versions of libuv support recvmmsg on unices.
* Since recvbuf is only allocated per worker allocating a bigger one is not
* that wasteful.
* 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source, nothing
* will break if the original value changes.
* The TCP receive buffer can fit one maximum sized DNS message plus its size,
* the receive buffer here affects TCP, DoT and DoH.
*/
#define ISC_NETMGR_RECVBUF_SIZE (20 * 65536)
#define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
/* Pick the larger buffer */
#define ISC_NETMGR_RECVBUF_SIZE \
(ISC_NETMGR_UDP_RECVBUF_SIZE >= ISC_NETMGR_TCP_RECVBUF_SIZE \
? ISC_NETMGR_UDP_RECVBUF_SIZE \
: ISC_NETMGR_TCP_RECVBUF_SIZE)
/*
* Send buffer
*/
#define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
/*
* Make sure our RECVBUF size is large enough
*/
STATIC_ASSERT(ISC_NETMGR_UDP_RECVBUF_SIZE <= ISC_NETMGR_RECVBUF_SIZE,
"UDP receive buffer size must be smaller or equal than worker "
"receive buffer size");
STATIC_ASSERT(ISC_NETMGR_TCP_RECVBUF_SIZE <= ISC_NETMGR_RECVBUF_SIZE,
"TCP receive buffer size must be smaller or equal than worker "
"receive buffer size");
/*%
* Regular TCP buffer size.
*/
@@ -70,7 +102,7 @@
* most in TCPDNS or TLSDNS connections, so there's no risk of overrun
* when using a buffer this size.
*/
#define NM_BIG_BUF (65535 + 2) * 2
#define NM_BIG_BUF ISC_NETMGR_TCP_RECVBUF_SIZE * 2
#if defined(SO_REUSEPORT_LB) || (defined(SO_REUSEPORT) && defined(__linux__))
#define HAVE_SO_REUSEPORT_LB 1

View File

@@ -1599,20 +1599,10 @@ isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf) {
isc__networker_t *worker = NULL;
REQUIRE(VALID_NMSOCK(sock));
if (buf->base == NULL) {
/* Empty buffer: might happen in case of error. */
return;
}
worker = &sock->mgr->workers[sock->tid];
REQUIRE(worker->recvbuf_inuse);
if (sock->type == isc_nm_udpsocket && buf->base > worker->recvbuf &&
buf->base <= worker->recvbuf + ISC_NETMGR_RECVBUF_SIZE)
{
/* Can happen in case of out-of-order recvmmsg in libuv1.36 */
return;
}
worker = &sock->mgr->workers[sock->tid];
REQUIRE(buf->base == worker->recvbuf);
worker->recvbuf_inuse = false;
}
@@ -2187,7 +2177,7 @@ isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr) {
}
/*%<
* Allocator for read operations. Limited to size 2^16.
* Allocator callback for read operations.
*
* Note this doesn't actually allocate anything, it just assigns the
* worker's receive buffer to a socket, and marks it as "in use".
@@ -2199,35 +2189,34 @@ isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) {
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(isc__nm_in_netthread());
/*
* The size provided by libuv is only suggested size, and it always
* defaults to 64 * 1024 in the current versions of libuv (see
* src/unix/udp.c and src/unix/stream.c).
*/
UNUSED(size);
worker = &sock->mgr->workers[sock->tid];
INSIST(!worker->recvbuf_inuse);
INSIST(worker->recvbuf != NULL);
switch (sock->type) {
case isc_nm_udpsocket:
REQUIRE(size <= ISC_NETMGR_RECVBUF_SIZE);
size = ISC_NETMGR_RECVBUF_SIZE;
buf->len = ISC_NETMGR_UDP_RECVBUF_SIZE;
break;
case isc_nm_tcpsocket:
case isc_nm_tcpdnssocket:
break;
case isc_nm_tlsdnssocket:
/*
* We need to limit the individual chunks to be read, so the
* BIO_write() will always succeed and the consumed before the
* next readcb is called.
*/
if (size >= ISC_NETMGR_TLSBUF_SIZE) {
size = ISC_NETMGR_TLSBUF_SIZE;
}
buf->len = ISC_NETMGR_TCP_RECVBUF_SIZE;
break;
default:
INSIST(0);
ISC_UNREACHABLE();
}
worker = &sock->mgr->workers[sock->tid];
INSIST(!worker->recvbuf_inuse || sock->type == isc_nm_udpsocket);
REQUIRE(buf->len <= ISC_NETMGR_RECVBUF_SIZE);
buf->base = worker->recvbuf;
buf->len = size;
worker->recvbuf_inuse = true;
}

View File

@@ -816,7 +816,6 @@ isc__nm_tcp_resumeread(isc_nmhandle_t *handle) {
isc__netievent_tcpstartread_t *ievent = NULL;
isc_nmsocket_t *sock = handle->sock;
isc__networker_t *worker = &sock->mgr->workers[sock->tid];
REQUIRE(sock->tid == isc_nm_tid());
@@ -838,18 +837,8 @@ isc__nm_tcp_resumeread(isc_nmhandle_t *handle) {
ievent = isc__nm_get_netievent_tcpstartread(sock->mgr, sock);
if (worker->recvbuf_inuse) {
/*
* If we happen to call the resumeread from inside the receive
* callback, the worker->recvbuf might still be in use, so we
* need to force enqueue the next read event.
*/
isc__nm_enqueue_ievent(worker, (isc__netievent_t *)ievent);
} else {
isc__nm_maybe_enqueue_ievent(worker,
(isc__netievent_t *)ievent);
}
isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
(isc__netievent_t *)ievent);
}
void
@@ -903,6 +892,15 @@ isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
}
free:
if (nread < 0) {
/*
* The buffer may be a null buffer on error.
*/
if (buf->base == NULL && buf->len == 0) {
return;
}
}
isc__nm_free_uvbuf(sock, buf);
}

View File

@@ -879,6 +879,15 @@ isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread,
isc__nm_process_sock_buffer(sock);
free:
if (nread < 0) {
/*
* The buffer may be a null buffer on error.
*/
if (buf->base == NULL && buf->len == 0) {
return;
}
}
isc__nm_free_uvbuf(sock, buf);
}

View File

@@ -264,12 +264,12 @@ tlsdns_connect_cb(uv_connect_t *uvreq, int status) {
/*
*
*/
r = BIO_new_bio_pair(&sock->tls.ssl_wbio, ISC_NETMGR_TLSBUF_SIZE,
&sock->tls.app_rbio, ISC_NETMGR_TLSBUF_SIZE);
r = BIO_new_bio_pair(&sock->tls.ssl_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
&sock->tls.app_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
RUNTIME_CHECK(r == 1);
r = BIO_new_bio_pair(&sock->tls.ssl_rbio, ISC_NETMGR_TLSBUF_SIZE,
&sock->tls.app_wbio, ISC_NETMGR_TLSBUF_SIZE);
r = BIO_new_bio_pair(&sock->tls.ssl_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
&sock->tls.app_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
RUNTIME_CHECK(r == 1);
#if HAVE_SSL_SET0_RBIO && HAVE_SSL_SET0_WBIO
@@ -1003,8 +1003,8 @@ tls_cycle_input(isc_nmsocket_t *sock) {
(void)SSL_peek(sock->tls.tls, &(char){ '\0' }, 0);
int pending = SSL_pending(sock->tls.tls);
if (pending > ISC_NETMGR_TLSBUF_SIZE) {
pending = ISC_NETMGR_TLSBUF_SIZE;
if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) {
pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE;
}
if ((sock->buf_len + pending) > sock->buf_size) {
@@ -1194,8 +1194,8 @@ tls_cycle_output(isc_nmsocket_t *sock) {
break;
}
if (pending > ISC_NETMGR_TLSBUF_SIZE) {
pending = ISC_NETMGR_TLSBUF_SIZE;
if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) {
pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE;
}
sock->tls.senddata.base = isc_mem_get(sock->mgr->mctx, pending);
@@ -1381,6 +1381,16 @@ isc__nm_tlsdns_read_cb(uv_stream_t *stream, ssize_t nread,
}
free:
async_tlsdns_cycle(sock);
if (nread < 0) {
/*
* The buffer may be a null buffer on error.
*/
if (buf->base == NULL && buf->len == 0) {
return;
}
}
isc__nm_free_uvbuf(sock, buf);
}
@@ -1516,12 +1526,12 @@ accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) {
csock->tls.tls = isc_tls_create(ssock->tls.ctx);
RUNTIME_CHECK(csock->tls.tls != NULL);
r = BIO_new_bio_pair(&csock->tls.ssl_wbio, ISC_NETMGR_TLSBUF_SIZE,
&csock->tls.app_rbio, ISC_NETMGR_TLSBUF_SIZE);
r = BIO_new_bio_pair(&csock->tls.ssl_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
&csock->tls.app_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
RUNTIME_CHECK(r == 1);
r = BIO_new_bio_pair(&csock->tls.ssl_rbio, ISC_NETMGR_TLSBUF_SIZE,
&csock->tls.app_wbio, ISC_NETMGR_TLSBUF_SIZE);
r = BIO_new_bio_pair(&csock->tls.ssl_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
&csock->tls.app_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
RUNTIME_CHECK(r == 1);
#if HAVE_SSL_SET0_RBIO && HAVE_SSL_SET0_WBIO

View File

@@ -431,7 +431,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
REQUIRE(sock->parent != NULL);
REQUIRE(sock->tid == isc_nm_tid());
#ifdef UV_UDP_RECVMMSG
#if HAVE_DECL_UV_UDP_RECVMMSG
uv_init_flags |= UV_UDP_RECVMMSG;
#endif
r = uv_udp_init_ex(&worker->loop, &sock->uv_handle.udp, uv_init_flags);
@@ -556,7 +556,6 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle);
isc__nm_uvreq_t *req = NULL;
uint32_t maxudp;
bool free_buf;
isc_result_t result;
isc_sockaddr_t sockaddr, *sa = NULL;
@@ -564,19 +563,22 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
REQUIRE(sock->tid == isc_nm_tid());
REQUIRE(atomic_load(&sock->reading));
#ifdef UV_UDP_MMSG_FREE
free_buf = ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE);
#elif UV_UDP_MMSG_CHUNK
free_buf = ((flags & UV_UDP_MMSG_CHUNK) == 0);
/*
* When using recvmmsg(2), if no errors occur, there will be a final
* callback with nrecv set to 0, addr set to NULL and the buffer
* pointing at the initially allocated data with the UV_UDP_MMSG_CHUNK
* flag cleared and the UV_UDP_MMSG_FREE flag set.
*/
#if HAVE_DECL_UV_UDP_MMSG_FREE
if ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE) {
INSIST(nrecv == 0);
INSIST(addr == NULL);
goto free;
}
#else
free_buf = true;
UNUSED(flags);
#endif
/*
* Four possible reasons to return now without processing:
*/
/*
* - If we're simulating a firewall blocking UDP packets
* bigger than 'maxudp' bytes for testing purposes.
@@ -640,9 +642,31 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
sock->processing = false;
free:
if (free_buf) {
isc__nm_free_uvbuf(sock, buf);
#if HAVE_DECL_UV_UDP_MMSG_CHUNK
/*
* When using recvmmsg(2), chunks will have the UV_UDP_MMSG_CHUNK flag
* set, those must not be freed.
*/
if ((flags & UV_UDP_MMSG_CHUNK) == UV_UDP_MMSG_CHUNK) {
return;
}
#endif
/*
* When using recvmmsg(2), if a UDP socket error occurs, nrecv will be <
* 0. In either scenario, the callee can now safely free the provided
* buffer.
*/
if (nrecv < 0) {
/*
* The buffer may be a null buffer on error.
*/
if (buf->base == NULL && buf->len == 0) {
return;
}
}
isc__nm_free_uvbuf(sock, buf);
}
/*