mirror of
https://gitlab.isc.org/isc-projects/bind9
synced 2025-08-31 14:35:26 +00:00
Merge branch '3095-invalid-recvmmsg-detection' into 'main'
Fix the UDP recvmmsg support Closes #3095 See merge request isc-projects/bind9!5713
This commit is contained in:
3
CHANGES
3
CHANGES
@@ -1,3 +1,6 @@
|
||||
5793. [bug] Correctly detect and enable UDP recvmmsg support
|
||||
in all versions of libuv that support it. [GL #3095]
|
||||
|
||||
5792. [bug] Don't schedule zone events on ISC_R_SHUTTINGDOWN
|
||||
event failures. [GL #3084]
|
||||
|
||||
|
@@ -551,6 +551,11 @@ AC_MSG_CHECKING([for libuv])
|
||||
PKG_CHECK_MODULES([LIBUV], [libuv >= 1.0.0], [],
|
||||
[AC_MSG_ERROR([libuv not found])])
|
||||
|
||||
# libuv recvmmsg support
|
||||
AC_CHECK_DECLS([UV_UDP_RECVMMSG, UV_UDP_MMSG_FREE, UV_UDP_MMSG_CHUNK], [], [], [[#include <uv.h>]])
|
||||
AS_CASE([$host],
|
||||
[*-musl],[AC_DEFINE([HAVE_DECL_UV_UDP_RECVMMSG], [0], [Disable recvmmsg support on systems with MUSL glibc])])
|
||||
|
||||
# [pairwise: --enable-doh --with-libnghttp2=auto, --enable-doh --with-libnghttp2=yes, --disable-doh]
|
||||
AC_ARG_ENABLE([doh],
|
||||
[AS_HELP_STRING([--disable-doh], [enable DNS over HTTPS, requires libnghttp2 (default=yes)])],
|
||||
|
@@ -57,3 +57,7 @@ Bug Fixes
|
||||
|
||||
- Using ``rndc`` on a busy recursive server could cause the ``named`` to abort
|
||||
with assertion failure. This has been fixed. :gl:`#3079`
|
||||
|
||||
- With libuv >= 1.37.0, the recvmmsg support would not be enabled in ``named``
|
||||
reducing the maximum query-response performance. The recvmmsg support would
|
||||
be used only in libuv 1.35.0 and 1.36.0. This has been fixed. :gl:`#3095`
|
||||
|
@@ -46,19 +46,51 @@
|
||||
/* Must be different from ISC_NETMGR_TID_UNKNOWN */
|
||||
#define ISC_NETMGR_NON_INTERLOCKED -2
|
||||
|
||||
#define ISC_NETMGR_TLSBUF_SIZE 65536
|
||||
/*
|
||||
* Receive buffers
|
||||
*/
|
||||
#if HAVE_DECL_UV_UDP_MMSG_CHUNK
|
||||
/*
|
||||
* The value 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source,
|
||||
* libuv will not receive more that 20 datagrams in a single recvmmsg call.
|
||||
*/
|
||||
#define ISC_NETMGR_UDP_RECVBUF_SIZE (20 * UINT16_MAX)
|
||||
#else
|
||||
/*
|
||||
* A single DNS message size
|
||||
*/
|
||||
#define ISC_NETMGR_UDP_RECVBUF_SIZE UINT16_MAX
|
||||
#endif
|
||||
|
||||
/*
|
||||
* New versions of libuv support recvmmsg on unices.
|
||||
* Since recvbuf is only allocated per worker allocating a bigger one is not
|
||||
* that wasteful.
|
||||
* 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source, nothing
|
||||
* will break if the original value changes.
|
||||
* The TCP receive buffer can fit one maximum sized DNS message plus its size,
|
||||
* the receive buffer here affects TCP, DoT and DoH.
|
||||
*/
|
||||
#define ISC_NETMGR_RECVBUF_SIZE (20 * 65536)
|
||||
#define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
|
||||
|
||||
/* Pick the larger buffer */
|
||||
#define ISC_NETMGR_RECVBUF_SIZE \
|
||||
(ISC_NETMGR_UDP_RECVBUF_SIZE >= ISC_NETMGR_TCP_RECVBUF_SIZE \
|
||||
? ISC_NETMGR_UDP_RECVBUF_SIZE \
|
||||
: ISC_NETMGR_TCP_RECVBUF_SIZE)
|
||||
|
||||
/*
|
||||
* Send buffer
|
||||
*/
|
||||
#define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
|
||||
|
||||
/*
|
||||
* Make sure our RECVBUF size is large enough
|
||||
*/
|
||||
|
||||
STATIC_ASSERT(ISC_NETMGR_UDP_RECVBUF_SIZE <= ISC_NETMGR_RECVBUF_SIZE,
|
||||
"UDP receive buffer size must be smaller or equal than worker "
|
||||
"receive buffer size");
|
||||
|
||||
STATIC_ASSERT(ISC_NETMGR_TCP_RECVBUF_SIZE <= ISC_NETMGR_RECVBUF_SIZE,
|
||||
"TCP receive buffer size must be smaller or equal than worker "
|
||||
"receive buffer size");
|
||||
|
||||
/*%
|
||||
* Regular TCP buffer size.
|
||||
*/
|
||||
@@ -70,7 +102,7 @@
|
||||
* most in TCPDNS or TLSDNS connections, so there's no risk of overrun
|
||||
* when using a buffer this size.
|
||||
*/
|
||||
#define NM_BIG_BUF (65535 + 2) * 2
|
||||
#define NM_BIG_BUF ISC_NETMGR_TCP_RECVBUF_SIZE * 2
|
||||
|
||||
#if defined(SO_REUSEPORT_LB) || (defined(SO_REUSEPORT) && defined(__linux__))
|
||||
#define HAVE_SO_REUSEPORT_LB 1
|
||||
|
@@ -1599,20 +1599,10 @@ isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf) {
|
||||
isc__networker_t *worker = NULL;
|
||||
|
||||
REQUIRE(VALID_NMSOCK(sock));
|
||||
if (buf->base == NULL) {
|
||||
/* Empty buffer: might happen in case of error. */
|
||||
return;
|
||||
}
|
||||
worker = &sock->mgr->workers[sock->tid];
|
||||
|
||||
REQUIRE(worker->recvbuf_inuse);
|
||||
if (sock->type == isc_nm_udpsocket && buf->base > worker->recvbuf &&
|
||||
buf->base <= worker->recvbuf + ISC_NETMGR_RECVBUF_SIZE)
|
||||
{
|
||||
/* Can happen in case of out-of-order recvmmsg in libuv1.36 */
|
||||
return;
|
||||
}
|
||||
worker = &sock->mgr->workers[sock->tid];
|
||||
REQUIRE(buf->base == worker->recvbuf);
|
||||
|
||||
worker->recvbuf_inuse = false;
|
||||
}
|
||||
|
||||
@@ -2187,7 +2177,7 @@ isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr) {
|
||||
}
|
||||
|
||||
/*%<
|
||||
* Allocator for read operations. Limited to size 2^16.
|
||||
* Allocator callback for read operations.
|
||||
*
|
||||
* Note this doesn't actually allocate anything, it just assigns the
|
||||
* worker's receive buffer to a socket, and marks it as "in use".
|
||||
@@ -2199,35 +2189,34 @@ isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) {
|
||||
|
||||
REQUIRE(VALID_NMSOCK(sock));
|
||||
REQUIRE(isc__nm_in_netthread());
|
||||
/*
|
||||
* The size provided by libuv is only suggested size, and it always
|
||||
* defaults to 64 * 1024 in the current versions of libuv (see
|
||||
* src/unix/udp.c and src/unix/stream.c).
|
||||
*/
|
||||
UNUSED(size);
|
||||
|
||||
worker = &sock->mgr->workers[sock->tid];
|
||||
INSIST(!worker->recvbuf_inuse);
|
||||
INSIST(worker->recvbuf != NULL);
|
||||
|
||||
switch (sock->type) {
|
||||
case isc_nm_udpsocket:
|
||||
REQUIRE(size <= ISC_NETMGR_RECVBUF_SIZE);
|
||||
size = ISC_NETMGR_RECVBUF_SIZE;
|
||||
buf->len = ISC_NETMGR_UDP_RECVBUF_SIZE;
|
||||
break;
|
||||
case isc_nm_tcpsocket:
|
||||
case isc_nm_tcpdnssocket:
|
||||
break;
|
||||
case isc_nm_tlsdnssocket:
|
||||
/*
|
||||
* We need to limit the individual chunks to be read, so the
|
||||
* BIO_write() will always succeed and the consumed before the
|
||||
* next readcb is called.
|
||||
*/
|
||||
if (size >= ISC_NETMGR_TLSBUF_SIZE) {
|
||||
size = ISC_NETMGR_TLSBUF_SIZE;
|
||||
}
|
||||
buf->len = ISC_NETMGR_TCP_RECVBUF_SIZE;
|
||||
break;
|
||||
default:
|
||||
INSIST(0);
|
||||
ISC_UNREACHABLE();
|
||||
}
|
||||
|
||||
worker = &sock->mgr->workers[sock->tid];
|
||||
INSIST(!worker->recvbuf_inuse || sock->type == isc_nm_udpsocket);
|
||||
|
||||
REQUIRE(buf->len <= ISC_NETMGR_RECVBUF_SIZE);
|
||||
buf->base = worker->recvbuf;
|
||||
buf->len = size;
|
||||
|
||||
worker->recvbuf_inuse = true;
|
||||
}
|
||||
|
||||
|
@@ -816,7 +816,6 @@ isc__nm_tcp_resumeread(isc_nmhandle_t *handle) {
|
||||
|
||||
isc__netievent_tcpstartread_t *ievent = NULL;
|
||||
isc_nmsocket_t *sock = handle->sock;
|
||||
isc__networker_t *worker = &sock->mgr->workers[sock->tid];
|
||||
|
||||
REQUIRE(sock->tid == isc_nm_tid());
|
||||
|
||||
@@ -838,18 +837,8 @@ isc__nm_tcp_resumeread(isc_nmhandle_t *handle) {
|
||||
|
||||
ievent = isc__nm_get_netievent_tcpstartread(sock->mgr, sock);
|
||||
|
||||
if (worker->recvbuf_inuse) {
|
||||
/*
|
||||
* If we happen to call the resumeread from inside the receive
|
||||
* callback, the worker->recvbuf might still be in use, so we
|
||||
* need to force enqueue the next read event.
|
||||
*/
|
||||
isc__nm_enqueue_ievent(worker, (isc__netievent_t *)ievent);
|
||||
|
||||
} else {
|
||||
isc__nm_maybe_enqueue_ievent(worker,
|
||||
(isc__netievent_t *)ievent);
|
||||
}
|
||||
isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
|
||||
(isc__netievent_t *)ievent);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -903,6 +892,15 @@ isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
|
||||
}
|
||||
|
||||
free:
|
||||
if (nread < 0) {
|
||||
/*
|
||||
* The buffer may be a null buffer on error.
|
||||
*/
|
||||
if (buf->base == NULL && buf->len == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
isc__nm_free_uvbuf(sock, buf);
|
||||
}
|
||||
|
||||
|
@@ -879,6 +879,15 @@ isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread,
|
||||
|
||||
isc__nm_process_sock_buffer(sock);
|
||||
free:
|
||||
if (nread < 0) {
|
||||
/*
|
||||
* The buffer may be a null buffer on error.
|
||||
*/
|
||||
if (buf->base == NULL && buf->len == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
isc__nm_free_uvbuf(sock, buf);
|
||||
}
|
||||
|
||||
|
@@ -264,12 +264,12 @@ tlsdns_connect_cb(uv_connect_t *uvreq, int status) {
|
||||
/*
|
||||
*
|
||||
*/
|
||||
r = BIO_new_bio_pair(&sock->tls.ssl_wbio, ISC_NETMGR_TLSBUF_SIZE,
|
||||
&sock->tls.app_rbio, ISC_NETMGR_TLSBUF_SIZE);
|
||||
r = BIO_new_bio_pair(&sock->tls.ssl_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
|
||||
&sock->tls.app_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
|
||||
RUNTIME_CHECK(r == 1);
|
||||
|
||||
r = BIO_new_bio_pair(&sock->tls.ssl_rbio, ISC_NETMGR_TLSBUF_SIZE,
|
||||
&sock->tls.app_wbio, ISC_NETMGR_TLSBUF_SIZE);
|
||||
r = BIO_new_bio_pair(&sock->tls.ssl_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
|
||||
&sock->tls.app_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
|
||||
RUNTIME_CHECK(r == 1);
|
||||
|
||||
#if HAVE_SSL_SET0_RBIO && HAVE_SSL_SET0_WBIO
|
||||
@@ -1003,8 +1003,8 @@ tls_cycle_input(isc_nmsocket_t *sock) {
|
||||
(void)SSL_peek(sock->tls.tls, &(char){ '\0' }, 0);
|
||||
|
||||
int pending = SSL_pending(sock->tls.tls);
|
||||
if (pending > ISC_NETMGR_TLSBUF_SIZE) {
|
||||
pending = ISC_NETMGR_TLSBUF_SIZE;
|
||||
if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) {
|
||||
pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE;
|
||||
}
|
||||
|
||||
if ((sock->buf_len + pending) > sock->buf_size) {
|
||||
@@ -1194,8 +1194,8 @@ tls_cycle_output(isc_nmsocket_t *sock) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (pending > ISC_NETMGR_TLSBUF_SIZE) {
|
||||
pending = ISC_NETMGR_TLSBUF_SIZE;
|
||||
if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) {
|
||||
pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE;
|
||||
}
|
||||
|
||||
sock->tls.senddata.base = isc_mem_get(sock->mgr->mctx, pending);
|
||||
@@ -1381,6 +1381,16 @@ isc__nm_tlsdns_read_cb(uv_stream_t *stream, ssize_t nread,
|
||||
}
|
||||
free:
|
||||
async_tlsdns_cycle(sock);
|
||||
|
||||
if (nread < 0) {
|
||||
/*
|
||||
* The buffer may be a null buffer on error.
|
||||
*/
|
||||
if (buf->base == NULL && buf->len == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
isc__nm_free_uvbuf(sock, buf);
|
||||
}
|
||||
|
||||
@@ -1516,12 +1526,12 @@ accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) {
|
||||
csock->tls.tls = isc_tls_create(ssock->tls.ctx);
|
||||
RUNTIME_CHECK(csock->tls.tls != NULL);
|
||||
|
||||
r = BIO_new_bio_pair(&csock->tls.ssl_wbio, ISC_NETMGR_TLSBUF_SIZE,
|
||||
&csock->tls.app_rbio, ISC_NETMGR_TLSBUF_SIZE);
|
||||
r = BIO_new_bio_pair(&csock->tls.ssl_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
|
||||
&csock->tls.app_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
|
||||
RUNTIME_CHECK(r == 1);
|
||||
|
||||
r = BIO_new_bio_pair(&csock->tls.ssl_rbio, ISC_NETMGR_TLSBUF_SIZE,
|
||||
&csock->tls.app_wbio, ISC_NETMGR_TLSBUF_SIZE);
|
||||
r = BIO_new_bio_pair(&csock->tls.ssl_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
|
||||
&csock->tls.app_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
|
||||
RUNTIME_CHECK(r == 1);
|
||||
|
||||
#if HAVE_SSL_SET0_RBIO && HAVE_SSL_SET0_WBIO
|
||||
|
@@ -431,7 +431,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
|
||||
REQUIRE(sock->parent != NULL);
|
||||
REQUIRE(sock->tid == isc_nm_tid());
|
||||
|
||||
#ifdef UV_UDP_RECVMMSG
|
||||
#if HAVE_DECL_UV_UDP_RECVMMSG
|
||||
uv_init_flags |= UV_UDP_RECVMMSG;
|
||||
#endif
|
||||
r = uv_udp_init_ex(&worker->loop, &sock->uv_handle.udp, uv_init_flags);
|
||||
@@ -556,7 +556,6 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
|
||||
isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle);
|
||||
isc__nm_uvreq_t *req = NULL;
|
||||
uint32_t maxudp;
|
||||
bool free_buf;
|
||||
isc_result_t result;
|
||||
isc_sockaddr_t sockaddr, *sa = NULL;
|
||||
|
||||
@@ -564,19 +563,22 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
|
||||
REQUIRE(sock->tid == isc_nm_tid());
|
||||
REQUIRE(atomic_load(&sock->reading));
|
||||
|
||||
#ifdef UV_UDP_MMSG_FREE
|
||||
free_buf = ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE);
|
||||
#elif UV_UDP_MMSG_CHUNK
|
||||
free_buf = ((flags & UV_UDP_MMSG_CHUNK) == 0);
|
||||
/*
|
||||
* When using recvmmsg(2), if no errors occur, there will be a final
|
||||
* callback with nrecv set to 0, addr set to NULL and the buffer
|
||||
* pointing at the initially allocated data with the UV_UDP_MMSG_CHUNK
|
||||
* flag cleared and the UV_UDP_MMSG_FREE flag set.
|
||||
*/
|
||||
#if HAVE_DECL_UV_UDP_MMSG_FREE
|
||||
if ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE) {
|
||||
INSIST(nrecv == 0);
|
||||
INSIST(addr == NULL);
|
||||
goto free;
|
||||
}
|
||||
#else
|
||||
free_buf = true;
|
||||
UNUSED(flags);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Four possible reasons to return now without processing:
|
||||
*/
|
||||
|
||||
/*
|
||||
* - If we're simulating a firewall blocking UDP packets
|
||||
* bigger than 'maxudp' bytes for testing purposes.
|
||||
@@ -640,9 +642,31 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
|
||||
sock->processing = false;
|
||||
|
||||
free:
|
||||
if (free_buf) {
|
||||
isc__nm_free_uvbuf(sock, buf);
|
||||
#if HAVE_DECL_UV_UDP_MMSG_CHUNK
|
||||
/*
|
||||
* When using recvmmsg(2), chunks will have the UV_UDP_MMSG_CHUNK flag
|
||||
* set, those must not be freed.
|
||||
*/
|
||||
if ((flags & UV_UDP_MMSG_CHUNK) == UV_UDP_MMSG_CHUNK) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* When using recvmmsg(2), if a UDP socket error occurs, nrecv will be <
|
||||
* 0. In either scenario, the callee can now safely free the provided
|
||||
* buffer.
|
||||
*/
|
||||
if (nrecv < 0) {
|
||||
/*
|
||||
* The buffer may be a null buffer on error.
|
||||
*/
|
||||
if (buf->base == NULL && buf->len == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
isc__nm_free_uvbuf(sock, buf);
|
||||
}
|
||||
|
||||
/*
|
||||
|
Reference in New Issue
Block a user