2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-29 13:38:26 +00:00

Fix the UDP recvmmsg support

Previously, the netmgr/udp.c tried to detect the recvmmsg detection in
libuv with #ifdef UV_UDP_<foo> preprocessor macros.  However, because
the UV_UDP_<foo> are not preprocessor macros, but enum members, the
detection didn't work.  Because the detection didn't work, the code
didn't have access to the information when we received the final chunk
of the recvmmsg and tried to free the uvbuf every time.  Fortunately,
the isc__nm_free_uvbuf() had a kludge that detected attempt to free in
the middle of the receive buffer, so the code worked.

However, libuv 1.37.0 changed the way the recvmmsg was enabled from
implicit to explicit, and we checked for yet another enum member
presence with preprocessor macro, so in fact libuv recvmmsg support was
never enabled with libuv >= 1.37.0.

This commit changes to the preprocessor macros to autoconf checks for
declaration, so the detection now works again.  On top of that, it's now
possible to cleanup the alloc_cb and free_uvbuf functions because now,
the information whether we can or cannot free the buffer is available to
us.
This commit is contained in:
Ondřej Surý 2022-01-11 12:14:23 +01:00
parent abe466e5b3
commit 7370725008
7 changed files with 137 additions and 61 deletions

View File

@ -551,6 +551,9 @@ AC_MSG_CHECKING([for libuv])
PKG_CHECK_MODULES([LIBUV], [libuv >= 1.0.0], [],
[AC_MSG_ERROR([libuv not found])])
# libuv recvmmsg support
AC_CHECK_DECLS([UV_UDP_RECVMMSG, UV_UDP_MMSG_FREE, UV_UDP_MMSG_CHUNK], [], [], [[#include <uv.h>]])
# [pairwise: --enable-doh --with-libnghttp2=auto, --enable-doh --with-libnghttp2=yes, --disable-doh]
AC_ARG_ENABLE([doh],
[AS_HELP_STRING([--disable-doh], [enable DNS over HTTPS, requires libnghttp2 (default=yes)])],

View File

@ -46,19 +46,51 @@
/* Must be different from ISC_NETMGR_TID_UNKNOWN */
#define ISC_NETMGR_NON_INTERLOCKED -2
#define ISC_NETMGR_TLSBUF_SIZE 65536
/*
* Receive buffers
*/
#if HAVE_DECL_UV_UDP_MMSG_CHUNK
/*
* The value 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source,
* libuv will not receive more that 20 datagrams in a single recvmmsg call.
*/
#define ISC_NETMGR_UDP_RECVBUF_SIZE (20 * UINT16_MAX)
#else
/*
* A single DNS message size
*/
#define ISC_NETMGR_UDP_RECVBUF_SIZE UINT16_MAX
#endif
/*
* New versions of libuv support recvmmsg on unices.
* Since recvbuf is only allocated per worker allocating a bigger one is not
* that wasteful.
* 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source, nothing
* will break if the original value changes.
* The TCP receive buffer can fit one maximum sized DNS message plus its size,
* the receive buffer here affects TCP, DoT and DoH.
*/
#define ISC_NETMGR_RECVBUF_SIZE (20 * 65536)
#define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
/* Pick the larger buffer */
#define ISC_NETMGR_RECVBUF_SIZE \
(ISC_NETMGR_UDP_RECVBUF_SIZE >= ISC_NETMGR_TCP_RECVBUF_SIZE \
? ISC_NETMGR_UDP_RECVBUF_SIZE \
: ISC_NETMGR_TCP_RECVBUF_SIZE)
/*
* Send buffer
*/
#define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
/*
* Make sure our RECVBUF size is large enough
*/
STATIC_ASSERT(ISC_NETMGR_UDP_RECVBUF_SIZE <= ISC_NETMGR_RECVBUF_SIZE,
"UDP receive buffer size must be smaller or equal than worker "
"receive buffer size");
STATIC_ASSERT(ISC_NETMGR_TCP_RECVBUF_SIZE <= ISC_NETMGR_RECVBUF_SIZE,
"TCP receive buffer size must be smaller or equal than worker "
"receive buffer size");
/*%
* Regular TCP buffer size.
*/
@ -70,7 +102,7 @@
* most in TCPDNS or TLSDNS connections, so there's no risk of overrun
* when using a buffer this size.
*/
#define NM_BIG_BUF (65535 + 2) * 2
#define NM_BIG_BUF ISC_NETMGR_TCP_RECVBUF_SIZE * 2
#if defined(SO_REUSEPORT_LB) || (defined(SO_REUSEPORT) && defined(__linux__))
#define HAVE_SO_REUSEPORT_LB 1

View File

@ -1599,20 +1599,10 @@ isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf) {
isc__networker_t *worker = NULL;
REQUIRE(VALID_NMSOCK(sock));
if (buf->base == NULL) {
/* Empty buffer: might happen in case of error. */
return;
}
worker = &sock->mgr->workers[sock->tid];
REQUIRE(worker->recvbuf_inuse);
if (sock->type == isc_nm_udpsocket && buf->base > worker->recvbuf &&
buf->base <= worker->recvbuf + ISC_NETMGR_RECVBUF_SIZE)
{
/* Can happen in case of out-of-order recvmmsg in libuv1.36 */
return;
}
worker = &sock->mgr->workers[sock->tid];
REQUIRE(buf->base == worker->recvbuf);
worker->recvbuf_inuse = false;
}
@ -2187,7 +2177,7 @@ isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr) {
}
/*%<
* Allocator for read operations. Limited to size 2^16.
* Allocator callback for read operations.
*
* Note this doesn't actually allocate anything, it just assigns the
* worker's receive buffer to a socket, and marks it as "in use".
@ -2199,35 +2189,34 @@ isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) {
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(isc__nm_in_netthread());
/*
* The size provided by libuv is only suggested size, and it always
* defaults to 64 * 1024 in the current versions of libuv (see
* src/unix/udp.c and src/unix/stream.c).
*/
UNUSED(size);
worker = &sock->mgr->workers[sock->tid];
INSIST(!worker->recvbuf_inuse);
INSIST(worker->recvbuf != NULL);
switch (sock->type) {
case isc_nm_udpsocket:
REQUIRE(size <= ISC_NETMGR_RECVBUF_SIZE);
size = ISC_NETMGR_RECVBUF_SIZE;
buf->len = ISC_NETMGR_UDP_RECVBUF_SIZE;
break;
case isc_nm_tcpsocket:
case isc_nm_tcpdnssocket:
break;
case isc_nm_tlsdnssocket:
/*
* We need to limit the individual chunks to be read, so the
* BIO_write() will always succeed and the consumed before the
* next readcb is called.
*/
if (size >= ISC_NETMGR_TLSBUF_SIZE) {
size = ISC_NETMGR_TLSBUF_SIZE;
}
buf->len = ISC_NETMGR_TCP_RECVBUF_SIZE;
break;
default:
INSIST(0);
ISC_UNREACHABLE();
}
worker = &sock->mgr->workers[sock->tid];
INSIST(!worker->recvbuf_inuse || sock->type == isc_nm_udpsocket);
REQUIRE(buf->len <= ISC_NETMGR_RECVBUF_SIZE);
buf->base = worker->recvbuf;
buf->len = size;
worker->recvbuf_inuse = true;
}

View File

@ -903,6 +903,15 @@ isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
}
free:
if (nread < 0) {
/*
* The buffer may be a null buffer on error.
*/
if (buf->base == NULL && buf->len == 0) {
return;
}
}
isc__nm_free_uvbuf(sock, buf);
}

View File

@ -879,6 +879,15 @@ isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread,
isc__nm_process_sock_buffer(sock);
free:
if (nread < 0) {
/*
* The buffer may be a null buffer on error.
*/
if (buf->base == NULL && buf->len == 0) {
return;
}
}
isc__nm_free_uvbuf(sock, buf);
}

View File

@ -264,12 +264,12 @@ tlsdns_connect_cb(uv_connect_t *uvreq, int status) {
/*
*
*/
r = BIO_new_bio_pair(&sock->tls.ssl_wbio, ISC_NETMGR_TLSBUF_SIZE,
&sock->tls.app_rbio, ISC_NETMGR_TLSBUF_SIZE);
r = BIO_new_bio_pair(&sock->tls.ssl_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
&sock->tls.app_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
RUNTIME_CHECK(r == 1);
r = BIO_new_bio_pair(&sock->tls.ssl_rbio, ISC_NETMGR_TLSBUF_SIZE,
&sock->tls.app_wbio, ISC_NETMGR_TLSBUF_SIZE);
r = BIO_new_bio_pair(&sock->tls.ssl_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
&sock->tls.app_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
RUNTIME_CHECK(r == 1);
#if HAVE_SSL_SET0_RBIO && HAVE_SSL_SET0_WBIO
@ -1003,8 +1003,8 @@ tls_cycle_input(isc_nmsocket_t *sock) {
(void)SSL_peek(sock->tls.tls, &(char){ '\0' }, 0);
int pending = SSL_pending(sock->tls.tls);
if (pending > ISC_NETMGR_TLSBUF_SIZE) {
pending = ISC_NETMGR_TLSBUF_SIZE;
if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) {
pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE;
}
if ((sock->buf_len + pending) > sock->buf_size) {
@ -1194,8 +1194,8 @@ tls_cycle_output(isc_nmsocket_t *sock) {
break;
}
if (pending > ISC_NETMGR_TLSBUF_SIZE) {
pending = ISC_NETMGR_TLSBUF_SIZE;
if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) {
pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE;
}
sock->tls.senddata.base = isc_mem_get(sock->mgr->mctx, pending);
@ -1381,6 +1381,16 @@ isc__nm_tlsdns_read_cb(uv_stream_t *stream, ssize_t nread,
}
free:
async_tlsdns_cycle(sock);
if (nread < 0) {
/*
* The buffer may be a null buffer on error.
*/
if (buf->base == NULL && buf->len == 0) {
return;
}
}
isc__nm_free_uvbuf(sock, buf);
}
@ -1516,12 +1526,12 @@ accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) {
csock->tls.tls = isc_tls_create(ssock->tls.ctx);
RUNTIME_CHECK(csock->tls.tls != NULL);
r = BIO_new_bio_pair(&csock->tls.ssl_wbio, ISC_NETMGR_TLSBUF_SIZE,
&csock->tls.app_rbio, ISC_NETMGR_TLSBUF_SIZE);
r = BIO_new_bio_pair(&csock->tls.ssl_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
&csock->tls.app_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
RUNTIME_CHECK(r == 1);
r = BIO_new_bio_pair(&csock->tls.ssl_rbio, ISC_NETMGR_TLSBUF_SIZE,
&csock->tls.app_wbio, ISC_NETMGR_TLSBUF_SIZE);
r = BIO_new_bio_pair(&csock->tls.ssl_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
&csock->tls.app_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
RUNTIME_CHECK(r == 1);
#if HAVE_SSL_SET0_RBIO && HAVE_SSL_SET0_WBIO

View File

@ -431,7 +431,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
REQUIRE(sock->parent != NULL);
REQUIRE(sock->tid == isc_nm_tid());
#ifdef UV_UDP_RECVMMSG
#if HAVE_DECL_UV_UDP_RECVMMSG
uv_init_flags |= UV_UDP_RECVMMSG;
#endif
r = uv_udp_init_ex(&worker->loop, &sock->uv_handle.udp, uv_init_flags);
@ -556,7 +556,6 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle);
isc__nm_uvreq_t *req = NULL;
uint32_t maxudp;
bool free_buf;
isc_result_t result;
isc_sockaddr_t sockaddr, *sa = NULL;
@ -564,19 +563,22 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
REQUIRE(sock->tid == isc_nm_tid());
REQUIRE(atomic_load(&sock->reading));
#ifdef UV_UDP_MMSG_FREE
free_buf = ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE);
#elif UV_UDP_MMSG_CHUNK
free_buf = ((flags & UV_UDP_MMSG_CHUNK) == 0);
/*
* When using recvmmsg(2), if no errors occur, there will be a final
* callback with nrecv set to 0, addr set to NULL and the buffer
* pointing at the initially allocated data with the UV_UDP_MMSG_CHUNK
* flag cleared and the UV_UDP_MMSG_FREE flag set.
*/
#if HAVE_DECL_UV_UDP_MMSG_FREE
if ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE) {
INSIST(nrecv == 0);
INSIST(addr == NULL);
goto free;
}
#else
free_buf = true;
UNUSED(flags);
#endif
/*
* Four possible reasons to return now without processing:
*/
/*
* - If we're simulating a firewall blocking UDP packets
* bigger than 'maxudp' bytes for testing purposes.
@ -640,9 +642,31 @@ udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
sock->processing = false;
free:
if (free_buf) {
isc__nm_free_uvbuf(sock, buf);
#if HAVE_DECL_UV_UDP_MMSG_CHUNK
/*
* When using recvmmsg(2), chunks will have the UV_UDP_MMSG_CHUNK flag
* set, those must not be freed.
*/
if ((flags & UV_UDP_MMSG_CHUNK) == UV_UDP_MMSG_CHUNK) {
return;
}
#endif
/*
* When using recvmmsg(2), if a UDP socket error occurs, nrecv will be <
* 0. In either scenario, the callee can now safely free the provided
* buffer.
*/
if (nrecv < 0) {
/*
* The buffer may be a null buffer on error.
*/
if (buf->base == NULL && buf->len == 0) {
return;
}
}
isc__nm_free_uvbuf(sock, buf);
}
/*