From 5daaca714678e7d2fb40f7730899bfc59dee7f28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Mon, 5 Oct 2020 10:40:02 +0200 Subject: [PATCH 1/5] Add SO_REUSEPORT and SO_INCOMING_CPU helper functions The setting of SO_REUSE**** and SO_INCOMING_CPU have been moved into a separate helper functions. --- lib/isc/netmgr/netmgr-int.h | 12 ++++++++ lib/isc/netmgr/netmgr.c | 58 ++++++++++++++++++++++++++++++++++++- lib/isc/netmgr/udp.c | 42 ++++----------------------- 3 files changed, 75 insertions(+), 37 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 1c30c6c15b..c565825385 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -838,3 +838,15 @@ isc__nm_socket_freebind(const uv_handle_t *handle); /*%< * Set the IP_FREEBIND (or equivalent) socket option on the uv_handle */ + +isc_result_t +isc__nm_socket_reuseport(uv_os_fd_t fd); +/*%< + * Set the SO_REUSEPORT (or equivalent) socket option on the fd + */ + +isc_result_t +isc__nm_socket_incoming_cpu(uv_os_fd_t fd); +/*%< + * Set the SO_INCOMING_CPU socket option on the fd if available + */ diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index d4e2bf29f5..85ce46c4f9 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -1625,12 +1625,68 @@ isc__nm_socket_freebind(const uv_handle_t *handle) { } #else UNUSED(handle); - UNUSED(fd); result = ISC_R_NOTIMPLEMENTED; #endif return (result); } +isc_result_t +isc__nm_socket_reuseport(uv_os_fd_t fd) { + /* + * This is SO_REUSE**** hell: + * + * Generally, the SO_REUSEADDR socket option allows reuse of + * local addresses. On Windows, it also allows a socket to + * forcibly bind to a port in use by another socket. + * + * On Linux, SO_REUSEPORT socket option allows sockets to be + * bound to an identical socket address. For UDP sockets, the + * use of this option can provide better distribution of + * incoming datagrams to multiple processes (or threads) as + * compared to the traditional technique of having multiple + * processes compete to receive datagrams on the same socket. + * + * On FreeBSD 12+, the same thing is achieved with SO_REUSEPORT_LB. + * + */ + isc_result_t result = ISC_R_NOTIMPLEMENTED; +#if defined(SO_REUSEADDR) + if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEADDR) == -1) { + return (ISC_R_FAILURE); + } else { + result = ISC_R_SUCCESS; + } +#endif +#if defined(SO_REUSEPORT_LB) + if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT_LB) == -1) { + return (ISC_R_FAILURE); + } else { + result = ISC_R_SUCCESS; + } +#elif defined(SO_REUSEPORT) + if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) { + return (ISC_R_FAILURE); + } else { + result = ISC_R_SUCCESS; + } +#endif + return (result); +} + +isc_result_t +isc__nm_socket_incoming_cpu(uv_os_fd_t fd) { +#ifdef SO_INCOMING_CPU + if (setsockopt_on(fd, SOL_SOCKET, SO_INCOMING_CPU) == -1) { + return (ISC_R_FAILURE); + } else { + return (ISC_R_SUCCESS); + } +#else + UNUSED(fd); +#endif + return (ISC_R_NOTIMPLEMENTED); +} + #ifdef NETMGR_TRACE /* * Dump all active sockets in netmgr. We output to stderr diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index b575d80aff..83ab7b54c9 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -65,8 +65,8 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb, nsock->extrahandlesize = extrahandlesize; for (size_t i = 0; i < mgr->nworkers; i++) { + isc_result_t result; uint16_t family = iface->addr.type.sa.sa_family; - int res = 0; isc__netievent_udplisten_t *ievent = NULL; isc_nmsocket_t *csock = &nsock->children[i]; @@ -82,46 +82,16 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb, csock->fd = socket(family, SOCK_DGRAM, 0); RUNTIME_CHECK(csock->fd >= 0); - /* - * This is SO_REUSE**** hell: - * - * Generally, the SO_REUSEADDR socket option allows reuse of - * local addresses. On Windows, it also allows a socket to - * forcibly bind to a port in use by another socket. - * - * On Linux, SO_REUSEPORT socket option allows sockets to be - * bound to an identical socket address. For UDP sockets, the - * use of this option can provide better distribution of - * incoming datagrams to multiple processes (or threads) as - * compared to the traditional technique of having multiple - * processes compete to receive datagrams on the same socket. - * - * On FreeBSD, the same thing is achieved with SO_REUSEPORT_LB. - * - */ -#if defined(SO_REUSEADDR) - res = setsockopt(csock->fd, SOL_SOCKET, SO_REUSEADDR, - &(int){ 1 }, sizeof(int)); - RUNTIME_CHECK(res == 0); -#endif -#if defined(SO_REUSEPORT_LB) - res = setsockopt(csock->fd, SOL_SOCKET, SO_REUSEPORT_LB, - &(int){ 1 }, sizeof(int)); - RUNTIME_CHECK(res == 0); -#elif defined(SO_REUSEPORT) - res = setsockopt(csock->fd, SOL_SOCKET, SO_REUSEPORT, - &(int){ 1 }, sizeof(int)); - RUNTIME_CHECK(res == 0); -#endif + result = isc__nm_socket_reuseport(csock->fd); + RUNTIME_CHECK(result == ISC_R_SUCCESS || + result == ISC_R_NOTIMPLEMENTED); -#ifdef SO_INCOMING_CPU /* We don't check for the result, because SO_INCOMING_CPU can be * available without the setter on Linux kernel version 4.4, and * setting SO_INCOMING_CPU is just an optimization. */ - (void)setsockopt(csock->fd, SOL_SOCKET, SO_INCOMING_CPU, - &(int){ 1 }, sizeof(int)); -#endif + (void)isc__nm_socket_incoming_cpu(csock->fd); + ievent = isc__nm_get_ievent(mgr, netievent_udplisten); ievent->sock = csock; isc__nm_enqueue_ievent(&mgr->workers[i], From d685bbc82240b8f1c20bec748a5725844221c61d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Mon, 5 Oct 2020 10:51:40 +0200 Subject: [PATCH 2/5] Add helper function to enable DF (don't fragment) flag on UDP sockets This commits add isc__nm_socket_dontfrag() helper functions. --- lib/isc/netmgr/netmgr-int.h | 6 ++++ lib/isc/netmgr/netmgr.c | 48 +++++++++++++++++++++++++ lib/isc/win32/include/isc/platform.h.in | 2 ++ 3 files changed, 56 insertions(+) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index c565825385..d6c4032dc6 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -850,3 +850,9 @@ isc__nm_socket_incoming_cpu(uv_os_fd_t fd); /*%< * Set the SO_INCOMING_CPU socket option on the fd if available */ + +isc_result_t +isc__nm_socket_dontfrag(uv_os_fd_t fd, sa_family_t sa_family); +/*%< + * Set the SO_IP_DONTFRAG (or equivalent) socket option of the fd if available + */ diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 85ce46c4f9..e39c882408 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -1687,6 +1687,54 @@ isc__nm_socket_incoming_cpu(uv_os_fd_t fd) { return (ISC_R_NOTIMPLEMENTED); } +isc_result_t +isc__nm_socket_dontfrag(uv_os_fd_t fd, sa_family_t sa_family) { + /* + * Set the Don't Fragment flag on IP packets + */ + if (sa_family == AF_INET6) { +#if defined(IPV6_DONTFRAG) + if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_DONTFRAG) == -1) { + return (ISC_R_FAILURE); + } else { + return (ISC_R_SUCCESS); + } +#elif defined(IPV6_MTU_DISCOVER) + if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER, + &(int){ IP_PMTUDISC_DO }, sizeof(int)) == -1) + { + return (ISC_R_FAILURE); + } else { + return (ISC_R_SUCCESS); + } +#else + UNUSED(fd); +#endif + } else if (sa_family == AF_INET) { +#if defined(IP_DONTFRAG) + if (setsockopt_on(fd, IPPROTO_IP, IP_DONTFRAG) == -1) { + return (ISC_R_FAILURE); + } else { + return (ISC_R_SUCCESS); + } +#elif defined(IP_MTU_DISCOVER) + if (setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER, + &(int){ IP_PMTUDISC_DO }, sizeof(int)) == -1) + { + return (ISC_R_FAILURE); + } else { + return (ISC_R_SUCCESS); + } +#else + UNUSED(fd); +#endif + } else { + return (ISC_R_FAMILYNOSUPPORT); + } + + return (ISC_R_NOTIMPLEMENTED); +} + #ifdef NETMGR_TRACE /* * Dump all active sockets in netmgr. We output to stderr diff --git a/lib/isc/win32/include/isc/platform.h.in b/lib/isc/win32/include/isc/platform.h.in index d51197ff2a..a8b645007c 100644 --- a/lib/isc/win32/include/isc/platform.h.in +++ b/lib/isc/win32/include/isc/platform.h.in @@ -56,6 +56,8 @@ typedef uint32_t socklen_t; #undef MSG_TRUNC +typedef uint16_t sa_family_t; + /* * Set up a macro for importing and exporting from the DLL */ From 9dc01a636b45686714f29f8814d8dad422754c25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Mon, 5 Oct 2020 11:17:52 +0200 Subject: [PATCH 3/5] Refactor isc__nm_socket_freebind() to take fd and sa_family as args The isc__nm_socket_freebind() has been refactored to match other isc__nm_socket_...() helper functions and take uv_os_fd_t and sa_family_t as function arguments. --- lib/isc/netmgr/netmgr-int.h | 2 +- lib/isc/netmgr/netmgr.c | 34 ++++++++++++++-------------------- lib/isc/netmgr/tcp.c | 8 ++++++-- lib/isc/netmgr/udp.c | 6 ++++-- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index d6c4032dc6..72c76a5afd 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -834,7 +834,7 @@ isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid); */ isc_result_t -isc__nm_socket_freebind(const uv_handle_t *handle); +isc__nm_socket_freebind(uv_os_fd_t fd, sa_family_t sa_family); /*%< * Set the IP_FREEBIND (or equivalent) socket option on the uv_handle */ diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index e39c882408..02ad0b2c54 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -1584,50 +1584,44 @@ isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid) { setsockopt(socket, level, name, &(int){ 1 }, sizeof(int)) isc_result_t -isc__nm_socket_freebind(const uv_handle_t *handle) { +isc__nm_socket_freebind(uv_os_fd_t fd, sa_family_t sa_family) { /* * Set the IP_FREEBIND (or equivalent option) on the uv_handle. */ - isc_result_t result = ISC_R_SUCCESS; - uv_os_fd_t fd; - if (uv_fileno(handle, &fd) != 0) { - return (ISC_R_FAILURE); - } #ifdef IP_FREEBIND + UNUSED(sa_family); if (setsockopt_on(fd, IPPROTO_IP, IP_FREEBIND) == -1) { return (ISC_R_FAILURE); } + return (ISC_R_SUCCESS); #elif defined(IP_BINDANY) || defined(IPV6_BINDANY) - struct sockaddr_in sockfd; - - if (getsockname(fd, (struct sockaddr *)&sockfd, - &(socklen_t){ sizeof(sockfd) }) == -1) - { - return (ISC_R_FAILURE); - } + if (sa_family == AF_INET) { #if defined(IP_BINDANY) - if (sockfd.sin_family == AF_INET) { if (setsockopt_on(fd, IPPROTO_IP, IP_BINDANY) == -1) { return (ISC_R_FAILURE); } - } + return (ISC_R_SUCCESS); #endif + } else if (sa_family == AF_INET6) { #if defined(IPV6_BINDANY) - if (sockfd.sin_family == AF_INET6) { if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_BINDANY) == -1) { return (ISC_R_FAILURE); } - } + return (ISC_R_SUCCESS); #endif + } + return (ISC_R_NOTIMPLEMENTED); #elif defined(SO_BINDANY) + UNUSED(sa_family); if (setsockopt_on(fd, SOL_SOCKET, SO_BINDANY) == -1) { return (ISC_R_FAILURE); } + return (ISC_R_SUCCESS); #else - UNUSED(handle); - result = ISC_R_NOTIMPLEMENTED; + UNUSED(fd); + UNUSED(sa_family); + return (ISC_R_NOTIMPLEMENTED); #endif - return (result); } isc_result_t diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index fbfc5aebce..2540083702 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -318,6 +318,8 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { isc_nmsocket_t *sock = ievent->sock; struct sockaddr_storage sname; int r, flags = 0, snamelen = sizeof(sname); + sa_family_t sa_family; + uv_os_fd_t fd; REQUIRE(isc__nm_in_netthread()); REQUIRE(sock->type == isc_nm_tcplistener); @@ -334,14 +336,16 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]); - if (sock->iface->addr.type.sa.sa_family == AF_INET6) { + sa_family = sock->iface->addr.type.sa.sa_family; + if (sa_family == AF_INET6) { flags = UV_TCP_IPV6ONLY; } r = uv_tcp_bind(&sock->uv_handle.tcp, &sock->iface->addr.type.sa, flags); if (r == UV_EADDRNOTAVAIL && - isc__nm_socket_freebind(&sock->uv_handle.handle) == ISC_R_SUCCESS) + uv_fileno(&sock->uv_handle.handle, &fd) == 0 && + isc__nm_socket_freebind(fd, sa_family) == ISC_R_SUCCESS) { /* * Retry binding with IP_FREEBIND (or equivalent option) if the diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index 83ab7b54c9..e531857e1c 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -137,6 +137,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { isc_nmsocket_t *sock = ievent->sock; int r, uv_bind_flags = 0; int uv_init_flags = 0; + sa_family_t sa_family; REQUIRE(sock->type == isc_nm_udpsocket); REQUIRE(sock->iface != NULL); @@ -158,14 +159,15 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) { isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]); } - if (sock->iface->addr.type.sa.sa_family == AF_INET6) { + sa_family = sock->iface->addr.type.sa.sa_family; + if (sa_family == AF_INET6) { uv_bind_flags |= UV_UDP_IPV6ONLY; } r = uv_udp_bind(&sock->uv_handle.udp, &sock->parent->iface->addr.type.sa, uv_bind_flags); if (r == UV_EADDRNOTAVAIL && - isc__nm_socket_freebind(&sock->uv_handle.handle) == ISC_R_SUCCESS) + isc__nm_socket_freebind(sock->fd, sa_family) == ISC_R_SUCCESS) { /* * Retry binding with IP_FREEBIND (or equivalent option) if the From acb6ad9e3c1b1c873c0cd535262cbe7730b5b750 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Mon, 5 Oct 2020 12:25:19 +0200 Subject: [PATCH 4/5] Use uv_os_sock_t instead of uv_os_fd_t for sockets On POSIX based systems both uv_os_sock_t and uv_os_fd_t are both typedef to int. That's not true on Windows, where uv_os_sock_t is SOCKET and uv_os_fd_t is HANDLE and they differ in level of indirection. --- lib/isc/netmgr/netmgr-int.h | 8 ++++---- lib/isc/netmgr/netmgr.c | 8 ++++---- lib/isc/netmgr/tcp.c | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 72c76a5afd..1ea281e251 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -834,25 +834,25 @@ isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid); */ isc_result_t -isc__nm_socket_freebind(uv_os_fd_t fd, sa_family_t sa_family); +isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family); /*%< * Set the IP_FREEBIND (or equivalent) socket option on the uv_handle */ isc_result_t -isc__nm_socket_reuseport(uv_os_fd_t fd); +isc__nm_socket_reuseport(uv_os_sock_t fd); /*%< * Set the SO_REUSEPORT (or equivalent) socket option on the fd */ isc_result_t -isc__nm_socket_incoming_cpu(uv_os_fd_t fd); +isc__nm_socket_incoming_cpu(uv_os_sock_t fd); /*%< * Set the SO_INCOMING_CPU socket option on the fd if available */ isc_result_t -isc__nm_socket_dontfrag(uv_os_fd_t fd, sa_family_t sa_family); +isc__nm_socket_dontfrag(uv_os_sock_t fd, sa_family_t sa_family); /*%< * Set the SO_IP_DONTFRAG (or equivalent) socket option of the fd if available */ diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 02ad0b2c54..7567ee30ff 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -1584,7 +1584,7 @@ isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid) { setsockopt(socket, level, name, &(int){ 1 }, sizeof(int)) isc_result_t -isc__nm_socket_freebind(uv_os_fd_t fd, sa_family_t sa_family) { +isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family) { /* * Set the IP_FREEBIND (or equivalent option) on the uv_handle. */ @@ -1625,7 +1625,7 @@ isc__nm_socket_freebind(uv_os_fd_t fd, sa_family_t sa_family) { } isc_result_t -isc__nm_socket_reuseport(uv_os_fd_t fd) { +isc__nm_socket_reuseport(uv_os_sock_t fd) { /* * This is SO_REUSE**** hell: * @@ -1668,7 +1668,7 @@ isc__nm_socket_reuseport(uv_os_fd_t fd) { } isc_result_t -isc__nm_socket_incoming_cpu(uv_os_fd_t fd) { +isc__nm_socket_incoming_cpu(uv_os_sock_t fd) { #ifdef SO_INCOMING_CPU if (setsockopt_on(fd, SOL_SOCKET, SO_INCOMING_CPU) == -1) { return (ISC_R_FAILURE); @@ -1682,7 +1682,7 @@ isc__nm_socket_incoming_cpu(uv_os_fd_t fd) { } isc_result_t -isc__nm_socket_dontfrag(uv_os_fd_t fd, sa_family_t sa_family) { +isc__nm_socket_dontfrag(uv_os_sock_t fd, sa_family_t sa_family) { /* * Set the Don't Fragment flag on IP packets */ diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c index 2540083702..5f731ddfee 100644 --- a/lib/isc/netmgr/tcp.c +++ b/lib/isc/netmgr/tcp.c @@ -319,7 +319,7 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { struct sockaddr_storage sname; int r, flags = 0, snamelen = sizeof(sname); sa_family_t sa_family; - uv_os_fd_t fd; + uv_os_sock_t fd; REQUIRE(isc__nm_in_netthread()); REQUIRE(sock->type == isc_nm_tcplistener); @@ -344,7 +344,7 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) { r = uv_tcp_bind(&sock->uv_handle.tcp, &sock->iface->addr.type.sa, flags); if (r == UV_EADDRNOTAVAIL && - uv_fileno(&sock->uv_handle.handle, &fd) == 0 && + uv_fileno(&sock->uv_handle.handle, (uv_os_fd_t *)&fd) == 0 && isc__nm_socket_freebind(fd, sa_family) == ISC_R_SUCCESS) { /* From fd975a551dc23b18c2a63105c0b2bb9cf0015021 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= Date: Mon, 5 Oct 2020 13:14:04 +0200 Subject: [PATCH 5/5] Split reusing the addr/port and load-balancing socket options The SO_REUSEADDR, SO_REUSEPORT and SO_REUSEPORT_LB has different meaning on different platform. In this commit, we split the function to set the reuse of address/port and setting the load-balancing into separate functions. The libuv library already have multiplatform support for setting SO_REUSEADDR and SO_REUSEPORT that allows binding to the same address and port, but unfortunately, when used after the load-balancing socket options have been already set, it overrides the previous setting, so we need our own helper function to enable the SO_REUSEADDR/SO_REUSEPORT first and then enable the load-balancing socket option. --- lib/isc/netmgr/netmgr-int.h | 10 +++++-- lib/isc/netmgr/netmgr.c | 60 ++++++++++++++++++++++++------------- lib/isc/netmgr/udp.c | 6 +++- 3 files changed, 53 insertions(+), 23 deletions(-) diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h index 1ea281e251..ceeb5cbb27 100644 --- a/lib/isc/netmgr/netmgr-int.h +++ b/lib/isc/netmgr/netmgr-int.h @@ -840,9 +840,15 @@ isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family); */ isc_result_t -isc__nm_socket_reuseport(uv_os_sock_t fd); +isc__nm_socket_reuse(uv_os_sock_t fd); /*%< - * Set the SO_REUSEPORT (or equivalent) socket option on the fd + * Set the SO_REUSEADDR or SO_REUSEPORT (or equivalent) socket option on the fd + */ + +isc_result_t +isc__nm_socket_reuse_lb(uv_os_sock_t fd); +/*%< + * Set the SO_REUSEPORT_LB (or equivalent) socket option on the fd */ isc_result_t diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 7567ee30ff..28dc6f22e1 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -1625,46 +1625,66 @@ isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family) { } isc_result_t -isc__nm_socket_reuseport(uv_os_sock_t fd) { +isc__nm_socket_reuse(uv_os_sock_t fd) { /* - * This is SO_REUSE**** hell: - * * Generally, the SO_REUSEADDR socket option allows reuse of - * local addresses. On Windows, it also allows a socket to - * forcibly bind to a port in use by another socket. + * local addresses. * - * On Linux, SO_REUSEPORT socket option allows sockets to be - * bound to an identical socket address. For UDP sockets, the - * use of this option can provide better distribution of - * incoming datagrams to multiple processes (or threads) as - * compared to the traditional technique of having multiple - * processes compete to receive datagrams on the same socket. + * On the BSDs, SO_REUSEPORT implies SO_REUSEADDR but with some + * additional refinements for programs that use multicast. * - * On FreeBSD 12+, the same thing is achieved with SO_REUSEPORT_LB. + * On Linux, SO_REUSEPORT has different semantics: it _shares_ the port + * rather than steal it from the current listener, so we don't use it + * here, but rather in isc__nm_socket_reuse_lb(). * + * On Windows, it also allows a socket to forcibly bind to a port in use + * by another socket. */ - isc_result_t result = ISC_R_NOTIMPLEMENTED; -#if defined(SO_REUSEADDR) + +#if defined(SO_REUSEPORT) && !defined(__linux__) + if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) { + return (ISC_R_FAILURE); + } + return (ISC_R_SUCCESS); +#elif defined(SO_REUSEADDR) if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEADDR) == -1) { return (ISC_R_FAILURE); - } else { - result = ISC_R_SUCCESS; } + return (ISC_R_SUCCESS); +#else + UNUSED(fd); + return (ISC_R_NOTIMPLEMENTED); #endif +} + +isc_result_t +isc__nm_socket_reuse_lb(uv_os_sock_t fd) { + /* + * On FreeBSD 12+, SO_REUSEPORT_LB socket option allows sockets to be + * bound to an identical socket address. For UDP sockets, the use of + * this option can provide better distribution of incoming datagrams to + * multiple processes (or threads) as compared to the traditional + * technique of having multiple processes compete to receive datagrams + * on the same socket. + * + * On Linux, the same thing is achieved simply with SO_REUSEPORT. + */ #if defined(SO_REUSEPORT_LB) if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT_LB) == -1) { return (ISC_R_FAILURE); } else { - result = ISC_R_SUCCESS; + return (ISC_R_SUCCESS); } -#elif defined(SO_REUSEPORT) +#elif defined(SO_REUSEPORT) && defined(__linux__) if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) { return (ISC_R_FAILURE); } else { - result = ISC_R_SUCCESS; + return (ISC_R_SUCCESS); } +#else + UNUSED(fd); + return (ISC_R_NOTIMPLEMENTED); #endif - return (result); } isc_result_t diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c index e531857e1c..7056a29bf2 100644 --- a/lib/isc/netmgr/udp.c +++ b/lib/isc/netmgr/udp.c @@ -82,7 +82,11 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb, csock->fd = socket(family, SOCK_DGRAM, 0); RUNTIME_CHECK(csock->fd >= 0); - result = isc__nm_socket_reuseport(csock->fd); + result = isc__nm_socket_reuse(csock->fd); + RUNTIME_CHECK(result == ISC_R_SUCCESS || + result == ISC_R_NOTIMPLEMENTED); + + result = isc__nm_socket_reuse_lb(csock->fd); RUNTIME_CHECK(result == ISC_R_SUCCESS || result == ISC_R_NOTIMPLEMENTED);