mirror of
https://gitlab.isc.org/isc-projects/bind9
synced 2025-08-29 13:38:26 +00:00
Merge branch 'ondrej/refactor-setsockopt' into 'main'
Refactor the setsockopt() code in network manager into helper functions See merge request isc-projects/bind9!4227
This commit is contained in:
commit
d51f09a8d0
@ -834,7 +834,31 @@ isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid);
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
isc_result_t
|
isc_result_t
|
||||||
isc__nm_socket_freebind(const uv_handle_t *handle);
|
isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family);
|
||||||
/*%<
|
/*%<
|
||||||
* Set the IP_FREEBIND (or equivalent) socket option on the uv_handle
|
* Set the IP_FREEBIND (or equivalent) socket option on the uv_handle
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
isc_result_t
|
||||||
|
isc__nm_socket_reuse(uv_os_sock_t fd);
|
||||||
|
/*%<
|
||||||
|
* Set the SO_REUSEADDR or SO_REUSEPORT (or equivalent) socket option on the fd
|
||||||
|
*/
|
||||||
|
|
||||||
|
isc_result_t
|
||||||
|
isc__nm_socket_reuse_lb(uv_os_sock_t fd);
|
||||||
|
/*%<
|
||||||
|
* Set the SO_REUSEPORT_LB (or equivalent) socket option on the fd
|
||||||
|
*/
|
||||||
|
|
||||||
|
isc_result_t
|
||||||
|
isc__nm_socket_incoming_cpu(uv_os_sock_t fd);
|
||||||
|
/*%<
|
||||||
|
* Set the SO_INCOMING_CPU socket option on the fd if available
|
||||||
|
*/
|
||||||
|
|
||||||
|
isc_result_t
|
||||||
|
isc__nm_socket_dontfrag(uv_os_sock_t fd, sa_family_t sa_family);
|
||||||
|
/*%<
|
||||||
|
* Set the SO_IP_DONTFRAG (or equivalent) socket option of the fd if available
|
||||||
|
*/
|
||||||
|
@ -1584,51 +1584,169 @@ isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid) {
|
|||||||
setsockopt(socket, level, name, &(int){ 1 }, sizeof(int))
|
setsockopt(socket, level, name, &(int){ 1 }, sizeof(int))
|
||||||
|
|
||||||
isc_result_t
|
isc_result_t
|
||||||
isc__nm_socket_freebind(const uv_handle_t *handle) {
|
isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family) {
|
||||||
/*
|
/*
|
||||||
* Set the IP_FREEBIND (or equivalent option) on the uv_handle.
|
* Set the IP_FREEBIND (or equivalent option) on the uv_handle.
|
||||||
*/
|
*/
|
||||||
isc_result_t result = ISC_R_SUCCESS;
|
|
||||||
uv_os_fd_t fd;
|
|
||||||
if (uv_fileno(handle, &fd) != 0) {
|
|
||||||
return (ISC_R_FAILURE);
|
|
||||||
}
|
|
||||||
#ifdef IP_FREEBIND
|
#ifdef IP_FREEBIND
|
||||||
|
UNUSED(sa_family);
|
||||||
if (setsockopt_on(fd, IPPROTO_IP, IP_FREEBIND) == -1) {
|
if (setsockopt_on(fd, IPPROTO_IP, IP_FREEBIND) == -1) {
|
||||||
return (ISC_R_FAILURE);
|
return (ISC_R_FAILURE);
|
||||||
}
|
}
|
||||||
|
return (ISC_R_SUCCESS);
|
||||||
#elif defined(IP_BINDANY) || defined(IPV6_BINDANY)
|
#elif defined(IP_BINDANY) || defined(IPV6_BINDANY)
|
||||||
struct sockaddr_in sockfd;
|
if (sa_family == AF_INET) {
|
||||||
|
|
||||||
if (getsockname(fd, (struct sockaddr *)&sockfd,
|
|
||||||
&(socklen_t){ sizeof(sockfd) }) == -1)
|
|
||||||
{
|
|
||||||
return (ISC_R_FAILURE);
|
|
||||||
}
|
|
||||||
#if defined(IP_BINDANY)
|
#if defined(IP_BINDANY)
|
||||||
if (sockfd.sin_family == AF_INET) {
|
|
||||||
if (setsockopt_on(fd, IPPROTO_IP, IP_BINDANY) == -1) {
|
if (setsockopt_on(fd, IPPROTO_IP, IP_BINDANY) == -1) {
|
||||||
return (ISC_R_FAILURE);
|
return (ISC_R_FAILURE);
|
||||||
}
|
}
|
||||||
}
|
return (ISC_R_SUCCESS);
|
||||||
#endif
|
#endif
|
||||||
|
} else if (sa_family == AF_INET6) {
|
||||||
#if defined(IPV6_BINDANY)
|
#if defined(IPV6_BINDANY)
|
||||||
if (sockfd.sin_family == AF_INET6) {
|
|
||||||
if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_BINDANY) == -1) {
|
if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_BINDANY) == -1) {
|
||||||
return (ISC_R_FAILURE);
|
return (ISC_R_FAILURE);
|
||||||
}
|
}
|
||||||
}
|
return (ISC_R_SUCCESS);
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
return (ISC_R_NOTIMPLEMENTED);
|
||||||
#elif defined(SO_BINDANY)
|
#elif defined(SO_BINDANY)
|
||||||
|
UNUSED(sa_family);
|
||||||
if (setsockopt_on(fd, SOL_SOCKET, SO_BINDANY) == -1) {
|
if (setsockopt_on(fd, SOL_SOCKET, SO_BINDANY) == -1) {
|
||||||
return (ISC_R_FAILURE);
|
return (ISC_R_FAILURE);
|
||||||
}
|
}
|
||||||
|
return (ISC_R_SUCCESS);
|
||||||
#else
|
#else
|
||||||
UNUSED(handle);
|
|
||||||
UNUSED(fd);
|
UNUSED(fd);
|
||||||
result = ISC_R_NOTIMPLEMENTED;
|
UNUSED(sa_family);
|
||||||
|
return (ISC_R_NOTIMPLEMENTED);
|
||||||
#endif
|
#endif
|
||||||
return (result);
|
}
|
||||||
|
|
||||||
|
isc_result_t
|
||||||
|
isc__nm_socket_reuse(uv_os_sock_t fd) {
|
||||||
|
/*
|
||||||
|
* Generally, the SO_REUSEADDR socket option allows reuse of
|
||||||
|
* local addresses.
|
||||||
|
*
|
||||||
|
* On the BSDs, SO_REUSEPORT implies SO_REUSEADDR but with some
|
||||||
|
* additional refinements for programs that use multicast.
|
||||||
|
*
|
||||||
|
* On Linux, SO_REUSEPORT has different semantics: it _shares_ the port
|
||||||
|
* rather than steal it from the current listener, so we don't use it
|
||||||
|
* here, but rather in isc__nm_socket_reuse_lb().
|
||||||
|
*
|
||||||
|
* On Windows, it also allows a socket to forcibly bind to a port in use
|
||||||
|
* by another socket.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined(SO_REUSEPORT) && !defined(__linux__)
|
||||||
|
if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
|
||||||
|
return (ISC_R_FAILURE);
|
||||||
|
}
|
||||||
|
return (ISC_R_SUCCESS);
|
||||||
|
#elif defined(SO_REUSEADDR)
|
||||||
|
if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEADDR) == -1) {
|
||||||
|
return (ISC_R_FAILURE);
|
||||||
|
}
|
||||||
|
return (ISC_R_SUCCESS);
|
||||||
|
#else
|
||||||
|
UNUSED(fd);
|
||||||
|
return (ISC_R_NOTIMPLEMENTED);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
isc_result_t
|
||||||
|
isc__nm_socket_reuse_lb(uv_os_sock_t fd) {
|
||||||
|
/*
|
||||||
|
* On FreeBSD 12+, SO_REUSEPORT_LB socket option allows sockets to be
|
||||||
|
* bound to an identical socket address. For UDP sockets, the use of
|
||||||
|
* this option can provide better distribution of incoming datagrams to
|
||||||
|
* multiple processes (or threads) as compared to the traditional
|
||||||
|
* technique of having multiple processes compete to receive datagrams
|
||||||
|
* on the same socket.
|
||||||
|
*
|
||||||
|
* On Linux, the same thing is achieved simply with SO_REUSEPORT.
|
||||||
|
*/
|
||||||
|
#if defined(SO_REUSEPORT_LB)
|
||||||
|
if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT_LB) == -1) {
|
||||||
|
return (ISC_R_FAILURE);
|
||||||
|
} else {
|
||||||
|
return (ISC_R_SUCCESS);
|
||||||
|
}
|
||||||
|
#elif defined(SO_REUSEPORT) && defined(__linux__)
|
||||||
|
if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
|
||||||
|
return (ISC_R_FAILURE);
|
||||||
|
} else {
|
||||||
|
return (ISC_R_SUCCESS);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
UNUSED(fd);
|
||||||
|
return (ISC_R_NOTIMPLEMENTED);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
isc_result_t
|
||||||
|
isc__nm_socket_incoming_cpu(uv_os_sock_t fd) {
|
||||||
|
#ifdef SO_INCOMING_CPU
|
||||||
|
if (setsockopt_on(fd, SOL_SOCKET, SO_INCOMING_CPU) == -1) {
|
||||||
|
return (ISC_R_FAILURE);
|
||||||
|
} else {
|
||||||
|
return (ISC_R_SUCCESS);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
UNUSED(fd);
|
||||||
|
#endif
|
||||||
|
return (ISC_R_NOTIMPLEMENTED);
|
||||||
|
}
|
||||||
|
|
||||||
|
isc_result_t
|
||||||
|
isc__nm_socket_dontfrag(uv_os_sock_t fd, sa_family_t sa_family) {
|
||||||
|
/*
|
||||||
|
* Set the Don't Fragment flag on IP packets
|
||||||
|
*/
|
||||||
|
if (sa_family == AF_INET6) {
|
||||||
|
#if defined(IPV6_DONTFRAG)
|
||||||
|
if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_DONTFRAG) == -1) {
|
||||||
|
return (ISC_R_FAILURE);
|
||||||
|
} else {
|
||||||
|
return (ISC_R_SUCCESS);
|
||||||
|
}
|
||||||
|
#elif defined(IPV6_MTU_DISCOVER)
|
||||||
|
if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
|
||||||
|
&(int){ IP_PMTUDISC_DO }, sizeof(int)) == -1)
|
||||||
|
{
|
||||||
|
return (ISC_R_FAILURE);
|
||||||
|
} else {
|
||||||
|
return (ISC_R_SUCCESS);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
UNUSED(fd);
|
||||||
|
#endif
|
||||||
|
} else if (sa_family == AF_INET) {
|
||||||
|
#if defined(IP_DONTFRAG)
|
||||||
|
if (setsockopt_on(fd, IPPROTO_IP, IP_DONTFRAG) == -1) {
|
||||||
|
return (ISC_R_FAILURE);
|
||||||
|
} else {
|
||||||
|
return (ISC_R_SUCCESS);
|
||||||
|
}
|
||||||
|
#elif defined(IP_MTU_DISCOVER)
|
||||||
|
if (setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER,
|
||||||
|
&(int){ IP_PMTUDISC_DO }, sizeof(int)) == -1)
|
||||||
|
{
|
||||||
|
return (ISC_R_FAILURE);
|
||||||
|
} else {
|
||||||
|
return (ISC_R_SUCCESS);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
UNUSED(fd);
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
return (ISC_R_FAMILYNOSUPPORT);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (ISC_R_NOTIMPLEMENTED);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef NETMGR_TRACE
|
#ifdef NETMGR_TRACE
|
||||||
|
@ -318,6 +318,8 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
|
|||||||
isc_nmsocket_t *sock = ievent->sock;
|
isc_nmsocket_t *sock = ievent->sock;
|
||||||
struct sockaddr_storage sname;
|
struct sockaddr_storage sname;
|
||||||
int r, flags = 0, snamelen = sizeof(sname);
|
int r, flags = 0, snamelen = sizeof(sname);
|
||||||
|
sa_family_t sa_family;
|
||||||
|
uv_os_sock_t fd;
|
||||||
|
|
||||||
REQUIRE(isc__nm_in_netthread());
|
REQUIRE(isc__nm_in_netthread());
|
||||||
REQUIRE(sock->type == isc_nm_tcplistener);
|
REQUIRE(sock->type == isc_nm_tcplistener);
|
||||||
@ -334,14 +336,16 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
|
|||||||
|
|
||||||
isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]);
|
isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]);
|
||||||
|
|
||||||
if (sock->iface->addr.type.sa.sa_family == AF_INET6) {
|
sa_family = sock->iface->addr.type.sa.sa_family;
|
||||||
|
if (sa_family == AF_INET6) {
|
||||||
flags = UV_TCP_IPV6ONLY;
|
flags = UV_TCP_IPV6ONLY;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = uv_tcp_bind(&sock->uv_handle.tcp, &sock->iface->addr.type.sa,
|
r = uv_tcp_bind(&sock->uv_handle.tcp, &sock->iface->addr.type.sa,
|
||||||
flags);
|
flags);
|
||||||
if (r == UV_EADDRNOTAVAIL &&
|
if (r == UV_EADDRNOTAVAIL &&
|
||||||
isc__nm_socket_freebind(&sock->uv_handle.handle) == ISC_R_SUCCESS)
|
uv_fileno(&sock->uv_handle.handle, (uv_os_fd_t *)&fd) == 0 &&
|
||||||
|
isc__nm_socket_freebind(fd, sa_family) == ISC_R_SUCCESS)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Retry binding with IP_FREEBIND (or equivalent option) if the
|
* Retry binding with IP_FREEBIND (or equivalent option) if the
|
||||||
|
@ -65,8 +65,8 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb,
|
|||||||
nsock->extrahandlesize = extrahandlesize;
|
nsock->extrahandlesize = extrahandlesize;
|
||||||
|
|
||||||
for (size_t i = 0; i < mgr->nworkers; i++) {
|
for (size_t i = 0; i < mgr->nworkers; i++) {
|
||||||
|
isc_result_t result;
|
||||||
uint16_t family = iface->addr.type.sa.sa_family;
|
uint16_t family = iface->addr.type.sa.sa_family;
|
||||||
int res = 0;
|
|
||||||
|
|
||||||
isc__netievent_udplisten_t *ievent = NULL;
|
isc__netievent_udplisten_t *ievent = NULL;
|
||||||
isc_nmsocket_t *csock = &nsock->children[i];
|
isc_nmsocket_t *csock = &nsock->children[i];
|
||||||
@ -82,46 +82,20 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb,
|
|||||||
csock->fd = socket(family, SOCK_DGRAM, 0);
|
csock->fd = socket(family, SOCK_DGRAM, 0);
|
||||||
RUNTIME_CHECK(csock->fd >= 0);
|
RUNTIME_CHECK(csock->fd >= 0);
|
||||||
|
|
||||||
/*
|
result = isc__nm_socket_reuse(csock->fd);
|
||||||
* This is SO_REUSE**** hell:
|
RUNTIME_CHECK(result == ISC_R_SUCCESS ||
|
||||||
*
|
result == ISC_R_NOTIMPLEMENTED);
|
||||||
* Generally, the SO_REUSEADDR socket option allows reuse of
|
|
||||||
* local addresses. On Windows, it also allows a socket to
|
result = isc__nm_socket_reuse_lb(csock->fd);
|
||||||
* forcibly bind to a port in use by another socket.
|
RUNTIME_CHECK(result == ISC_R_SUCCESS ||
|
||||||
*
|
result == ISC_R_NOTIMPLEMENTED);
|
||||||
* On Linux, SO_REUSEPORT socket option allows sockets to be
|
|
||||||
* bound to an identical socket address. For UDP sockets, the
|
|
||||||
* use of this option can provide better distribution of
|
|
||||||
* incoming datagrams to multiple processes (or threads) as
|
|
||||||
* compared to the traditional technique of having multiple
|
|
||||||
* processes compete to receive datagrams on the same socket.
|
|
||||||
*
|
|
||||||
* On FreeBSD, the same thing is achieved with SO_REUSEPORT_LB.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
#if defined(SO_REUSEADDR)
|
|
||||||
res = setsockopt(csock->fd, SOL_SOCKET, SO_REUSEADDR,
|
|
||||||
&(int){ 1 }, sizeof(int));
|
|
||||||
RUNTIME_CHECK(res == 0);
|
|
||||||
#endif
|
|
||||||
#if defined(SO_REUSEPORT_LB)
|
|
||||||
res = setsockopt(csock->fd, SOL_SOCKET, SO_REUSEPORT_LB,
|
|
||||||
&(int){ 1 }, sizeof(int));
|
|
||||||
RUNTIME_CHECK(res == 0);
|
|
||||||
#elif defined(SO_REUSEPORT)
|
|
||||||
res = setsockopt(csock->fd, SOL_SOCKET, SO_REUSEPORT,
|
|
||||||
&(int){ 1 }, sizeof(int));
|
|
||||||
RUNTIME_CHECK(res == 0);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SO_INCOMING_CPU
|
|
||||||
/* We don't check for the result, because SO_INCOMING_CPU can be
|
/* We don't check for the result, because SO_INCOMING_CPU can be
|
||||||
* available without the setter on Linux kernel version 4.4, and
|
* available without the setter on Linux kernel version 4.4, and
|
||||||
* setting SO_INCOMING_CPU is just an optimization.
|
* setting SO_INCOMING_CPU is just an optimization.
|
||||||
*/
|
*/
|
||||||
(void)setsockopt(csock->fd, SOL_SOCKET, SO_INCOMING_CPU,
|
(void)isc__nm_socket_incoming_cpu(csock->fd);
|
||||||
&(int){ 1 }, sizeof(int));
|
|
||||||
#endif
|
|
||||||
ievent = isc__nm_get_ievent(mgr, netievent_udplisten);
|
ievent = isc__nm_get_ievent(mgr, netievent_udplisten);
|
||||||
ievent->sock = csock;
|
ievent->sock = csock;
|
||||||
isc__nm_enqueue_ievent(&mgr->workers[i],
|
isc__nm_enqueue_ievent(&mgr->workers[i],
|
||||||
@ -167,6 +141,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
|
|||||||
isc_nmsocket_t *sock = ievent->sock;
|
isc_nmsocket_t *sock = ievent->sock;
|
||||||
int r, uv_bind_flags = 0;
|
int r, uv_bind_flags = 0;
|
||||||
int uv_init_flags = 0;
|
int uv_init_flags = 0;
|
||||||
|
sa_family_t sa_family;
|
||||||
|
|
||||||
REQUIRE(sock->type == isc_nm_udpsocket);
|
REQUIRE(sock->type == isc_nm_udpsocket);
|
||||||
REQUIRE(sock->iface != NULL);
|
REQUIRE(sock->iface != NULL);
|
||||||
@ -188,14 +163,15 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
|
|||||||
isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]);
|
isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sock->iface->addr.type.sa.sa_family == AF_INET6) {
|
sa_family = sock->iface->addr.type.sa.sa_family;
|
||||||
|
if (sa_family == AF_INET6) {
|
||||||
uv_bind_flags |= UV_UDP_IPV6ONLY;
|
uv_bind_flags |= UV_UDP_IPV6ONLY;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = uv_udp_bind(&sock->uv_handle.udp,
|
r = uv_udp_bind(&sock->uv_handle.udp,
|
||||||
&sock->parent->iface->addr.type.sa, uv_bind_flags);
|
&sock->parent->iface->addr.type.sa, uv_bind_flags);
|
||||||
if (r == UV_EADDRNOTAVAIL &&
|
if (r == UV_EADDRNOTAVAIL &&
|
||||||
isc__nm_socket_freebind(&sock->uv_handle.handle) == ISC_R_SUCCESS)
|
isc__nm_socket_freebind(sock->fd, sa_family) == ISC_R_SUCCESS)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Retry binding with IP_FREEBIND (or equivalent option) if the
|
* Retry binding with IP_FREEBIND (or equivalent option) if the
|
||||||
|
@ -56,6 +56,8 @@ typedef uint32_t socklen_t;
|
|||||||
|
|
||||||
#undef MSG_TRUNC
|
#undef MSG_TRUNC
|
||||||
|
|
||||||
|
typedef uint16_t sa_family_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set up a macro for importing and exporting from the DLL
|
* Set up a macro for importing and exporting from the DLL
|
||||||
*/
|
*/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user