2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-31 06:25:31 +00:00

Distribute queries among threads even on platforms without lb sockets

On platforms without load-balancing socket all the queries would be
handle by a single thread.  Currently, the support for load-balanced
sockets is present in Linux with SO_REUSEPORT and FreeBSD 12 with
SO_REUSEPORT_LB.

This commit adds workaround for such platforms that:

1. setups single shared listening socket for all listening nmthreads for
   UDP, TCP and TCPDNS netmgr transports

2. Calls uv_udp_bind/uv_tcp_bind on the underlying socket just once and
   for rest of the nmthreads only copy the internal libuv flags (should
   be just UV_HANDLE_BOUND and optionally UV_HANDLE_IPV6).

3. start reading on UDP socket or listening on TCP socket

The load distribution among the nmthreads is uneven, but it's still
better than utilizing just one thread for processing all the incoming
queries
This commit is contained in:
Ondřej Surý
2020-12-02 15:37:18 +01:00
parent d0f5407825
commit 1d066e4bc5
6 changed files with 182 additions and 125 deletions

View File

@@ -378,27 +378,27 @@ isc_nm_tcpconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer,
return (result);
}
static isc_result_t
isc__nm_tcp_lb_socket(sa_family_t sa_family, uv_os_sock_t *sockp) {
static uv_os_sock_t
isc__nm_tcp_lb_socket(sa_family_t sa_family) {
isc_result_t result;
uv_os_sock_t sock;
result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock);
REQUIRE(result == ISC_R_SUCCESS);
RUNTIME_CHECK(result == ISC_R_SUCCESS);
(void)isc__nm_socket_incoming_cpu(sock);
/* FIXME: set mss */
result = isc__nm_socket_reuse(sock);
REQUIRE(result == ISC_R_SUCCESS || result == ISC_R_NOTIMPLEMENTED);
RUNTIME_CHECK(result == ISC_R_SUCCESS);
#if HAVE_SO_REUSEPORT_LB
result = isc__nm_socket_reuse_lb(sock);
REQUIRE(result == ISC_R_SUCCESS || result == ISC_R_NOTIMPLEMENTED);
RUNTIME_CHECK(result == ISC_R_SUCCESS);
#endif
*sockp = sock;
return (result);
return (sock);
}
isc_result_t
@@ -410,6 +410,7 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface,
isc_nmsocket_t *sock = NULL;
sa_family_t sa_family = iface->addr.type.sa.sa_family;
size_t children_size = 0;
uv_os_sock_t fd = -1;
REQUIRE(VALID_NM(mgr));
@@ -417,7 +418,11 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface,
isc__nmsocket_init(sock, mgr, isc_nm_tcplistener, iface);
sock->rchildren = 0;
#if defined(WIN32)
sock->nchildren = 1;
#else
sock->nchildren = mgr->nworkers;
#endif
children_size = sock->nchildren * sizeof(sock->children[0]);
sock->children = isc_mem_get(mgr->mctx, children_size);
memset(sock->children, 0, children_size);
@@ -426,6 +431,10 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface,
sock->tid = isc_random_uniform(mgr->nworkers);
sock->fd = -1;
#if !HAVE_SO_REUSEPORT_LB && !defined(WIN32)
fd = isc__nm_tcp_lb_socket(sa_family);
#endif
for (size_t i = 0; i < mgr->nworkers; i++) {
isc__netievent_tcplisten_t *ievent = NULL;
isc_nmsocket_t *csock = &sock->children[i];
@@ -444,9 +453,11 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface,
csock->pquota = quota;
isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock);
result = isc__nm_tcp_lb_socket(sa_family, &csock->fd);
REQUIRE(result == ISC_R_SUCCESS ||
result == ISC_R_NOTIMPLEMENTED);
#if HAVE_SO_REUSEPORT_LB || defined(WIN32)
csock->fd = isc__nm_tcp_lb_socket(sa_family);
#else
csock->fd = dup(fd);
#endif
REQUIRE(csock->fd >= 0);
ievent = isc__nm_get_netievent_tcplisten(mgr, csock);
@@ -454,6 +465,10 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface,
(isc__netievent_t *)ievent);
}
#if !HAVE_SO_REUSEPORT_LB && !defined(WIN32)
isc__nm_closesocket(fd);
#endif
LOCK(&sock->lock);
while (sock->rchildren != mgr->nworkers) {
WAIT(&sock->cond, &sock->lock);
@@ -479,11 +494,12 @@ isc_nm_listentcp(isc_nm_t *mgr, isc_nmiface_t *iface,
void
isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
isc__netievent_tcplisten_t *ievent = (isc__netievent_tcplisten_t *)ev0;
isc_nmiface_t *iface;
isc_nmiface_t *iface = NULL;
sa_family_t sa_family;
int r;
int flags = 0;
isc_nmsocket_t *sock = NULL;
isc_result_t result;
REQUIRE(VALID_NMSOCK(ievent->sock));
REQUIRE(ievent->sock->tid == isc_nm_tid());
@@ -502,14 +518,18 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
RUNTIME_CHECK(r == 0);
uv_handle_set_data(&sock->uv_handle.handle, sock);
/* This keeps the socket alive after everything else is gone */
isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
r = uv_timer_init(&worker->loop, &sock->timer);
RUNTIME_CHECK(r == 0);
uv_handle_set_data((uv_handle_t *)&sock->timer, sock);
LOCK(&sock->parent->lock);
r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
if (r < 0) {
isc__nm_closesocket(sock->fd);
@@ -522,12 +542,29 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
flags = UV_TCP_IPV6ONLY;
}
#if HAVE_SO_REUSEPORT_LB || defined(WIN32)
r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
&sock->iface->addr.type.sa, flags);
if (r < 0 && r != UV_EINVAL) {
if (r < 0) {
isc__nm_incstats(sock->mgr, sock->statsindex[STATID_BINDFAIL]);
goto failure;
}
#else
if (sock->parent->fd == -1) {
r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
&sock->iface->addr.type.sa, flags);
if (r < 0) {
isc__nm_incstats(sock->mgr,
sock->statsindex[STATID_BINDFAIL]);
goto failure;
}
sock->parent->uv_handle.tcp.flags = sock->uv_handle.tcp.flags;
sock->parent->fd = sock->fd;
} else {
/* The socket is already bound, just copy the flags */
sock->uv_handle.tcp.flags = sock->parent->uv_handle.tcp.flags;
}
#endif
/*
* The callback will run in the same thread uv_listen() was called
@@ -535,7 +572,7 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
*/
r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog,
tcp_connection_cb);
if (r < 0) {
if (r != 0) {
isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
"uv_listen failed: %s",
@@ -546,27 +583,15 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
atomic_store(&sock->listening, true);
LOCK(&sock->parent->lock);
sock->parent->rchildren += 1;
if (sock->parent->result == ISC_R_DEFAULT) {
sock->parent->result = ISC_R_SUCCESS;
}
SIGNAL(&sock->parent->cond);
if (!atomic_load(&sock->parent->active)) {
WAIT(&sock->parent->scond, &sock->parent->lock);
}
INSIST(atomic_load(&sock->parent->active));
UNLOCK(&sock->parent->lock);
return;
failure:
sock->pquota = NULL;
result = isc__nm_uverr2result(r);
if (result != ISC_R_SUCCESS) {
sock->pquota = NULL;
}
LOCK(&sock->parent->lock);
sock->parent->rchildren += 1;
if (sock->parent->result == ISC_R_DEFAULT) {
sock->parent->result = isc__nm_uverr2result(r);
sock->parent->result = result;
}
SIGNAL(&sock->parent->cond);
if (!atomic_load(&sock->parent->active)) {
@@ -926,7 +951,7 @@ isc__nm_tcp_resumeread(isc_nmhandle_t *handle) {
static void
read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream);
isc__nm_uvreq_t *req;
isc__nm_uvreq_t *req = NULL;
REQUIRE(VALID_NMSOCK(sock));
REQUIRE(sock->tid == isc_nm_tid());
@@ -1027,7 +1052,7 @@ accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) {
isc_result_t result;
struct sockaddr_storage ss;
isc_sockaddr_t local;
isc_nmhandle_t *handle;
isc_nmhandle_t *handle = NULL;
REQUIRE(VALID_NMSOCK(ssock));
REQUIRE(ssock->tid == isc_nm_tid());