2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-31 14:35:26 +00:00

chg: dev: Increase the scalability in the ADB

This MR reduces lock contention and increases scalability in the ADB by:
 a) Using SIEVE algorithm instead of classical LRU;
 b) Replacing rwlocked isc_hashmap with RCU cds_lfht table;
 c) Replace the single LRU table per-object with per-loop LRU tables per-object.

Merge branch 'ondrej/use-urcu-lfht-for-ADB-tables' into 'main'

See merge request isc-projects/bind9!10645
This commit is contained in:
Ondřej Surý
2025-07-09 23:19:56 +02:00
13 changed files with 521 additions and 524 deletions

32
doc/dev/LRU.md Normal file
View File

@@ -0,0 +1,32 @@
<!--
Copyright (C) Internet Systems Consortium, Inc. ("ISC")
SPDX-License-Identifier: MPL-2.0
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, you can obtain one at https://mozilla.org/MPL/2.0/.
See the COPYRIGHT file distributed with this work for additional
information regarding copyright ownership.
-->
# Per-Loop LRU cleaning
Several compilation units now employ per-loop LRU lists. When combined
with other algorithms, this design allows LRU lists to be lock-free.
When a new entry is created, it is assigned to the currently-running loop
(`isc_tid()`), added to the loop's LRU list, and added to a global
lock-free (`cds_lfht`) hash table. Deletion of the entry by any loop will
first delete it from the hash table, then schedule it to be removed from
the LRU list by the entry's loop. If LRU cleaning happens in the meantime,
the entry is processed normally.
The badcache and unreachable primaries list are very simple LRUs that don't
update the position of the entry in the list on cache hit; they just
remove the old entry and insert new one.
The ADB combines per-loop LRU lists with the SIEVE algorithm. On a
cache hit, SIEVE marks the entry as "visited". There is no need to
update the LRU list, so an off-loop cache hit is also lock-free.

File diff suppressed because it is too large Load Diff

View File

@@ -235,7 +235,8 @@ struct dns_adbaddrinfo {
****/
void
dns_adb_create(isc_mem_t *mem, dns_view_t *view, dns_adb_t **newadb);
dns_adb_create(isc_mem_t *mem, isc_loopmgr_t *loopmgr, dns_view_t *view,
dns_adb_t **newadb);
/*%<
* Create a new ADB.
*

View File

@@ -550,7 +550,7 @@ dns_view_createresolver(dns_view_t *view, isc_nm_t *netmgr,
}
isc_mem_create("ADB", &mctx);
dns_adb_create(mctx, view, &view->adb);
dns_adb_create(mctx, loopmgr, view, &view->adb);
isc_mem_detach(&mctx);
result = dns_requestmgr_create(view->mctx, loopmgr, view->dispatchmgr,

View File

@@ -123,16 +123,17 @@ isc_netmgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, isc_nm_t **netgmrp);
* Creates a new network manager and starts it running when loopmgr is started.
*/
void
isc_netmgr_destroy(isc_nm_t **netmgrp);
/*%<
* Similar to isc_nm_detach(), but requires all other references to be gone.
*/
#if ISC_NETMGR_TRACE
#define isc_nm_ref(ptr) isc_nm__ref(ptr, __func__, __FILE__, __LINE__)
#define isc_nm_unref(ptr) isc_nm__unref(ptr, __func__, __FILE__, __LINE__)
#define isc_nm_attach(ptr, ptrp) \
isc_nm__attach(ptr, ptrp, __func__, __FILE__, __LINE__)
#define isc_nm_detach(ptrp) isc_nm__detach(ptrp, __func__, __FILE__, __LINE__)
ISC_REFCOUNT_TRACE_DECL(isc_nm);
#else
ISC_REFCOUNT_DECL(isc_nm);
#endif
void
isc_nm_attach(isc_nm_t *mgr, isc_nm_t **dst);
void
isc_nm_detach(isc_nm_t **mgr0);
/*%<
* Attach/detach a network manager. When all references have been
* released, the network manager is shut down, freeing all resources.

View File

@@ -164,3 +164,6 @@
#define ISC_SIEVE_INSERT(sieve, entry, link) \
ISC_LIST_PREPEND((sieve).list, entry, link)
#define ISC_SIEVE_FOREACH(sieve, entry, link) \
ISC_LIST_FOREACH((sieve).list, entry, link)

View File

@@ -45,7 +45,7 @@ isc_managers_destroy(isc_mem_t **mctxp, isc_loopmgr_t **loopmgrp,
* The sequence of operations here is important:
*/
isc_netmgr_destroy(netmgrp);
isc_nm_detach(netmgrp);
isc_loopmgr_destroy(loopmgrp);
isc_mem_detach(mctxp);
}

View File

@@ -845,8 +845,8 @@ isc_mem_isovermem(isc_mem_t *ctx) {
if ((isc_mem_debugging & ISC_MEM_DEBUGUSAGE) != 0) {
fprintf(stderr,
"overmem mctx %p inuse %zu hi_water %zu\n", ctx,
inuse, hiwater);
"overmem %s mctx %p inuse %zu hi_water %zu\n",
ctx->name, ctx, inuse, hiwater);
}
atomic_store_relaxed(&ctx->stat[tid].is_overmem, true);
@@ -865,8 +865,8 @@ isc_mem_isovermem(isc_mem_t *ctx) {
if ((isc_mem_debugging & ISC_MEM_DEBUGUSAGE) != 0) {
fprintf(stderr,
"overmem mctx %p inuse %zu lo_water %zu\n", ctx,
inuse, lowater);
"overmem %s mctx %p inuse %zu lo_water %zu\n",
ctx->name, ctx, inuse, lowater);
}
atomic_store_relaxed(&ctx->stat[tid].is_overmem, false);
return false;

View File

@@ -244,62 +244,27 @@ isc_netmgr_create(isc_mem_t *mctx, isc_loopmgr_t *loopmgr, isc_nm_t **netmgrp) {
* Free the resources of the network manager.
*/
static void
nm_destroy(isc_nm_t **mgr0) {
REQUIRE(VALID_NM(*mgr0));
nm_destroy(isc_nm_t *netmgr) {
REQUIRE(VALID_NM(netmgr));
isc_nm_t *mgr = *mgr0;
*mgr0 = NULL;
isc_refcount_destroy(&netmgr->references);
isc_refcount_destroy(&mgr->references);
netmgr->magic = 0;
mgr->magic = 0;
if (mgr->stats != NULL) {
isc_stats_detach(&mgr->stats);
if (netmgr->stats != NULL) {
isc_stats_detach(&netmgr->stats);
}
isc_mem_cput(mgr->mctx, mgr->workers, mgr->nloops,
sizeof(mgr->workers[0]));
isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(*mgr));
isc_mem_cput(netmgr->mctx, netmgr->workers, netmgr->nloops,
sizeof(netmgr->workers[0]));
isc_mem_putanddetach(&netmgr->mctx, netmgr, sizeof(*netmgr));
}
void
isc_nm_attach(isc_nm_t *mgr, isc_nm_t **dst) {
REQUIRE(VALID_NM(mgr));
REQUIRE(dst != NULL && *dst == NULL);
isc_refcount_increment(&mgr->references);
*dst = mgr;
}
void
isc_nm_detach(isc_nm_t **mgr0) {
isc_nm_t *mgr = NULL;
REQUIRE(mgr0 != NULL);
REQUIRE(VALID_NM(*mgr0));
mgr = *mgr0;
*mgr0 = NULL;
if (isc_refcount_decrement(&mgr->references) == 1) {
nm_destroy(&mgr);
}
}
void
isc_netmgr_destroy(isc_nm_t **netmgrp) {
isc_nm_t *mgr = NULL;
REQUIRE(VALID_NM(*netmgrp));
mgr = *netmgrp;
*netmgrp = NULL;
REQUIRE(isc_refcount_decrement(&mgr->references) == 1);
nm_destroy(&mgr);
}
#if ISC_NETMGR_TRACE
ISC_REFCOUNT_TRACE_IMPL(isc_nm, nm_destroy)
#else
ISC_REFCOUNT_IMPL(isc_nm, nm_destroy);
#endif
void
isc_nm_maxudp(isc_nm_t *mgr, uint32_t maxudp) {

View File

@@ -245,7 +245,7 @@ teardown_test(void **state) {
isc_tlsctx_cache_detach(&tls_tlsctx_client_cache);
isc_tlsctx_free(&tls_listen_tlsctx);
isc_netmgr_destroy(&connect_nm);
isc_nm_detach(&connect_nm);
teardown_netmgr(state);
teardown_loopmgr(state);

View File

@@ -377,7 +377,7 @@ setup_test(void **state) {
static int
teardown_test(void **state ISC_ATTR_UNUSED) {
for (size_t i = 0; i < MAX_NM; i++) {
isc_netmgr_destroy(&nm[i]);
isc_nm_detach(&nm[i]);
assert_null(nm[i]);
}
isc_mem_cput(mctx, nm, MAX_NM, sizeof(nm[0]));

View File

@@ -226,10 +226,10 @@ teardown_netmgr_test(void **state ISC_ATTR_UNUSED) {
isc_tlsctx_free(&tcp_connect_tlsctx);
isc_tlsctx_free(&tcp_listen_tlsctx);
isc_netmgr_destroy(&connect_nm);
isc_nm_detach(&connect_nm);
assert_null(connect_nm);
isc_netmgr_destroy(&listen_nm);
isc_nm_detach(&listen_nm);
assert_null(listen_nm);
teardown_loopmgr(state);

View File

@@ -121,7 +121,7 @@ int
teardown_netmgr(void **state ISC_ATTR_UNUSED) {
REQUIRE(loopmgr != NULL);
isc_netmgr_destroy(&netmgr);
isc_nm_detach(&netmgr);
return 0;
}