2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-31 06:25:31 +00:00

Merge branch '4383-limit-tree-pruning-overhead' into 'v9.19.20-release'

Limit isc_async_run() overhead for tree pruning

See merge request isc-private/bind9!619
This commit is contained in:
Michał Kępień
2024-01-05 11:39:08 +00:00
5 changed files with 98 additions and 58 deletions

View File

@@ -1,4 +1,5 @@
6319. [placeholder]
6319. [func] Limit isc_async_run() overhead for RBTDB tree pruning.
[GL #4383]
6318. [placeholder]

View File

@@ -118,6 +118,12 @@ struct dns_rbtnode {
*/
ISC_LINK(dns_rbtnode_t) deadlink;
/*%
* This linked list is used to store nodes from which tree pruning can
* be started.
*/
ISC_LINK(dns_rbtnode_t) prunelink;
/*@{*/
/*!
* These values are used in the RBT DB implementation. The appropriate

View File

@@ -1520,6 +1520,7 @@ create_node(isc_mem_t *mctx, const dns_name_t *name, dns_rbtnode_t **nodep) {
};
ISC_LINK_INIT(node, deadlink);
ISC_LINK_INIT(node, prunelink);
isc_refcount_init(&node->references, 0);

View File

@@ -439,6 +439,7 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log) {
unsigned int i;
isc_result_t result;
char buf[DNS_NAME_FORMATSIZE];
dns_rbtnode_t *node = NULL;
dns_rbt_t **treep = NULL;
isc_time_t start;
@@ -461,8 +462,6 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log) {
* the overhead of unlinking all nodes here should be negligible.
*/
for (i = 0; i < rbtdb->node_lock_count; i++) {
dns_rbtnode_t *node = NULL;
node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
while (node != NULL) {
ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
@@ -470,6 +469,12 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log) {
}
}
node = ISC_LIST_HEAD(rbtdb->prunenodes);
while (node != NULL) {
ISC_LIST_UNLINK(rbtdb->prunenodes, node, prunelink);
node = ISC_LIST_HEAD(rbtdb->prunenodes);
}
rbtdb->quantum = (rbtdb->loop != NULL) ? 100 : 0;
for (;;) {
@@ -1148,16 +1153,26 @@ is_leaf(dns_rbtnode_t *node) {
node->left == NULL && node->right == NULL);
}
/*%
* The tree lock must be held when this function is called as it reads and
* updates rbtdb->prunenodes.
*/
static void
send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
isc_rwlocktype_t locktype DNS__DB_FLARG) {
prune_t *prune = isc_mem_get(rbtdb->common.mctx, sizeof(*prune));
*prune = (prune_t){ .node = node };
bool pruning_queued = (ISC_LIST_HEAD(rbtdb->prunenodes) != NULL);
INSIST(locktype == isc_rwlocktype_write);
dns_db_attach((dns_db_t *)rbtdb, &prune->db);
dns__rbtdb_newref(rbtdb, node, locktype DNS__DB_FLARG_PASS);
INSIST(!ISC_LINK_LINKED(node, prunelink));
ISC_LIST_APPEND(rbtdb->prunenodes, node, prunelink);
isc_async_run(rbtdb->loop, prune_tree, prune);
if (!pruning_queued) {
dns_db_t *db = NULL;
dns_db_attach((dns_db_t *)rbtdb, &db);
isc_async_run(rbtdb->loop, prune_tree, db);
}
}
/*%
@@ -1455,64 +1470,83 @@ restore_locks:
}
/*
* Prune the tree by recursively cleaning-up single leaves. In the worst
* case, the number of iteration is the number of tree levels, which is at
* most the maximum number of domain name labels, i.e, 127. In practice, this
* should be much smaller (only a few times), and even the worst case would be
* acceptable for a single event.
* Prune the tree by recursively cleaning up single leaves. Go through all
* nodes stored in the rbtdb->prunenodes list; for each of them, in the worst
* case, it will be necessary to traverse a number of tree levels equal to the
* maximum legal number of domain name labels (127); in practice, the number of
* tree levels to traverse will virtually always be much smaller (a few levels
* at most). While holding the tree lock throughout this entire operation is
* less than ideal, so is splitting the latter up by queueing a separate
* prune_tree() run for each node to start pruning from (as queueing requires
* allocating memory and can therefore potentially be exploited to exhaust
* available memory). Also note that actually freeing up the memory used by
* RBTDB nodes (which is what this function does) is essential to keeping cache
* memory use in check, so since the tree lock needs to be acquired anyway,
* freeing as many nodes as possible before the tree lock gets released is
* prudent.
*/
static void
prune_tree(void *arg) {
prune_t *prune = (prune_t *)arg;
dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)prune->db;
dns_rbtnode_t *node = prune->node;
dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)arg;
dns_rbtnode_t *node = NULL;
dns_rbtnode_t *parent = NULL;
unsigned int locknum;
isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_mem_put(rbtdb->common.mctx, prune, sizeof(*prune));
TREE_WRLOCK(&rbtdb->tree_lock, &tlocktype);
locknum = node->locknum;
NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype);
do {
parent = node->parent;
dns__rbtdb_decref(rbtdb, node, 0, &nlocktype, &tlocktype, true,
true DNS__DB_FILELINE);
if (parent != NULL && parent->down == NULL) {
/*
* node was the only down child of the parent and has
* just been removed. We'll then need to examine the
* parent. Keep the lock if possible; otherwise,
* release the old lock and acquire one for the parent.
*/
if (parent->locknum != locknum) {
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
&nlocktype);
locknum = parent->locknum;
NODE_WRLOCK(&rbtdb->node_locks[locknum].lock,
&nlocktype);
while ((node = ISC_LIST_HEAD(rbtdb->prunenodes)) != NULL) {
locknum = node->locknum;
NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype);
do {
if (ISC_LINK_LINKED(node, prunelink)) {
ISC_LIST_UNLINK(rbtdb->prunenodes, node,
prunelink);
}
/*
* We need to gain a reference to the node before
* decrementing it in the next iteration.
*/
if (ISC_LINK_LINKED(parent, deadlink)) {
ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
parent = node->parent;
dns__rbtdb_decref(rbtdb, node, 0, &nlocktype,
&tlocktype, true,
true DNS__DB_FILELINE);
if (parent != NULL && parent->down == NULL) {
/*
* node was the only down child of the parent
* and has just been removed. We'll then need
* to examine the parent. Keep the lock if
* possible; otherwise, release the old lock and
* acquire one for the parent.
*/
if (parent->locknum != locknum) {
NODE_UNLOCK(
&rbtdb->node_locks[locknum].lock,
&nlocktype);
locknum = parent->locknum;
NODE_WRLOCK(
&rbtdb->node_locks[locknum].lock,
&nlocktype);
}
/*
* We need to gain a reference to the node
* before decrementing it in the next iteration.
*/
if (ISC_LINK_LINKED(parent, deadlink)) {
ISC_LIST_UNLINK(
rbtdb->deadnodes[locknum],
parent, deadlink);
}
dns__rbtdb_newref(rbtdb, parent,
nlocktype DNS__DB_FILELINE);
} else {
parent = NULL;
}
dns__rbtdb_newref(rbtdb, parent,
nlocktype DNS__DB_FILELINE);
} else {
parent = NULL;
}
node = parent;
} while (node != NULL);
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype);
node = parent;
} while (node != NULL);
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype);
}
TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype);
dns_db_detach((dns_db_t **)&rbtdb);
@@ -3858,6 +3892,8 @@ dns__rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,
ISC_LIST_INIT(rbtdb->deadnodes[i]);
}
ISC_LIST_INIT(rbtdb->prunenodes);
rbtdb->active = rbtdb->node_lock_count;
for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {

View File

@@ -296,6 +296,10 @@ struct dns_rbtdb {
*/
dns_rbtnodelist_t *deadnodes;
/* List of nodes from which recursive tree pruning can be started from.
* Locked by tree_lock. */
dns_rbtnodelist_t prunenodes;
/*
* Heaps. These are used for TTL based expiry in a cache,
* or for zone resigning in a zone DB. hmctx is the memory
@@ -342,14 +346,6 @@ typedef struct {
isc_stdtime_t now;
} rbtdb_load_t;
/*%
* Prune context
*/
typedef struct {
dns_db_t *db;
dns_rbtnode_t *node;
} prune_t;
extern dns_dbmethods_t dns__rbtdb_zonemethods;
extern dns_dbmethods_t dns__rbtdb_cachemethods;