2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-30 14:07:59 +00:00

Merge branch 'ondrej/restore-prune_tree-behaviour' into 'main'

Restore the parent cleaning logic in prune_tree()

See merge request isc-projects/bind9!8823
This commit is contained in:
Ondřej Surý
2024-03-06 12:04:39 +00:00

View File

@@ -1184,7 +1184,7 @@ dns__rbtdb_newref(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
* The tree lock must be held for the result to be valid.
*/
static bool
is_leaf(dns_rbtnode_t *node) {
is_last_node_on_its_level(dns_rbtnode_t *node) {
return (node->parent != NULL && node->parent->down == node &&
node->left == NULL && node->right == NULL);
}
@@ -1231,7 +1231,7 @@ cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum DNS__DB_FLARG) {
continue;
}
if (is_leaf(node) && rbtdb->loop != NULL) {
if (is_last_node_on_its_level(node) && rbtdb->loop != NULL) {
send_to_prune_tree(
rbtdb, node,
isc_rwlocktype_write DNS__DB_FLARG_PASS);
@@ -1449,29 +1449,32 @@ dns__rbtdb_decref(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
if (write_locked) {
/*
* If this node is the only one in the level it's in, deleting
* this node may recursively make its parent the only node in
* the parent level; if so, and if no one is currently using
* the parent node, this is almost the only opportunity to
* clean it up. But the recursive cleanup is not that trivial
* since the child and parent may be in different lock buckets,
* which would cause a lock order reversal problem. To avoid
* the trouble, we'll dispatch a separate event for batch
* cleaning. We need to check whether we're deleting the node
* as a result of pruning to avoid infinite dispatching.
* Note: pruning happens only when a loop has been set for the
* rbtdb. If the user of the rbtdb chooses not to set a loop,
* it's their responsibility to purge stale leaves (e.g. by
* periodic walk-through).
* If this node is the only one left on its RBTDB level,
* attempt pruning the RBTDB (i.e. deleting empty nodes that
* are ancestors of 'node' and are not interior nodes) starting
* from this node (see prune_tree()). The main reason this is
* not done immediately, but asynchronously, is that the
* ancestors of 'node' are almost guaranteed to belong to
* different node buckets and we don't want to do juggle locks
* right now.
*
* Since prune_tree() also calls dns__rbtdb_decref(), check the
* value of the 'pruning' parameter (which is only set to
* 'true' in the dns__rbtdb_decref() call present in
* prune_tree()) to prevent an infinite loop and to allow a
* node sent to prune_tree() to be deleted by the delete_node()
* call in the code branch below.
*/
if (!pruning && is_leaf(node) && rbtdb->loop != NULL) {
if (!pruning && is_last_node_on_its_level(node) &&
rbtdb->loop != NULL)
{
send_to_prune_tree(rbtdb, node,
*nlocktypep DNS__DB_FLARG_PASS);
no_reference = false;
} else {
/* We can now delete the node. */
/*
* The node can now be deleted.
*/
delete_node(rbtdb, node);
}
} else {
@@ -1494,10 +1497,27 @@ restore_locks:
}
/*
* Prune the tree by cleaning up single leaves. A single execution of this
* function cleans up a single node; if the parent of the latter becomes a
* single leaf on its own level as a result, the parent is then also sent to
* this function.
* Prune the RBTDB tree of trees. Start by attempting to delete a node that is
* the only one left on its RBTDB level (see the send_to_prune_tree() call in
* dns__rbtdb_decref()). Then, if the node has a parent (which can either
* exist on the same RBTDB level or on an upper RBTDB level), check whether the
* latter is an interior node (i.e. a node with a non-NULL 'down' pointer). If
* the parent node is not an interior node, attempt deleting the parent node as
* well and then move on to examining the parent node's parent, etc. Continue
* traversing the RBTDB tree until a node is encountered that is still an
* interior node after the previously-processed node gets deleted.
*
* It is acceptable for a node sent to this function to NOT be deleted in the
* process (e.g. if it gets reactivated in the meantime). Furthermore, node
* deletion is not a prerequisite for continuing RBTDB traversal.
*
* This function gets called once for every "starting node" and it continues
* traversing the RBTDB until the stop condition is met. In the worst case,
* the number of nodes processed by a single execution of this function is the
* number of tree levels, which is at most the maximum number of domain name
* labels (127); however, it should be much smaller in practice and deleting
* empty RBTDB nodes is critical to keeping the amount of memory used by the
* cache memory context within the configured limit anyway.
*/
static void
prune_tree(void *arg) {
@@ -1512,22 +1532,45 @@ prune_tree(void *arg) {
isc_mem_put(rbtdb->common.mctx, prune, sizeof(*prune));
TREE_WRLOCK(&rbtdb->tree_lock, &tlocktype);
parent = node->parent;
NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype);
dns__rbtdb_decref(rbtdb, node, 0, &nlocktype, &tlocktype, true,
true DNS__DB_FILELINE);
do {
parent = node->parent;
dns__rbtdb_decref(rbtdb, node, 0, &nlocktype, &tlocktype, true,
true DNS__DB_FILELINE);
/*
* Check whether the parent is an interior node. Note that it
* might have been one before the dns__rbtdb_decref() call on
* the previous line, but decrementing the reference count for
* 'node' could have caused 'node->parent->down' to become
* NULL.
*/
if (parent != NULL && parent->down == NULL) {
/*
* Keep the node lock if possible; otherwise, release
* the old lock and acquire one for the parent.
*/
if (parent->locknum != locknum) {
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
&nlocktype);
locknum = parent->locknum;
NODE_WRLOCK(&rbtdb->node_locks[locknum].lock,
&nlocktype);
}
/*
* We need to gain a reference to the parent node
* before decrementing it in the next iteration.
*/
dns__rbtdb_newref(rbtdb, parent,
nlocktype DNS__DB_FLARG_PASS);
} else {
parent = NULL;
}
node = parent;
} while (node != NULL);
NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype);
if (parent != NULL && is_leaf(parent)) {
NODE_WRLOCK(&rbtdb->node_locks[parent->locknum].lock,
&nlocktype);
send_to_prune_tree(rbtdb, parent, nlocktype);
NODE_UNLOCK(&rbtdb->node_locks[parent->locknum].lock,
&nlocktype);
}
TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype);
dns_db_detach((dns_db_t **)&rbtdb);