diff --git a/lib/dns/Makefile.am b/lib/dns/Makefile.am index 7f12f18110..759c637856 100644 --- a/lib/dns/Makefile.am +++ b/lib/dns/Makefile.am @@ -219,8 +219,12 @@ libdns_la_SOURCES = \ rbt.c \ rbt-cachedb.c \ rbt-zonedb.c \ - rbtdb_p.h \ rbtdb.c \ + rbtdb_p.h \ + qp-cachedb.c \ + qp-zonedb.c \ + qpdb_p.h \ + qpdb.c \ rcode.c \ rdata.c \ rdatalist.c \ diff --git a/lib/dns/db.c b/lib/dns/db.c index 0624b0d264..691b9cb259 100644 --- a/lib/dns/db.c +++ b/lib/dns/db.c @@ -62,7 +62,7 @@ struct dns_dbimplementation { */ #include "db_p.h" -#include "rbtdb_p.h" +#include "qpdb_p.h" unsigned int dns_pps = 0U; @@ -76,11 +76,11 @@ static void initialize(void) { isc_rwlock_init(&implock); - rbtimp.name = "rbt"; - rbtimp.create = dns__rbtdb_create; - rbtimp.mctx = NULL; - rbtimp.driverarg = NULL; - ISC_LINK_INIT(&rbtimp, link); + rbtimp = (dns_dbimplementation_t){ + .name = "rbt", + .create = dns__rbtdb_create, + .link = ISC_LINK_INITIALIZER, + }; ISC_LIST_INIT(implementations); ISC_LIST_APPEND(implementations, &rbtimp, link); diff --git a/lib/dns/include/dns/types.h b/lib/dns/include/dns/types.h index 94a4b09ff7..6f72afd635 100644 --- a/lib/dns/include/dns/types.h +++ b/lib/dns/include/dns/types.h @@ -146,6 +146,7 @@ typedef struct dns_request dns_request_t; typedef struct dns_requestmgr dns_requestmgr_t; typedef struct dns_resolver dns_resolver_t; typedef struct dns_rpsdb dns_rpsdb_t; +typedef struct dns_qpdata dns_qpdata_t; typedef struct dns_qpnode dns_qpnode_t; typedef uint8_t dns_secalg_t; typedef uint8_t dns_secproto_t; diff --git a/lib/dns/qp-cachedb.c b/lib/dns/qp-cachedb.c new file mode 100644 index 0000000000..f612aa1c22 --- /dev/null +++ b/lib/dns/qp-cachedb.c @@ -0,0 +1,1731 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/*! \file */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "db_p.h" +#include "qpdb_p.h" + +#define CHECK(op) \ + do { \ + result = (op); \ + if (result != ISC_R_SUCCESS) \ + goto failure; \ + } while (0) + +/*% + * Whether to rate-limit updating the LRU to avoid possible thread contention. + * Updating LRU requires write locking, so we don't do it every time the + * record is touched - only after some time passes. + */ +#ifndef DNS_RBTDB_LIMITLRUUPDATE +#define DNS_RBTDB_LIMITLRUUPDATE 1 +#endif + +/*% Time after which we update LRU for glue records, 5 minutes */ +#define DNS_RBTDB_LRUUPDATE_GLUE 300 +/*% Time after which we update LRU for all other records, 10 minutes */ +#define DNS_RBTDB_LRUUPDATE_REGULAR 600 + +#define EXISTS(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NONEXISTENT) == 0) +#define NONEXISTENT(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NONEXISTENT) != 0) +#define NXDOMAIN(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NXDOMAIN) != 0) +#define STALE(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_STALE) != 0) +#define NEGATIVE(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NEGATIVE) != 0) +#define ZEROTTL(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_ZEROTTL) != 0) +#define ANCIENT(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_ANCIENT) != 0) +#define STATCOUNT(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_STATCOUNT) != 0) + +#define STALE_TTL(header, rbtdb) \ + (NXDOMAIN(header) ? 0 : rbtdb->common.serve_stale_ttl) + +#define ACTIVE(header, now) \ + (((header)->ttl > (now)) || ((header)->ttl == (now) && ZEROTTL(header))) + +#define KEEPSTALE(rbtdb) ((rbtdb)->common.serve_stale_ttl > 0) + +/*% + * Routines for LRU-based cache management. + */ + +/*% + * See if a given cache entry that is being reused needs to be updated + * in the LRU-list. From the LRU management point of view, this function is + * expected to return true for almost all cases. When used with threads, + * however, this may cause a non-negligible performance penalty because a + * writer lock will have to be acquired before updating the list. + * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this + * function returns true if the entry has not been updated for some period of + * time. We differentiate the NS or glue address case and the others since + * experiments have shown that the former tends to be accessed relatively + * infrequently and the cost of cache miss is higher (e.g., a missing NS records + * may cause external queries at a higher level zone, involving more + * transactions). + * + * Caller must hold the node (read or write) lock. + */ +static bool +need_headerupdate(dns_slabheader_t *header, isc_stdtime_t now) { + if (DNS_SLABHEADER_GETATTR(header, (DNS_SLABHEADERATTR_NONEXISTENT | + DNS_SLABHEADERATTR_ANCIENT | + DNS_SLABHEADERATTR_ZEROTTL)) != 0) + { + return (false); + } + +#if DNS_RBTDB_LIMITLRUUPDATE + if (header->type == dns_rdatatype_ns || + (header->trust == dns_trust_glue && + (header->type == dns_rdatatype_a || + header->type == dns_rdatatype_aaaa))) + { + /* + * Glue records are updated if at least DNS_RBTDB_LRUUPDATE_GLUE + * seconds have passed since the previous update time. + */ + return (header->last_used + DNS_RBTDB_LRUUPDATE_GLUE <= now); + } + + /* + * Other records are updated if DNS_RBTDB_LRUUPDATE_REGULAR seconds + * have passed. + */ + return (header->last_used + DNS_RBTDB_LRUUPDATE_REGULAR <= now); +#else + UNUSED(now); + + return (true); +#endif /* if DNS_RBTDB_LIMITLRUUPDATE */ +} + +/*% + * Update the timestamp of a given cache entry and move it to the head + * of the corresponding LRU list. + * + * Caller must hold the node (write) lock. + * + * Note that the we do NOT touch the heap here, as the TTL has not changed. + */ +static void +update_header(dns_rbtdb_t *rbtdb, dns_slabheader_t *header, isc_stdtime_t now) { + INSIST(IS_CACHE(rbtdb)); + + /* To be checked: can we really assume this? XXXMLG */ + INSIST(ISC_LINK_LINKED(header, link)); + + ISC_LIST_UNLINK(rbtdb->lru[RBTDB_HEADERNODE(header)->locknum], header, + link); + header->last_used = now; + ISC_LIST_PREPEND(rbtdb->lru[RBTDB_HEADERNODE(header)->locknum], header, + link); +} + +/* + * Locking + * + * If a routine is going to lock more than one lock in this module, then + * the locking must be done in the following order: + * + * Tree Lock + * + * Node Lock (Only one from the set may be locked at one time by + * any caller) + * + * Database Lock + * + * Failure to follow this hierarchy can result in deadlock. + */ + +/* + * Deleting Nodes + * + * For zone databases the node for the origin of the zone MUST NOT be deleted. + */ + +/* + * DB Routines + */ + +static void +update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) { + INSIST(IS_CACHE(rbtdb)); + + if (rbtdb->cachestats == NULL) { + return; + } + + switch (result) { + case DNS_R_COVERINGNSEC: + isc_stats_increment(rbtdb->cachestats, + dns_cachestatscounter_coveringnsec); + FALLTHROUGH; + case ISC_R_SUCCESS: + case DNS_R_CNAME: + case DNS_R_DNAME: + case DNS_R_DELEGATION: + case DNS_R_NCACHENXDOMAIN: + case DNS_R_NCACHENXRRSET: + isc_stats_increment(rbtdb->cachestats, + dns_cachestatscounter_hits); + break; + default: + isc_stats_increment(rbtdb->cachestats, + dns_cachestatscounter_misses); + } +} + +static void +clean_stale_headers(dns_slabheader_t *top) { + dns_slabheader_t *d = NULL, *down_next = NULL; + + for (d = top->down; d != NULL; d = down_next) { + down_next = d->down; + dns_slabheader_destroy(&d); + } + top->down = NULL; +} + +static isc_result_t +setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep, + dns_name_t *foundname, dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_name_t *zcname = NULL; + dns_typepair_t type; + dns_rbtnode_t *node = NULL; + + REQUIRE(search != NULL); + REQUIRE(search->zonecut != NULL); + REQUIRE(search->zonecut_header != NULL); + + /* + * The caller MUST NOT be holding any node locks. + */ + + node = search->zonecut; + type = search->zonecut_header->type; + + /* + * If we have to set foundname, we do it before anything else. + * If we were to set foundname after we had set nodep or bound the + * rdataset, then we'd have to undo that work if dns_name_copy() + * failed. By setting foundname first, there's nothing to undo if + * we have trouble. + */ + if (foundname != NULL && search->copy_name) { + zcname = dns_fixedname_name(&search->zonecut_name); + dns_name_copy(zcname, foundname); + } + if (nodep != NULL) { + /* + * Note that we don't have to increment the node's reference + * count here because we're going to use the reference we + * already have in the search block. + */ + *nodep = node; + search->need_cleanup = false; + } + if (rdataset != NULL) { + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + NODE_RDLOCK(&(search->rbtdb->node_locks[node->locknum].lock), + &nlocktype); + dns__rbtdb_bindrdataset(search->rbtdb, node, + search->zonecut_header, search->now, + isc_rwlocktype_read, + rdataset DNS__DB_FLARG_PASS); + if (sigrdataset != NULL && search->zonecut_sigheader != NULL) { + dns__rbtdb_bindrdataset( + search->rbtdb, node, search->zonecut_sigheader, + search->now, isc_rwlocktype_read, + sigrdataset DNS__DB_FLARG_PASS); + } + NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), + &nlocktype); + } + + if (type == dns_rdatatype_dname) { + return (DNS_R_DNAME); + } + return (DNS_R_DELEGATION); +} + +static bool +check_stale_header(dns_rbtnode_t *node, dns_slabheader_t *header, + isc_rwlocktype_t *nlocktypep, isc_rwlock_t *lock, + rbtdb_search_t *search, dns_slabheader_t **header_prev) { + if (!ACTIVE(header, search->now)) { + dns_ttl_t stale = header->ttl + + STALE_TTL(header, search->rbtdb); + /* + * If this data is in the stale window keep it and if + * DNS_DBFIND_STALEOK is not set we tell the caller to + * skip this record. We skip the records with ZEROTTL + * (these records should not be cached anyway). + */ + + DNS_SLABHEADER_CLRATTR(header, DNS_SLABHEADERATTR_STALE_WINDOW); + if (!ZEROTTL(header) && KEEPSTALE(search->rbtdb) && + stale > search->now) + { + dns__rbtdb_mark(header, DNS_SLABHEADERATTR_STALE); + *header_prev = header; + /* + * If DNS_DBFIND_STALESTART is set then it means we + * failed to resolve the name during recursion, in + * this case we mark the time in which the refresh + * failed. + */ + if ((search->options & DNS_DBFIND_STALESTART) != 0) { + atomic_store_release( + &header->last_refresh_fail_ts, + search->now); + } else if ((search->options & + DNS_DBFIND_STALEENABLED) != 0 && + search->now < + (atomic_load_acquire( + &header->last_refresh_fail_ts) + + search->rbtdb->serve_stale_refresh)) + { + /* + * If we are within interval between last + * refresh failure time + 'stale-refresh-time', + * then don't skip this stale entry but use it + * instead. + */ + DNS_SLABHEADER_SETATTR( + header, + DNS_SLABHEADERATTR_STALE_WINDOW); + return (false); + } else if ((search->options & + DNS_DBFIND_STALETIMEOUT) != 0) + { + /* + * We want stale RRset due to timeout, so we + * don't skip it. + */ + return (false); + } + return ((search->options & DNS_DBFIND_STALEOK) == 0); + } + + /* + * This rdataset is stale. If no one else is using the + * node, we can clean it up right now, otherwise we mark + * it as ancient, and the node as dirty, so it will get + * cleaned up later. + */ + if ((header->ttl < search->now - RBTDB_VIRTUAL) && + (*nlocktypep == isc_rwlocktype_write || + NODE_TRYUPGRADE(lock, nlocktypep) == ISC_R_SUCCESS)) + { + /* + * We update the node's status only when we can + * get write access; otherwise, we leave others + * to this work. Periodical cleaning will + * eventually take the job as the last resort. + * We won't downgrade the lock, since other + * rdatasets are probably stale, too. + */ + + if (isc_refcount_current(&node->references) == 0) { + /* + * header->down can be non-NULL if the + * refcount has just decremented to 0 + * but dns__rbtdb_decref() has not + * performed clean_cache_node(), in + * which case we need to purge the stale + * headers first. + */ + clean_stale_headers(header); + if (*header_prev != NULL) { + (*header_prev)->next = header->next; + } else { + node->data = header->next; + } + dns_slabheader_destroy(&header); + } else { + dns__rbtdb_mark(header, + DNS_SLABHEADERATTR_ANCIENT); + RBTDB_HEADERNODE(header)->dirty = 1; + *header_prev = header; + } + } else { + *header_prev = header; + } + return (true); + } + return (false); +} + +static isc_result_t +cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, + void *arg DNS__DB_FLARG) { + rbtdb_search_t *search = arg; + dns_slabheader_t *header = NULL; + dns_slabheader_t *header_prev = NULL, *header_next = NULL; + dns_slabheader_t *dname_header = NULL, *sigdname_header = NULL; + isc_result_t result; + isc_rwlock_t *lock = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + REQUIRE(search->zonecut == NULL); + + /* + * Keep compiler silent. + */ + UNUSED(name); + + lock = &(search->rbtdb->node_locks[node->locknum].lock); + NODE_RDLOCK(lock, &nlocktype); + + /* + * Look for a DNAME or RRSIG DNAME rdataset. + */ + for (header = node->data; header != NULL; header = header_next) { + header_next = header->next; + if (check_stale_header(node, header, &nlocktype, lock, search, + &header_prev)) + { + /* Do nothing. */ + } else if (header->type == dns_rdatatype_dname && + EXISTS(header) && !ANCIENT(header)) + { + dname_header = header; + header_prev = header; + } else if (header->type == DNS_SIGTYPE(dns_rdatatype_dname) && + EXISTS(header) && !ANCIENT(header)) + { + sigdname_header = header; + header_prev = header; + } else { + header_prev = header; + } + } + + if (dname_header != NULL && + (!DNS_TRUST_PENDING(dname_header->trust) || + (search->options & DNS_DBFIND_PENDINGOK) != 0)) + { + /* + * We increment the reference count on node to ensure that + * search->zonecut_header will still be valid later. + */ + dns__rbtdb_newref(search->rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + search->zonecut = node; + search->zonecut_header = dname_header; + search->zonecut_sigheader = sigdname_header; + search->need_cleanup = true; + result = DNS_R_PARTIALMATCH; + } else { + result = DNS_R_CONTINUE; + } + + NODE_UNLOCK(lock, &nlocktype); + + return (result); +} + +static isc_result_t +find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node, + dns_dbnode_t **nodep, dns_name_t *foundname, + dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + unsigned int i; + isc_result_t result = ISC_R_NOTFOUND; + dns_name_t name; + dns_rbtdb_t *rbtdb = NULL; + bool done; + + /* + * Caller must be holding the tree lock. + */ + + rbtdb = search->rbtdb; + i = search->chain.level_matches; + done = false; + do { + dns_slabheader_t *header = NULL; + dns_slabheader_t *header_prev = NULL, *header_next = NULL; + dns_slabheader_t *found = NULL, *foundsig = NULL; + isc_rwlock_t *lock = &rbtdb->node_locks[node->locknum].lock; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + NODE_RDLOCK(lock, &nlocktype); + + /* + * Look for NS and RRSIG NS rdatasets. + */ + for (header = node->data; header != NULL; header = header_next) + { + header_next = header->next; + if (check_stale_header(node, header, &nlocktype, lock, + search, &header_prev)) + { + /* Do nothing. */ + } else if (EXISTS(header) && !ANCIENT(header)) { + /* + * We've found an extant rdataset. See if + * we're interested in it. + */ + if (header->type == dns_rdatatype_ns) { + found = header; + if (foundsig != NULL) { + break; + } + } else if (header->type == + DNS_SIGTYPE(dns_rdatatype_ns)) + { + foundsig = header; + if (found != NULL) { + break; + } + } + header_prev = header; + } else { + header_prev = header; + } + } + + if (found != NULL) { + /* + * If we have to set foundname, we do it before + * anything else. If we were to set foundname after + * we had set nodep or bound the rdataset, then we'd + * have to undo that work if dns_name_concatenate() + * failed. By setting foundname first, there's + * nothing to undo if we have trouble. + */ + if (foundname != NULL) { + dns_name_init(&name, NULL); + dns_rbt_namefromnode(node, &name); + dns_name_copy(&name, foundname); + while (i > 0) { + dns_rbtnode_t *level_node = + search->chain.levels[--i]; + dns_name_init(&name, NULL); + dns_rbt_namefromnode(level_node, &name); + result = dns_name_concatenate( + foundname, &name, foundname, + NULL); + if (result != ISC_R_SUCCESS) { + if (nodep != NULL) { + *nodep = NULL; + } + goto node_exit; + } + } + } + result = DNS_R_DELEGATION; + if (nodep != NULL) { + dns__rbtdb_newref(search->rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + *nodep = node; + } + dns__rbtdb_bindrdataset(search->rbtdb, node, found, + search->now, nlocktype, + rdataset DNS__DB_FLARG_PASS); + if (foundsig != NULL) { + dns__rbtdb_bindrdataset( + search->rbtdb, node, foundsig, + search->now, nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + } + if (need_headerupdate(found, search->now) || + (foundsig != NULL && + need_headerupdate(foundsig, search->now))) + { + if (nlocktype != isc_rwlocktype_write) { + NODE_FORCEUPGRADE(lock, &nlocktype); + POST(nlocktype); + } + if (need_headerupdate(found, search->now)) { + update_header(search->rbtdb, found, + search->now); + } + if (foundsig != NULL && + need_headerupdate(foundsig, search->now)) + { + update_header(search->rbtdb, foundsig, + search->now); + } + } + } + + node_exit: + NODE_UNLOCK(lock, &nlocktype); + + if (found == NULL && i > 0) { + i--; + node = search->chain.levels[i]; + } else { + done = true; + } + } while (!done); + + return (result); +} + +/* + * Look for a potentially covering NSEC in the cache where `name` + * is known not to exist. This uses the auxiliary NSEC tree to find + * the potential NSEC owner. If found, we update 'foundname', 'nodep', + * 'rdataset' and 'sigrdataset', and return DNS_R_COVERINGNSEC. + * Otherwise, return ISC_R_NOTFOUND. + */ +static isc_result_t +find_coveringnsec(rbtdb_search_t *search, const dns_name_t *name, + dns_dbnode_t **nodep, isc_stdtime_t now, + dns_name_t *foundname, dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_fixedname_t fprefix, forigin, ftarget, fixed; + dns_name_t *prefix = NULL, *origin = NULL; + dns_name_t *target = NULL, *fname = NULL; + dns_rbtnode_t *node = NULL; + dns_rbtnodechain_t chain; + isc_result_t result; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + isc_rwlock_t *lock = NULL; + dns_typepair_t matchtype, sigmatchtype; + dns_slabheader_t *found = NULL, *foundsig = NULL; + dns_slabheader_t *header = NULL; + dns_slabheader_t *header_next = NULL, *header_prev = NULL; + + /* + * Look for the node in the auxilary tree. + */ + dns_rbtnodechain_init(&chain); + target = dns_fixedname_initname(&ftarget); + result = dns_rbt_findnode(search->rbtdb->nsec, name, target, &node, + &chain, DNS_RBTFIND_EMPTYDATA, NULL, NULL); + if (result != DNS_R_PARTIALMATCH) { + dns_rbtnodechain_reset(&chain); + return (ISC_R_NOTFOUND); + } + + prefix = dns_fixedname_initname(&fprefix); + origin = dns_fixedname_initname(&forigin); + target = dns_fixedname_initname(&ftarget); + fname = dns_fixedname_initname(&fixed); + + matchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_nsec, 0); + sigmatchtype = DNS_SIGTYPE(dns_rdatatype_nsec); + + /* + * Extract predecessor from chain. + */ + result = dns_rbtnodechain_current(&chain, prefix, origin, NULL); + dns_rbtnodechain_reset(&chain); + if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) { + return (ISC_R_NOTFOUND); + } + + result = dns_name_concatenate(prefix, origin, target, NULL); + if (result != ISC_R_SUCCESS) { + return (ISC_R_NOTFOUND); + } + + /* + * Lookup the predecessor in the main tree. + */ + node = NULL; + result = dns_rbt_findnode(search->rbtdb->tree, target, fname, &node, + NULL, DNS_RBTFIND_EMPTYDATA, NULL, NULL); + if (result != ISC_R_SUCCESS) { + return (ISC_R_NOTFOUND); + } + + lock = &(search->rbtdb->node_locks[node->locknum].lock); + NODE_RDLOCK(lock, &nlocktype); + for (header = node->data; header != NULL; header = header_next) { + header_next = header->next; + if (check_stale_header(node, header, &nlocktype, lock, search, + &header_prev)) + { + continue; + } + if (NONEXISTENT(header) || DNS_TYPEPAIR_TYPE(header->type) == 0) + { + header_prev = header; + continue; + } + if (header->type == matchtype) { + found = header; + if (foundsig != NULL) { + break; + } + } else if (header->type == sigmatchtype) { + foundsig = header; + if (found != NULL) { + break; + } + } + header_prev = header; + } + if (found != NULL) { + dns__rbtdb_bindrdataset(search->rbtdb, node, found, now, + nlocktype, rdataset DNS__DB_FLARG_PASS); + if (foundsig != NULL) { + dns__rbtdb_bindrdataset(search->rbtdb, node, foundsig, + now, nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + } + dns__rbtdb_newref(search->rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + + dns_name_copy(fname, foundname); + + *nodep = node; + result = DNS_R_COVERINGNSEC; + } else { + result = ISC_R_NOTFOUND; + } + NODE_UNLOCK(lock, &nlocktype); + return (result); +} + +static isc_result_t +cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, + dns_rdatatype_t type, unsigned int options, isc_stdtime_t now, + dns_dbnode_t **nodep, dns_name_t *foundname, + dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_rbtnode_t *node = NULL; + isc_result_t result; + rbtdb_search_t search; + bool cname_ok = true; + bool found_noqname = false; + bool all_negative = true; + bool empty_node; + isc_rwlock_t *lock = NULL; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + dns_slabheader_t *header = NULL; + dns_slabheader_t *header_prev = NULL, *header_next = NULL; + dns_slabheader_t *found = NULL, *nsheader = NULL; + dns_slabheader_t *foundsig = NULL, *nssig = NULL, *cnamesig = NULL; + dns_slabheader_t *update = NULL, *updatesig = NULL; + dns_slabheader_t *nsecheader = NULL, *nsecsig = NULL; + dns_typepair_t sigtype, negtype; + + UNUSED(version); + + REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); + REQUIRE(version == NULL); + + if (now == 0) { + now = isc_stdtime_now(); + } + + search = (rbtdb_search_t){ + .rbtdb = (dns_rbtdb_t *)db, + .serial = 1, + .options = options, + .now = now, + }; + dns_fixedname_init(&search.zonecut_name); + dns_rbtnodechain_init(&search.chain); + + TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype); + + /* + * Search down from the root of the tree. If, while going down, we + * encounter a callback node, cache_zonecut_callback() will search the + * rdatasets at the zone cut for a DNAME rdataset. + */ + result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node, + &search.chain, DNS_RBTFIND_EMPTYDATA, + cache_zonecut_callback, &search); + + if (result == DNS_R_PARTIALMATCH) { + /* + * If dns_rbt_findnode discovered a covering DNAME skip + * looking for a covering NSEC. + */ + if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && + (search.zonecut_header == NULL || + search.zonecut_header->type != dns_rdatatype_dname)) + { + result = find_coveringnsec( + &search, name, nodep, now, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + if (result == DNS_R_COVERINGNSEC) { + goto tree_exit; + } + } + if (search.zonecut != NULL) { + result = setup_delegation( + &search, nodep, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + goto tree_exit; + } else { + find_ns: + result = find_deepest_zonecut( + &search, node, nodep, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + goto tree_exit; + } + } else if (result != ISC_R_SUCCESS) { + goto tree_exit; + } + + /* + * Certain DNSSEC types are not subject to CNAME matching + * (RFC4035, section 2.5 and RFC3007). + * + * We don't check for RRSIG, because we don't store RRSIG records + * directly. + */ + if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) { + cname_ok = false; + } + + /* + * We now go looking for rdata... + */ + + lock = &(search.rbtdb->node_locks[node->locknum].lock); + NODE_RDLOCK(lock, &nlocktype); + + /* + * These pointers need to be reset here in case we did + * 'goto find_ns' from somewhere below. + */ + found = NULL; + foundsig = NULL; + sigtype = DNS_SIGTYPE(type); + negtype = DNS_TYPEPAIR_VALUE(0, type); + nsheader = NULL; + nsecheader = NULL; + nssig = NULL; + nsecsig = NULL; + cnamesig = NULL; + empty_node = true; + header_prev = NULL; + for (header = node->data; header != NULL; header = header_next) { + header_next = header->next; + if (check_stale_header(node, header, &nlocktype, lock, &search, + &header_prev)) + { + /* Do nothing. */ + } else if (EXISTS(header) && !ANCIENT(header)) { + /* + * We now know that there is at least one active + * non-stale rdataset at this node. + */ + empty_node = false; + if (header->noqname != NULL && + header->trust == dns_trust_secure) + { + found_noqname = true; + } + if (!NEGATIVE(header)) { + all_negative = false; + } + + /* + * If we found a type we were looking for, remember + * it. + */ + if (header->type == type || + (type == dns_rdatatype_any && + DNS_TYPEPAIR_TYPE(header->type) != 0) || + (cname_ok && header->type == dns_rdatatype_cname)) + { + /* + * We've found the answer. + */ + found = header; + if (header->type == dns_rdatatype_cname && + cname_ok) + { + /* + * If we've already got the + * CNAME RRSIG, use it. + */ + if (cnamesig != NULL) { + foundsig = cnamesig; + } else { + sigtype = DNS_SIGTYPE( + dns_rdatatype_cname); + } + } + } else if (header->type == sigtype) { + /* + * We've found the RRSIG rdataset for our + * target type. Remember it. + */ + foundsig = header; + } else if (header->type == RDATATYPE_NCACHEANY || + header->type == negtype) + { + /* + * We've found a negative cache entry. + */ + found = header; + } else if (header->type == dns_rdatatype_ns) { + /* + * Remember a NS rdataset even if we're + * not specifically looking for it, because + * we might need it later. + */ + nsheader = header; + } else if (header->type == + DNS_SIGTYPE(dns_rdatatype_ns)) + { + /* + * If we need the NS rdataset, we'll also + * need its signature. + */ + nssig = header; + } else if (header->type == dns_rdatatype_nsec) { + nsecheader = header; + } else if (header->type == + DNS_SIGTYPE(dns_rdatatype_nsec)) + { + nsecsig = header; + } else if (cname_ok && + header->type == + DNS_SIGTYPE(dns_rdatatype_cname)) + { + /* + * If we get a CNAME match, we'll also need + * its signature. + */ + cnamesig = header; + } + header_prev = header; + } else { + header_prev = header; + } + } + + if (empty_node) { + /* + * We have an exact match for the name, but there are no + * extant rdatasets. That means that this node doesn't + * meaningfully exist, and that we really have a partial match. + */ + NODE_UNLOCK(lock, &nlocktype); + if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) { + result = find_coveringnsec( + &search, name, nodep, now, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + if (result == DNS_R_COVERINGNSEC) { + goto tree_exit; + } + } + goto find_ns; + } + + /* + * If we didn't find what we were looking for... + */ + if (found == NULL || + (DNS_TRUST_ADDITIONAL(found->trust) && + ((options & DNS_DBFIND_ADDITIONALOK) == 0)) || + (found->trust == dns_trust_glue && + ((options & DNS_DBFIND_GLUEOK) == 0)) || + (DNS_TRUST_PENDING(found->trust) && + ((options & DNS_DBFIND_PENDINGOK) == 0))) + { + /* + * Return covering NODATA NSEC record. + */ + if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && + nsecheader != NULL) + { + if (nodep != NULL) { + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + *nodep = node; + } + dns__rbtdb_bindrdataset(search.rbtdb, node, nsecheader, + search.now, nlocktype, + rdataset DNS__DB_FLARG_PASS); + if (need_headerupdate(nsecheader, search.now)) { + update = nsecheader; + } + if (nsecsig != NULL) { + dns__rbtdb_bindrdataset( + search.rbtdb, node, nsecsig, search.now, + nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + if (need_headerupdate(nsecsig, search.now)) { + updatesig = nsecsig; + } + } + result = DNS_R_COVERINGNSEC; + goto node_exit; + } + + /* + * This name was from a wild card. Look for a covering NSEC. + */ + if (found == NULL && (found_noqname || all_negative) && + (search.options & DNS_DBFIND_COVERINGNSEC) != 0) + { + NODE_UNLOCK(lock, &nlocktype); + result = find_coveringnsec( + &search, name, nodep, now, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + if (result == DNS_R_COVERINGNSEC) { + goto tree_exit; + } + goto find_ns; + } + + /* + * If there is an NS rdataset at this node, then this is the + * deepest zone cut. + */ + if (nsheader != NULL) { + if (nodep != NULL) { + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + *nodep = node; + } + dns__rbtdb_bindrdataset(search.rbtdb, node, nsheader, + search.now, nlocktype, + rdataset DNS__DB_FLARG_PASS); + if (need_headerupdate(nsheader, search.now)) { + update = nsheader; + } + if (nssig != NULL) { + dns__rbtdb_bindrdataset( + search.rbtdb, node, nssig, search.now, + nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + if (need_headerupdate(nssig, search.now)) { + updatesig = nssig; + } + } + result = DNS_R_DELEGATION; + goto node_exit; + } + + /* + * Go find the deepest zone cut. + */ + NODE_UNLOCK(lock, &nlocktype); + goto find_ns; + } + + /* + * We found what we were looking for, or we found a CNAME. + */ + + if (nodep != NULL) { + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + *nodep = node; + } + + if (NEGATIVE(found)) { + /* + * We found a negative cache entry. + */ + if (NXDOMAIN(found)) { + result = DNS_R_NCACHENXDOMAIN; + } else { + result = DNS_R_NCACHENXRRSET; + } + } else if (type != found->type && type != dns_rdatatype_any && + found->type == dns_rdatatype_cname) + { + /* + * We weren't doing an ANY query and we found a CNAME instead + * of the type we were looking for, so we need to indicate + * that result to the caller. + */ + result = DNS_R_CNAME; + } else { + /* + * An ordinary successful query! + */ + result = ISC_R_SUCCESS; + } + + if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN || + result == DNS_R_NCACHENXRRSET) + { + dns__rbtdb_bindrdataset(search.rbtdb, node, found, search.now, + nlocktype, rdataset DNS__DB_FLARG_PASS); + if (need_headerupdate(found, search.now)) { + update = found; + } + if (!NEGATIVE(found) && foundsig != NULL) { + dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig, + search.now, nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + if (need_headerupdate(foundsig, search.now)) { + updatesig = foundsig; + } + } + } + +node_exit: + if ((update != NULL || updatesig != NULL) && + nlocktype != isc_rwlocktype_write) + { + NODE_FORCEUPGRADE(lock, &nlocktype); + POST(nlocktype); + } + if (update != NULL && need_headerupdate(update, search.now)) { + update_header(search.rbtdb, update, search.now); + } + if (updatesig != NULL && need_headerupdate(updatesig, search.now)) { + update_header(search.rbtdb, updatesig, search.now); + } + + NODE_UNLOCK(lock, &nlocktype); + +tree_exit: + TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype); + + /* + * If we found a zonecut but aren't going to use it, we have to + * let go of it. + */ + if (search.need_cleanup) { + node = search.zonecut; + INSIST(node != NULL); + lock = &(search.rbtdb->node_locks[node->locknum].lock); + + NODE_RDLOCK(lock, &nlocktype); + dns__rbtdb_decref(search.rbtdb, node, 0, &nlocktype, &tlocktype, + true, false DNS__DB_FLARG_PASS); + NODE_UNLOCK(lock, &nlocktype); + INSIST(tlocktype == isc_rwlocktype_none); + } + + dns_rbtnodechain_reset(&search.chain); + + update_cachestats(search.rbtdb, result); + return (result); +} + +static isc_result_t +cache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options, + isc_stdtime_t now, dns_dbnode_t **nodep, + dns_name_t *foundname, dns_name_t *dcname, + dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_rbtnode_t *node = NULL; + isc_rwlock_t *lock = NULL; + isc_result_t result; + rbtdb_search_t search; + dns_slabheader_t *header = NULL; + dns_slabheader_t *header_prev = NULL, *header_next = NULL; + dns_slabheader_t *found = NULL, *foundsig = NULL; + unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + bool dcnull = (dcname == NULL); + + REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); + + if (now == 0) { + now = isc_stdtime_now(); + } + + search = (rbtdb_search_t){ + .rbtdb = (dns_rbtdb_t *)db, + .serial = 1, + .options = options, + .now = now, + }; + dns_fixedname_init(&search.zonecut_name); + dns_rbtnodechain_init(&search.chain); + + if (dcnull) { + dcname = foundname; + } + + if ((options & DNS_DBFIND_NOEXACT) != 0) { + rbtoptions |= DNS_RBTFIND_NOEXACT; + } + + TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype); + + /* + * Search down from the root of the tree. + */ + result = dns_rbt_findnode(search.rbtdb->tree, name, dcname, &node, + &search.chain, rbtoptions, NULL, &search); + + if (result == DNS_R_PARTIALMATCH) { + result = find_deepest_zonecut(&search, node, nodep, foundname, + rdataset, + sigrdataset DNS__DB_FLARG_PASS); + goto tree_exit; + } else if (result != ISC_R_SUCCESS) { + goto tree_exit; + } else if (!dcnull) { + dns_name_copy(dcname, foundname); + } + + /* + * We now go looking for an NS rdataset at the node. + */ + + lock = &(search.rbtdb->node_locks[node->locknum].lock); + NODE_RDLOCK(lock, &nlocktype); + + for (header = node->data; header != NULL; header = header_next) { + header_next = header->next; + if (check_stale_header(node, header, &nlocktype, lock, &search, + &header_prev)) + { + /* + * The function dns_rbt_findnode found us the a matching + * node for 'name' and stored the result in 'dcname'. + * This is the deepest known zonecut in our database. + * However, this node may be stale and if serve-stale + * is not enabled (in other words 'stale-answer-enable' + * is set to no), this node may not be used as a + * zonecut we know about. If so, find the deepest + * zonecut from this node up and return that instead. + */ + NODE_UNLOCK(lock, &nlocktype); + result = find_deepest_zonecut( + &search, node, nodep, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + dns_name_copy(foundname, dcname); + goto tree_exit; + } else if (EXISTS(header) && !ANCIENT(header)) { + /* + * If we found a type we were looking for, remember + * it. + */ + if (header->type == dns_rdatatype_ns) { + /* + * Remember a NS rdataset even if we're + * not specifically looking for it, because + * we might need it later. + */ + found = header; + } else if (header->type == + DNS_SIGTYPE(dns_rdatatype_ns)) + { + /* + * If we need the NS rdataset, we'll also + * need its signature. + */ + foundsig = header; + } + header_prev = header; + } else { + header_prev = header; + } + } + + if (found == NULL) { + /* + * No NS records here. + */ + NODE_UNLOCK(lock, &nlocktype); + result = find_deepest_zonecut(&search, node, nodep, foundname, + rdataset, + sigrdataset DNS__DB_FLARG_PASS); + goto tree_exit; + } + + if (nodep != NULL) { + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + *nodep = node; + } + + dns__rbtdb_bindrdataset(search.rbtdb, node, found, search.now, + nlocktype, rdataset DNS__DB_FLARG_PASS); + if (foundsig != NULL) { + dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig, + search.now, nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + } + + if (need_headerupdate(found, search.now) || + (foundsig != NULL && need_headerupdate(foundsig, search.now))) + { + if (nlocktype != isc_rwlocktype_write) { + NODE_FORCEUPGRADE(lock, &nlocktype); + POST(nlocktype); + } + if (need_headerupdate(found, search.now)) { + update_header(search.rbtdb, found, search.now); + } + if (foundsig != NULL && need_headerupdate(foundsig, search.now)) + { + update_header(search.rbtdb, foundsig, search.now); + } + } + + NODE_UNLOCK(lock, &nlocktype); + +tree_exit: + TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype); + + INSIST(!search.need_cleanup); + + dns_rbtnodechain_reset(&search.chain); + + if (result == DNS_R_DELEGATION) { + result = ISC_R_SUCCESS; + } + + return (result); +} + +static isc_result_t +cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, + dns_rdatatype_t type, dns_rdatatype_t covers, + isc_stdtime_t now, dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; + dns_slabheader_t *header = NULL, *header_next = NULL; + dns_slabheader_t *found = NULL, *foundsig = NULL; + dns_typepair_t matchtype, sigmatchtype, negtype; + isc_result_t result; + isc_rwlock_t *lock = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(type != dns_rdatatype_any); + + UNUSED(version); + + result = ISC_R_SUCCESS; + + if (now == 0) { + now = isc_stdtime_now(); + } + + lock = &rbtdb->node_locks[rbtnode->locknum].lock; + NODE_RDLOCK(lock, &nlocktype); + + matchtype = DNS_TYPEPAIR_VALUE(type, covers); + negtype = DNS_TYPEPAIR_VALUE(0, type); + if (covers == 0) { + sigmatchtype = DNS_SIGTYPE(type); + } else { + sigmatchtype = 0; + } + + for (header = rbtnode->data; header != NULL; header = header_next) { + header_next = header->next; + if (!ACTIVE(header, now)) { + if ((header->ttl + STALE_TTL(header, rbtdb) < + now - RBTDB_VIRTUAL) && + (nlocktype == isc_rwlocktype_write || + NODE_TRYUPGRADE(lock, &nlocktype) == + ISC_R_SUCCESS)) + { + /* + * We update the node's status only when we + * can get write access. + * + * We don't check if refcurrent(rbtnode) == 0 + * and try to free like we do in cache_find(), + * because refcurrent(rbtnode) must be + * non-zero. This is so because 'node' is an + * argument to the function. + */ + dns__rbtdb_mark(header, + DNS_SLABHEADERATTR_ANCIENT); + RBTDB_HEADERNODE(header)->dirty = 1; + } + } else if (EXISTS(header) && !ANCIENT(header)) { + if (header->type == matchtype) { + found = header; + } else if (header->type == RDATATYPE_NCACHEANY || + header->type == negtype) + { + found = header; + } else if (header->type == sigmatchtype) { + foundsig = header; + } + } + } + if (found != NULL) { + dns__rbtdb_bindrdataset(rbtdb, rbtnode, found, now, nlocktype, + rdataset DNS__DB_FLARG_PASS); + if (!NEGATIVE(found) && foundsig != NULL) { + dns__rbtdb_bindrdataset(rbtdb, rbtnode, foundsig, now, + nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + } + } + + NODE_UNLOCK(lock, &nlocktype); + + if (found == NULL) { + return (ISC_R_NOTFOUND); + } + + if (NEGATIVE(found)) { + /* + * We found a negative cache entry. + */ + if (NXDOMAIN(found)) { + result = DNS_R_NCACHENXDOMAIN; + } else { + result = DNS_R_NCACHENXRRSET; + } + } + + update_cachestats(rbtdb, result); + + return (result); +} + +static size_t +hashsize(dns_db_t *db) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + size_t size; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + + REQUIRE(VALID_RBTDB(rbtdb)); + + TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); + size = dns_rbt_hashsize(rbtdb->tree); + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + + return (size); +} + +static isc_result_t +setcachestats(dns_db_t *db, isc_stats_t *stats) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ + REQUIRE(stats != NULL); + + isc_stats_attach(stats, &rbtdb->cachestats); + return (ISC_R_SUCCESS); +} + +static dns_stats_t * +getrrsetstats(dns_db_t *db) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ + + return (rbtdb->rrsetstats); +} + +static isc_result_t +setservestalettl(dns_db_t *db, dns_ttl_t ttl) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); + + /* currently no bounds checking. 0 means disable. */ + rbtdb->common.serve_stale_ttl = ttl; + return (ISC_R_SUCCESS); +} + +static isc_result_t +getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); + + *ttl = rbtdb->common.serve_stale_ttl; + return (ISC_R_SUCCESS); +} + +static isc_result_t +setservestalerefresh(dns_db_t *db, uint32_t interval) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); + + /* currently no bounds checking. 0 means disable. */ + rbtdb->serve_stale_refresh = interval; + return (ISC_R_SUCCESS); +} + +static isc_result_t +getservestalerefresh(dns_db_t *db, uint32_t *interval) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(IS_CACHE(rbtdb)); + + *interval = rbtdb->serve_stale_refresh; + return (ISC_R_SUCCESS); +} + +static void +expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; + dns_slabheader_t *header = data; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + + NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + dns__cacherbt_expireheader(header, &tlocktype, + dns_expire_flush DNS__DB_FLARG_PASS); + NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + INSIST(tlocktype == isc_rwlocktype_none); +} + +dns_dbmethods_t dns__rbtdb_cachemethods = { + .destroy = dns__rbtdb_destroy, + .currentversion = dns__rbtdb_currentversion, + .newversion = dns__rbtdb_newversion, + .attachversion = dns__rbtdb_attachversion, + .closeversion = dns__rbtdb_closeversion, + .findnode = dns__rbtdb_findnode, + .find = cache_find, + .findzonecut = cache_findzonecut, + .attachnode = dns__rbtdb_attachnode, + .detachnode = dns__rbtdb_detachnode, + .createiterator = dns__rbtdb_createiterator, + .findrdataset = cache_findrdataset, + .allrdatasets = dns__rbtdb_allrdatasets, + .addrdataset = dns__rbtdb_addrdataset, + .subtractrdataset = dns__rbtdb_subtractrdataset, + .deleterdataset = dns__rbtdb_deleterdataset, + .nodecount = dns__rbtdb_nodecount, + .setloop = dns__rbtdb_setloop, + .getoriginnode = dns__rbtdb_getoriginnode, + .getrrsetstats = getrrsetstats, + .setcachestats = setcachestats, + .hashsize = hashsize, + .setservestalettl = setservestalettl, + .getservestalettl = getservestalettl, + .setservestalerefresh = setservestalerefresh, + .getservestalerefresh = getservestalerefresh, + .locknode = dns__rbtdb_locknode, + .unlocknode = dns__rbtdb_unlocknode, + .expiredata = expiredata, + .deletedata = dns__rbtdb_deletedata, +}; + +/* + * Caller must hold the node (write) lock. + */ +void +dns__cacherbt_expireheader(dns_slabheader_t *header, + isc_rwlocktype_t *tlocktypep, + dns_expire_t reason DNS__DB_FLARG) { + dns__rbtdb_setttl(header, 0); + dns__rbtdb_mark(header, DNS_SLABHEADERATTR_ANCIENT); + RBTDB_HEADERNODE(header)->dirty = 1; + + if (isc_refcount_current(&RBTDB_HEADERNODE(header)->references) == 0) { + isc_rwlocktype_t nlocktype = isc_rwlocktype_write; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)header->db; + + /* + * If no one else is using the node, we can clean it up now. + * We first need to gain a new reference to the node to meet a + * requirement of dns__rbtdb_decref(). + */ + dns__rbtdb_newref(rbtdb, RBTDB_HEADERNODE(header), + nlocktype DNS__DB_FLARG_PASS); + dns__rbtdb_decref(rbtdb, RBTDB_HEADERNODE(header), 0, + &nlocktype, tlocktypep, true, + false DNS__DB_FLARG_PASS); + + if (rbtdb->cachestats == NULL) { + return; + } + + switch (reason) { + case dns_expire_ttl: + isc_stats_increment(rbtdb->cachestats, + dns_cachestatscounter_deletettl); + break; + case dns_expire_lru: + isc_stats_increment(rbtdb->cachestats, + dns_cachestatscounter_deletelru); + break; + default: + break; + } + } +} + +static size_t +rdataset_size(dns_slabheader_t *header) { + if (!NONEXISTENT(header)) { + return (dns_rdataslab_size((unsigned char *)header, + sizeof(*header))); + } + + return (sizeof(*header)); +} + +static size_t +expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, + isc_rwlocktype_t *tlocktypep, + size_t purgesize DNS__DB_FLARG) { + dns_slabheader_t *header = NULL; + size_t purged = 0; + + for (header = ISC_LIST_TAIL(rbtdb->lru[locknum]); + header != NULL && header->last_used <= rbtdb->last_used && + purged <= purgesize; + header = ISC_LIST_TAIL(rbtdb->lru[locknum])) + { + size_t header_size = rdataset_size(header); + + /* + * Unlink the entry at this point to avoid checking it + * again even if it's currently used someone else and + * cannot be purged at this moment. This entry won't be + * referenced any more (so unlinking is safe) since the + * TTL will be reset to 0. + */ + ISC_LIST_UNLINK(rbtdb->lru[locknum], header, link); + dns__cacherbt_expireheader(header, tlocktypep, + dns_expire_lru DNS__DB_FLARG_PASS); + purged += header_size; + } + + return (purged); +} + +/*% + * Purge some expired and/or stale (i.e. unused for some period) cache entries + * due to an overmem condition. To recover from this condition quickly, + * we clean up entries up to the size of newly added rdata that triggered + * the overmem; this is accessible via newheader. + * + * The LRU lists tails are processed in LRU order to the nearest second. + * + * A write lock on the tree must be held. + */ +void +dns__cacherbt_overmem(dns_rbtdb_t *rbtdb, dns_slabheader_t *newheader, + isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) { + uint32_t locknum_start = rbtdb->lru_sweep++ % rbtdb->node_lock_count; + uint32_t locknum = locknum_start; + /* Size of added data, possible node and possible ENT node. */ + size_t purgesize = + rdataset_size(newheader) + + 2 * dns__rbtnode_getsize(RBTDB_HEADERNODE(newheader)); + size_t purged = 0; + isc_stdtime_t min_last_used = 0; + size_t max_passes = 8; + +again: + do { + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); + + purged += expire_lru_headers(rbtdb, locknum, tlocktypep, + purgesize - + purged DNS__DB_FLARG_PASS); + + /* + * Work out the oldest remaining last_used values of the list + * tails as we walk across the array of lru lists. + */ + dns_slabheader_t *header = ISC_LIST_TAIL(rbtdb->lru[locknum]); + if (header != NULL && + (min_last_used == 0 || header->last_used < min_last_used)) + { + min_last_used = header->last_used; + } + NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); + locknum = (locknum + 1) % rbtdb->node_lock_count; + } while (locknum != locknum_start && purged <= purgesize); + + /* + * Update rbtdb->last_used if we have walked all the list tails and have + * not freed the required amount of memory. + */ + if (purged < purgesize) { + if (min_last_used != 0) { + rbtdb->last_used = min_last_used; + if (max_passes-- > 0) { + goto again; + } + } + } +} diff --git a/lib/dns/qp-zonedb.c b/lib/dns/qp-zonedb.c new file mode 100644 index 0000000000..6ebc44f56a --- /dev/null +++ b/lib/dns/qp-zonedb.c @@ -0,0 +1,2519 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/*! \file */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "db_p.h" +#include "qpdb_p.h" + +#define CHECK(op) \ + do { \ + result = (op); \ + if (result != ISC_R_SUCCESS) \ + goto failure; \ + } while (0) + +#define EXISTS(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NONEXISTENT) == 0) +#define NONEXISTENT(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NONEXISTENT) != 0) +#define IGNORE(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_IGNORE) != 0) +#define RESIGN(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_RESIGN) != 0) +#define ANCIENT(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_ANCIENT) != 0) + +#define RBTDB_ATTR_LOADED 0x01 +#define RBTDB_ATTR_LOADING 0x02 + +static isc_result_t +findnsec3node(dns_db_t *db, const dns_name_t *name, bool create, + dns_dbnode_t **nodep DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + + return (dns__rbtdb_findnodeintree(rbtdb, rbtdb->nsec3, name, create, + nodep DNS__DB_FLARG_PASS)); +} + +static isc_result_t +zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, + void *arg DNS__DB_FLARG) { + rbtdb_search_t *search = arg; + dns_slabheader_t *header = NULL, *header_next = NULL; + dns_slabheader_t *dname_header = NULL, *sigdname_header = NULL; + dns_slabheader_t *ns_header = NULL; + dns_slabheader_t *found = NULL; + isc_result_t result = DNS_R_CONTINUE; + dns_rbtnode_t *onode = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + /* + * We only want to remember the topmost zone cut, since it's the one + * that counts, so we'll just continue if we've already found a + * zonecut. + */ + if (search->zonecut != NULL) { + return (result); + } + + onode = search->rbtdb->origin_node; + + NODE_RDLOCK(&(search->rbtdb->node_locks[node->locknum].lock), + &nlocktype); + + /* + * Look for an NS or DNAME rdataset active in our version. + */ + for (header = node->data; header != NULL; header = header_next) { + header_next = header->next; + if (header->type == dns_rdatatype_ns || + header->type == dns_rdatatype_dname || + header->type == DNS_SIGTYPE(dns_rdatatype_dname)) + { + do { + if (header->serial <= search->serial && + !IGNORE(header)) + { + /* + * Is this a "this rdataset doesn't + * exist" record? + */ + if (NONEXISTENT(header)) { + header = NULL; + } + break; + } else { + header = header->down; + } + } while (header != NULL); + if (header != NULL) { + if (header->type == dns_rdatatype_dname) { + dname_header = header; + } else if (header->type == + DNS_SIGTYPE(dns_rdatatype_dname)) + { + sigdname_header = header; + } else if (node != onode || + IS_STUB(search->rbtdb)) + { + /* + * We've found an NS rdataset that + * isn't at the origin node. We check + * that they're not at the origin node, + * because otherwise we'd erroneously + * treat the zone top as if it were + * a delegation. + */ + ns_header = header; + } + } + } + } + + /* + * Did we find anything? + */ + if (!IS_STUB(search->rbtdb) && ns_header != NULL) { + /* + * Note that NS has precedence over DNAME if both exist + * in a zone. Otherwise DNAME take precedence over NS. + */ + found = ns_header; + search->zonecut_sigheader = NULL; + } else if (dname_header != NULL) { + found = dname_header; + search->zonecut_sigheader = sigdname_header; + } else if (ns_header != NULL) { + found = ns_header; + search->zonecut_sigheader = NULL; + } + + if (found != NULL) { + /* + * We increment the reference count on node to ensure that + * search->zonecut_header will still be valid later. + */ + dns__rbtdb_newref(search->rbtdb, node, + isc_rwlocktype_read DNS__DB_FLARG_PASS); + search->zonecut = node; + search->zonecut_header = found; + search->need_cleanup = true; + /* + * Since we've found a zonecut, anything beneath it is + * glue and is not subject to wildcard matching, so we + * may clear search->wild. + */ + search->wild = false; + if ((search->options & DNS_DBFIND_GLUEOK) == 0) { + /* + * If the caller does not want to find glue, then + * this is the best answer and the search should + * stop now. + */ + result = DNS_R_PARTIALMATCH; + } else { + dns_name_t *zcname = NULL; + + /* + * The search will continue beneath the zone cut. + * This may or may not be the best match. In case it + * is, we need to remember the node name. + */ + zcname = dns_fixedname_name(&search->zonecut_name); + dns_name_copy(name, zcname); + search->copy_name = true; + } + } else { + /* + * There is no zonecut at this node which is active in this + * version. + * + * If this is a "wild" node and the caller hasn't disabled + * wildcard matching, remember that we've seen a wild node + * in case we need to go searching for wildcard matches + * later on. + */ + if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0) { + search->wild = true; + } + } + + NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), + &nlocktype); + + return (result); +} + +static isc_result_t +setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep, + dns_name_t *foundname, dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_name_t *zcname = NULL; + dns_typepair_t type; + dns_rbtnode_t *node = NULL; + + REQUIRE(search != NULL); + REQUIRE(search->zonecut != NULL); + REQUIRE(search->zonecut_header != NULL); + + /* + * The caller MUST NOT be holding any node locks. + */ + + node = search->zonecut; + type = search->zonecut_header->type; + + /* + * If we have to set foundname, we do it before anything else. + * If we were to set foundname after we had set nodep or bound the + * rdataset, then we'd have to undo that work if dns_name_copy() + * failed. By setting foundname first, there's nothing to undo if + * we have trouble. + */ + if (foundname != NULL && search->copy_name) { + zcname = dns_fixedname_name(&search->zonecut_name); + dns_name_copy(zcname, foundname); + } + if (nodep != NULL) { + /* + * Note that we don't have to increment the node's reference + * count here because we're going to use the reference we + * already have in the search block. + */ + *nodep = node; + search->need_cleanup = false; + } + if (rdataset != NULL) { + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + NODE_RDLOCK(&(search->rbtdb->node_locks[node->locknum].lock), + &nlocktype); + dns__rbtdb_bindrdataset(search->rbtdb, node, + search->zonecut_header, search->now, + isc_rwlocktype_read, + rdataset DNS__DB_FLARG_PASS); + if (sigrdataset != NULL && search->zonecut_sigheader != NULL) { + dns__rbtdb_bindrdataset( + search->rbtdb, node, search->zonecut_sigheader, + search->now, isc_rwlocktype_read, + sigrdataset DNS__DB_FLARG_PASS); + } + NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), + &nlocktype); + } + + if (type == dns_rdatatype_dname) { + return (DNS_R_DNAME); + } + return (DNS_R_DELEGATION); +} + +typedef enum { FORWARD, BACK } direction_t; + +/* + * Step backwards or forwards through the database until we find a + * node with data in it for the desired version. If 'nextname' is not NULL, + * and we found a predecessor or successor, save the name we found in it. + * Return true if we found a predecessor or successor. + */ +static bool +step(rbtdb_search_t *search, dns_rbtnodechain_t *chain, direction_t direction, + dns_name_t *nextname) { + dns_fixedname_t forigin; + dns_name_t *origin = NULL; + dns_name_t prefix; + dns_rbtdb_t *rbtdb = NULL; + dns_rbtnode_t *node = NULL; + isc_result_t result = ISC_R_SUCCESS; + dns_slabheader_t *header = NULL; + + rbtdb = search->rbtdb; + + dns_name_init(&prefix, NULL); + origin = dns_fixedname_initname(&forigin); + + while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + node = NULL; + result = dns_rbtnodechain_current(chain, &prefix, origin, + &node); + if (result != ISC_R_SUCCESS) { + break; + } + NODE_RDLOCK(&(rbtdb->node_locks[node->locknum].lock), + &nlocktype); + for (header = node->data; header != NULL; header = header->next) + { + if (header->serial <= search->serial && + !IGNORE(header) && EXISTS(header)) + { + break; + } + } + NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), + &nlocktype); + if (header != NULL) { + break; + } + if (direction == FORWARD) { + result = dns_rbtnodechain_next(chain, NULL, NULL); + } else { + result = dns_rbtnodechain_prev(chain, NULL, NULL); + } + }; + if (result == ISC_R_SUCCESS) { + result = dns_name_concatenate(&prefix, origin, nextname, NULL); + } + if (result == ISC_R_SUCCESS) { + return (true); + } + return (false); +} + +/* + * Use step() to find the successor to the current name, and then + * check to see whether it's a subdomain of the current name. If so, + * then this is an empty non-terminal in the currently active version + * of the database. + */ +static bool +activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain, + const dns_name_t *current) { + isc_result_t result; + dns_fixedname_t fnext; + dns_name_t *next = dns_fixedname_initname(&fnext); + + result = dns_rbtnodechain_next(chain, NULL, NULL); + if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) { + return (false); + } + return (step(search, chain, FORWARD, next) && + dns_name_issubdomain(next, current)); +} + +static bool +wildcard_blocked(rbtdb_search_t *search, const dns_name_t *qname, + dns_name_t *wname) { + isc_result_t result; + dns_fixedname_t fnext; + dns_fixedname_t fprev; + dns_name_t *next = NULL, *prev = NULL; + dns_name_t name; + dns_name_t rname; + dns_name_t tname; + dns_rbtnodechain_t chain; + bool check_next = false; + bool check_prev = false; + unsigned int n; + + dns_name_init(&name, NULL); + dns_name_init(&tname, NULL); + dns_name_init(&rname, NULL); + next = dns_fixedname_initname(&fnext); + prev = dns_fixedname_initname(&fprev); + + /* + * The qname seems to have matched a wildcard, but we + * need to find out if there's an empty nonterminal node + * between the wildcard level and the qname. + * + * search->chain should now be pointing at the predecessor + * of the searched-for name. We are using a local copy of the + * chain so as not to change the state of search->chain. + * step() will walk backward until we find a predecessor with + * data. + */ + chain = search->chain; + check_prev = step(search, &chain, BACK, prev); + + /* Now reset the chain and look for a successor with data. */ + chain = search->chain; + result = dns_rbtnodechain_next(&chain, NULL, NULL); + if (result == ISC_R_SUCCESS) { + check_next = step(search, &chain, FORWARD, next); + } + + if (!check_prev && !check_next) { + /* No predecessor or successor was found at all? */ + return (false); + } + + dns_name_clone(qname, &rname); + + /* + * Remove the wildcard label to find the terminal name. + */ + n = dns_name_countlabels(wname); + dns_name_getlabelsequence(wname, 1, n - 1, &tname); + + do { + if ((check_prev && dns_name_issubdomain(prev, &rname)) || + (check_next && dns_name_issubdomain(next, &rname))) + { + return (true); + } + + /* + * Remove the leftmost label from the qname and check again. + */ + n = dns_name_countlabels(&rname); + dns_name_getlabelsequence(&rname, 1, n - 1, &rname); + } while (!dns_name_equal(&rname, &tname)); + + return (false); +} + +static isc_result_t +find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep, + const dns_name_t *qname) { + unsigned int i, j; + dns_rbtnode_t *node = NULL, *level_node = NULL, *wnode = NULL; + dns_slabheader_t *header = NULL; + isc_result_t result = ISC_R_NOTFOUND; + dns_name_t name; + dns_name_t *wname = NULL; + dns_fixedname_t fwname; + dns_rbtdb_t *rbtdb = NULL; + bool done, wild, active; + dns_rbtnodechain_t wchain; + + /* + * Caller must be holding the tree lock and MUST NOT be holding + * any node locks. + */ + + /* + * Examine each ancestor level. If the level's wild bit + * is set, then construct the corresponding wildcard name and + * search for it. If the wildcard node exists, and is active in + * this version, we're done. If not, then we next check to see + * if the ancestor is active in this version. If so, then there + * can be no possible wildcard match and again we're done. If not, + * continue the search. + */ + + rbtdb = search->rbtdb; + i = search->chain.level_matches; + done = false; + node = *nodep; + do { + isc_rwlock_t *lock = &rbtdb->node_locks[node->locknum].lock; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + NODE_RDLOCK(lock, &nlocktype); + + /* + * First we try to figure out if this node is active in + * the search's version. We do this now, even though we + * may not need the information, because it simplifies the + * locking and code flow. + */ + for (header = node->data; header != NULL; header = header->next) + { + if (header->serial <= search->serial && + !IGNORE(header) && EXISTS(header) && + !ANCIENT(header)) + { + break; + } + } + if (header != NULL) { + active = true; + } else { + active = false; + } + + if (node->wild) { + wild = true; + } else { + wild = false; + } + + NODE_UNLOCK(lock, &nlocktype); + + if (wild) { + /* + * Construct the wildcard name for this level. + */ + dns_name_init(&name, NULL); + dns_rbt_namefromnode(node, &name); + wname = dns_fixedname_initname(&fwname); + result = dns_name_concatenate(dns_wildcardname, &name, + wname, NULL); + j = i; + while (result == ISC_R_SUCCESS && j != 0) { + j--; + level_node = search->chain.levels[j]; + dns_name_init(&name, NULL); + dns_rbt_namefromnode(level_node, &name); + result = dns_name_concatenate(wname, &name, + wname, NULL); + } + if (result != ISC_R_SUCCESS) { + break; + } + + wnode = NULL; + dns_rbtnodechain_init(&wchain); + result = dns_rbt_findnode( + rbtdb->tree, wname, NULL, &wnode, &wchain, + DNS_RBTFIND_EMPTYDATA, NULL, NULL); + if (result == ISC_R_SUCCESS) { + /* + * We have found the wildcard node. If it + * is active in the search's version, we're + * done. + */ + lock = &rbtdb->node_locks[wnode->locknum].lock; + NODE_RDLOCK(lock, &nlocktype); + for (header = wnode->data; header != NULL; + header = header->next) + { + if (header->serial <= search->serial && + !IGNORE(header) && EXISTS(header) && + !ANCIENT(header)) + { + break; + } + } + NODE_UNLOCK(lock, &nlocktype); + if (header != NULL || + activeempty(search, &wchain, wname)) + { + if (wildcard_blocked(search, qname, + wname)) + { + return (ISC_R_NOTFOUND); + } + /* + * The wildcard node is active! + * + * Note: result is still ISC_R_SUCCESS + * so we don't have to set it. + */ + *nodep = wnode; + break; + } + } else if (result != ISC_R_NOTFOUND && + result != DNS_R_PARTIALMATCH) + { + /* + * An error has occurred. Bail out. + */ + break; + } + } + + if (active) { + /* + * The level node is active. Any wildcarding + * present at higher levels has no + * effect and we're done. + */ + result = ISC_R_NOTFOUND; + break; + } + + if (i > 0) { + i--; + node = search->chain.levels[i]; + } else { + done = true; + } + } while (!done); + + return (result); +} + +static bool +matchparams(dns_slabheader_t *header, rbtdb_search_t *search) { + dns_rdata_t rdata = DNS_RDATA_INIT; + dns_rdata_nsec3_t nsec3; + unsigned char *raw = NULL; + unsigned int rdlen, count; + isc_region_t region; + isc_result_t result; + + REQUIRE(header->type == dns_rdatatype_nsec3); + + raw = (unsigned char *)header + sizeof(*header); + count = raw[0] * 256 + raw[1]; /* count */ + raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH; + + while (count-- > 0) { + rdlen = raw[0] * 256 + raw[1]; + raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; + region.base = raw; + region.length = rdlen; + dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass, + dns_rdatatype_nsec3, ®ion); + raw += rdlen; + result = dns_rdata_tostruct(&rdata, &nsec3, NULL); + INSIST(result == ISC_R_SUCCESS); + if (nsec3.hash == search->rbtversion->hash && + nsec3.iterations == search->rbtversion->iterations && + nsec3.salt_length == search->rbtversion->salt_length && + memcmp(nsec3.salt, search->rbtversion->salt, + nsec3.salt_length) == 0) + { + return (true); + } + dns_rdata_reset(&rdata); + } + return (false); +} + +/* + * Find node of the NSEC/NSEC3 record that is 'name'. + */ +static isc_result_t +previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search, + dns_name_t *name, dns_name_t *origin, + dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain, + bool *firstp) { + dns_fixedname_t ftarget; + dns_name_t *target = NULL; + dns_rbtnode_t *nsecnode = NULL; + isc_result_t result; + + REQUIRE(nodep != NULL && *nodep == NULL); + REQUIRE(type == dns_rdatatype_nsec3 || firstp != NULL); + + if (type == dns_rdatatype_nsec3) { + result = dns_rbtnodechain_prev(&search->chain, NULL, NULL); + if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) { + return (result); + } + result = dns_rbtnodechain_current(&search->chain, name, origin, + nodep); + return (result); + } + + target = dns_fixedname_initname(&ftarget); + + for (;;) { + if (*firstp) { + /* + * Construct the name of the second node to check. + * It is the first node sought in the NSEC tree. + */ + *firstp = false; + dns_rbtnodechain_init(nsecchain); + result = dns_name_concatenate(name, origin, target, + NULL); + if (result != ISC_R_SUCCESS) { + return (result); + } + nsecnode = NULL; + result = dns_rbt_findnode( + search->rbtdb->nsec, target, NULL, &nsecnode, + nsecchain, DNS_RBTFIND_EMPTYDATA, NULL, NULL); + if (result == ISC_R_SUCCESS) { + /* + * Since this was the first loop, finding the + * name in the NSEC tree implies that the first + * node checked in the main tree had an + * unacceptable NSEC record. + * Try the previous node in the NSEC tree. + */ + result = dns_rbtnodechain_prev(nsecchain, name, + origin); + if (result == DNS_R_NEWORIGIN) { + result = ISC_R_SUCCESS; + } + } else if (result == ISC_R_NOTFOUND || + result == DNS_R_PARTIALMATCH) + { + result = dns_rbtnodechain_current( + nsecchain, name, origin, NULL); + if (result == ISC_R_NOTFOUND) { + result = ISC_R_NOMORE; + } + } + } else { + /* + * This is a second or later trip through the auxiliary + * tree for the name of a third or earlier NSEC node in + * the main tree. Previous trips through the NSEC tree + * must have found nodes in the main tree with NSEC + * records. Perhaps they lacked signature records. + */ + result = dns_rbtnodechain_prev(nsecchain, name, origin); + if (result == DNS_R_NEWORIGIN) { + result = ISC_R_SUCCESS; + } + } + if (result != ISC_R_SUCCESS) { + return (result); + } + + /* + * Construct the name to seek in the main tree. + */ + result = dns_name_concatenate(name, origin, target, NULL); + if (result != ISC_R_SUCCESS) { + return (result); + } + + *nodep = NULL; + result = dns_rbt_findnode(search->rbtdb->tree, target, NULL, + nodep, &search->chain, + DNS_RBTFIND_EMPTYDATA, NULL, NULL); + if (result == ISC_R_SUCCESS) { + return (result); + } + + /* + * There should always be a node in the main tree with the + * same name as the node in the auxiliary NSEC tree, except for + * nodes in the auxiliary tree that are awaiting deletion. + */ + if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) { + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, + DNS_LOGMODULE_CACHE, ISC_LOG_ERROR, + "previous_closest_nsec(): %s", + isc_result_totext(result)); + return (DNS_R_BADDB); + } + } +} + +/* + * Find the NSEC/NSEC3 which is or before the current point on the + * search chain. For NSEC3 records only NSEC3 records that match the + * current NSEC3PARAM record are considered. + */ +static isc_result_t +find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep, + dns_name_t *foundname, dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset, dns_qp_t *tree, + bool secure DNS__DB_FLARG) { + dns_rbtnode_t *node = NULL, *prevnode = NULL; + dns_slabheader_t *header = NULL, *header_next = NULL; + dns_rbtnodechain_t nsecchain; + bool empty_node; + isc_result_t result; + dns_fixedname_t fname, forigin; + dns_name_t *name = NULL, *origin = NULL; + dns_rdatatype_t type; + dns_typepair_t sigtype; + bool wraps; + bool first = true; + bool need_sig = secure; + + if (tree == search->rbtdb->nsec3) { + type = dns_rdatatype_nsec3; + sigtype = DNS_SIGTYPE(dns_rdatatype_nsec3); + wraps = true; + } else { + type = dns_rdatatype_nsec; + sigtype = DNS_SIGTYPE(dns_rdatatype_nsec); + wraps = false; + } + + /* + * Use the auxiliary tree only starting with the second node in the + * hope that the original node will be right much of the time. + */ + name = dns_fixedname_initname(&fname); + origin = dns_fixedname_initname(&forigin); +again: + node = NULL; + prevnode = NULL; + result = dns_rbtnodechain_current(&search->chain, name, origin, &node); + if (result != ISC_R_SUCCESS) { + return (result); + } + do { + dns_slabheader_t *found = NULL, *foundsig = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + NODE_RDLOCK(&(search->rbtdb->node_locks[node->locknum].lock), + &nlocktype); + empty_node = true; + for (header = node->data; header != NULL; header = header_next) + { + header_next = header->next; + /* + * Look for an active, extant NSEC or RRSIG NSEC. + */ + do { + if (header->serial <= search->serial && + !IGNORE(header)) + { + /* + * Is this a "this rdataset doesn't + * exist" record? + */ + if (NONEXISTENT(header)) { + header = NULL; + } + break; + } else { + header = header->down; + } + } while (header != NULL); + if (header != NULL) { + /* + * We now know that there is at least one + * active rdataset at this node. + */ + empty_node = false; + if (header->type == type) { + found = header; + if (foundsig != NULL) { + break; + } + } else if (header->type == sigtype) { + foundsig = header; + if (found != NULL) { + break; + } + } + } + } + if (!empty_node) { + if (found != NULL && search->rbtversion->havensec3 && + found->type == dns_rdatatype_nsec3 && + !matchparams(found, search)) + { + empty_node = true; + found = NULL; + foundsig = NULL; + result = previous_closest_nsec( + type, search, name, origin, &prevnode, + NULL, NULL); + } else if (found != NULL && + (foundsig != NULL || !need_sig)) + { + /* + * We've found the right NSEC/NSEC3 record. + * + * Note: for this to really be the right + * NSEC record, it's essential that the NSEC + * records of any nodes obscured by a zone + * cut have been removed; we assume this is + * the case. + */ + result = dns_name_concatenate(name, origin, + foundname, NULL); + if (result == ISC_R_SUCCESS) { + if (nodep != NULL) { + dns__rbtdb_newref( + search->rbtdb, node, + isc_rwlocktype_read + DNS__DB_FLARG_PASS); + *nodep = node; + } + dns__rbtdb_bindrdataset( + search->rbtdb, node, found, + search->now, + isc_rwlocktype_read, + rdataset DNS__DB_FLARG_PASS); + if (foundsig != NULL) { + dns__rbtdb_bindrdataset( + search->rbtdb, node, + foundsig, search->now, + isc_rwlocktype_read, + sigrdataset + DNS__DB_FLARG_PASS); + } + } + } else if (found == NULL && foundsig == NULL) { + /* + * This node is active, but has no NSEC or + * RRSIG NSEC. That means it's glue or + * other obscured zone data that isn't + * relevant for our search. Treat the + * node as if it were empty and keep looking. + */ + empty_node = true; + result = previous_closest_nsec( + type, search, name, origin, &prevnode, + &nsecchain, &first); + } else { + /* + * We found an active node, but either the + * NSEC or the RRSIG NSEC is missing. This + * shouldn't happen. + */ + result = DNS_R_BADDB; + } + } else { + /* + * This node isn't active. We've got to keep + * looking. + */ + result = previous_closest_nsec(type, search, name, + origin, &prevnode, + &nsecchain, &first); + } + NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), + &nlocktype); + node = prevnode; + prevnode = NULL; + } while (empty_node && result == ISC_R_SUCCESS); + + if (!first) { + dns_rbtnodechain_invalidate(&nsecchain); + } + + if (result == ISC_R_NOMORE && wraps) { + result = dns_rbtnodechain_last(&search->chain, tree, NULL, + NULL); + if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { + wraps = false; + goto again; + } + } + + /* + * If the result is ISC_R_NOMORE, then we got to the beginning of + * the database and didn't find a NSEC record. This shouldn't + * happen. + */ + if (result == ISC_R_NOMORE) { + result = DNS_R_BADDB; + } + + return (result); +} + +static isc_result_t +zone_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, + dns_rdatatype_t type, unsigned int options, + isc_stdtime_t now ISC_ATTR_UNUSED, dns_dbnode_t **nodep, + dns_name_t *foundname, dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_rbtnode_t *node = NULL; + isc_result_t result; + rbtdb_search_t search; + bool cname_ok = true; + bool close_version = false; + bool maybe_zonecut = false; + bool at_zonecut = false; + bool wild = false; + bool empty_node; + dns_slabheader_t *header = NULL, *header_next = NULL; + dns_slabheader_t *found = NULL, *nsecheader = NULL; + dns_slabheader_t *foundsig = NULL, *cnamesig = NULL, *nsecsig = NULL; + dns_typepair_t sigtype; + bool active; + isc_rwlock_t *lock = NULL; + dns_qp_t *tree = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + + REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); + INSIST(version == NULL || + ((dns_rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db); + + /* + * If the caller didn't supply a version, attach to the current + * version. + */ + if (version == NULL) { + dns__rbtdb_currentversion(db, &version); + close_version = true; + } + + search = (rbtdb_search_t){ + .rbtdb = (dns_rbtdb_t *)db, + .rbtversion = version, + .serial = ((dns_rbtdb_version_t *)version)->serial, + .options = options, + }; + dns_fixedname_init(&search.zonecut_name); + dns_rbtnodechain_init(&search.chain); + + TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype); + + /* + * Search down from the root of the tree. If, while going down, we + * encounter a callback node, zone_zonecut_callback() will search the + * rdatasets at the zone cut for active DNAME or NS rdatasets. + */ + tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 + : search.rbtdb->tree; + result = dns_rbt_findnode(tree, name, foundname, &node, &search.chain, + DNS_RBTFIND_EMPTYDATA, zone_zonecut_callback, + &search); + + if (result == DNS_R_PARTIALMATCH) { + partial_match: + if (search.zonecut != NULL) { + result = setup_delegation( + &search, nodep, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + goto tree_exit; + } + + if (search.wild) { + /* + * At least one of the levels in the search chain + * potentially has a wildcard. For each such level, + * we must see if there's a matching wildcard active + * in the current version. + */ + result = find_wildcard(&search, &node, name); + if (result == ISC_R_SUCCESS) { + dns_name_copy(name, foundname); + wild = true; + goto found; + } else if (result != ISC_R_NOTFOUND) { + goto tree_exit; + } + } + + active = false; + if ((options & DNS_DBFIND_FORCENSEC3) == 0) { + /* + * The NSEC3 tree won't have empty nodes, + * so it isn't necessary to check for them. + */ + dns_rbtnodechain_t chain = search.chain; + active = activeempty(&search, &chain, name); + } + + /* + * If we're here, then the name does not exist, is not + * beneath a zonecut, and there's no matching wildcard. + */ + if ((search.rbtversion->secure && + !search.rbtversion->havensec3) || + (search.options & DNS_DBFIND_FORCENSEC3) != 0) + { + result = find_closest_nsec( + &search, nodep, foundname, rdataset, + sigrdataset, tree, + search.rbtversion->secure DNS__DB_FLARG_PASS); + if (result == ISC_R_SUCCESS) { + result = active ? DNS_R_EMPTYNAME + : DNS_R_NXDOMAIN; + } + } else { + result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN; + } + goto tree_exit; + } else if (result != ISC_R_SUCCESS) { + goto tree_exit; + } + +found: + /* + * We have found a node whose name is the desired name, or we + * have matched a wildcard. + */ + + if (search.zonecut != NULL) { + /* + * If we're beneath a zone cut, we don't want to look for + * CNAMEs because they're not legitimate zone glue. + */ + cname_ok = false; + } else { + /* + * The node may be a zone cut itself. If it might be one, + * make sure we check for it later. + * + * DS records live above the zone cut in ordinary zone so + * we want to ignore any referral. + * + * Stub zones don't have anything "above" the delegation so + * we always return a referral. + */ + if (node->find_callback && + ((node != search.rbtdb->origin_node && + !dns_rdatatype_atparent(type)) || + IS_STUB(search.rbtdb))) + { + maybe_zonecut = true; + } + } + + /* + * Certain DNSSEC types are not subject to CNAME matching + * (RFC4035, section 2.5 and RFC3007). + * + * We don't check for RRSIG, because we don't store RRSIG records + * directly. + */ + if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) { + cname_ok = false; + } + + /* + * We now go looking for rdata... + */ + + lock = &search.rbtdb->node_locks[node->locknum].lock; + NODE_RDLOCK(lock, &nlocktype); + + found = NULL; + foundsig = NULL; + sigtype = DNS_SIGTYPE(type); + nsecheader = NULL; + nsecsig = NULL; + cnamesig = NULL; + empty_node = true; + for (header = node->data; header != NULL; header = header_next) { + header_next = header->next; + /* + * Look for an active, extant rdataset. + */ + do { + if (header->serial <= search.serial && !IGNORE(header)) + { + /* + * Is this a "this rdataset doesn't + * exist" record? + */ + if (NONEXISTENT(header)) { + header = NULL; + } + break; + } else { + header = header->down; + } + } while (header != NULL); + if (header != NULL) { + /* + * We now know that there is at least one active + * rdataset at this node. + */ + empty_node = false; + + /* + * Do special zone cut handling, if requested. + */ + if (maybe_zonecut && header->type == dns_rdatatype_ns) { + /* + * We increment the reference count on node to + * ensure that search->zonecut_header will + * still be valid later. + */ + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + search.zonecut = node; + search.zonecut_header = header; + search.zonecut_sigheader = NULL; + search.need_cleanup = true; + maybe_zonecut = false; + at_zonecut = true; + /* + * It is not clear if KEY should still be + * allowed at the parent side of the zone + * cut or not. It is needed for RFC3007 + * validated updates. + */ + if ((search.options & DNS_DBFIND_GLUEOK) == 0 && + type != dns_rdatatype_nsec && + type != dns_rdatatype_key) + { + /* + * Glue is not OK, but any answer we + * could return would be glue. Return + * the delegation. + */ + found = NULL; + break; + } + if (found != NULL && foundsig != NULL) { + break; + } + } + + /* + * If the NSEC3 record doesn't match the chain + * we are using behave as if it isn't here. + */ + if (header->type == dns_rdatatype_nsec3 && + !matchparams(header, &search)) + { + NODE_UNLOCK(lock, &nlocktype); + goto partial_match; + } + /* + * If we found a type we were looking for, + * remember it. + */ + if (header->type == type || type == dns_rdatatype_any || + (header->type == dns_rdatatype_cname && cname_ok)) + { + /* + * We've found the answer! + */ + found = header; + if (header->type == dns_rdatatype_cname && + cname_ok) + { + /* + * We may be finding a CNAME instead + * of the desired type. + * + * If we've already got the CNAME RRSIG, + * use it, otherwise change sigtype + * so that we find it. + */ + if (cnamesig != NULL) { + foundsig = cnamesig; + } else { + sigtype = DNS_SIGTYPE( + dns_rdatatype_cname); + } + } + /* + * If we've got all we need, end the search. + */ + if (!maybe_zonecut && foundsig != NULL) { + break; + } + } else if (header->type == sigtype) { + /* + * We've found the RRSIG rdataset for our + * target type. Remember it. + */ + foundsig = header; + /* + * If we've got all we need, end the search. + */ + if (!maybe_zonecut && found != NULL) { + break; + } + } else if (header->type == dns_rdatatype_nsec && + !search.rbtversion->havensec3) + { + /* + * Remember a NSEC rdataset even if we're + * not specifically looking for it, because + * we might need it later. + */ + nsecheader = header; + } else if (header->type == + DNS_SIGTYPE(dns_rdatatype_nsec) && + !search.rbtversion->havensec3) + { + /* + * If we need the NSEC rdataset, we'll also + * need its signature. + */ + nsecsig = header; + } else if (cname_ok && + header->type == + DNS_SIGTYPE(dns_rdatatype_cname)) + { + /* + * If we get a CNAME match, we'll also need + * its signature. + */ + cnamesig = header; + } + } + } + + if (empty_node) { + /* + * We have an exact match for the name, but there are no + * active rdatasets in the desired version. That means that + * this node doesn't exist in the desired version, and that + * we really have a partial match. + */ + if (!wild) { + NODE_UNLOCK(lock, &nlocktype); + goto partial_match; + } + } + + /* + * If we didn't find what we were looking for... + */ + if (found == NULL) { + if (search.zonecut != NULL) { + /* + * We were trying to find glue at a node beneath a + * zone cut, but didn't. + * + * Return the delegation. + */ + NODE_UNLOCK(lock, &nlocktype); + result = setup_delegation( + &search, nodep, foundname, rdataset, + sigrdataset DNS__DB_FLARG_PASS); + goto tree_exit; + } + /* + * The desired type doesn't exist. + */ + result = DNS_R_NXRRSET; + if (search.rbtversion->secure && + !search.rbtversion->havensec3 && + (nsecheader == NULL || nsecsig == NULL)) + { + /* + * The zone is secure but there's no NSEC, + * or the NSEC has no signature! + */ + if (!wild) { + result = DNS_R_BADDB; + goto node_exit; + } + + NODE_UNLOCK(lock, &nlocktype); + result = find_closest_nsec( + &search, nodep, foundname, rdataset, + sigrdataset, search.rbtdb->tree, + search.rbtversion->secure DNS__DB_FLARG_PASS); + if (result == ISC_R_SUCCESS) { + result = DNS_R_EMPTYWILD; + } + goto tree_exit; + } + if (nodep != NULL) { + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + *nodep = node; + } + if ((search.rbtversion->secure && + !search.rbtversion->havensec3)) + { + dns__rbtdb_bindrdataset(search.rbtdb, node, nsecheader, + 0, nlocktype, + rdataset DNS__DB_FLARG_PASS); + if (nsecsig != NULL) { + dns__rbtdb_bindrdataset( + search.rbtdb, node, nsecsig, 0, + nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + } + } + if (wild) { + foundname->attributes.wildcard = true; + } + goto node_exit; + } + + /* + * We found what we were looking for, or we found a CNAME. + */ + + if (type != found->type && type != dns_rdatatype_any && + found->type == dns_rdatatype_cname) + { + /* + * We weren't doing an ANY query and we found a CNAME instead + * of the type we were looking for, so we need to indicate + * that result to the caller. + */ + result = DNS_R_CNAME; + } else if (search.zonecut != NULL) { + /* + * If we're beneath a zone cut, we must indicate that the + * result is glue, unless we're actually at the zone cut + * and the type is NSEC or KEY. + */ + if (search.zonecut == node) { + /* + * It is not clear if KEY should still be + * allowed at the parent side of the zone + * cut or not. It is needed for RFC3007 + * validated updates. + */ + if (type == dns_rdatatype_nsec || + type == dns_rdatatype_nsec3 || + type == dns_rdatatype_key) + { + result = ISC_R_SUCCESS; + } else if (type == dns_rdatatype_any) { + result = DNS_R_ZONECUT; + } else { + result = DNS_R_GLUE; + } + } else { + result = DNS_R_GLUE; + } + } else { + /* + * An ordinary successful query! + */ + result = ISC_R_SUCCESS; + } + + if (nodep != NULL) { + if (!at_zonecut) { + dns__rbtdb_newref(search.rbtdb, node, + nlocktype DNS__DB_FLARG_PASS); + } else { + search.need_cleanup = false; + } + *nodep = node; + } + + if (type != dns_rdatatype_any) { + dns__rbtdb_bindrdataset(search.rbtdb, node, found, 0, nlocktype, + rdataset DNS__DB_FLARG_PASS); + if (foundsig != NULL) { + dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig, 0, + nlocktype, + sigrdataset DNS__DB_FLARG_PASS); + } + } + + if (wild) { + foundname->attributes.wildcard = true; + } + +node_exit: + NODE_UNLOCK(lock, &nlocktype); + +tree_exit: + TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype); + + /* + * If we found a zonecut but aren't going to use it, we have to + * let go of it. + */ + if (search.need_cleanup) { + node = search.zonecut; + INSIST(node != NULL); + lock = &(search.rbtdb->node_locks[node->locknum].lock); + + NODE_RDLOCK(lock, &nlocktype); + dns__rbtdb_decref(search.rbtdb, node, 0, &nlocktype, &tlocktype, + true, false DNS__DB_FLARG_PASS); + NODE_UNLOCK(lock, &nlocktype); + INSIST(tlocktype == isc_rwlocktype_none); + } + + if (close_version) { + dns__rbtdb_closeversion(db, &version, false DNS__DB_FLARG_PASS); + } + + dns_rbtnodechain_reset(&search.chain); + + return (result); +} + +static isc_result_t +zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, + dns_rdatatype_t type, dns_rdatatype_t covers, + isc_stdtime_t now, dns_rdataset_t *rdataset, + dns_rdataset_t *sigrdataset DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; + dns_slabheader_t *header = NULL, *header_next = NULL; + dns_slabheader_t *found = NULL, *foundsig = NULL; + uint32_t serial; + dns_rbtdb_version_t *rbtversion = version; + bool close_version = false; + dns_typepair_t matchtype, sigmatchtype; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(type != dns_rdatatype_any); + INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); + + if (rbtversion == NULL) { + dns__rbtdb_currentversion( + db, (dns_dbversion_t **)(void *)(&rbtversion)); + close_version = true; + } + serial = rbtversion->serial; + now = 0; + + NODE_RDLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + + matchtype = DNS_TYPEPAIR_VALUE(type, covers); + if (covers == 0) { + sigmatchtype = DNS_SIGTYPE(type); + } else { + sigmatchtype = 0; + } + + for (header = rbtnode->data; header != NULL; header = header_next) { + header_next = header->next; + do { + if (header->serial <= serial && !IGNORE(header)) { + /* + * Is this a "this rdataset doesn't + * exist" record? + */ + if (NONEXISTENT(header)) { + header = NULL; + } + break; + } else { + header = header->down; + } + } while (header != NULL); + if (header != NULL) { + /* + * We have an active, extant rdataset. If it's a + * type we're looking for, remember it. + */ + if (header->type == matchtype) { + found = header; + if (foundsig != NULL) { + break; + } + } else if (header->type == sigmatchtype) { + foundsig = header; + if (found != NULL) { + break; + } + } + } + } + if (found != NULL) { + dns__rbtdb_bindrdataset(rbtdb, rbtnode, found, now, + isc_rwlocktype_read, + rdataset DNS__DB_FLARG_PASS); + if (foundsig != NULL) { + dns__rbtdb_bindrdataset(rbtdb, rbtnode, foundsig, now, + isc_rwlocktype_read, + sigrdataset DNS__DB_FLARG_PASS); + } + } + + NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + + if (close_version) { + dns__rbtdb_closeversion( + db, (dns_dbversion_t **)(void *)(&rbtversion), + false DNS__DB_FLARG_PASS); + } + + if (found == NULL) { + return (ISC_R_NOTFOUND); + } + + return (ISC_R_SUCCESS); +} + +static bool +delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, dns_typepair_t type) { + if (type == dns_rdatatype_dname || + (type == dns_rdatatype_ns && + (node != rbtdb->origin_node || IS_STUB(rbtdb)))) + { + return (true); + } + return (false); +} + +/* + * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC + */ +static isc_result_t +loadnode(dns_rbtdb_t *rbtdb, const dns_name_t *name, dns_rbtnode_t **nodep, + bool hasnsec) { + isc_result_t noderesult, nsecresult, tmpresult; + dns_rbtnode_t *nsecnode = NULL, *node = NULL; + + noderesult = dns_rbt_addnode(rbtdb->tree, name, &node); + if (!hasnsec) { + goto done; + } + if (noderesult == ISC_R_EXISTS) { + /* + * Add a node to the auxiliary NSEC tree for an old node + * just now getting an NSEC record. + */ + if (node->nsec == DNS_DB_NSEC_HAS_NSEC) { + goto done; + } + } else if (noderesult != ISC_R_SUCCESS) { + goto done; + } + + /* + * Build the auxiliary tree for NSECs as we go. + * This tree speeds searches for closest NSECs that would otherwise + * need to examine many irrelevant nodes in large TLDs. + * + * Add nodes to the auxiliary tree after corresponding nodes have + * been added to the main tree. + */ + nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode); + if (nsecresult == ISC_R_SUCCESS) { + nsecnode->nsec = DNS_DB_NSEC_NSEC; + node->nsec = DNS_DB_NSEC_HAS_NSEC; + goto done; + } + + if (nsecresult == ISC_R_EXISTS) { +#if 1 /* 0 */ + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, + DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, + "addnode: NSEC node already exists"); +#endif /* if 1 */ + node->nsec = DNS_DB_NSEC_HAS_NSEC; + goto done; + } + + if (noderesult == ISC_R_SUCCESS) { + /* + * Remove the node we just added above. + */ + tmpresult = dns_rbt_deletenode(rbtdb->tree, node, false); + if (tmpresult != ISC_R_SUCCESS) { + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, + DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, + "loading_addrdataset: " + "dns_rbt_deletenode: %s after " + "dns_rbt_addnode(NSEC): %s", + isc_result_totext(tmpresult), + isc_result_totext(noderesult)); + } + } + + /* + * Set the error condition to be returned. + */ + noderesult = nsecresult; + +done: + if (noderesult == ISC_R_SUCCESS || noderesult == ISC_R_EXISTS) { + *nodep = node; + } + + return (noderesult); +} + +static isc_result_t +loading_addrdataset(void *arg, const dns_name_t *name, + dns_rdataset_t *rdataset DNS__DB_FLARG) { + rbtdb_load_t *loadctx = arg; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)loadctx->db; + dns_rbtnode_t *node = NULL; + isc_result_t result; + isc_region_t region; + dns_slabheader_t *newheader = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + REQUIRE(rdataset->rdclass == rbtdb->common.rdclass); + + /* + * SOA records are only allowed at top of zone. + */ + if (rdataset->type == dns_rdatatype_soa && + !dns_name_equal(name, &rbtdb->common.origin)) + { + return (DNS_R_NOTZONETOP); + } + + if (rdataset->type != dns_rdatatype_nsec3 && + rdataset->covers != dns_rdatatype_nsec3) + { + dns__zonerbt_addwildcards(rbtdb, name, false); + } + + if (dns_name_iswildcard(name)) { + /* + * NS record owners cannot legally be wild cards. + */ + if (rdataset->type == dns_rdatatype_ns) { + return (DNS_R_INVALIDNS); + } + /* + * NSEC3 record owners cannot legally be wild cards. + */ + if (rdataset->type == dns_rdatatype_nsec3) { + return (DNS_R_INVALIDNSEC3); + } + result = dns__zonerbt_wildcardmagic(rbtdb, name, false); + if (result != ISC_R_SUCCESS) { + return (result); + } + } + + if (rdataset->type == dns_rdatatype_nsec3 || + rdataset->covers == dns_rdatatype_nsec3) + { + result = dns_rbt_addnode(rbtdb->nsec3, name, &node); + if (result == ISC_R_SUCCESS) { + node->nsec = DNS_DB_NSEC_NSEC3; + } + } else if (rdataset->type == dns_rdatatype_nsec) { + result = loadnode(rbtdb, name, &node, true); + } else { + result = loadnode(rbtdb, name, &node, false); + } + if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) { + return (result); + } + if (result == ISC_R_SUCCESS) { + node->locknum = node->hashval % rbtdb->node_lock_count; + } + + result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, + ®ion, sizeof(dns_slabheader_t)); + if (result != ISC_R_SUCCESS) { + return (result); + } + newheader = (dns_slabheader_t *)region.base; + *newheader = (dns_slabheader_t){ + .type = DNS_TYPEPAIR_VALUE(rdataset->type, rdataset->covers), + .ttl = rdataset->ttl + loadctx->now, + .trust = rdataset->trust, + .node = node, + .serial = 1, + .count = 1, + }; + + dns_slabheader_reset(newheader, (dns_db_t *)rbtdb, node); + dns_slabheader_setownercase(newheader, name); + + if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) { + DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_RESIGN); + newheader->resign = + (isc_stdtime_t)(dns_time64_from32(rdataset->resign) >> + 1); + newheader->resign_lsb = rdataset->resign & 0x1; + } + + NODE_WRLOCK(&rbtdb->node_locks[node->locknum].lock, &nlocktype); + result = dns__rbtdb_add(rbtdb, node, name, rbtdb->current_version, + newheader, DNS_DBADD_MERGE, true, NULL, + 0 DNS__DB_FLARG_PASS); + NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock, &nlocktype); + + if (result == ISC_R_SUCCESS && + delegating_type(rbtdb, node, rdataset->type)) + { + node->find_callback = 1; + } else if (result == DNS_R_UNCHANGED) { + result = ISC_R_SUCCESS; + } + + return (result); +} + +static isc_result_t +beginload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) { + rbtdb_load_t *loadctx = NULL; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(DNS_CALLBACK_VALID(callbacks)); + REQUIRE(VALID_RBTDB(rbtdb)); + + loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx)); + + loadctx->db = db; + loadctx->now = 0; + + RWLOCK(&rbtdb->lock, isc_rwlocktype_write); + + REQUIRE((rbtdb->attributes & + (RBTDB_ATTR_LOADED | RBTDB_ATTR_LOADING)) == 0); + rbtdb->attributes |= RBTDB_ATTR_LOADING; + + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); + + callbacks->add = loading_addrdataset; + callbacks->add_private = loadctx; + + return (ISC_R_SUCCESS); +} + +static isc_result_t +endload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) { + rbtdb_load_t *loadctx = NULL; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(DNS_CALLBACK_VALID(callbacks)); + loadctx = callbacks->add_private; + REQUIRE(loadctx != NULL); + REQUIRE(loadctx->db == db); + + RWLOCK(&rbtdb->lock, isc_rwlocktype_write); + + REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0); + REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0); + + rbtdb->attributes &= ~RBTDB_ATTR_LOADING; + rbtdb->attributes |= RBTDB_ATTR_LOADED; + + /* + * If there's a KEY rdataset at the zone origin containing a + * zone key, we consider the zone secure. + */ + if (rbtdb->origin_node != NULL) { + dns_dbversion_t *version = rbtdb->current_version; + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); + dns__rbtdb_setsecure(db, version, rbtdb->origin_node); + } else { + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); + } + + callbacks->add = NULL; + callbacks->add_private = NULL; + + isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx)); + + return (ISC_R_SUCCESS); +} + +static bool +issecure(dns_db_t *db) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + bool secure; + + REQUIRE(VALID_RBTDB(rbtdb)); + + RWLOCK(&rbtdb->lock, isc_rwlocktype_read); + secure = rbtdb->current_version->secure; + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); + + return (secure); +} + +static isc_result_t +getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash, + uint8_t *flags, uint16_t *iterations, unsigned char *salt, + size_t *salt_length) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + isc_result_t result = ISC_R_NOTFOUND; + dns_rbtdb_version_t *rbtversion = version; + + REQUIRE(VALID_RBTDB(rbtdb)); + INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); + + RWLOCK(&rbtdb->lock, isc_rwlocktype_read); + if (rbtversion == NULL) { + rbtversion = rbtdb->current_version; + } + + if (rbtversion->havensec3) { + if (hash != NULL) { + *hash = rbtversion->hash; + } + if (salt != NULL && salt_length != NULL) { + REQUIRE(*salt_length >= rbtversion->salt_length); + memmove(salt, rbtversion->salt, + rbtversion->salt_length); + } + if (salt_length != NULL) { + *salt_length = rbtversion->salt_length; + } + if (iterations != NULL) { + *iterations = rbtversion->iterations; + } + if (flags != NULL) { + *flags = rbtversion->flags; + } + result = ISC_R_SUCCESS; + } + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); + + return (result); +} + +static isc_result_t +getsize(dns_db_t *db, dns_dbversion_t *version, uint64_t *records, + uint64_t *xfrsize) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + isc_result_t result = ISC_R_SUCCESS; + dns_rbtdb_version_t *rbtversion = version; + + REQUIRE(VALID_RBTDB(rbtdb)); + INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); + + RWLOCK(&rbtdb->lock, isc_rwlocktype_read); + if (rbtversion == NULL) { + rbtversion = rbtdb->current_version; + } + + RWLOCK(&rbtversion->rwlock, isc_rwlocktype_read); + SET_IF_NOT_NULL(records, rbtversion->records); + + SET_IF_NOT_NULL(xfrsize, rbtversion->xfrsize); + RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_read); + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); + + return (result); +} + +static isc_result_t +setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_slabheader_t *header, oldheader; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(!IS_CACHE(rbtdb)); + REQUIRE(rdataset != NULL); + REQUIRE(rdataset->methods == &dns_rdataslab_rdatasetmethods); + + header = dns_slabheader_fromrdataset(rdataset); + + NODE_WRLOCK(&rbtdb->node_locks[RBTDB_HEADERNODE(header)->locknum].lock, + &nlocktype); + + oldheader = *header; + + /* + * Only break the heap invariant (by adjusting resign and resign_lsb) + * if we are going to be restoring it by calling isc_heap_increased + * or isc_heap_decreased. + */ + if (resign != 0) { + header->resign = (isc_stdtime_t)(dns_time64_from32(resign) >> + 1); + header->resign_lsb = resign & 0x1; + } + if (header->heap_index != 0) { + INSIST(RESIGN(header)); + if (resign == 0) { + isc_heap_delete( + rbtdb->heaps[RBTDB_HEADERNODE(header)->locknum], + header->heap_index); + header->heap_index = 0; + header->heap = NULL; + } else if (rbtdb->sooner(header, &oldheader)) { + isc_heap_increased( + rbtdb->heaps[RBTDB_HEADERNODE(header)->locknum], + header->heap_index); + } else if (rbtdb->sooner(&oldheader, header)) { + isc_heap_decreased( + rbtdb->heaps[RBTDB_HEADERNODE(header)->locknum], + header->heap_index); + } + } else if (resign != 0) { + DNS_SLABHEADER_SETATTR(header, DNS_SLABHEADERATTR_RESIGN); + dns__zonerbt_resigninsert( + rbtdb, RBTDB_HEADERNODE(header)->locknum, header); + } + NODE_UNLOCK(&rbtdb->node_locks[RBTDB_HEADERNODE(header)->locknum].lock, + &nlocktype); + return (ISC_R_SUCCESS); +} + +static isc_result_t +getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, + dns_name_t *foundname DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_slabheader_t *header = NULL, *this = NULL; + unsigned int i; + isc_result_t result = ISC_R_NOTFOUND; + unsigned int locknum = 0; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + REQUIRE(VALID_RBTDB(rbtdb)); + + TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); + + for (i = 0; i < rbtdb->node_lock_count; i++) { + NODE_RDLOCK(&rbtdb->node_locks[i].lock, &nlocktype); + + /* + * Find for the earliest signing time among all of the + * heaps, each of which is covered by a different bucket + * lock. + */ + this = isc_heap_element(rbtdb->heaps[i], 1); + if (this == NULL) { + /* Nothing found; unlock and try the next heap. */ + NODE_UNLOCK(&rbtdb->node_locks[i].lock, &nlocktype); + continue; + } + + if (header == NULL) { + /* + * Found a signing time: retain the bucket lock and + * preserve the lock number so we can unlock it + * later. + */ + header = this; + locknum = i; + nlocktype = isc_rwlocktype_none; + } else if (rbtdb->sooner(this, header)) { + /* + * Found an earlier signing time; release the + * previous bucket lock and retain this one instead. + */ + NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, + &nlocktype); + header = this; + locknum = i; + } else { + /* + * Earliest signing time in this heap isn't + * an improvement; unlock and try the next heap. + */ + NODE_UNLOCK(&rbtdb->node_locks[i].lock, &nlocktype); + } + } + + if (header != NULL) { + nlocktype = isc_rwlocktype_read; + /* + * Found something; pass back the answer and unlock + * the bucket. + */ + dns__rbtdb_bindrdataset(rbtdb, RBTDB_HEADERNODE(header), header, + 0, isc_rwlocktype_read, + rdataset DNS__DB_FLARG_PASS); + + if (foundname != NULL) { + dns_rbt_fullnamefromnode(RBTDB_HEADERNODE(header), + foundname); + } + + NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); + + result = ISC_R_SUCCESS; + } + + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + + return (result); +} + +static isc_result_t +setgluecachestats(dns_db_t *db, isc_stats_t *stats) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(!IS_CACHE(rbtdb) && !IS_STUB(rbtdb)); + REQUIRE(stats != NULL); + + isc_stats_attach(stats, &rbtdb->gluecachestats); + return (ISC_R_SUCCESS); +} + +static dns_glue_t * +new_gluelist(isc_mem_t *mctx, dns_name_t *name) { + dns_glue_t *glue = isc_mem_get(mctx, sizeof(*glue)); + *glue = (dns_glue_t){ 0 }; + dns_name_t *gluename = dns_fixedname_initname(&glue->fixedname); + + isc_mem_attach(mctx, &glue->mctx); + dns_name_copy(name, gluename); + + return (glue); +} + +static isc_result_t +glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype, + dns_rdataset_t *unused DNS__DB_FLARG) { + dns_glue_additionaldata_ctx_t *ctx = NULL; + isc_result_t result; + dns_fixedname_t fixedname_a; + dns_name_t *name_a = NULL; + dns_rdataset_t rdataset_a, sigrdataset_a; + dns_rbtnode_t *node_a = NULL; + dns_fixedname_t fixedname_aaaa; + dns_name_t *name_aaaa = NULL; + dns_rdataset_t rdataset_aaaa, sigrdataset_aaaa; + dns_rbtnode_t *node_aaaa = NULL; + dns_glue_t *glue = NULL; + + UNUSED(unused); + + /* + * NS records want addresses in additional records. + */ + INSIST(qtype == dns_rdatatype_a); + + ctx = (dns_glue_additionaldata_ctx_t *)arg; + + name_a = dns_fixedname_initname(&fixedname_a); + dns_rdataset_init(&rdataset_a); + dns_rdataset_init(&sigrdataset_a); + + name_aaaa = dns_fixedname_initname(&fixedname_aaaa); + dns_rdataset_init(&rdataset_aaaa); + dns_rdataset_init(&sigrdataset_aaaa); + + result = zone_find(ctx->db, name, ctx->version, dns_rdatatype_a, + DNS_DBFIND_GLUEOK, 0, (dns_dbnode_t **)&node_a, + name_a, &rdataset_a, + &sigrdataset_a DNS__DB_FLARG_PASS); + if (result == DNS_R_GLUE) { + glue = new_gluelist(ctx->db->mctx, name_a); + + dns_rdataset_init(&glue->rdataset_a); + dns_rdataset_init(&glue->sigrdataset_a); + dns_rdataset_init(&glue->rdataset_aaaa); + dns_rdataset_init(&glue->sigrdataset_aaaa); + + dns_rdataset_clone(&rdataset_a, &glue->rdataset_a); + if (dns_rdataset_isassociated(&sigrdataset_a)) { + dns_rdataset_clone(&sigrdataset_a, + &glue->sigrdataset_a); + } + } + + result = zone_find(ctx->db, name, ctx->version, dns_rdatatype_aaaa, + DNS_DBFIND_GLUEOK, 0, (dns_dbnode_t **)&node_aaaa, + name_aaaa, &rdataset_aaaa, + &sigrdataset_aaaa DNS__DB_FLARG_PASS); + if (result == DNS_R_GLUE) { + if (glue == NULL) { + glue = new_gluelist(ctx->db->mctx, name_aaaa); + + dns_rdataset_init(&glue->rdataset_a); + dns_rdataset_init(&glue->sigrdataset_a); + dns_rdataset_init(&glue->rdataset_aaaa); + dns_rdataset_init(&glue->sigrdataset_aaaa); + } else { + INSIST(node_a == node_aaaa); + INSIST(dns_name_equal(name_a, name_aaaa)); + } + + dns_rdataset_clone(&rdataset_aaaa, &glue->rdataset_aaaa); + if (dns_rdataset_isassociated(&sigrdataset_aaaa)) { + dns_rdataset_clone(&sigrdataset_aaaa, + &glue->sigrdataset_aaaa); + } + } + + /* + * If the currently processed NS record is in-bailiwick, mark any glue + * RRsets found for it with DNS_RDATASETATTR_REQUIRED. Note that for + * simplicity, glue RRsets for all in-bailiwick NS records are marked + * this way, even though dns_message_rendersection() only checks the + * attributes for the first rdataset associated with the first name + * added to the ADDITIONAL section. + */ + if (glue != NULL && dns_name_issubdomain(name, ctx->nodename)) { + if (dns_rdataset_isassociated(&glue->rdataset_a)) { + glue->rdataset_a.attributes |= + DNS_RDATASETATTR_REQUIRED; + } + if (dns_rdataset_isassociated(&glue->rdataset_aaaa)) { + glue->rdataset_aaaa.attributes |= + DNS_RDATASETATTR_REQUIRED; + } + } + + if (glue != NULL) { + glue->next = ctx->glue_list; + ctx->glue_list = glue; + } + + result = ISC_R_SUCCESS; + + if (dns_rdataset_isassociated(&rdataset_a)) { + dns_rdataset_disassociate(&rdataset_a); + } + if (dns_rdataset_isassociated(&sigrdataset_a)) { + dns_rdataset_disassociate(&sigrdataset_a); + } + + if (dns_rdataset_isassociated(&rdataset_aaaa)) { + dns_rdataset_disassociate(&rdataset_aaaa); + } + if (dns_rdataset_isassociated(&sigrdataset_aaaa)) { + dns_rdataset_disassociate(&sigrdataset_aaaa); + } + + if (node_a != NULL) { + dns__db_detachnode(ctx->db, + (dns_dbnode_t *)&node_a DNS__DB_FLARG_PASS); + } + if (node_aaaa != NULL) { + dns__db_detachnode( + ctx->db, (dns_dbnode_t *)&node_aaaa DNS__DB_FLARG_PASS); + } + + return (result); +} + +#define IS_REQUIRED_GLUE(r) (((r)->attributes & DNS_RDATASETATTR_REQUIRED) != 0) + +static void +addglue_to_message(dns_glue_t *ge, dns_message_t *msg) { + for (; ge != NULL; ge = ge->next) { + dns_name_t *name = NULL; + dns_rdataset_t *rdataset_a = NULL; + dns_rdataset_t *sigrdataset_a = NULL; + dns_rdataset_t *rdataset_aaaa = NULL; + dns_rdataset_t *sigrdataset_aaaa = NULL; + dns_name_t *gluename = dns_fixedname_name(&ge->fixedname); + bool prepend_name = false; + + dns_message_gettempname(msg, &name); + + dns_name_copy(gluename, name); + + if (dns_rdataset_isassociated(&ge->rdataset_a)) { + dns_message_gettemprdataset(msg, &rdataset_a); + } + + if (dns_rdataset_isassociated(&ge->sigrdataset_a)) { + dns_message_gettemprdataset(msg, &sigrdataset_a); + } + + if (dns_rdataset_isassociated(&ge->rdataset_aaaa)) { + dns_message_gettemprdataset(msg, &rdataset_aaaa); + } + + if (dns_rdataset_isassociated(&ge->sigrdataset_aaaa)) { + dns_message_gettemprdataset(msg, &sigrdataset_aaaa); + } + + if (rdataset_a != NULL) { + dns_rdataset_clone(&ge->rdataset_a, rdataset_a); + ISC_LIST_APPEND(name->list, rdataset_a, link); + if (IS_REQUIRED_GLUE(rdataset_a)) { + prepend_name = true; + } + } + + if (sigrdataset_a != NULL) { + dns_rdataset_clone(&ge->sigrdataset_a, sigrdataset_a); + ISC_LIST_APPEND(name->list, sigrdataset_a, link); + } + + if (rdataset_aaaa != NULL) { + dns_rdataset_clone(&ge->rdataset_aaaa, rdataset_aaaa); + ISC_LIST_APPEND(name->list, rdataset_aaaa, link); + if (IS_REQUIRED_GLUE(rdataset_aaaa)) { + prepend_name = true; + } + } + if (sigrdataset_aaaa != NULL) { + dns_rdataset_clone(&ge->sigrdataset_aaaa, + sigrdataset_aaaa); + ISC_LIST_APPEND(name->list, sigrdataset_aaaa, link); + } + + dns_message_addname(msg, name, DNS_SECTION_ADDITIONAL); + + /* + * When looking for required glue, dns_message_rendersection() + * only processes the first rdataset associated with the first + * name added to the ADDITIONAL section. dns_message_addname() + * performs an append on the list of names in a given section, + * so if any glue record was marked as required, we need to + * move the name it is associated with to the beginning of the + * list for the ADDITIONAL section or else required glue might + * not be rendered. + */ + if (prepend_name) { + ISC_LIST_UNLINK(msg->sections[DNS_SECTION_ADDITIONAL], + name, link); + ISC_LIST_PREPEND(msg->sections[DNS_SECTION_ADDITIONAL], + name, link); + } + } +} + +static dns_glue_t * +newglue(dns_rbtdb_t *rbtdb, dns_rbtdb_version_t *rbtversion, + dns_rbtnode_t *node, dns_rdataset_t *rdataset) { + dns_fixedname_t nodename; + dns_glue_additionaldata_ctx_t ctx = { + .db = (dns_db_t *)rbtdb, + .version = (dns_dbversion_t *)rbtversion, + .nodename = dns_fixedname_initname(&nodename), + }; + + /* + * Get the owner name of the NS RRset - it will be necessary for + * identifying required glue in glue_nsdname_cb() (by + * determining which NS records in the delegation are + * in-bailiwick). + */ + dns__rbtdb_nodefullname((dns_db_t *)rbtdb, node, ctx.nodename); + + (void)dns_rdataset_additionaldata(rdataset, dns_rootname, + glue_nsdname_cb, &ctx); + + return (ctx.glue_list); +} + +static isc_result_t +addglue(dns_db_t *db, dns_dbversion_t *version, dns_rdataset_t *rdataset, + dns_message_t *msg) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtdb_version_t *rbtversion = version; + dns_rbtnode_t *node = (dns_rbtnode_t *)rdataset->slab.node; + dns_slabheader_t *header = dns_slabheader_fromrdataset(rdataset); + + REQUIRE(rdataset->type == dns_rdatatype_ns); + REQUIRE(rbtdb == (dns_rbtdb_t *)rdataset->slab.db); + REQUIRE(rbtdb == rbtversion->rbtdb); + REQUIRE(!IS_CACHE(rbtdb) && !IS_STUB(rbtdb)); + + rcu_read_lock(); + + dns_glue_t *glue = rcu_dereference(header->glue_list); + if (glue == NULL) { + /* No cached glue was found in the table. Get new glue. */ + glue = newglue(rbtdb, rbtversion, node, rdataset); + + /* Cache the glue or (void *)-1 if no glue was found. */ + dns_glue_t *old_glue = rcu_cmpxchg_pointer( + &header->glue_list, NULL, (glue) ? glue : (void *)-1); + if (old_glue != NULL) { + /* Somebody else was faster */ + dns__rbtdb_freeglue(glue); + glue = old_glue; + } else if (glue != NULL) { + cds_wfs_push(&rbtversion->glue_stack, + &header->wfs_node); + } + } + + /* We have a cached result. Add it to the message and return. */ + + if (rbtdb->gluecachestats != NULL) { + isc_stats_increment( + rbtdb->gluecachestats, + (glue == (void *)-1) + ? dns_gluecachestatscounter_hits_absent + : dns_gluecachestatscounter_hits_present); + } + + /* + * (void *)-1 is a special value that means no glue is present in the + * zone. + */ + if (glue != (void *)-1) { + addglue_to_message(glue, msg); + } + + rcu_read_unlock(); + + return (ISC_R_SUCCESS); +} + +dns_dbmethods_t dns__rbtdb_zonemethods = { + .destroy = dns__rbtdb_destroy, + .beginload = beginload, + .endload = endload, + .currentversion = dns__rbtdb_currentversion, + .newversion = dns__rbtdb_newversion, + .attachversion = dns__rbtdb_attachversion, + .closeversion = dns__rbtdb_closeversion, + .findnode = dns__rbtdb_findnode, + .find = zone_find, + .attachnode = dns__rbtdb_attachnode, + .detachnode = dns__rbtdb_detachnode, + .createiterator = dns__rbtdb_createiterator, + .findrdataset = zone_findrdataset, + .allrdatasets = dns__rbtdb_allrdatasets, + .addrdataset = dns__rbtdb_addrdataset, + .subtractrdataset = dns__rbtdb_subtractrdataset, + .deleterdataset = dns__rbtdb_deleterdataset, + .issecure = issecure, + .nodecount = dns__rbtdb_nodecount, + .setloop = dns__rbtdb_setloop, + .getoriginnode = dns__rbtdb_getoriginnode, + .getnsec3parameters = getnsec3parameters, + .findnsec3node = findnsec3node, + .setsigningtime = setsigningtime, + .getsigningtime = getsigningtime, + .getsize = getsize, + .setgluecachestats = setgluecachestats, + .locknode = dns__rbtdb_locknode, + .unlocknode = dns__rbtdb_unlocknode, + .addglue = addglue, + .deletedata = dns__rbtdb_deletedata, +}; + +void +dns__zonerbt_resigninsert(dns_rbtdb_t *rbtdb, int idx, + dns_slabheader_t *newheader) { + INSIST(!IS_CACHE(rbtdb)); + INSIST(newheader->heap_index == 0); + INSIST(!ISC_LINK_LINKED(newheader, link)); + + isc_heap_insert(rbtdb->heaps[idx], newheader); + newheader->heap = rbtdb->heaps[idx]; +} + +void +dns__zonerbt_resigndelete(dns_rbtdb_t *rbtdb, dns_rbtdb_version_t *version, + dns_slabheader_t *header DNS__DB_FLARG) { + /* + * Remove the old header from the heap + */ + if (header != NULL && header->heap_index != 0) { + isc_heap_delete(rbtdb->heaps[RBTDB_HEADERNODE(header)->locknum], + header->heap_index); + header->heap_index = 0; + if (version != NULL) { + dns__rbtdb_newref( + rbtdb, RBTDB_HEADERNODE(header), + isc_rwlocktype_write DNS__DB_FLARG_PASS); + ISC_LIST_APPEND(version->resigned_list, header, link); + } + } +} + +isc_result_t +dns__zonerbt_wildcardmagic(dns_rbtdb_t *rbtdb, const dns_name_t *name, + bool lock) { + isc_result_t result; + dns_name_t foundname; + dns_offsets_t offsets; + unsigned int n; + dns_rbtnode_t *node = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + dns_name_init(&foundname, offsets); + n = dns_name_countlabels(name); + INSIST(n >= 2); + n--; + dns_name_getlabelsequence(name, 1, n, &foundname); + result = dns_rbt_addnode(rbtdb->tree, &foundname, &node); + if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) { + return (result); + } + if (result == ISC_R_SUCCESS) { + node->nsec = DNS_DB_NSEC_NORMAL; + } + node->find_callback = 1; + if (lock) { + NODE_WRLOCK(&rbtdb->node_locks[node->locknum].lock, &nlocktype); + } + node->wild = 1; + if (lock) { + NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock, &nlocktype); + } + return (ISC_R_SUCCESS); +} + +isc_result_t +dns__zonerbt_addwildcards(dns_rbtdb_t *rbtdb, const dns_name_t *name, + bool lock) { + isc_result_t result; + dns_name_t foundname; + dns_offsets_t offsets; + unsigned int n, l, i; + + dns_name_init(&foundname, offsets); + n = dns_name_countlabels(name); + l = dns_name_countlabels(&rbtdb->common.origin); + i = l + 1; + while (i < n) { + dns_rbtnode_t *node = NULL; + dns_name_getlabelsequence(name, n - i, i, &foundname); + if (dns_name_iswildcard(&foundname)) { + result = dns__zonerbt_wildcardmagic(rbtdb, &foundname, + lock); + if (result != ISC_R_SUCCESS) { + return (result); + } + result = dns_rbt_addnode(rbtdb->tree, &foundname, + &node); + if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) { + return (result); + } + if (result == ISC_R_SUCCESS) { + node->nsec = DNS_DB_NSEC_NORMAL; + } + } + i++; + } + return (ISC_R_SUCCESS); +} diff --git a/lib/dns/qpdb.c b/lib/dns/qpdb.c new file mode 100644 index 0000000000..e10024316e --- /dev/null +++ b/lib/dns/qpdb.c @@ -0,0 +1,4884 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/*! \file */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "db_p.h" +#include "qpdb_p.h" + +#define CHECK(op) \ + do { \ + result = (op); \ + if (result != ISC_R_SUCCESS) \ + goto failure; \ + } while (0) + +#define EXISTS(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NONEXISTENT) == 0) +#define NONEXISTENT(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NONEXISTENT) != 0) +#define IGNORE(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_IGNORE) != 0) +#define NXDOMAIN(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NXDOMAIN) != 0) +#define STALE(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_STALE) != 0) +#define STALE_WINDOW(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_STALE_WINDOW) != 0) +#define RESIGN(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_RESIGN) != 0) +#define OPTOUT(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_OPTOUT) != 0) +#define NEGATIVE(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_NEGATIVE) != 0) +#define PREFETCH(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_PREFETCH) != 0) +#define CASESET(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_CASESET) != 0) +#define ZEROTTL(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_ZEROTTL) != 0) +#define ANCIENT(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_ANCIENT) != 0) +#define STATCOUNT(header) \ + ((atomic_load_acquire(&(header)->attributes) & \ + DNS_SLABHEADERATTR_STATCOUNT) != 0) + +#define STALE_TTL(header, rbtdb) \ + (NXDOMAIN(header) ? 0 : rbtdb->common.serve_stale_ttl) + +#define ACTIVE(header, now) \ + (((header)->ttl > (now)) || ((header)->ttl == (now) && ZEROTTL(header))) + +#define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */ + +#define EXPIREDOK(rbtiterator) \ + (((rbtiterator)->common.options & DNS_DB_EXPIREDOK) != 0) + +#define STALEOK(rbtiterator) \ + (((rbtiterator)->common.options & DNS_DB_STALEOK) != 0) + +#define KEEPSTALE(rbtdb) ((rbtdb)->common.serve_stale_ttl > 0) + +#define RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, iterator) \ + ((iterator)->current == &(iterator)->nsec3chain && \ + (iterator)->node == (rbtdb)->nsec3_origin_node) + +/*% + * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps). + * There is a tradeoff issue about configuring this value: if this is too + * small, it may cause heavier contention between threads; if this is too large, + * LRU purge algorithm won't work well (entries tend to be purged prematurely). + * The default value should work well for most environments, but this can + * also be configurable at compilation time via the + * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than + * 1 due to the assumption of dns__cacherbt_overmem(). + */ +#ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT +#if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 +#error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1" +#else /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */ +#define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT +#endif /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */ +#else /* ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT */ +#define DEFAULT_CACHE_NODE_LOCK_COUNT 17 +#endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */ + +/* + * This defines the number of headers that we try to expire each time the + * expire_ttl_headers() is run. The number should be small enough, so the + * TTL-based header expiration doesn't take too long, but it should be large + * enough, so we expire enough headers if their TTL is clustered. + */ +#define DNS_RBTDB_EXPIRE_TTL_COUNT 10 + +/* QP methods */ +static void +qp_attach(void *uctx, void *pval, uint32_t ival); +static void +qp_detach(void *uctx, void *pval, uint32_t ival); +static size_t +qp_makekey(dns_qpkey_t key, void *uctx, void *pval, uint32_t ival); +static void +qp_triename(void *uctx, char *buf, size_t size); + +static dns_qpmethods_t qpmethods = { + qp_attach, + qp_detach, + qp_makekey, + qp_triename, +}; + +static void +qp_attach(void *uctx ISC_ATTR_UNUSED, void *pval, + uint32_t ival ISC_ATTR_UNUSED) { + dns_qpdata_t *data = pval; + dns_qpdata_ref(data); +} + +static void +qp_detach(void *uctx ISC_ATTR_UNUSED, void *pval, + uint32_t ival ISC_ATTR_UNUSED) { + dns_qpdata_t *data = pval; + dns_qpdata_detach(&data); +} + +static size_t +qp_makekey(dns_qpkey_t key, void *uctx ISC_ATTR_UNUSED, void *pval, + uint32_t ival ISC_ATTR_UNUSED) { + dns_qpdata_t *data = pval; + return (dns_qpkey_fromname(key, data->name)); +} + +static void +qp_triename(void *uctx, char *buf, size_t size) { + UNUSED(uctx); + snprintf(buf, size, "qpdb-lite"); +} + +static void +prune_tree(void *arg); +static void +free_gluetable(dns_rbtdb_version_t *version); + +static void +rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG); +static isc_result_t +rdatasetiter_first(dns_rdatasetiter_t *iterator DNS__DB_FLARG); +static isc_result_t +rdatasetiter_next(dns_rdatasetiter_t *iterator DNS__DB_FLARG); +static void +rdatasetiter_current(dns_rdatasetiter_t *iterator, + dns_rdataset_t *rdataset DNS__DB_FLARG); + +static dns_rdatasetitermethods_t rdatasetiter_methods = { + rdatasetiter_destroy, rdatasetiter_first, rdatasetiter_next, + rdatasetiter_current +}; + +typedef struct rbtdb_rdatasetiter { + dns_rdatasetiter_t common; + dns_slabheader_t *current; +} rbtdb_rdatasetiter_t; + +/* + * Note that these iterators, unless created with either DNS_DB_NSEC3ONLY or + * DNS_DB_NONSEC3, will transparently move between the last node of the + * "regular" RBT ("chain" field) and the root node of the NSEC3 RBT + * ("nsec3chain" field) of the database in question, as if the latter was a + * successor to the former in lexical order. The "current" field always holds + * the address of either "chain" or "nsec3chain", depending on which RBT is + * being traversed at given time. + */ +static void +dbiterator_destroy(dns_dbiterator_t **iteratorp DNS__DB_FLARG); +static isc_result_t +dbiterator_first(dns_dbiterator_t *iterator DNS__DB_FLARG); +static isc_result_t +dbiterator_last(dns_dbiterator_t *iterator DNS__DB_FLARG); +static isc_result_t +dbiterator_seek(dns_dbiterator_t *iterator, + const dns_name_t *name DNS__DB_FLARG); +static isc_result_t +dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG); +static isc_result_t +dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG); +static isc_result_t +dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, + dns_name_t *name DNS__DB_FLARG); +static isc_result_t +dbiterator_pause(dns_dbiterator_t *iterator); +static isc_result_t +dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name); + +static dns_dbiteratormethods_t dbiterator_methods = { + dbiterator_destroy, dbiterator_first, dbiterator_last, + dbiterator_seek, dbiterator_prev, dbiterator_next, + dbiterator_current, dbiterator_pause, dbiterator_origin +}; + +/* + * If 'paused' is true, then the tree lock is not being held. + */ +typedef struct rbtdb_dbiterator { + dns_dbiterator_t common; + bool paused; + bool new_origin; + isc_rwlocktype_t tree_locked; + isc_result_t result; + dns_fixedname_t name; + dns_fixedname_t origin; + dns_rbtnodechain_t chain; + dns_rbtnodechain_t nsec3chain; + dns_rbtnodechain_t *current; + dns_rbtnode_t *node; + enum { full, nonsec3, nsec3only } nsec3mode; +} rbtdb_dbiterator_t; + +static void +free_rbtdb(dns_rbtdb_t *rbtdb, bool log); +static void +setnsec3parameters(dns_db_t *db, dns_rbtdb_version_t *version); + +/*% + * 'init_count' is used to initialize 'newheader->count' which inturn + * is used to determine where in the cycle rrset-order cyclic starts. + * We don't lock this as we don't care about simultaneous updates. + */ +static atomic_uint_fast16_t init_count = 0; + +/* + * Locking + * + * If a routine is going to lock more than one lock in this module, then + * the locking must be done in the following order: + * + * Tree Lock + * + * Node Lock (Only one from the set may be locked at one time by + * any caller) + * + * Database Lock + * + * Failure to follow this hierarchy can result in deadlock. + */ + +/* + * Deleting Nodes + * + * For zone databases the node for the origin of the zone MUST NOT be deleted. + */ + +/* + * DB Routines + */ + +static void +update_rrsetstats(dns_stats_t *stats, const dns_typepair_t htype, + const uint_least16_t hattributes, const bool increment) { + dns_rdatastatstype_t statattributes = 0; + dns_rdatastatstype_t base = 0; + dns_rdatastatstype_t type; + dns_slabheader_t *header = &(dns_slabheader_t){ + .type = htype, + .attributes = hattributes, + }; + + if (!EXISTS(header) || !STATCOUNT(header)) { + return; + } + + if (NEGATIVE(header)) { + if (NXDOMAIN(header)) { + statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN; + } else { + statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET; + base = DNS_TYPEPAIR_COVERS(header->type); + } + } else { + base = DNS_TYPEPAIR_TYPE(header->type); + } + + if (STALE(header)) { + statattributes |= DNS_RDATASTATSTYPE_ATTR_STALE; + } + if (ANCIENT(header)) { + statattributes |= DNS_RDATASTATSTYPE_ATTR_ANCIENT; + } + + type = DNS_RDATASTATSTYPE_VALUE(base, statattributes); + if (increment) { + dns_rdatasetstats_increment(stats, type); + } else { + dns_rdatasetstats_decrement(stats, type); + } +} + +void +dns__rbtdb_setttl(dns_slabheader_t *header, dns_ttl_t newttl) { + dns_ttl_t oldttl = header->ttl; + + header->ttl = newttl; + + if (header->db == NULL || !dns_db_iscache(header->db)) { + return; + } + + /* + * This is a cache. Adjust the heaps if necessary. + */ + if (header->heap == NULL || header->heap_index == 0 || newttl == oldttl) + { + return; + } + + if (newttl < oldttl) { + isc_heap_increased(header->heap, header->heap_index); + } else { + isc_heap_decreased(header->heap, header->heap_index); + } + + if (newttl == 0) { + isc_heap_delete(header->heap, header->heap_index); + } +} + +static bool +prio_type(dns_typepair_t type) { + switch (type) { + case dns_rdatatype_soa: + case DNS_SIGTYPE(dns_rdatatype_soa): + case dns_rdatatype_a: + case DNS_SIGTYPE(dns_rdatatype_a): + case dns_rdatatype_aaaa: + case DNS_SIGTYPE(dns_rdatatype_aaaa): + case dns_rdatatype_nsec: + case DNS_SIGTYPE(dns_rdatatype_nsec): + case dns_rdatatype_nsec3: + case DNS_SIGTYPE(dns_rdatatype_nsec3): + case dns_rdatatype_ns: + case DNS_SIGTYPE(dns_rdatatype_ns): + case dns_rdatatype_ds: + case DNS_SIGTYPE(dns_rdatatype_ds): + case dns_rdatatype_cname: + case DNS_SIGTYPE(dns_rdatatype_cname): + return (true); + } + return (false); +} + +/*% + * These functions allow the heap code to rank the priority of each + * element. It returns true if v1 happens "sooner" than v2. + */ +static bool +ttl_sooner(void *v1, void *v2) { + dns_slabheader_t *h1 = v1; + dns_slabheader_t *h2 = v2; + + return (h1->ttl < h2->ttl); +} + +/*% + * Return which RRset should be resigned sooner. If the RRsets have the + * same signing time, prefer the other RRset over the SOA RRset. + */ +static bool +resign_sooner(void *v1, void *v2) { + dns_slabheader_t *h1 = v1; + dns_slabheader_t *h2 = v2; + + return (h1->resign < h2->resign || + (h1->resign == h2->resign && h1->resign_lsb < h2->resign_lsb) || + (h1->resign == h2->resign && h1->resign_lsb == h2->resign_lsb && + h2->type == DNS_SIGTYPE(dns_rdatatype_soa))); +} + +/*% + * This function sets the heap index into the header. + */ +static void +set_index(void *what, unsigned int idx) { + dns_slabheader_t *h = what; + + h->heap_index = idx; +} + +static void +free_rbtdb(dns_rbtdb_t *rbtdb, bool log) { + unsigned int i; + char buf[DNS_NAME_FORMATSIZE]; + dns_qp_t **treep = NULL; + + REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions)); + REQUIRE(rbtdb->future_version == NULL); + + if (rbtdb->current_version != NULL) { + isc_refcount_decrementz(&rbtdb->current_version->references); + + isc_refcount_destroy(&rbtdb->current_version->references); + UNLINK(rbtdb->open_versions, rbtdb->current_version, link); + cds_wfs_destroy(&rbtdb->current_version->glue_stack); + isc_rwlock_destroy(&rbtdb->current_version->rwlock); + isc_mem_put(rbtdb->common.mctx, rbtdb->current_version, + sizeof(*rbtdb->current_version)); + } + + /* + * We assume the number of remaining dead nodes is reasonably small; + * the overhead of unlinking all nodes here should be negligible. + */ + for (i = 0; i < rbtdb->node_lock_count; i++) { + dns_rbtnode_t *node = NULL; + + node = ISC_LIST_HEAD(rbtdb->deadnodes[i]); + while (node != NULL) { + ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink); + node = ISC_LIST_HEAD(rbtdb->deadnodes[i]); + } + } + + rbtdb->quantum = (rbtdb->loop != NULL) ? 100 : 0; + + for (;;) { + /* + * pick the next tree to (start to) destroy + */ + treep = &rbtdb->tree; + if (*treep == NULL) { + treep = &rbtdb->nsec; + if (*treep == NULL) { + treep = &rbtdb->nsec3; + /* + * we're finished after clear cutting + */ + if (*treep == NULL) { + break; + } + } + } + + dns_qp_destroy(treep); + INSIST(*treep == NULL); + } + + if (log) { + if (dns_name_dynamic(&rbtdb->common.origin)) { + dns_name_format(&rbtdb->common.origin, buf, + sizeof(buf)); + } else { + strlcpy(buf, "", sizeof(buf)); + } + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, + DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), + "done free_rbtdb(%s)", buf); + } + if (dns_name_dynamic(&rbtdb->common.origin)) { + dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx); + } + for (i = 0; i < rbtdb->node_lock_count; i++) { + isc_refcount_destroy(&rbtdb->node_locks[i].references); + NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock); + } + + /* + * Clean up LRU / re-signing order lists. + */ + if (rbtdb->lru != NULL) { + for (i = 0; i < rbtdb->node_lock_count; i++) { + INSIST(ISC_LIST_EMPTY(rbtdb->lru[i])); + } + isc_mem_cput(rbtdb->common.mctx, rbtdb->lru, + rbtdb->node_lock_count, + sizeof(dns_slabheaderlist_t)); + } + /* + * Clean up dead node buckets. + */ + if (rbtdb->deadnodes != NULL) { + for (i = 0; i < rbtdb->node_lock_count; i++) { + INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i])); + } + isc_mem_cput(rbtdb->common.mctx, rbtdb->deadnodes, + rbtdb->node_lock_count, sizeof(dns_rbtnodelist_t)); + } + /* + * Clean up heap objects. + */ + if (rbtdb->heaps != NULL) { + for (i = 0; i < rbtdb->node_lock_count; i++) { + isc_heap_destroy(&rbtdb->heaps[i]); + } + isc_mem_cput(rbtdb->hmctx, rbtdb->heaps, rbtdb->node_lock_count, + sizeof(isc_heap_t *)); + } + + if (rbtdb->rrsetstats != NULL) { + dns_stats_detach(&rbtdb->rrsetstats); + } + if (rbtdb->cachestats != NULL) { + isc_stats_detach(&rbtdb->cachestats); + } + if (rbtdb->gluecachestats != NULL) { + isc_stats_detach(&rbtdb->gluecachestats); + } + + isc_mem_cput(rbtdb->common.mctx, rbtdb->node_locks, + rbtdb->node_lock_count, sizeof(db_nodelock_t)); + TREE_DESTROYLOCK(&rbtdb->tree_lock); + isc_refcount_destroy(&rbtdb->common.references); + if (rbtdb->loop != NULL) { + isc_loop_detach(&rbtdb->loop); + } + + isc_rwlock_destroy(&rbtdb->lock); + rbtdb->common.magic = 0; + rbtdb->common.impmagic = 0; + isc_mem_detach(&rbtdb->hmctx); + + if (rbtdb->common.update_listeners != NULL) { + INSIST(!cds_lfht_destroy(rbtdb->common.update_listeners, NULL)); + } + + isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb)); +} + +void +dns__rbtdb_destroy(dns_db_t *arg) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)arg; + bool want_free = false; + unsigned int i; + unsigned int inactive = 0; + + /* XXX check for open versions here */ + + if (rbtdb->soanode != NULL) { + dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode); + } + if (rbtdb->nsnode != NULL) { + dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode); + } + + /* + * The current version's glue table needs to be freed early + * so the nodes are dereferenced before we check the active + * node count below. + */ + if (rbtdb->current_version != NULL) { + free_gluetable(rbtdb->current_version); + } + + /* + * Even though there are no external direct references, there still + * may be nodes in use. + */ + for (i = 0; i < rbtdb->node_lock_count; i++) { + isc_rwlocktype_t nodelock = isc_rwlocktype_none; + NODE_WRLOCK(&rbtdb->node_locks[i].lock, &nodelock); + rbtdb->node_locks[i].exiting = true; + if (isc_refcount_current(&rbtdb->node_locks[i].references) == 0) + { + inactive++; + } + NODE_UNLOCK(&rbtdb->node_locks[i].lock, &nodelock); + } + + if (inactive != 0) { + RWLOCK(&rbtdb->lock, isc_rwlocktype_write); + rbtdb->active -= inactive; + if (rbtdb->active == 0) { + want_free = true; + } + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); + if (want_free) { + char buf[DNS_NAME_FORMATSIZE]; + if (dns_name_dynamic(&rbtdb->common.origin)) { + dns_name_format(&rbtdb->common.origin, buf, + sizeof(buf)); + } else { + strlcpy(buf, "", sizeof(buf)); + } + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, + DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), + "calling free_rbtdb(%s)", buf); + free_rbtdb(rbtdb, true); + } + } +} + +void +dns__rbtdb_currentversion(dns_db_t *db, dns_dbversion_t **versionp) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtdb_version_t *version = NULL; + + REQUIRE(VALID_RBTDB(rbtdb)); + + RWLOCK(&rbtdb->lock, isc_rwlocktype_read); + version = rbtdb->current_version; + isc_refcount_increment(&version->references); + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); + + *versionp = (dns_dbversion_t *)version; +} + +static dns_rbtdb_version_t * +allocate_version(isc_mem_t *mctx, uint32_t serial, unsigned int references, + bool writer) { + dns_rbtdb_version_t *version = isc_mem_get(mctx, sizeof(*version)); + *version = (dns_rbtdb_version_t){ + .serial = serial, + .writer = writer, + .changed_list = ISC_LIST_INITIALIZER, + .resigned_list = ISC_LIST_INITIALIZER, + .link = ISC_LINK_INITIALIZER, + }; + + cds_wfs_init(&version->glue_stack); + + isc_refcount_init(&version->references, references); + + return (version); +} + +isc_result_t +dns__rbtdb_newversion(dns_db_t *db, dns_dbversion_t **versionp) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtdb_version_t *version = NULL; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(versionp != NULL && *versionp == NULL); + REQUIRE(rbtdb->future_version == NULL); + + RWLOCK(&rbtdb->lock, isc_rwlocktype_write); + RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */ + version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1, + true); + version->rbtdb = rbtdb; + version->commit_ok = true; + version->secure = rbtdb->current_version->secure; + version->havensec3 = rbtdb->current_version->havensec3; + if (version->havensec3) { + version->flags = rbtdb->current_version->flags; + version->iterations = rbtdb->current_version->iterations; + version->hash = rbtdb->current_version->hash; + version->salt_length = rbtdb->current_version->salt_length; + memmove(version->salt, rbtdb->current_version->salt, + version->salt_length); + } else { + version->flags = 0; + version->iterations = 0; + version->hash = 0; + version->salt_length = 0; + memset(version->salt, 0, sizeof(version->salt)); + } + isc_rwlock_init(&version->rwlock); + RWLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read); + version->records = rbtdb->current_version->records; + version->xfrsize = rbtdb->current_version->xfrsize; + RWUNLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read); + rbtdb->next_serial++; + rbtdb->future_version = version; + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); + + *versionp = version; + + return (ISC_R_SUCCESS); +} + +void +dns__rbtdb_attachversion(dns_db_t *db, dns_dbversion_t *source, + dns_dbversion_t **targetp) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtdb_version_t *rbtversion = source; + + REQUIRE(VALID_RBTDB(rbtdb)); + INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb); + + isc_refcount_increment(&rbtversion->references); + + *targetp = rbtversion; +} + +static rbtdb_changed_t * +add_changed(dns_slabheader_t *header, + dns_rbtdb_version_t *version DNS__DB_FLARG) { + rbtdb_changed_t *changed = NULL; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)header->db; + + /* + * Caller must be holding the node lock if its reference must be + * protected by the lock. + */ + + changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed)); + + RWLOCK(&rbtdb->lock, isc_rwlocktype_write); + + REQUIRE(version->writer); + + if (changed != NULL) { + dns_rbtnode_t *node = (dns_rbtnode_t *)header->node; + uint_fast32_t refs = isc_refcount_increment(&node->references); +#if DNS_DB_NODETRACE + fprintf(stderr, + "incr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", + func, file, line, node, refs + 1); +#else + UNUSED(refs); +#endif + changed->node = node; + changed->dirty = false; + ISC_LIST_INITANDAPPEND(version->changed_list, changed, link); + } else { + version->commit_ok = false; + } + + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); + + return (changed); +} + +static void +rollback_node(dns_rbtnode_t *node, uint32_t serial) { + dns_slabheader_t *header = NULL, *dcurrent = NULL; + bool make_dirty = false; + + /* + * Caller must hold the node lock. + */ + + /* + * We set the IGNORE attribute on rdatasets with serial number + * 'serial'. When the reference count goes to zero, these rdatasets + * will be cleaned up; until that time, they will be ignored. + */ + for (header = node->data; header != NULL; header = header->next) { + if (header->serial == serial) { + DNS_SLABHEADER_SETATTR(header, + DNS_SLABHEADERATTR_IGNORE); + make_dirty = true; + } + for (dcurrent = header->down; dcurrent != NULL; + dcurrent = dcurrent->down) + { + if (dcurrent->serial == serial) { + DNS_SLABHEADER_SETATTR( + dcurrent, DNS_SLABHEADERATTR_IGNORE); + make_dirty = true; + } + } + } + if (make_dirty) { + node->dirty = 1; + } +} + +void +dns__rbtdb_mark(dns_slabheader_t *header, uint_least16_t flag) { + uint_least16_t attributes = atomic_load_acquire(&header->attributes); + uint_least16_t newattributes = 0; + dns_stats_t *stats = NULL; + + /* + * If we are already ancient there is nothing to do. + */ + do { + if ((attributes & flag) != 0) { + return; + } + newattributes = attributes | flag; + } while (!atomic_compare_exchange_weak_acq_rel( + &header->attributes, &attributes, newattributes)); + + /* + * Decrement and increment the stats counter for the appropriate + * RRtype. + */ + stats = dns_db_getrrsetstats(header->db); + if (stats != NULL) { + update_rrsetstats(stats, header->type, attributes, false); + update_rrsetstats(stats, header->type, newattributes, true); + } +} + +static void +mark_ancient(dns_slabheader_t *header) { + dns__rbtdb_setttl(header, 0); + dns__rbtdb_mark(header, DNS_SLABHEADERATTR_ANCIENT); + RBTDB_HEADERNODE(header)->dirty = 1; +} + +static void +clean_stale_headers(dns_slabheader_t *top) { + dns_slabheader_t *d = NULL, *down_next = NULL; + + for (d = top->down; d != NULL; d = down_next) { + down_next = d->down; + dns_slabheader_destroy(&d); + } + top->down = NULL; +} + +static void +clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) { + dns_slabheader_t *current = NULL, *top_prev = NULL, *top_next = NULL; + + /* + * Caller must be holding the node lock. + */ + + for (current = node->data; current != NULL; current = top_next) { + top_next = current->next; + clean_stale_headers(current); + /* + * If current is nonexistent, ancient, or stale and + * we are not keeping stale, we can clean it up. + */ + if (NONEXISTENT(current) || ANCIENT(current) || + (STALE(current) && !KEEPSTALE(rbtdb))) + { + if (top_prev != NULL) { + top_prev->next = current->next; + } else { + node->data = current->next; + } + dns_slabheader_destroy(¤t); + } else { + top_prev = current; + } + } + node->dirty = 0; +} + +static void +clean_zone_node(dns_rbtnode_t *node, uint32_t least_serial) { + dns_slabheader_t *current = NULL, *dcurrent = NULL; + dns_slabheader_t *down_next = NULL, *dparent = NULL; + dns_slabheader_t *top_prev = NULL, *top_next = NULL; + bool still_dirty = false; + + /* + * Caller must be holding the node lock. + */ + REQUIRE(least_serial != 0); + + for (current = node->data; current != NULL; current = top_next) { + top_next = current->next; + + /* + * First, we clean up any instances of multiple rdatasets + * with the same serial number, or that have the IGNORE + * attribute. + */ + dparent = current; + for (dcurrent = current->down; dcurrent != NULL; + dcurrent = down_next) + { + down_next = dcurrent->down; + INSIST(dcurrent->serial <= dparent->serial); + if (dcurrent->serial == dparent->serial || + IGNORE(dcurrent)) + { + if (down_next != NULL) { + down_next->next = dparent; + } + dparent->down = down_next; + dns_slabheader_destroy(&dcurrent); + } else { + dparent = dcurrent; + } + } + + /* + * We've now eliminated all IGNORE datasets with the possible + * exception of current, which we now check. + */ + if (IGNORE(current)) { + down_next = current->down; + if (down_next == NULL) { + if (top_prev != NULL) { + top_prev->next = current->next; + } else { + node->data = current->next; + } + dns_slabheader_destroy(¤t); + /* + * current no longer exists, so we can + * just continue with the loop. + */ + continue; + } else { + /* + * Pull up current->down, making it the new + * current. + */ + if (top_prev != NULL) { + top_prev->next = down_next; + } else { + node->data = down_next; + } + down_next->next = top_next; + dns_slabheader_destroy(¤t); + current = down_next; + } + } + + /* + * We now try to find the first down node less than the + * least serial. + */ + dparent = current; + for (dcurrent = current->down; dcurrent != NULL; + dcurrent = down_next) + { + down_next = dcurrent->down; + if (dcurrent->serial < least_serial) { + break; + } + dparent = dcurrent; + } + + /* + * If there is a such an rdataset, delete it and any older + * versions. + */ + if (dcurrent != NULL) { + do { + down_next = dcurrent->down; + INSIST(dcurrent->serial <= least_serial); + dns_slabheader_destroy(&dcurrent); + dcurrent = down_next; + } while (dcurrent != NULL); + dparent->down = NULL; + } + + /* + * Note. The serial number of 'current' might be less than + * least_serial too, but we cannot delete it because it is + * the most recent version, unless it is a NONEXISTENT + * rdataset. + */ + if (current->down != NULL) { + still_dirty = true; + top_prev = current; + } else { + /* + * If this is a NONEXISTENT rdataset, we can delete it. + */ + if (NONEXISTENT(current)) { + if (top_prev != NULL) { + top_prev->next = current->next; + } else { + node->data = current->next; + } + dns_slabheader_destroy(¤t); + } else { + top_prev = current; + } + } + } + if (!still_dirty) { + node->dirty = 0; + } +} + +/* + * tree_lock(write) must be held. + */ +static void +delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) { + dns_rbtnode_t *nsecnode = NULL; + dns_fixedname_t fname; + dns_name_t *name = NULL; + isc_result_t result = ISC_R_UNEXPECTED; + + INSIST(!ISC_LINK_LINKED(node, deadlink)); + + if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) { + char printname[DNS_NAME_FORMATSIZE]; + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, + DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), + "delete_node(): %p %s (bucket %d)", node, + dns_rbt_formatnodename(node, printname, + sizeof(printname)), + node->locknum); + } + + switch (node->nsec) { + case DNS_DB_NSEC_NORMAL: + result = dns_rbt_deletenode(rbtdb->tree, node, false); + break; + case DNS_DB_NSEC_HAS_NSEC: + /* + * Though this may be wasteful, it has to be done before + * node is deleted. + */ + name = dns_fixedname_initname(&fname); + dns_rbt_fullnamefromnode(node, name); + /* + * Delete the corresponding node from the auxiliary NSEC + * tree before deleting from the main tree. + */ + nsecnode = NULL; + result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode, + NULL, DNS_RBTFIND_EMPTYDATA, NULL, + NULL); + if (result != ISC_R_SUCCESS) { + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, + DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, + "delete_node: " + "dns_rbt_findnode(nsec): %s", + isc_result_totext(result)); + } else { + result = dns_rbt_deletenode(rbtdb->nsec, nsecnode, + false); + if (result != ISC_R_SUCCESS) { + isc_log_write( + dns_lctx, DNS_LOGCATEGORY_DATABASE, + DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, + "delete_node(): " + "dns_rbt_deletenode(nsecnode): %s", + isc_result_totext(result)); + } + } + result = dns_rbt_deletenode(rbtdb->tree, node, false); + break; + case DNS_DB_NSEC_NSEC: + result = dns_rbt_deletenode(rbtdb->nsec, node, false); + break; + case DNS_DB_NSEC_NSEC3: + result = dns_rbt_deletenode(rbtdb->nsec3, node, false); + break; + } + if (result != ISC_R_SUCCESS) { + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, + DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, + "delete_node(): " + "dns_rbt_deletenode: %s", + isc_result_totext(result)); + } +} + +/* + * Caller must be holding the node lock. + */ +void +dns__rbtdb_newref(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, + isc_rwlocktype_t nlocktype DNS__DB_FLARG) { + uint_fast32_t refs; + + if (nlocktype == isc_rwlocktype_write && + ISC_LINK_LINKED(node, deadlink)) + { + ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node, + deadlink); + } + + refs = isc_refcount_increment0(&node->references); +#if DNS_DB_NODETRACE + fprintf(stderr, "incr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", + func, file, line, node, refs + 1); +#else + UNUSED(refs); +#endif + + if (refs == 0) { + /* this is the first reference to the node */ + refs = isc_refcount_increment0( + &rbtdb->node_locks[node->locknum].references); +#if DNS_DB_NODETRACE + fprintf(stderr, + "incr:nodelock:%s:%s:%u:%p:%p->references = " + "%" PRIuFAST32 "\n", + func, file, line, node, + &rbtdb->node_locks[node->locknum], refs + 1); +#else + UNUSED(refs); +#endif + } +} + +/*% + * The tree lock must be held for the result to be valid. + */ +static bool +is_leaf(dns_rbtnode_t *node) { + return (node->parent != NULL && node->parent->down == node && + node->left == NULL && node->right == NULL); +} + +static void +send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, + isc_rwlocktype_t nlocktype DNS__DB_FLARG) { + prune_t *prune = isc_mem_get(rbtdb->common.mctx, sizeof(*prune)); + *prune = (prune_t){ .node = node }; + + dns_db_attach((dns_db_t *)rbtdb, &prune->db); + dns__rbtdb_newref(rbtdb, node, nlocktype DNS__DB_FLARG_PASS); + + isc_async_run(rbtdb->loop, prune_tree, prune); +} + +/*% + * Clean up dead nodes. These are nodes which have no references, and + * have no data. They are dead but we could not or chose not to delete + * them when we deleted all the data at that node because we did not want + * to wait for the tree write lock. + * + * The caller must hold a tree write lock and bucketnum'th node (write) lock. + */ +static void +cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum DNS__DB_FLARG) { + dns_rbtnode_t *node = NULL; + int count = 10; /* XXXJT: should be adjustable */ + + node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]); + while (node != NULL && count > 0) { + ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink); + + /* + * We might have reactivated this node without a tree write + * lock, so we couldn't remove this node from deadnodes then + * and we have to do it now. + */ + if (isc_refcount_current(&node->references) != 0 || + node->data != NULL) + { + node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]); + count--; + continue; + } + + if (is_leaf(node) && rbtdb->loop != NULL) { + send_to_prune_tree( + rbtdb, node, + isc_rwlocktype_write DNS__DB_FLARG_PASS); + } else if (node->down == NULL && node->data == NULL) { + /* + * Not a interior node and not needing to be + * reactivated. + */ + delete_node(rbtdb, node); + } else if (node->data == NULL) { + /* + * A interior node without data. Leave linked to + * to be cleaned up when node->down becomes NULL. + */ + ISC_LIST_APPEND(rbtdb->deadnodes[bucketnum], node, + deadlink); + } + node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]); + count--; + } +} + +/* + * This function is assumed to be called when a node is newly referenced + * and can be in the deadnode list. In that case the node must be retrieved + * from the list because it is going to be used. In addition, if the caller + * happens to hold a write lock on the tree, it's a good chance to purge dead + * nodes. + * Note: while a new reference is gained in multiple places, there are only very + * few cases where the node can be in the deadnode list (only empty nodes can + * have been added to the list). + */ +static void +reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, + isc_rwlocktype_t tlocktype DNS__DB_FLARG) { + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + isc_rwlock_t *nodelock = &rbtdb->node_locks[node->locknum].lock; + bool maybe_cleanup = false; + + POST(nlocktype); + + NODE_RDLOCK(nodelock, &nlocktype); + + /* + * Check if we can possibly cleanup the dead node. If so, upgrade + * the node lock below to perform the cleanup. + */ + if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) && + tlocktype == isc_rwlocktype_write) + { + maybe_cleanup = true; + } + + if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) { + /* + * Upgrade the lock and test if we still need to unlink. + */ + NODE_FORCEUPGRADE(nodelock, &nlocktype); + POST(nlocktype); + if (ISC_LINK_LINKED(node, deadlink)) { + ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node, + deadlink); + } + if (maybe_cleanup) { + cleanup_dead_nodes(rbtdb, + node->locknum DNS__DB_FILELINE); + } + } + + dns__rbtdb_newref(rbtdb, node, nlocktype DNS__DB_FLARG_PASS); + + NODE_UNLOCK(nodelock, &nlocktype); +} + +/* + * Caller must be holding the node lock; either the read or write lock. + * Note that the lock must be held even when node references are + * atomically modified; in that case the decrement operation itself does not + * have to be protected, but we must avoid a race condition where multiple + * threads are decreasing the reference to zero simultaneously and at least + * one of them is going to free the node. + * + * This function returns true if and only if the node reference decreases + * to zero. + * + * NOTE: Decrementing the reference count of a node to zero does not mean it + * will be immediately freed. + */ +bool +dns__rbtdb_decref(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, + uint32_t least_serial, isc_rwlocktype_t *nlocktypep, + isc_rwlocktype_t *tlocktypep, bool tryupgrade, + bool pruning DNS__DB_FLARG) { + isc_result_t result; + bool locked = *tlocktypep != isc_rwlocktype_none; + bool write_locked = false; + db_nodelock_t *nodelock = NULL; + int bucket = node->locknum; + bool no_reference = true; + uint_fast32_t refs; + + REQUIRE(*nlocktypep != isc_rwlocktype_none); + + nodelock = &rbtdb->node_locks[bucket]; + +#define KEEP_NODE(n, r, l) \ + ((n)->data != NULL || ((l) && (n)->down != NULL) || \ + (n) == (r)->origin_node || (n) == (r)->nsec3_origin_node) + + /* Handle easy and typical case first. */ + if (!node->dirty && KEEP_NODE(node, rbtdb, locked)) { + refs = isc_refcount_decrement(&node->references); +#if DNS_DB_NODETRACE + fprintf(stderr, + "decr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", + func, file, line, node, refs - 1); +#else + UNUSED(refs); +#endif + if (refs == 1) { + refs = isc_refcount_decrement(&nodelock->references); +#if DNS_DB_NODETRACE + fprintf(stderr, + "decr:nodelock:%s:%s:%u:%p:%p->references = " + "%" PRIuFAST32 "\n", + func, file, line, node, nodelock, refs - 1); +#else + UNUSED(refs); +#endif + return (true); + } else { + return (false); + } + } + + /* Upgrade the lock? */ + if (*nlocktypep == isc_rwlocktype_read) { + NODE_FORCEUPGRADE(&nodelock->lock, nlocktypep); + } + + refs = isc_refcount_decrement(&node->references); +#if DNS_DB_NODETRACE + fprintf(stderr, "decr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", + func, file, line, node, refs - 1); +#else + UNUSED(refs); +#endif + if (refs > 1) { + return (false); + } + + if (node->dirty) { + if (IS_CACHE(rbtdb)) { + clean_cache_node(rbtdb, node); + } else { + if (least_serial == 0) { + /* + * Caller doesn't know the least serial. + * Get it. + */ + RWLOCK(&rbtdb->lock, isc_rwlocktype_read); + least_serial = rbtdb->least_serial; + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); + } + clean_zone_node(node, least_serial); + } + } + + /* + * Attempt to switch to a write lock on the tree. If this fails, + * we will add this node to a linked list of nodes in this locking + * bucket which we will free later. + * + * Locking hierarchy notwithstanding, we don't need to free + * the node lock before acquiring the tree write lock because + * we only do a trylock. + */ + /* We are allowed to upgrade the tree lock */ + switch (*tlocktypep) { + case isc_rwlocktype_write: + result = ISC_R_SUCCESS; + break; + case isc_rwlocktype_read: + if (tryupgrade) { + result = TREE_TRYUPGRADE(&rbtdb->tree_lock, tlocktypep); + } else { + result = ISC_R_LOCKBUSY; + } + break; + case isc_rwlocktype_none: + result = TREE_TRYWRLOCK(&rbtdb->tree_lock, tlocktypep); + break; + default: + UNREACHABLE(); + } + RUNTIME_CHECK(result == ISC_R_SUCCESS || result == ISC_R_LOCKBUSY); + if (result == ISC_R_SUCCESS) { + write_locked = true; + } + + refs = isc_refcount_decrement(&nodelock->references); +#if DNS_DB_NODETRACE + fprintf(stderr, + "decr:nodelock:%s:%s:%u:%p:%p->references = %" PRIuFAST32 "\n", + func, file, line, node, nodelock, refs - 1); +#else + UNUSED(refs); +#endif + + if (KEEP_NODE(node, rbtdb, (locked || write_locked))) { + goto restore_locks; + } + +#undef KEEP_NODE + + if (write_locked) { + /* + * If this node is the only one in the level it's in, deleting + * this node may recursively make its parent the only node in + * the parent level; if so, and if no one is currently using + * the parent node, this is almost the only opportunity to + * clean it up. But the recursive cleanup is not that trivial + * since the child and parent may be in different lock buckets, + * which would cause a lock order reversal problem. To avoid + * the trouble, we'll dispatch a separate event for batch + * cleaning. We need to check whether we're deleting the node + * as a result of pruning to avoid infinite dispatching. + * Note: pruning happens only when a loop has been set for the + * rbtdb. If the user of the rbtdb chooses not to set a loop, + * it's their responsibility to purge stale leaves (e.g. by + * periodic walk-through). + */ + + if (!pruning && is_leaf(node) && rbtdb->loop != NULL) { + send_to_prune_tree(rbtdb, node, + *nlocktypep DNS__DB_FLARG_PASS); + no_reference = false; + } else { + /* We can now delete the node. */ + + delete_node(rbtdb, node); + } + } else { + INSIST(node->data == NULL); + if (!ISC_LINK_LINKED(node, deadlink)) { + ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, + deadlink); + } + } + +restore_locks: + /* + * Relock a read lock, or unlock the write lock if no lock was held. + */ + if (!locked && write_locked) { + TREE_UNLOCK(&rbtdb->tree_lock, tlocktypep); + } + + return (no_reference); +} + +/* + * Prune the tree by cleaning up single leaves. A single execution of this + * function cleans up a single node; if the parent of the latter becomes a + * single leaf on its own level as a result, the parent is then also sent to + * this function. + */ +static void +prune_tree(void *arg) { + prune_t *prune = (prune_t *)arg; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)prune->db; + dns_rbtnode_t *node = prune->node; + dns_rbtnode_t *parent = NULL; + unsigned int locknum = node->locknum; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + isc_mem_put(rbtdb->common.mctx, prune, sizeof(*prune)); + + TREE_WRLOCK(&rbtdb->tree_lock, &tlocktype); + + parent = node->parent; + + NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); + dns__rbtdb_decref(rbtdb, node, 0, &nlocktype, &tlocktype, true, + true DNS__DB_FILELINE); + NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); + + if (parent != NULL && is_leaf(parent)) { + NODE_WRLOCK(&rbtdb->node_locks[parent->locknum].lock, + &nlocktype); + send_to_prune_tree(rbtdb, parent, nlocktype); + NODE_UNLOCK(&rbtdb->node_locks[parent->locknum].lock, + &nlocktype); + } + + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + + dns_db_detach((dns_db_t **)&rbtdb); +} + +static void +make_least_version(dns_rbtdb_t *rbtdb, dns_rbtdb_version_t *version, + rbtdb_changedlist_t *cleanup_list) { + /* + * Caller must be holding the database lock. + */ + + rbtdb->least_serial = version->serial; + *cleanup_list = version->changed_list; + ISC_LIST_INIT(version->changed_list); +} + +static void +cleanup_nondirty(dns_rbtdb_version_t *version, + rbtdb_changedlist_t *cleanup_list) { + rbtdb_changed_t *changed = NULL, *next_changed = NULL; + + /* + * If the changed record is dirty, then + * an update created multiple versions of + * a given rdataset. We keep this list + * until we're the least open version, at + * which point it's safe to get rid of any + * older versions. + * + * If the changed record isn't dirty, then + * we don't need it anymore since we're + * committing and not rolling back. + * + * The caller must be holding the database lock. + */ + for (changed = HEAD(version->changed_list); changed != NULL; + changed = next_changed) + { + next_changed = NEXT(changed, link); + if (!changed->dirty) { + UNLINK(version->changed_list, changed, link); + APPEND(*cleanup_list, changed, link); + } + } +} + +void +dns__rbtdb_setsecure(dns_db_t *db, dns_rbtdb_version_t *version, + dns_dbnode_t *origin) { + dns_rdataset_t keyset; + dns_rdataset_t nsecset, signsecset; + bool haszonekey = false; + bool hasnsec = false; + isc_result_t result; + + dns_rdataset_init(&keyset); + result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey, + 0, 0, &keyset, NULL); + if (result == ISC_R_SUCCESS) { + result = dns_rdataset_first(&keyset); + while (result == ISC_R_SUCCESS) { + dns_rdata_t keyrdata = DNS_RDATA_INIT; + dns_rdataset_current(&keyset, &keyrdata); + if (dns_zonekey_iszonekey(&keyrdata)) { + haszonekey = true; + break; + } + result = dns_rdataset_next(&keyset); + } + dns_rdataset_disassociate(&keyset); + } + if (!haszonekey) { + version->secure = false; + version->havensec3 = false; + return; + } + + dns_rdataset_init(&nsecset); + dns_rdataset_init(&signsecset); + result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec, 0, + 0, &nsecset, &signsecset); + if (result == ISC_R_SUCCESS) { + if (dns_rdataset_isassociated(&signsecset)) { + hasnsec = true; + dns_rdataset_disassociate(&signsecset); + } + dns_rdataset_disassociate(&nsecset); + } + + setnsec3parameters(db, version); + + /* + * Do we have a valid NSEC/NSEC3 chain? + */ + if (version->havensec3 || hasnsec) { + version->secure = true; + } else { + version->secure = false; + } +} + +/*%< + * Walk the origin node looking for NSEC3PARAM records. + * Cache the nsec3 parameters. + */ +static void +setnsec3parameters(dns_db_t *db, dns_rbtdb_version_t *version) { + dns_rbtnode_t *node = NULL; + dns_rdata_nsec3param_t nsec3param; + dns_rdata_t rdata = DNS_RDATA_INIT; + isc_region_t region; + isc_result_t result; + dns_slabheader_t *header = NULL, *header_next = NULL; + unsigned char *raw; /* RDATASLAB */ + unsigned int count, length; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); + version->havensec3 = false; + node = rbtdb->origin_node; + NODE_RDLOCK(&(rbtdb->node_locks[node->locknum].lock), &nlocktype); + for (header = node->data; header != NULL; header = header_next) { + header_next = header->next; + do { + if (header->serial <= version->serial && + !IGNORE(header)) + { + if (NONEXISTENT(header)) { + header = NULL; + } + break; + } else { + header = header->down; + } + } while (header != NULL); + + if (header != NULL && + (header->type == dns_rdatatype_nsec3param)) + { + /* + * Find A NSEC3PARAM with a supported algorithm. + */ + raw = dns_slabheader_raw(header); + count = raw[0] * 256 + raw[1]; /* count */ + raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH; + while (count-- > 0U) { + length = raw[0] * 256 + raw[1]; + raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH; + region.base = raw; + region.length = length; + raw += length; + dns_rdata_fromregion( + &rdata, rbtdb->common.rdclass, + dns_rdatatype_nsec3param, ®ion); + result = dns_rdata_tostruct(&rdata, &nsec3param, + NULL); + INSIST(result == ISC_R_SUCCESS); + dns_rdata_reset(&rdata); + + if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG && + !dns_nsec3_supportedhash(nsec3param.hash)) + { + continue; + } + + if (nsec3param.flags != 0) { + continue; + } + + memmove(version->salt, nsec3param.salt, + nsec3param.salt_length); + version->hash = nsec3param.hash; + version->salt_length = nsec3param.salt_length; + version->iterations = nsec3param.iterations; + version->flags = nsec3param.flags; + version->havensec3 = true; + /* + * Look for a better algorithm than the + * unknown test algorithm. + */ + if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG) { + goto unlock; + } + } + } + } +unlock: + NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock), &nlocktype); + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); +} + +static void +cleanup_dead_nodes_callback(void *arg) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)arg; + bool again = false; + unsigned int locknum; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + TREE_WRLOCK(&rbtdb->tree_lock, &tlocktype); + for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) { + NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); + cleanup_dead_nodes(rbtdb, locknum DNS__DB_FILELINE); + if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL) { + again = true; + } + NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); + } + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + if (again) { + isc_async_run(rbtdb->loop, cleanup_dead_nodes_callback, rbtdb); + } else { + dns_db_detach((dns_db_t **)&rbtdb); + } +} + +void +dns__rbtdb_closeversion(dns_db_t *db, dns_dbversion_t **versionp, + bool commit DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtdb_version_t *version = NULL, *cleanup_version = NULL; + dns_rbtdb_version_t *least_greater = NULL; + bool rollback = false; + rbtdb_changedlist_t cleanup_list; + dns_slabheaderlist_t resigned_list; + rbtdb_changed_t *changed = NULL, *next_changed = NULL; + uint32_t serial, least_serial; + dns_rbtnode_t *rbtnode = NULL; + dns_slabheader_t *header = NULL; + + REQUIRE(VALID_RBTDB(rbtdb)); + version = (dns_rbtdb_version_t *)*versionp; + INSIST(version->rbtdb == rbtdb); + + ISC_LIST_INIT(cleanup_list); + ISC_LIST_INIT(resigned_list); + + if (isc_refcount_decrement(&version->references) > 1) { + /* typical and easy case first */ + if (commit) { + RWLOCK(&rbtdb->lock, isc_rwlocktype_read); + INSIST(!version->writer); + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); + } + goto end; + } + + /* + * Update the zone's secure status in version before making + * it the current version. + */ + if (version->writer && commit && !IS_CACHE(rbtdb)) { + dns__rbtdb_setsecure(db, version, rbtdb->origin_node); + } + + RWLOCK(&rbtdb->lock, isc_rwlocktype_write); + serial = version->serial; + if (version->writer) { + if (commit) { + unsigned int cur_ref; + dns_rbtdb_version_t *cur_version = NULL; + + INSIST(version->commit_ok); + INSIST(version == rbtdb->future_version); + /* + * The current version is going to be replaced. + * Release the (likely last) reference to it from the + * DB itself and unlink it from the open list. + */ + cur_version = rbtdb->current_version; + cur_ref = isc_refcount_decrement( + &cur_version->references); + if (cur_ref == 1) { + (void)isc_refcount_current( + &cur_version->references); + if (cur_version->serial == rbtdb->least_serial) + { + INSIST(EMPTY( + cur_version->changed_list)); + } + UNLINK(rbtdb->open_versions, cur_version, link); + } + if (EMPTY(rbtdb->open_versions)) { + /* + * We're going to become the least open + * version. + */ + make_least_version(rbtdb, version, + &cleanup_list); + } else { + /* + * Some other open version is the + * least version. We can't cleanup + * records that were changed in this + * version because the older versions + * may still be in use by an open + * version. + * + * We can, however, discard the + * changed records for things that + * we've added that didn't exist in + * prior versions. + */ + cleanup_nondirty(version, &cleanup_list); + } + /* + * If the (soon to be former) current version + * isn't being used by anyone, we can clean + * it up. + */ + if (cur_ref == 1) { + cleanup_version = cur_version; + APPENDLIST(version->changed_list, + cleanup_version->changed_list, link); + } + /* + * Become the current version. + */ + version->writer = false; + rbtdb->current_version = version; + rbtdb->current_serial = version->serial; + rbtdb->future_version = NULL; + + /* + * Keep the current version in the open list, and + * gain a reference for the DB itself (see the DB + * creation function below). This must be the only + * case where we need to increment the counter from + * zero and need to use isc_refcount_increment0(). + */ + INSIST(isc_refcount_increment0(&version->references) == + 0); + PREPEND(rbtdb->open_versions, rbtdb->current_version, + link); + resigned_list = version->resigned_list; + ISC_LIST_INIT(version->resigned_list); + } else { + /* + * We're rolling back this transaction. + */ + cleanup_list = version->changed_list; + ISC_LIST_INIT(version->changed_list); + resigned_list = version->resigned_list; + ISC_LIST_INIT(version->resigned_list); + rollback = true; + cleanup_version = version; + rbtdb->future_version = NULL; + } + } else { + if (version != rbtdb->current_version) { + /* + * There are no external or internal references + * to this version and it can be cleaned up. + */ + cleanup_version = version; + + /* + * Find the version with the least serial + * number greater than ours. + */ + least_greater = PREV(version, link); + if (least_greater == NULL) { + least_greater = rbtdb->current_version; + } + + INSIST(version->serial < least_greater->serial); + /* + * Is this the least open version? + */ + if (version->serial == rbtdb->least_serial) { + /* + * Yes. Install the new least open + * version. + */ + make_least_version(rbtdb, least_greater, + &cleanup_list); + } else { + /* + * Add any unexecuted cleanups to + * those of the least greater version. + */ + APPENDLIST(least_greater->changed_list, + version->changed_list, link); + } + } else if (version->serial == rbtdb->least_serial) { + INSIST(EMPTY(version->changed_list)); + } + UNLINK(rbtdb->open_versions, version, link); + } + least_serial = rbtdb->least_serial; + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); + + if (cleanup_version != NULL) { + isc_refcount_destroy(&cleanup_version->references); + INSIST(EMPTY(cleanup_version->changed_list)); + free_gluetable(cleanup_version); + cds_wfs_destroy(&cleanup_version->glue_stack); + isc_rwlock_destroy(&cleanup_version->rwlock); + isc_mem_put(rbtdb->common.mctx, cleanup_version, + sizeof(*cleanup_version)); + } + + /* + * Commit/rollback re-signed headers. + */ + for (header = HEAD(resigned_list); header != NULL; + header = HEAD(resigned_list)) + { + isc_rwlock_t *lock = NULL; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + ISC_LIST_UNLINK(resigned_list, header, link); + + lock = &rbtdb->node_locks[RBTDB_HEADERNODE(header)->locknum] + .lock; + NODE_WRLOCK(lock, &nlocktype); + if (rollback && !IGNORE(header)) { + dns__zonerbt_resigninsert( + rbtdb, RBTDB_HEADERNODE(header)->locknum, + header); + } + dns__rbtdb_decref(rbtdb, RBTDB_HEADERNODE(header), least_serial, + &nlocktype, &tlocktype, true, + false DNS__DB_FLARG_PASS); + NODE_UNLOCK(lock, &nlocktype); + INSIST(tlocktype == isc_rwlocktype_none); + } + + if (!EMPTY(cleanup_list)) { + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + + if (rbtdb->loop == NULL) { + /* + * We acquire a tree write lock here in order to make + * sure that stale nodes will be removed in + * dns__rbtdb_decref(). If we didn't have the lock, + * those nodes could miss the chance to be removed + * until the server stops. The write lock is + * expensive, but this should be rare enough + * to justify the cost. + */ + TREE_WRLOCK(&rbtdb->tree_lock, &tlocktype); + } + + for (changed = HEAD(cleanup_list); changed != NULL; + changed = next_changed) + { + isc_rwlock_t *lock = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + next_changed = NEXT(changed, link); + rbtnode = changed->node; + lock = &rbtdb->node_locks[rbtnode->locknum].lock; + + NODE_WRLOCK(lock, &nlocktype); + /* + * This is a good opportunity to purge any dead nodes, + * so use it. + */ + if (rbtdb->loop == NULL) { + cleanup_dead_nodes( + rbtdb, + rbtnode->locknum DNS__DB_FLARG_PASS); + } + + if (rollback) { + rollback_node(rbtnode, serial); + } + dns__rbtdb_decref(rbtdb, rbtnode, least_serial, + &nlocktype, &tlocktype, true, + false DNS__DB_FILELINE); + + NODE_UNLOCK(lock, &nlocktype); + + isc_mem_put(rbtdb->common.mctx, changed, + sizeof(*changed)); + } + if (rbtdb->loop != NULL) { + isc_refcount_increment(&rbtdb->common.references); + isc_async_run(rbtdb->loop, cleanup_dead_nodes_callback, + rbtdb); + } else { + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + } + + INSIST(tlocktype == isc_rwlocktype_none); + } + +end: + *versionp = NULL; +} + +isc_result_t +dns__rbtdb_findnodeintree(dns_rbtdb_t *rbtdb, dns_qp_t *tree, + const dns_name_t *name, bool create, + dns_dbnode_t **nodep DNS__DB_FLARG) { + dns_rbtnode_t *node = NULL; + dns_name_t nodename; + isc_result_t result; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + + INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3); + + dns_name_init(&nodename, NULL); + TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); + result = dns_rbt_findnode(tree, name, NULL, &node, NULL, + DNS_RBTFIND_EMPTYDATA, NULL, NULL); + if (result != ISC_R_SUCCESS) { + if (!create) { + if (result == DNS_R_PARTIALMATCH) { + result = ISC_R_NOTFOUND; + } + goto unlock; + } + /* + * Try to upgrade the lock and if that fails unlock then relock. + */ + TREE_FORCEUPGRADE(&rbtdb->tree_lock, &tlocktype); + node = NULL; + result = dns_rbt_addnode(tree, name, &node); + if (result == ISC_R_SUCCESS) { + dns_rbt_namefromnode(node, &nodename); + node->locknum = node->hashval % rbtdb->node_lock_count; + if (tree == rbtdb->tree) { + dns__zonerbt_addwildcards(rbtdb, name, true); + + if (dns_name_iswildcard(name)) { + result = dns__zonerbt_wildcardmagic( + rbtdb, name, true); + if (result != ISC_R_SUCCESS) { + goto unlock; + } + } + } + if (tree == rbtdb->nsec3) { + node->nsec = DNS_DB_NSEC_NSEC3; + } + } else if (result == ISC_R_EXISTS) { + result = ISC_R_SUCCESS; + } else { + goto unlock; + } + } + + if (tree == rbtdb->nsec3) { + INSIST(node->nsec == DNS_DB_NSEC_NSEC3); + } + + reactivate_node(rbtdb, node, tlocktype DNS__DB_FLARG_PASS); + + *nodep = (dns_dbnode_t *)node; +unlock: + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + + return (result); +} + +isc_result_t +dns__rbtdb_findnode(dns_db_t *db, const dns_name_t *name, bool create, + dns_dbnode_t **nodep DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + + return (dns__rbtdb_findnodeintree(rbtdb, rbtdb->tree, name, create, + nodep DNS__DB_FLARG_PASS)); +} + +void +dns__rbtdb_bindrdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, + dns_slabheader_t *header, isc_stdtime_t now, + isc_rwlocktype_t locktype, + dns_rdataset_t *rdataset DNS__DB_FLARG) { + bool stale = STALE(header); + bool ancient = ANCIENT(header); + + /* + * Caller must be holding the node reader lock. + * XXXJT: technically, we need a writer lock, since we'll increment + * the header count below. However, since the actual counter value + * doesn't matter, we prioritize performance here. (We may want to + * use atomic increment when available). + */ + + if (rdataset == NULL) { + return; + } + + dns__rbtdb_newref(rbtdb, node, locktype DNS__DB_FLARG_PASS); + + INSIST(rdataset->methods == NULL); /* We must be disassociated. */ + + /* + * Mark header stale or ancient if the RRset is no longer active. + */ + if (!ACTIVE(header, now)) { + dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, rbtdb); + /* + * If this data is in the stale window keep it and if + * DNS_DBFIND_STALEOK is not set we tell the caller to + * skip this record. We skip the records with ZEROTTL + * (these records should not be cached anyway). + */ + + if (KEEPSTALE(rbtdb) && stale_ttl > now) { + stale = true; + } else { + /* + * We are not keeping stale, or it is outside the + * stale window. Mark ancient, i.e. ready for cleanup. + */ + ancient = true; + } + } + + rdataset->methods = &dns_rdataslab_rdatasetmethods; + rdataset->rdclass = rbtdb->common.rdclass; + rdataset->type = DNS_TYPEPAIR_TYPE(header->type); + rdataset->covers = DNS_TYPEPAIR_COVERS(header->type); + rdataset->ttl = header->ttl - now; + rdataset->trust = header->trust; + + if (NEGATIVE(header)) { + rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE; + } + if (NXDOMAIN(header)) { + rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN; + } + if (OPTOUT(header)) { + rdataset->attributes |= DNS_RDATASETATTR_OPTOUT; + } + if (PREFETCH(header)) { + rdataset->attributes |= DNS_RDATASETATTR_PREFETCH; + } + + if (stale && !ancient) { + dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, rbtdb); + if (stale_ttl > now) { + rdataset->ttl = stale_ttl - now; + } else { + rdataset->ttl = 0; + } + if (STALE_WINDOW(header)) { + rdataset->attributes |= DNS_RDATASETATTR_STALE_WINDOW; + } + rdataset->attributes |= DNS_RDATASETATTR_STALE; + } else if (IS_CACHE(rbtdb) && !ACTIVE(header, now)) { + rdataset->attributes |= DNS_RDATASETATTR_ANCIENT; + rdataset->ttl = header->ttl; + } + + rdataset->count = atomic_fetch_add_relaxed(&header->count, 1); + + rdataset->slab.db = (dns_db_t *)rbtdb; + rdataset->slab.node = (dns_dbnode_t *)node; + rdataset->slab.raw = dns_slabheader_raw(header); + rdataset->slab.iter_pos = NULL; + rdataset->slab.iter_count = 0; + + /* + * Add noqname proof. + */ + rdataset->slab.noqname = header->noqname; + if (header->noqname != NULL) { + rdataset->attributes |= DNS_RDATASETATTR_NOQNAME; + } + rdataset->slab.closest = header->closest; + if (header->closest != NULL) { + rdataset->attributes |= DNS_RDATASETATTR_CLOSEST; + } + + /* + * Copy out re-signing information. + */ + if (RESIGN(header)) { + rdataset->attributes |= DNS_RDATASETATTR_RESIGN; + rdataset->resign = (header->resign << 1) | header->resign_lsb; + } else { + rdataset->resign = 0; + } +} + +void +dns__rbtdb_attachnode(dns_db_t *db, dns_dbnode_t *source, + dns_dbnode_t **targetp DNS__DB_FLARG) { + REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); + REQUIRE(targetp != NULL && *targetp == NULL); + + dns_rbtnode_t *node = (dns_rbtnode_t *)source; + uint_fast32_t refs = isc_refcount_increment(&node->references); + +#if DNS_DB_NODETRACE + fprintf(stderr, "incr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", + func, file, line, node, refs + 1); +#else + UNUSED(refs); +#endif + + *targetp = source; +} + +void +dns__rbtdb_detachnode(dns_db_t *db, dns_dbnode_t **targetp DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *node = NULL; + bool want_free = false; + bool inactive = false; + db_nodelock_t *nodelock = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(targetp != NULL && *targetp != NULL); + + node = (dns_rbtnode_t *)(*targetp); + nodelock = &rbtdb->node_locks[node->locknum]; + + NODE_RDLOCK(&nodelock->lock, &nlocktype); + + if (dns__rbtdb_decref(rbtdb, node, 0, &nlocktype, &tlocktype, true, + false DNS__DB_FLARG_PASS)) + { + if (isc_refcount_current(&nodelock->references) == 0 && + nodelock->exiting) + { + inactive = true; + } + } + + NODE_UNLOCK(&nodelock->lock, &nlocktype); + INSIST(tlocktype == isc_rwlocktype_none); + + *targetp = NULL; + + if (inactive) { + RWLOCK(&rbtdb->lock, isc_rwlocktype_write); + rbtdb->active--; + if (rbtdb->active == 0) { + want_free = true; + } + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); + if (want_free) { + char buf[DNS_NAME_FORMATSIZE]; + if (dns_name_dynamic(&rbtdb->common.origin)) { + dns_name_format(&rbtdb->common.origin, buf, + sizeof(buf)); + } else { + strlcpy(buf, "", sizeof(buf)); + } + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, + DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), + "calling free_rbtdb(%s)", buf); + free_rbtdb(rbtdb, true); + } + } +} + +isc_result_t +dns__rbtdb_createiterator(dns_db_t *db, unsigned int options, + dns_dbiterator_t **iteratorp) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + rbtdb_dbiterator_t *rbtdbiter = NULL; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE((options & (DNS_DB_NSEC3ONLY | DNS_DB_NONSEC3)) != + (DNS_DB_NSEC3ONLY | DNS_DB_NONSEC3)); + + rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter)); + + rbtdbiter->common.methods = &dbiterator_methods; + rbtdbiter->common.db = NULL; + dns_db_attach(db, &rbtdbiter->common.db); + rbtdbiter->common.relative_names = ((options & DNS_DB_RELATIVENAMES) != + 0); + rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC; + rbtdbiter->paused = true; + rbtdbiter->tree_locked = isc_rwlocktype_none; + rbtdbiter->result = ISC_R_SUCCESS; + dns_fixedname_init(&rbtdbiter->name); + dns_fixedname_init(&rbtdbiter->origin); + rbtdbiter->node = NULL; + if ((options & DNS_DB_NSEC3ONLY) != 0) { + rbtdbiter->nsec3mode = nsec3only; + } else if ((options & DNS_DB_NONSEC3) != 0) { + rbtdbiter->nsec3mode = nonsec3; + } else { + rbtdbiter->nsec3mode = full; + } + dns_rbtnodechain_init(&rbtdbiter->chain); + dns_rbtnodechain_init(&rbtdbiter->nsec3chain); + if (rbtdbiter->nsec3mode == nsec3only) { + rbtdbiter->current = &rbtdbiter->nsec3chain; + } else { + rbtdbiter->current = &rbtdbiter->chain; + } + + *iteratorp = (dns_dbiterator_t *)rbtdbiter; + + return (ISC_R_SUCCESS); +} + +isc_result_t +dns__rbtdb_allrdatasets(dns_db_t *db, dns_dbnode_t *node, + dns_dbversion_t *version, unsigned int options, + isc_stdtime_t now, + dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; + dns_rbtdb_version_t *rbtversion = version; + rbtdb_rdatasetiter_t *iterator = NULL; + uint_fast32_t refs; + + REQUIRE(VALID_RBTDB(rbtdb)); + + iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator)); + + if ((db->attributes & DNS_DBATTR_CACHE) == 0) { + now = 0; + if (rbtversion == NULL) { + dns__rbtdb_currentversion( + db, (dns_dbversion_t **)(void *)(&rbtversion)); + } else { + INSIST(rbtversion->rbtdb == rbtdb); + + (void)isc_refcount_increment(&rbtversion->references); + } + } else { + if (now == 0) { + now = isc_stdtime_now(); + } + rbtversion = NULL; + } + + iterator->common.magic = DNS_RDATASETITER_MAGIC; + iterator->common.methods = &rdatasetiter_methods; + iterator->common.db = db; + iterator->common.node = node; + iterator->common.version = (dns_dbversion_t *)rbtversion; + iterator->common.options = options; + iterator->common.now = now; + + refs = isc_refcount_increment(&rbtnode->references); +#if DNS_DB_NODETRACE + fprintf(stderr, "incr:node:%s:%s:%u:%p->references = %" PRIuFAST32 "\n", + func, file, line, node, refs + 1); +#else + UNUSED(refs); +#endif + + iterator->current = NULL; + + *iteratorp = (dns_rdatasetiter_t *)iterator; + + return (ISC_R_SUCCESS); +} + +static bool +cname_and_other_data(dns_rbtnode_t *node, uint32_t serial) { + dns_slabheader_t *header = NULL, *header_next = NULL; + bool cname = false, other_data = false; + dns_rdatatype_t rdtype; + + /* + * The caller must hold the node lock. + */ + + /* + * Look for CNAME and "other data" rdatasets active in our version. + */ + for (header = node->data; header != NULL; header = header_next) { + header_next = header->next; + if (!prio_type(header->type)) { + /* + * CNAME is in the priority list, so if we are done + * with the priority list, we know there will not be + * CNAME, so we are safe to skip the rest of the types. + */ + return (false); + } + if (header->type == dns_rdatatype_cname) { + /* + * Look for an active extant CNAME. + */ + do { + if (header->serial <= serial && !IGNORE(header)) + { + /* + * Is this a "this rdataset doesn't + * exist" record? + */ + if (NONEXISTENT(header)) { + header = NULL; + } + break; + } else { + header = header->down; + } + } while (header != NULL); + if (header != NULL) { + cname = true; + } + } else { + /* + * Look for active extant "other data". + * + * "Other data" is any rdataset whose type is not + * KEY, NSEC, SIG or RRSIG. + */ + rdtype = DNS_TYPEPAIR_TYPE(header->type); + if (rdtype != dns_rdatatype_key && + rdtype != dns_rdatatype_sig && + rdtype != dns_rdatatype_nsec && + rdtype != dns_rdatatype_rrsig) + { + /* + * Is it active and extant? + */ + do { + if (header->serial <= serial && + !IGNORE(header)) + { + /* + * Is this a "this rdataset + * doesn't exist" record? + */ + if (NONEXISTENT(header)) { + header = NULL; + } + break; + } else { + header = header->down; + } + } while (header != NULL); + if (header != NULL) { + other_data = true; + } + } + } + if (cname && other_data) { + return (true); + } + } + + return (false); +} + +static uint64_t +recordsize(dns_slabheader_t *header, unsigned int namelen) { + return (dns_rdataslab_rdatasize((unsigned char *)header, + sizeof(*header)) + + sizeof(dns_ttl_t) + sizeof(dns_rdatatype_t) + + sizeof(dns_rdataclass_t) + namelen); +} + +static void +update_recordsandxfrsize(bool add, dns_rbtdb_version_t *rbtversion, + dns_slabheader_t *header, unsigned int namelen) { + unsigned char *hdr = (unsigned char *)header; + size_t hdrsize = sizeof(*header); + + RWLOCK(&rbtversion->rwlock, isc_rwlocktype_write); + if (add) { + rbtversion->records += dns_rdataslab_count(hdr, hdrsize); + rbtversion->xfrsize += recordsize(header, namelen); + } else { + rbtversion->records -= dns_rdataslab_count(hdr, hdrsize); + rbtversion->xfrsize -= recordsize(header, namelen); + } + RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_write); +} + +isc_result_t +dns__rbtdb_add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, + const dns_name_t *nodename, dns_rbtdb_version_t *rbtversion, + dns_slabheader_t *newheader, unsigned int options, bool loading, + dns_rdataset_t *addedrdataset, isc_stdtime_t now DNS__DB_FLARG) { + rbtdb_changed_t *changed = NULL; + dns_slabheader_t *topheader = NULL, *topheader_prev = NULL; + dns_slabheader_t *header = NULL, *sigheader = NULL; + dns_slabheader_t *prioheader = NULL; + unsigned char *merged = NULL; + isc_result_t result; + bool header_nx; + bool newheader_nx; + bool merge; + dns_rdatatype_t rdtype, covers; + dns_typepair_t negtype = 0, sigtype; + dns_trust_t trust; + int idx; + + if ((options & DNS_DBADD_MERGE) != 0) { + REQUIRE(rbtversion != NULL); + merge = true; + } else { + merge = false; + } + + if ((options & DNS_DBADD_FORCE) != 0) { + trust = dns_trust_ultimate; + } else { + trust = newheader->trust; + } + + if (rbtversion != NULL && !loading) { + /* + * We always add a changed record, even if no changes end up + * being made to this node, because it's harmless and + * simplifies the code. + */ + changed = add_changed(newheader, rbtversion DNS__DB_FLARG_PASS); + if (changed == NULL) { + dns_slabheader_destroy(&newheader); + return (ISC_R_NOMEMORY); + } + } + + newheader_nx = NONEXISTENT(newheader) ? true : false; + if (rbtversion == NULL && !newheader_nx) { + rdtype = DNS_TYPEPAIR_TYPE(newheader->type); + covers = DNS_TYPEPAIR_COVERS(newheader->type); + sigtype = DNS_SIGTYPE(covers); + if (NEGATIVE(newheader)) { + /* + * We're adding a negative cache entry. + */ + if (covers == dns_rdatatype_any) { + /* + * If we're adding an negative cache entry + * which covers all types (NXDOMAIN, + * NODATA(QTYPE=ANY)), + * + * We make all other data ancient so that the + * only rdataset that can be found at this + * node is the negative cache entry. + */ + for (topheader = rbtnode->data; + topheader != NULL; + topheader = topheader->next) + { + mark_ancient(topheader); + } + goto find_header; + } + /* + * Otherwise look for any RRSIGs of the given + * type so they can be marked ancient later. + */ + for (topheader = rbtnode->data; topheader != NULL; + topheader = topheader->next) + { + if (topheader->type == sigtype) { + sigheader = topheader; + } + } + negtype = DNS_TYPEPAIR_VALUE(covers, 0); + } else { + /* + * We're adding something that isn't a + * negative cache entry. Look for an extant + * non-ancient NXDOMAIN/NODATA(QTYPE=ANY) negative + * cache entry. If we're adding an RRSIG, also + * check for an extant non-ancient NODATA ncache + * entry which covers the same type as the RRSIG. + */ + for (topheader = rbtnode->data; topheader != NULL; + topheader = topheader->next) + { + if ((topheader->type == RDATATYPE_NCACHEANY) || + (newheader->type == sigtype && + topheader->type == + DNS_TYPEPAIR_VALUE(0, covers))) + { + break; + } + } + if (topheader != NULL && EXISTS(topheader) && + ACTIVE(topheader, now)) + { + /* + * Found one. + */ + if (trust < topheader->trust) { + /* + * The NXDOMAIN/NODATA(QTYPE=ANY) + * is more trusted. + */ + dns_slabheader_destroy(&newheader); + if (addedrdataset != NULL) { + dns__rbtdb_bindrdataset( + rbtdb, rbtnode, + topheader, now, + isc_rwlocktype_write, + addedrdataset + DNS__DB_FLARG_PASS); + } + return (DNS_R_UNCHANGED); + } + /* + * The new rdataset is better. Expire the + * ncache entry. + */ + mark_ancient(topheader); + topheader = NULL; + goto find_header; + } + negtype = DNS_TYPEPAIR_VALUE(0, rdtype); + } + } + + for (topheader = rbtnode->data; topheader != NULL; + topheader = topheader->next) + { + if (prio_type(topheader->type)) { + prioheader = topheader; + } + if (topheader->type == newheader->type || + topheader->type == negtype) + { + break; + } + topheader_prev = topheader; + } + +find_header: + /* + * If header isn't NULL, we've found the right type. There may be + * IGNORE rdatasets between the top of the chain and the first real + * data. We skip over them. + */ + header = topheader; + while (header != NULL && IGNORE(header)) { + header = header->down; + } + if (header != NULL) { + header_nx = NONEXISTENT(header) ? true : false; + + /* + * Deleting an already non-existent rdataset has no effect. + */ + if (header_nx && newheader_nx) { + dns_slabheader_destroy(&newheader); + return (DNS_R_UNCHANGED); + } + + /* + * Trying to add an rdataset with lower trust to a cache + * DB has no effect, provided that the cache data isn't + * stale. If the cache data is stale, new lower trust + * data will supersede it below. Unclear what the best + * policy is here. + */ + if (rbtversion == NULL && trust < header->trust && + (ACTIVE(header, now) || header_nx)) + { + dns_slabheader_destroy(&newheader); + if (addedrdataset != NULL) { + dns__rbtdb_bindrdataset( + rbtdb, rbtnode, header, now, + isc_rwlocktype_write, + addedrdataset DNS__DB_FLARG_PASS); + } + return (DNS_R_UNCHANGED); + } + + /* + * Don't merge if a nonexistent rdataset is involved. + */ + if (merge && (header_nx || newheader_nx)) { + merge = false; + } + + /* + * If 'merge' is true, we'll try to create a new rdataset + * that is the union of 'newheader' and 'header'. + */ + if (merge) { + unsigned int flags = 0; + INSIST(rbtversion->serial >= header->serial); + merged = NULL; + result = ISC_R_SUCCESS; + + if ((options & DNS_DBADD_EXACT) != 0) { + flags |= DNS_RDATASLAB_EXACT; + } + /* + * TTL use here is irrelevant to the cache; + * merge is only done with zonedbs. + */ + if ((options & DNS_DBADD_EXACTTTL) != 0 && + newheader->ttl != header->ttl) + { + result = DNS_R_NOTEXACT; + } else if (newheader->ttl != header->ttl) { + flags |= DNS_RDATASLAB_FORCE; + } + if (result == ISC_R_SUCCESS) { + result = dns_rdataslab_merge( + (unsigned char *)header, + (unsigned char *)newheader, + (unsigned int)(sizeof(*newheader)), + rbtdb->common.mctx, + rbtdb->common.rdclass, + (dns_rdatatype_t)header->type, flags, + &merged); + } + if (result == ISC_R_SUCCESS) { + /* + * If 'header' has the same serial number as + * we do, we could clean it up now if we knew + * that our caller had no references to it. + * We don't know this, however, so we leave it + * alone. It will get cleaned up when + * clean_zone_node() runs. + */ + dns_slabheader_destroy(&newheader); + newheader = (dns_slabheader_t *)merged; + dns_slabheader_reset(newheader, + (dns_db_t *)rbtdb, + (dns_dbnode_t *)rbtnode); + dns_slabheader_copycase(newheader, header); + if (loading && RESIGN(newheader) && + RESIGN(header) && + resign_sooner(header, newheader)) + { + newheader->resign = header->resign; + newheader->resign_lsb = + header->resign_lsb; + } + } else { + dns_slabheader_destroy(&newheader); + return (result); + } + } + /* + * Don't replace existing NS, A and AAAA RRsets in the + * cache if they are already exist. This prevents named + * being locked to old servers. Don't lower trust of + * existing record if the update is forced. Nothing + * special to be done w.r.t stale data; it gets replaced + * normally further down. + */ + if (IS_CACHE(rbtdb) && ACTIVE(header, now) && + header->type == dns_rdatatype_ns && !header_nx && + !newheader_nx && header->trust >= newheader->trust && + dns_rdataslab_equalx((unsigned char *)header, + (unsigned char *)newheader, + (unsigned int)(sizeof(*newheader)), + rbtdb->common.rdclass, + (dns_rdatatype_t)header->type)) + { + /* + * Honour the new ttl if it is less than the + * older one. + */ + if (header->ttl > newheader->ttl) { + dns__rbtdb_setttl(header, newheader->ttl); + } + if (header->last_used != now) { + ISC_LIST_UNLINK( + rbtdb->lru[RBTDB_HEADERNODE(header) + ->locknum], + header, link); + header->last_used = now; + ISC_LIST_PREPEND( + rbtdb->lru[RBTDB_HEADERNODE(header) + ->locknum], + header, link); + } + if (header->noqname == NULL && + newheader->noqname != NULL) + { + header->noqname = newheader->noqname; + newheader->noqname = NULL; + } + if (header->closest == NULL && + newheader->closest != NULL) + { + header->closest = newheader->closest; + newheader->closest = NULL; + } + dns_slabheader_destroy(&newheader); + if (addedrdataset != NULL) { + dns__rbtdb_bindrdataset( + rbtdb, rbtnode, header, now, + isc_rwlocktype_write, + addedrdataset DNS__DB_FLARG_PASS); + } + return (ISC_R_SUCCESS); + } + + /* + * If we have will be replacing a NS RRset force its TTL + * to be no more than the current NS RRset's TTL. This + * ensures the delegations that are withdrawn are honoured. + */ + if (IS_CACHE(rbtdb) && ACTIVE(header, now) && + header->type == dns_rdatatype_ns && !header_nx && + !newheader_nx && header->trust <= newheader->trust) + { + if (newheader->ttl > header->ttl) { + newheader->ttl = header->ttl; + } + } + if (IS_CACHE(rbtdb) && ACTIVE(header, now) && + (options & DNS_DBADD_PREFETCH) == 0 && + (header->type == dns_rdatatype_a || + header->type == dns_rdatatype_aaaa || + header->type == dns_rdatatype_ds || + header->type == DNS_SIGTYPE(dns_rdatatype_ds)) && + !header_nx && !newheader_nx && + header->trust >= newheader->trust && + dns_rdataslab_equal((unsigned char *)header, + (unsigned char *)newheader, + (unsigned int)(sizeof(*newheader)))) + { + /* + * Honour the new ttl if it is less than the + * older one. + */ + if (header->ttl > newheader->ttl) { + dns__rbtdb_setttl(header, newheader->ttl); + } + if (header->last_used != now) { + ISC_LIST_UNLINK( + rbtdb->lru[RBTDB_HEADERNODE(header) + ->locknum], + header, link); + header->last_used = now; + ISC_LIST_PREPEND( + rbtdb->lru[RBTDB_HEADERNODE(header) + ->locknum], + header, link); + } + if (header->noqname == NULL && + newheader->noqname != NULL) + { + header->noqname = newheader->noqname; + newheader->noqname = NULL; + } + if (header->closest == NULL && + newheader->closest != NULL) + { + header->closest = newheader->closest; + newheader->closest = NULL; + } + dns_slabheader_destroy(&newheader); + if (addedrdataset != NULL) { + dns__rbtdb_bindrdataset( + rbtdb, rbtnode, header, now, + isc_rwlocktype_write, + addedrdataset DNS__DB_FLARG_PASS); + } + return (ISC_R_SUCCESS); + } + INSIST(rbtversion == NULL || + rbtversion->serial >= topheader->serial); + if (loading) { + newheader->down = NULL; + idx = RBTDB_HEADERNODE(newheader)->locknum; + if (IS_CACHE(rbtdb)) { + if (ZEROTTL(newheader)) { + newheader->last_used = + rbtdb->last_used + 1; + ISC_LIST_APPEND(rbtdb->lru[idx], + newheader, link); + } else { + ISC_LIST_PREPEND(rbtdb->lru[idx], + newheader, link); + } + INSIST(rbtdb->heaps != NULL); + isc_heap_insert(rbtdb->heaps[idx], newheader); + newheader->heap = rbtdb->heaps[idx]; + } else if (RESIGN(newheader)) { + dns__zonerbt_resigninsert(rbtdb, idx, + newheader); + /* + * Don't call resigndelete, we don't need + * to reverse the delete. The free_slabheader + * call below will clean up the heap entry. + */ + } + + /* + * There are no other references to 'header' when + * loading, so we MAY clean up 'header' now. + * Since we don't generate changed records when + * loading, we MUST clean up 'header' now. + */ + if (topheader_prev != NULL) { + topheader_prev->next = newheader; + } else { + rbtnode->data = newheader; + } + newheader->next = topheader->next; + if (rbtversion != NULL && !header_nx) { + update_recordsandxfrsize(false, rbtversion, + header, + nodename->length); + } + dns_slabheader_destroy(&header); + } else { + idx = RBTDB_HEADERNODE(newheader)->locknum; + if (IS_CACHE(rbtdb)) { + INSIST(rbtdb->heaps != NULL); + isc_heap_insert(rbtdb->heaps[idx], newheader); + newheader->heap = rbtdb->heaps[idx]; + if (ZEROTTL(newheader)) { + newheader->last_used = + rbtdb->last_used + 1; + ISC_LIST_APPEND(rbtdb->lru[idx], + newheader, link); + } else { + ISC_LIST_PREPEND(rbtdb->lru[idx], + newheader, link); + } + } else if (RESIGN(newheader)) { + dns__zonerbt_resigninsert(rbtdb, idx, + newheader); + dns__zonerbt_resigndelete( + rbtdb, rbtversion, + header DNS__DB_FLARG_PASS); + } + if (topheader_prev != NULL) { + topheader_prev->next = newheader; + } else { + rbtnode->data = newheader; + } + newheader->next = topheader->next; + newheader->down = topheader; + topheader->next = newheader; + rbtnode->dirty = 1; + if (changed != NULL) { + changed->dirty = true; + } + if (rbtversion == NULL) { + mark_ancient(header); + if (sigheader != NULL) { + mark_ancient(sigheader); + } + } + if (rbtversion != NULL && !header_nx) { + update_recordsandxfrsize(false, rbtversion, + header, + nodename->length); + } + } + } else { + /* + * No non-IGNORED rdatasets of the given type exist at + * this node. + */ + + /* + * If we're trying to delete the type, don't bother. + */ + if (newheader_nx) { + dns_slabheader_destroy(&newheader); + return (DNS_R_UNCHANGED); + } + + idx = RBTDB_HEADERNODE(newheader)->locknum; + if (IS_CACHE(rbtdb)) { + isc_heap_insert(rbtdb->heaps[idx], newheader); + newheader->heap = rbtdb->heaps[idx]; + if (ZEROTTL(newheader)) { + ISC_LIST_APPEND(rbtdb->lru[idx], newheader, + link); + } else { + ISC_LIST_PREPEND(rbtdb->lru[idx], newheader, + link); + } + } else if (RESIGN(newheader)) { + dns__zonerbt_resigninsert(rbtdb, idx, newheader); + dns__zonerbt_resigndelete(rbtdb, rbtversion, + header DNS__DB_FLARG_PASS); + } + + if (topheader != NULL) { + /* + * We have an list of rdatasets of the given type, + * but they're all marked IGNORE. We simply insert + * the new rdataset at the head of the list. + * + * Ignored rdatasets cannot occur during loading, so + * we INSIST on it. + */ + INSIST(!loading); + INSIST(rbtversion == NULL || + rbtversion->serial >= topheader->serial); + if (topheader_prev != NULL) { + topheader_prev->next = newheader; + } else { + rbtnode->data = newheader; + } + newheader->next = topheader->next; + newheader->down = topheader; + topheader->next = newheader; + rbtnode->dirty = 1; + if (changed != NULL) { + changed->dirty = true; + } + } else { + /* + * No rdatasets of the given type exist at the node. + */ + INSIST(newheader->down == NULL); + + if (prio_type(newheader->type)) { + /* This is a priority type, prepend it */ + newheader->next = rbtnode->data; + rbtnode->data = newheader; + } else if (prioheader != NULL) { + /* Append after the priority headers */ + newheader->next = prioheader->next; + prioheader->next = newheader; + } else { + /* There were no priority headers */ + newheader->next = rbtnode->data; + rbtnode->data = newheader; + } + } + } + + if (rbtversion != NULL && !newheader_nx) { + update_recordsandxfrsize(true, rbtversion, newheader, + nodename->length); + } + + /* + * Check if the node now contains CNAME and other data. + */ + if (rbtversion != NULL && + cname_and_other_data(rbtnode, rbtversion->serial)) + { + return (DNS_R_CNAMEANDOTHER); + } + + if (addedrdataset != NULL) { + dns__rbtdb_bindrdataset(rbtdb, rbtnode, newheader, now, + isc_rwlocktype_write, + addedrdataset DNS__DB_FLARG_PASS); + } + + return (ISC_R_SUCCESS); +} + +static bool +delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, dns_typepair_t type) { + if (IS_CACHE(rbtdb)) { + if (type == dns_rdatatype_dname) { + return (true); + } else { + return (false); + } + } else if (type == dns_rdatatype_dname || + (type == dns_rdatatype_ns && + (node != rbtdb->origin_node || IS_STUB(rbtdb)))) + { + return (true); + } + return (false); +} + +static isc_result_t +addnoqname(isc_mem_t *mctx, dns_slabheader_t *newheader, + dns_rdataset_t *rdataset) { + isc_result_t result; + dns_slabheader_proof_t *noqname = NULL; + dns_name_t name = DNS_NAME_INITEMPTY; + dns_rdataset_t neg = DNS_RDATASET_INIT, negsig = DNS_RDATASET_INIT; + isc_region_t r1, r2; + + result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + + result = dns_rdataslab_fromrdataset(&neg, mctx, &r1, 0); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } + + result = dns_rdataslab_fromrdataset(&negsig, mctx, &r2, 0); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } + + noqname = isc_mem_get(mctx, sizeof(*noqname)); + *noqname = (dns_slabheader_proof_t){ + .neg = r1.base, + .negsig = r2.base, + .type = neg.type, + .name = DNS_NAME_INITEMPTY, + }; + dns_name_dup(&name, mctx, &noqname->name); + newheader->noqname = noqname; + +cleanup: + dns_rdataset_disassociate(&neg); + dns_rdataset_disassociate(&negsig); + + return (result); +} + +static isc_result_t +addclosest(isc_mem_t *mctx, dns_slabheader_t *newheader, + dns_rdataset_t *rdataset) { + isc_result_t result; + dns_slabheader_proof_t *closest = NULL; + dns_name_t name = DNS_NAME_INITEMPTY; + dns_rdataset_t neg = DNS_RDATASET_INIT, negsig = DNS_RDATASET_INIT; + isc_region_t r1, r2; + + result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig); + RUNTIME_CHECK(result == ISC_R_SUCCESS); + + result = dns_rdataslab_fromrdataset(&neg, mctx, &r1, 0); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } + + result = dns_rdataslab_fromrdataset(&negsig, mctx, &r2, 0); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } + + closest = isc_mem_get(mctx, sizeof(*closest)); + *closest = (dns_slabheader_proof_t){ + .neg = r1.base, + .negsig = r2.base, + .name = DNS_NAME_INITEMPTY, + .type = neg.type, + }; + dns_name_dup(&name, mctx, &closest->name); + newheader->closest = closest; + +cleanup: + dns_rdataset_disassociate(&neg); + dns_rdataset_disassociate(&negsig); + return (result); +} + +static void +expire_ttl_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, + isc_rwlocktype_t *tlocktypep, isc_stdtime_t now, + bool cache_is_overmem DNS__DB_FLARG); + +isc_result_t +dns__rbtdb_addrdataset(dns_db_t *db, dns_dbnode_t *node, + dns_dbversion_t *version, isc_stdtime_t now, + dns_rdataset_t *rdataset, unsigned int options, + dns_rdataset_t *addedrdataset DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; + dns_rbtdb_version_t *rbtversion = version; + isc_region_t region; + dns_slabheader_t *newheader = NULL; + isc_result_t result; + bool delegating; + bool newnsec; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + bool cache_is_overmem = false; + dns_fixedname_t fixed; + dns_name_t *name = NULL; + + REQUIRE(VALID_RBTDB(rbtdb)); + INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); + + if (!IS_CACHE(rbtdb)) { + /* + * SOA records are only allowed at top of zone. + */ + if (rdataset->type == dns_rdatatype_soa && + node != rbtdb->origin_node) + { + return (DNS_R_NOTZONETOP); + } + TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); + REQUIRE(((rbtnode->nsec == DNS_DB_NSEC_NSEC3 && + (rdataset->type == dns_rdatatype_nsec3 || + rdataset->covers == dns_rdatatype_nsec3)) || + (rbtnode->nsec != DNS_DB_NSEC_NSEC3 && + rdataset->type != dns_rdatatype_nsec3 && + rdataset->covers != dns_rdatatype_nsec3))); + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + } + + if (rbtversion == NULL) { + if (now == 0) { + now = isc_stdtime_now(); + } + } else { + now = 0; + } + + result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, + ®ion, sizeof(dns_slabheader_t)); + if (result != ISC_R_SUCCESS) { + return (result); + } + + name = dns_fixedname_initname(&fixed); + dns__rbtdb_nodefullname(db, node, name); + dns_rdataset_getownercase(rdataset, name); + + newheader = (dns_slabheader_t *)region.base; + *newheader = (dns_slabheader_t){ + .type = DNS_TYPEPAIR_VALUE(rdataset->type, rdataset->covers), + .trust = rdataset->trust, + .last_used = now, + .node = rbtnode, + }; + + dns_slabheader_reset(newheader, db, node); + dns__rbtdb_setttl(newheader, rdataset->ttl + now); + if (rdataset->ttl == 0U) { + DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_ZEROTTL); + } + atomic_init(&newheader->count, + atomic_fetch_add_relaxed(&init_count, 1)); + if (rbtversion != NULL) { + newheader->serial = rbtversion->serial; + now = 0; + + if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) { + DNS_SLABHEADER_SETATTR(newheader, + DNS_SLABHEADERATTR_RESIGN); + newheader->resign = + (isc_stdtime_t)(dns_time64_from32( + rdataset->resign) >> + 1); + newheader->resign_lsb = rdataset->resign & 0x1; + } + } else { + newheader->serial = 1; + if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0) { + DNS_SLABHEADER_SETATTR(newheader, + DNS_SLABHEADERATTR_PREFETCH); + } + if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0) { + DNS_SLABHEADER_SETATTR(newheader, + DNS_SLABHEADERATTR_NEGATIVE); + } + if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0) { + DNS_SLABHEADER_SETATTR(newheader, + DNS_SLABHEADERATTR_NXDOMAIN); + } + if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0) { + DNS_SLABHEADER_SETATTR(newheader, + DNS_SLABHEADERATTR_OPTOUT); + } + if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) { + result = addnoqname(rbtdb->common.mctx, newheader, + rdataset); + if (result != ISC_R_SUCCESS) { + dns_slabheader_destroy(&newheader); + return (result); + } + } + if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) { + result = addclosest(rbtdb->common.mctx, newheader, + rdataset); + if (result != ISC_R_SUCCESS) { + dns_slabheader_destroy(&newheader); + return (result); + } + } + } + + /* + * If we're adding a delegation type (e.g. NS or DNAME for a zone, + * just DNAME for the cache), then we need to set the callback bit + * on the node. + */ + if (delegating_type(rbtdb, rbtnode, rdataset->type)) { + delegating = true; + } else { + delegating = false; + } + + /* + * Add to the auxiliary NSEC tree if we're adding an NSEC record. + */ + TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); + if (rbtnode->nsec != DNS_DB_NSEC_HAS_NSEC && + rdataset->type == dns_rdatatype_nsec) + { + newnsec = true; + } else { + newnsec = false; + } + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + + /* + * If we're adding a delegation type, adding to the auxiliary NSEC + * tree, or the DB is a cache in an overmem state, hold an + * exclusive lock on the tree. In the latter case the lock does + * not necessarily have to be acquired but it will help purge + * ancient entries more effectively. + */ + if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx)) { + cache_is_overmem = true; + } + if (delegating || newnsec || cache_is_overmem) { + TREE_WRLOCK(&rbtdb->tree_lock, &tlocktype); + } + + if (cache_is_overmem) { + dns__cacherbt_overmem(rbtdb, newheader, + &tlocktype DNS__DB_FLARG_PASS); + } + + NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + + if (rbtdb->rrsetstats != NULL) { + DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_STATCOUNT); + update_rrsetstats(rbtdb->rrsetstats, newheader->type, + atomic_load_acquire(&newheader->attributes), + true); + } + + if (IS_CACHE(rbtdb)) { + if (tlocktype == isc_rwlocktype_write) { + cleanup_dead_nodes(rbtdb, + rbtnode->locknum DNS__DB_FLARG_PASS); + } + + expire_ttl_headers(rbtdb, rbtnode->locknum, &tlocktype, now, + cache_is_overmem DNS__DB_FLARG_PASS); + + /* + * If we've been holding a write lock on the tree just for + * cleaning, we can release it now. However, we still need the + * node lock. + */ + if (tlocktype == isc_rwlocktype_write && !delegating && + !newnsec) + { + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + } + } + + result = ISC_R_SUCCESS; + if (newnsec) { + dns_rbtnode_t *nsecnode = NULL; + + result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode); + if (result == ISC_R_SUCCESS) { + nsecnode->nsec = DNS_DB_NSEC_NSEC; + rbtnode->nsec = DNS_DB_NSEC_HAS_NSEC; + } else if (result == ISC_R_EXISTS) { + rbtnode->nsec = DNS_DB_NSEC_HAS_NSEC; + result = ISC_R_SUCCESS; + } + } + + if (result == ISC_R_SUCCESS) { + result = dns__rbtdb_add(rbtdb, rbtnode, name, rbtversion, + newheader, options, false, + addedrdataset, now DNS__DB_FLARG_PASS); + } + if (result == ISC_R_SUCCESS && delegating) { + rbtnode->find_callback = 1; + } + + NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + + if (tlocktype != isc_rwlocktype_none) { + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + } + INSIST(tlocktype == isc_rwlocktype_none); + + /* + * Update the zone's secure status. If version is non-NULL + * this is deferred until dns__rbtdb_closeversion() is called. + */ + if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) { + dns__rbtdb_setsecure(db, version, rbtdb->origin_node); + } + + return (result); +} + +isc_result_t +dns__rbtdb_subtractrdataset(dns_db_t *db, dns_dbnode_t *node, + dns_dbversion_t *version, dns_rdataset_t *rdataset, + unsigned int options, + dns_rdataset_t *newrdataset DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; + dns_rbtdb_version_t *rbtversion = version; + dns_fixedname_t fname; + dns_name_t *nodename = dns_fixedname_initname(&fname); + dns_slabheader_t *topheader = NULL, *topheader_prev = NULL; + dns_slabheader_t *header = NULL, *newheader = NULL; + unsigned char *subresult = NULL; + isc_region_t region; + isc_result_t result; + rbtdb_changed_t *changed = NULL; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb); + + if (!IS_CACHE(rbtdb)) { + TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); + REQUIRE(((rbtnode->nsec == DNS_DB_NSEC_NSEC3 && + (rdataset->type == dns_rdatatype_nsec3 || + rdataset->covers == dns_rdatatype_nsec3)) || + (rbtnode->nsec != DNS_DB_NSEC_NSEC3 && + rdataset->type != dns_rdatatype_nsec3 && + rdataset->covers != dns_rdatatype_nsec3))); + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + } + + dns__rbtdb_nodefullname(db, node, nodename); + + result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx, + ®ion, sizeof(dns_slabheader_t)); + if (result != ISC_R_SUCCESS) { + return (result); + } + + newheader = (dns_slabheader_t *)region.base; + dns_slabheader_reset(newheader, db, node); + dns__rbtdb_setttl(newheader, rdataset->ttl); + newheader->type = DNS_TYPEPAIR_VALUE(rdataset->type, rdataset->covers); + atomic_init(&newheader->attributes, 0); + newheader->serial = rbtversion->serial; + newheader->trust = 0; + newheader->noqname = NULL; + newheader->closest = NULL; + atomic_init(&newheader->count, + atomic_fetch_add_relaxed(&init_count, 1)); + newheader->last_used = 0; + newheader->node = rbtnode; + newheader->db = (dns_db_t *)rbtdb; + if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) { + DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_RESIGN); + newheader->resign = + (isc_stdtime_t)(dns_time64_from32(rdataset->resign) >> + 1); + newheader->resign_lsb = rdataset->resign & 0x1; + } else { + newheader->resign = 0; + newheader->resign_lsb = 0; + } + + NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + + changed = add_changed(newheader, rbtversion DNS__DB_FLARG_PASS); + if (changed == NULL) { + dns_slabheader_destroy(&newheader); + NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, + &nlocktype); + return (ISC_R_NOMEMORY); + } + + for (topheader = rbtnode->data; topheader != NULL; + topheader = topheader->next) + { + if (topheader->type == newheader->type) { + break; + } + topheader_prev = topheader; + } + /* + * If header isn't NULL, we've found the right type. There may be + * IGNORE rdatasets between the top of the chain and the first real + * data. We skip over them. + */ + header = topheader; + while (header != NULL && IGNORE(header)) { + header = header->down; + } + if (header != NULL && EXISTS(header)) { + unsigned int flags = 0; + subresult = NULL; + result = ISC_R_SUCCESS; + if ((options & DNS_DBSUB_EXACT) != 0) { + flags |= DNS_RDATASLAB_EXACT; + if (newheader->ttl != header->ttl) { + result = DNS_R_NOTEXACT; + } + } + if (result == ISC_R_SUCCESS) { + result = dns_rdataslab_subtract( + (unsigned char *)header, + (unsigned char *)newheader, + (unsigned int)(sizeof(*newheader)), + rbtdb->common.mctx, rbtdb->common.rdclass, + (dns_rdatatype_t)header->type, flags, + &subresult); + } + if (result == ISC_R_SUCCESS) { + dns_slabheader_destroy(&newheader); + newheader = (dns_slabheader_t *)subresult; + dns_slabheader_reset(newheader, db, node); + dns_slabheader_copycase(newheader, header); + if (RESIGN(header)) { + DNS_SLABHEADER_SETATTR( + newheader, DNS_SLABHEADERATTR_RESIGN); + newheader->resign = header->resign; + newheader->resign_lsb = header->resign_lsb; + dns__zonerbt_resigninsert( + rbtdb, rbtnode->locknum, newheader); + } + /* + * We have to set the serial since the rdataslab + * subtraction routine copies the reserved portion of + * header, not newheader. + */ + newheader->serial = rbtversion->serial; + /* + * XXXJT: dns_rdataslab_subtract() copied the pointers + * to additional info. We need to clear these fields + * to avoid having duplicated references. + */ + update_recordsandxfrsize(true, rbtversion, newheader, + nodename->length); + } else if (result == DNS_R_NXRRSET) { + /* + * This subtraction would remove all of the rdata; + * add a nonexistent header instead. + */ + dns_slabheader_destroy(&newheader); + newheader = dns_slabheader_new((dns_db_t *)rbtdb, + (dns_dbnode_t *)rbtnode); + dns__rbtdb_setttl(newheader, 0); + newheader->type = topheader->type; + atomic_init(&newheader->attributes, + DNS_SLABHEADERATTR_NONEXISTENT); + newheader->serial = rbtversion->serial; + } else { + dns_slabheader_destroy(&newheader); + goto unlock; + } + + /* + * If we're here, we want to link newheader in front of + * topheader. + */ + INSIST(rbtversion->serial >= topheader->serial); + update_recordsandxfrsize(false, rbtversion, header, + nodename->length); + if (topheader_prev != NULL) { + topheader_prev->next = newheader; + } else { + rbtnode->data = newheader; + } + newheader->next = topheader->next; + newheader->down = topheader; + topheader->next = newheader; + rbtnode->dirty = 1; + changed->dirty = true; + dns__zonerbt_resigndelete(rbtdb, rbtversion, + header DNS__DB_FLARG_PASS); + } else { + /* + * The rdataset doesn't exist, so we don't need to do anything + * to satisfy the deletion request. + */ + dns_slabheader_destroy(&newheader); + if ((options & DNS_DBSUB_EXACT) != 0) { + result = DNS_R_NOTEXACT; + } else { + result = DNS_R_UNCHANGED; + } + } + + if (result == ISC_R_SUCCESS && newrdataset != NULL) { + dns__rbtdb_bindrdataset(rbtdb, rbtnode, newheader, 0, + isc_rwlocktype_write, + newrdataset DNS__DB_FLARG_PASS); + } + + if (result == DNS_R_NXRRSET && newrdataset != NULL && + (options & DNS_DBSUB_WANTOLD) != 0) + { + dns__rbtdb_bindrdataset(rbtdb, rbtnode, header, 0, + isc_rwlocktype_write, + newrdataset DNS__DB_FLARG_PASS); + } + +unlock: + NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + + /* + * Update the zone's secure status. If version is non-NULL + * this is deferred until dns__rbtdb_closeversion() is called. + */ + if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) { + RWLOCK(&rbtdb->lock, isc_rwlocktype_read); + version = rbtdb->current_version; + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); + dns__rbtdb_setsecure(db, version, rbtdb->origin_node); + } + + return (result); +} + +isc_result_t +dns__rbtdb_deleterdataset(dns_db_t *db, dns_dbnode_t *node, + dns_dbversion_t *version, dns_rdatatype_t type, + dns_rdatatype_t covers DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; + dns_rbtdb_version_t *rbtversion = version; + dns_fixedname_t fname; + dns_name_t *nodename = dns_fixedname_initname(&fname); + isc_result_t result; + dns_slabheader_t *newheader = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + REQUIRE(VALID_RBTDB(rbtdb)); + INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb); + + if (type == dns_rdatatype_any) { + return (ISC_R_NOTIMPLEMENTED); + } + if (type == dns_rdatatype_rrsig && covers == 0) { + return (ISC_R_NOTIMPLEMENTED); + } + + newheader = dns_slabheader_new(db, node); + newheader->type = DNS_TYPEPAIR_VALUE(type, covers); + dns__rbtdb_setttl(newheader, 0); + atomic_init(&newheader->attributes, DNS_SLABHEADERATTR_NONEXISTENT); + if (rbtversion != NULL) { + newheader->serial = rbtversion->serial; + } + + dns__rbtdb_nodefullname(db, node, nodename); + + NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + result = dns__rbtdb_add(rbtdb, rbtnode, nodename, rbtversion, newheader, + DNS_DBADD_FORCE, false, NULL, + 0 DNS__DB_FLARG_PASS); + NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + + /* + * Update the zone's secure status. If version is non-NULL + * this is deferred until dns__rbtdb_closeversion() is called. + */ + if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) { + RWLOCK(&rbtdb->lock, isc_rwlocktype_read); + version = rbtdb->current_version; + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_read); + dns__rbtdb_setsecure(db, version, rbtdb->origin_node); + } + + return (result); +} + +unsigned int +dns__rbtdb_nodecount(dns_db_t *db, dns_dbtree_t tree) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + unsigned int count; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + + REQUIRE(VALID_RBTDB(rbtdb)); + + TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); + switch (tree) { + case dns_dbtree_main: + count = dns_rbt_nodecount(rbtdb->tree); + break; + case dns_dbtree_nsec: + count = dns_rbt_nodecount(rbtdb->nsec); + break; + case dns_dbtree_nsec3: + count = dns_rbt_nodecount(rbtdb->nsec3); + break; + default: + UNREACHABLE(); + } + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + + return (count); +} + +void +dns__rbtdb_setloop(dns_db_t *db, isc_loop_t *loop) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + + REQUIRE(VALID_RBTDB(rbtdb)); + + RWLOCK(&rbtdb->lock, isc_rwlocktype_write); + if (rbtdb->loop != NULL) { + isc_loop_detach(&rbtdb->loop); + } + if (loop != NULL) { + isc_loop_attach(loop, &rbtdb->loop); + } + RWUNLOCK(&rbtdb->lock, isc_rwlocktype_write); +} + +isc_result_t +dns__rbtdb_getoriginnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *onode = NULL; + isc_result_t result = ISC_R_SUCCESS; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(nodep != NULL && *nodep == NULL); + + /* Note that the access to origin_node doesn't require a DB lock */ + onode = (dns_rbtnode_t *)rbtdb->origin_node; + if (onode != NULL) { + dns__rbtdb_newref(rbtdb, onode, + isc_rwlocktype_none DNS__DB_FLARG_PASS); + *nodep = rbtdb->origin_node; + } else { + INSIST(IS_CACHE(rbtdb)); + result = ISC_R_NOTFOUND; + } + + return (result); +} + +void +dns__rbtdb_locknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; + + RWLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, type); +} + +void +dns__rbtdb_unlocknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; + + RWUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, type); +} + +isc_result_t +dns__rbtdb_nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; + isc_result_t result; + isc_rwlocktype_t tlocktype = isc_rwlocktype_none; + + REQUIRE(VALID_RBTDB(rbtdb)); + REQUIRE(node != NULL); + REQUIRE(name != NULL); + + TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); + result = dns_rbt_fullnamefromnode(rbtnode, name); + TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); + + return (result); +} + +isc_result_t +dns__rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type, + dns_rdataclass_t rdclass, unsigned int argc, char *argv[], + void *driverarg ISC_ATTR_UNUSED, dns_db_t **dbp) { + dns_rbtdb_t *rbtdb = NULL; + isc_result_t result; + int i; + isc_mem_t *hmctx = mctx; + + rbtdb = isc_mem_get(mctx, sizeof(*rbtdb)); + *rbtdb = (dns_rbtdb_t){ + .common.origin = DNS_NAME_INITEMPTY, + .common.rdclass = rdclass, + .current_serial = 1, + .least_serial = 1, + .next_serial = 2, + .open_versions = ISC_LIST_INITIALIZER, + }; + + isc_refcount_init(&rbtdb->common.references, 1); + + /* + * If argv[0] exists, it points to a memory context to use for heap + */ + if (argc != 0) { + hmctx = (isc_mem_t *)argv[0]; + } + + if (type == dns_dbtype_cache) { + rbtdb->common.methods = &dns__rbtdb_cachemethods; + rbtdb->common.attributes |= DNS_DBATTR_CACHE; + } else if (type == dns_dbtype_stub) { + rbtdb->common.methods = &dns__rbtdb_zonemethods; + rbtdb->common.attributes |= DNS_DBATTR_STUB; + } else { + rbtdb->common.methods = &dns__rbtdb_zonemethods; + } + + isc_rwlock_init(&rbtdb->lock); + TREE_INITLOCK(&rbtdb->tree_lock); + + /* + * Initialize node_lock_count in a generic way to support future + * extension which allows the user to specify this value on creation. + * Note that when specified for a cache DB it must be larger than 1 + * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT. + */ + if (rbtdb->node_lock_count == 0) { + if (IS_CACHE(rbtdb)) { + rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT; + } else { + rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT; + } + } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) { + result = ISC_R_RANGE; + goto cleanup_tree_lock; + } + INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH)); + rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count * + sizeof(db_nodelock_t)); + + rbtdb->common.update_listeners = cds_lfht_new(16, 16, 0, 0, NULL); + + if (IS_CACHE(rbtdb)) { + dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats); + rbtdb->lru = isc_mem_get(mctx, + rbtdb->node_lock_count * + sizeof(dns_slabheaderlist_t)); + for (i = 0; i < (int)rbtdb->node_lock_count; i++) { + ISC_LIST_INIT(rbtdb->lru[i]); + } + } + + /* + * Create the heaps. + */ + rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count * + sizeof(isc_heap_t *)); + for (i = 0; i < (int)rbtdb->node_lock_count; i++) { + rbtdb->heaps[i] = NULL; + } + + rbtdb->sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner; + for (i = 0; i < (int)rbtdb->node_lock_count; i++) { + isc_heap_create(hmctx, rbtdb->sooner, set_index, 0, + &rbtdb->heaps[i]); + } + + /* + * Create deadnode lists. + */ + rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count * + sizeof(dns_rbtnodelist_t)); + for (i = 0; i < (int)rbtdb->node_lock_count; i++) { + ISC_LIST_INIT(rbtdb->deadnodes[i]); + } + + rbtdb->active = rbtdb->node_lock_count; + + for (i = 0; i < (int)(rbtdb->node_lock_count); i++) { + NODE_INITLOCK(&rbtdb->node_locks[i].lock); + isc_refcount_init(&rbtdb->node_locks[i].references, 0); + rbtdb->node_locks[i].exiting = false; + } + + /* + * Attach to the mctx. The database will persist so long as there + * are references to it, and attaching to the mctx ensures that our + * mctx won't disappear out from under us. + */ + isc_mem_attach(mctx, &rbtdb->common.mctx); + isc_mem_attach(hmctx, &rbtdb->hmctx); + + /* + * Make a copy of the origin name. + */ + result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin); + if (result != ISC_R_SUCCESS) { + free_rbtdb(rbtdb, false); + return (result); + } + + /* + * Make the qp tries. + */ + dns_qp_create(mctx, &qpmethods, rbtdb, &rbtdb->tree); + dns_qp_create(mctx, &qpmethods, rbtdb, &rbtdb->nsec); + dns_qp_create(mctx, &qpmethods, rbtdb, &rbtdb->nsec3); + + /* + * In order to set the node callback bit correctly in zone databases, + * we need to know if the node has the origin name of the zone. + * In loading_addrdataset() we could simply compare the new name + * to the origin name, but this is expensive. Also, we don't know the + * node name in dns__rbtdb_addrdataset(), so we need another way of + * knowing the zone's top. + * + * We now explicitly create a node for the zone's origin, and then + * we simply remember the node's address. This is safe, because + * the top-of-zone node can never be deleted, nor can its address + * change. + */ + if (!IS_CACHE(rbtdb)) { + result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin, + &rbtdb->origin_node); + if (result != ISC_R_SUCCESS) { + INSIST(result != ISC_R_EXISTS); + free_rbtdb(rbtdb, false); + return (result); + } + INSIST(rbtdb->origin_node != NULL); + rbtdb->origin_node->nsec = DNS_DB_NSEC_NORMAL; + /* + * Add an apex node to the NSEC3 tree so that NSEC3 searches + * return partial matches when there is only a single NSEC3 + * record in the tree. + */ + result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin, + &rbtdb->nsec3_origin_node); + if (result != ISC_R_SUCCESS) { + INSIST(result != ISC_R_EXISTS); + free_rbtdb(rbtdb, false); + return (result); + } + INSIST(result == ISC_R_SUCCESS); + INSIST(rbtdb->nsec3_origin_node != NULL); + rbtdb->nsec3_origin_node->nsec = DNS_DB_NSEC_NSEC3; + } + + /* + * Version Initialization. + */ + rbtdb->current_version = allocate_version(mctx, 1, 1, false); + rbtdb->current_version->rbtdb = rbtdb; + isc_rwlock_init(&rbtdb->current_version->rwlock); + + /* + * Keep the current version in the open list so that list operation + * won't happen in normal lookup operations. + */ + PREPEND(rbtdb->open_versions, rbtdb->current_version, link); + + rbtdb->common.magic = DNS_DB_MAGIC; + rbtdb->common.impmagic = RBTDB_MAGIC; + + *dbp = (dns_db_t *)rbtdb; + + return (ISC_R_SUCCESS); + +cleanup_tree_lock: + TREE_DESTROYLOCK(&rbtdb->tree_lock); + isc_rwlock_destroy(&rbtdb->lock); + isc_mem_put(mctx, rbtdb, sizeof(*rbtdb)); + return (result); +} + +/* + * Rdataset Iterator Methods + */ + +static void +rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) { + rbtdb_rdatasetiter_t *rbtiterator = NULL; + + rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp); + + if (rbtiterator->common.version != NULL) { + dns__rbtdb_closeversion(rbtiterator->common.db, + &rbtiterator->common.version, + false DNS__DB_FLARG_PASS); + } + dns__db_detachnode(rbtiterator->common.db, + &rbtiterator->common.node DNS__DB_FLARG_PASS); + isc_mem_put(rbtiterator->common.db->mctx, rbtiterator, + sizeof(*rbtiterator)); + + *iteratorp = NULL; +} + +static bool +iterator_active(dns_rbtdb_t *rbtdb, rbtdb_rdatasetiter_t *rbtiterator, + dns_slabheader_t *header) { + dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, rbtdb); + + /* + * Is this a "this rdataset doesn't exist" record? + */ + if (NONEXISTENT(header)) { + return (false); + } + + /* + * If this is a zone or this header still active then return it. + */ + if (!IS_CACHE(rbtdb) || ACTIVE(header, rbtiterator->common.now)) { + return (true); + } + + /* + * If we are not returning stale records or the rdataset is + * too old don't return it. + */ + if (!STALEOK(rbtiterator) || (rbtiterator->common.now > stale_ttl)) { + return (false); + } + return (true); +} + +static isc_result_t +rdatasetiter_first(dns_rdatasetiter_t *iterator DNS__DB_FLARG) { + rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db); + dns_rbtnode_t *rbtnode = rbtiterator->common.node; + dns_rbtdb_version_t *rbtversion = rbtiterator->common.version; + dns_slabheader_t *header = NULL, *top_next = NULL; + uint32_t serial = IS_CACHE(rbtdb) ? 1 : rbtversion->serial; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + NODE_RDLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + + for (header = rbtnode->data; header != NULL; header = top_next) { + top_next = header->next; + do { + if (EXPIREDOK(rbtiterator)) { + if (!NONEXISTENT(header)) { + break; + } + header = header->down; + } else if (header->serial <= serial && !IGNORE(header)) + { + if (!iterator_active(rbtdb, rbtiterator, + header)) + { + header = NULL; + } + break; + } else { + header = header->down; + } + } while (header != NULL); + if (header != NULL) { + break; + } + } + + NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + + rbtiterator->current = header; + + if (header == NULL) { + return (ISC_R_NOMORE); + } + + return (ISC_R_SUCCESS); +} + +static isc_result_t +rdatasetiter_next(dns_rdatasetiter_t *iterator DNS__DB_FLARG) { + rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db); + dns_rbtnode_t *rbtnode = rbtiterator->common.node; + dns_rbtdb_version_t *rbtversion = rbtiterator->common.version; + dns_slabheader_t *header = NULL, *top_next = NULL; + uint32_t serial = IS_CACHE(rbtdb) ? 1 : rbtversion->serial; + dns_typepair_t type, negtype; + dns_rdatatype_t rdtype, covers; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + bool expiredok = EXPIREDOK(rbtiterator); + + header = rbtiterator->current; + if (header == NULL) { + return (ISC_R_NOMORE); + } + + NODE_RDLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + + type = header->type; + rdtype = DNS_TYPEPAIR_TYPE(header->type); + if (NEGATIVE(header)) { + covers = DNS_TYPEPAIR_COVERS(header->type); + negtype = DNS_TYPEPAIR_VALUE(covers, 0); + } else { + negtype = DNS_TYPEPAIR_VALUE(0, rdtype); + } + + /* + * Find the start of the header chain for the next type + * by walking back up the list. + */ + top_next = header->next; + while (top_next != NULL && + (top_next->type == type || top_next->type == negtype)) + { + top_next = top_next->next; + } + if (expiredok) { + /* + * Keep walking down the list if possible or + * start the next type. + */ + header = header->down != NULL ? header->down : top_next; + } else { + header = top_next; + } + for (; header != NULL; header = top_next) { + top_next = header->next; + do { + if (expiredok) { + if (!NONEXISTENT(header)) { + break; + } + header = header->down; + } else if (header->serial <= serial && !IGNORE(header)) + { + if (!iterator_active(rbtdb, rbtiterator, + header)) + { + header = NULL; + } + break; + } else { + header = header->down; + } + } while (header != NULL); + if (header != NULL) { + break; + } + /* + * Find the start of the header chain for the next type + * by walking back up the list. + */ + while (top_next != NULL && + (top_next->type == type || top_next->type == negtype)) + { + top_next = top_next->next; + } + } + + NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + + rbtiterator->current = header; + + if (header == NULL) { + return (ISC_R_NOMORE); + } + + return (ISC_R_SUCCESS); +} + +static void +rdatasetiter_current(dns_rdatasetiter_t *iterator, + dns_rdataset_t *rdataset DNS__DB_FLARG) { + rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db); + dns_rbtnode_t *rbtnode = rbtiterator->common.node; + dns_slabheader_t *header = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + + header = rbtiterator->current; + REQUIRE(header != NULL); + + NODE_RDLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); + + dns__rbtdb_bindrdataset(rbtdb, rbtnode, header, rbtiterator->common.now, + isc_rwlocktype_read, + rdataset DNS__DB_FLARG_PASS); + + NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); +} + +/* + * Database Iterator Methods + */ + +static void +reference_iter_node(rbtdb_dbiterator_t *rbtdbiter DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; + dns_rbtnode_t *node = rbtdbiter->node; + + if (node == NULL) { + return; + } + + INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none); + reactivate_node(rbtdb, node, rbtdbiter->tree_locked DNS__DB_FLARG_PASS); +} + +static void +dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; + dns_rbtnode_t *node = rbtdbiter->node; + isc_rwlock_t *lock = NULL; + isc_rwlocktype_t nlocktype = isc_rwlocktype_none; + isc_rwlocktype_t tlocktype = rbtdbiter->tree_locked; + + if (node == NULL) { + return; + } + + REQUIRE(tlocktype != isc_rwlocktype_write); + + lock = &rbtdb->node_locks[node->locknum].lock; + NODE_RDLOCK(lock, &nlocktype); + dns__rbtdb_decref(rbtdb, node, 0, &nlocktype, &rbtdbiter->tree_locked, + false, false DNS__DB_FLARG_PASS); + NODE_UNLOCK(lock, &nlocktype); + + INSIST(rbtdbiter->tree_locked == tlocktype); + + rbtdbiter->node = NULL; +} + +static void +resume_iteration(rbtdb_dbiterator_t *rbtdbiter) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; + + REQUIRE(rbtdbiter->paused); + REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none); + + TREE_RDLOCK(&rbtdb->tree_lock, &rbtdbiter->tree_locked); + + rbtdbiter->paused = false; +} + +static void +dbiterator_destroy(dns_dbiterator_t **iteratorp DNS__DB_FLARG) { + rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp); + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db; + dns_db_t *db = NULL; + + if (rbtdbiter->tree_locked == isc_rwlocktype_read) { + TREE_UNLOCK(&rbtdb->tree_lock, &rbtdbiter->tree_locked); + } + INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none); + + dereference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); + + dns_db_attach(rbtdbiter->common.db, &db); + dns_db_detach(&rbtdbiter->common.db); + + dns_rbtnodechain_reset(&rbtdbiter->chain); + dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); + isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter)); + dns_db_detach(&db); + + *iteratorp = NULL; +} + +static isc_result_t +dbiterator_first(dns_dbiterator_t *iterator DNS__DB_FLARG) { + isc_result_t result; + rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; + dns_name_t *name = NULL, *origin = NULL; + + if (rbtdbiter->result != ISC_R_SUCCESS && + rbtdbiter->result != ISC_R_NOTFOUND && + rbtdbiter->result != DNS_R_PARTIALMATCH && + rbtdbiter->result != ISC_R_NOMORE) + { + return (rbtdbiter->result); + } + + if (rbtdbiter->paused) { + resume_iteration(rbtdbiter); + } + + dereference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); + + name = dns_fixedname_name(&rbtdbiter->name); + origin = dns_fixedname_name(&rbtdbiter->origin); + dns_rbtnodechain_reset(&rbtdbiter->chain); + dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); + + switch (rbtdbiter->nsec3mode) { + case nsec3only: + rbtdbiter->current = &rbtdbiter->nsec3chain; + result = dns_rbtnodechain_first(rbtdbiter->current, + rbtdb->nsec3, name, origin); + break; + case nonsec3: + rbtdbiter->current = &rbtdbiter->chain; + result = dns_rbtnodechain_first(rbtdbiter->current, rbtdb->tree, + name, origin); + break; + case full: + rbtdbiter->current = &rbtdbiter->chain; + result = dns_rbtnodechain_first(rbtdbiter->current, rbtdb->tree, + name, origin); + if (result == ISC_R_NOTFOUND) { + rbtdbiter->current = &rbtdbiter->nsec3chain; + result = dns_rbtnodechain_first( + rbtdbiter->current, rbtdb->nsec3, name, origin); + } + break; + default: + UNREACHABLE(); + } + + if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { + result = dns_rbtnodechain_current(rbtdbiter->current, NULL, + NULL, &rbtdbiter->node); + + /* If we're in the NSEC3 tree, skip the origin */ + if (RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, rbtdbiter)) { + rbtdbiter->node = NULL; + result = dns_rbtnodechain_next(rbtdbiter->current, name, + origin); + if (result == ISC_R_SUCCESS || + result == DNS_R_NEWORIGIN) + { + result = dns_rbtnodechain_current( + rbtdbiter->current, NULL, NULL, + &rbtdbiter->node); + } + } + if (result == ISC_R_SUCCESS) { + rbtdbiter->new_origin = true; + reference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); + } + } else { + INSIST(result == ISC_R_NOTFOUND); + result = ISC_R_NOMORE; /* The tree is empty. */ + } + + rbtdbiter->result = result; + + if (result != ISC_R_SUCCESS) { + ENSURE(!rbtdbiter->paused); + } + + return (result); +} + +static isc_result_t +dbiterator_last(dns_dbiterator_t *iterator DNS__DB_FLARG) { + isc_result_t result; + rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; + dns_name_t *name = NULL, *origin = NULL; + + if (rbtdbiter->result != ISC_R_SUCCESS && + rbtdbiter->result != ISC_R_NOTFOUND && + rbtdbiter->result != DNS_R_PARTIALMATCH && + rbtdbiter->result != ISC_R_NOMORE) + { + return (rbtdbiter->result); + } + + if (rbtdbiter->paused) { + resume_iteration(rbtdbiter); + } + + dereference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); + + name = dns_fixedname_name(&rbtdbiter->name); + origin = dns_fixedname_name(&rbtdbiter->origin); + dns_rbtnodechain_reset(&rbtdbiter->chain); + dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); + + switch (rbtdbiter->nsec3mode) { + case nsec3only: + rbtdbiter->current = &rbtdbiter->nsec3chain; + result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->nsec3, + name, origin); + break; + case nonsec3: + rbtdbiter->current = &rbtdbiter->chain; + result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree, + name, origin); + break; + case full: + rbtdbiter->current = &rbtdbiter->nsec3chain; + result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->nsec3, + name, origin); + if (result == ISC_R_NOTFOUND) { + rbtdbiter->current = &rbtdbiter->chain; + result = dns_rbtnodechain_last( + rbtdbiter->current, rbtdb->tree, name, origin); + } + break; + default: + UNREACHABLE(); + } + + if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) { + result = dns_rbtnodechain_current(rbtdbiter->current, NULL, + NULL, &rbtdbiter->node); + if (RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, rbtdbiter)) { + /* + * NSEC3 tree only has an origin node. + */ + rbtdbiter->node = NULL; + switch (rbtdbiter->nsec3mode) { + case nsec3only: + result = ISC_R_NOMORE; + break; + case nonsec3: + case full: + rbtdbiter->current = &rbtdbiter->chain; + result = dns_rbtnodechain_last( + rbtdbiter->current, rbtdb->tree, name, + origin); + if (result == ISC_R_SUCCESS || + result == DNS_R_NEWORIGIN) + { + result = dns_rbtnodechain_current( + rbtdbiter->current, NULL, NULL, + &rbtdbiter->node); + } + break; + default: + UNREACHABLE(); + } + } + if (result == ISC_R_SUCCESS) { + rbtdbiter->new_origin = true; + reference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); + } + } else { + INSIST(result == ISC_R_NOTFOUND); + result = ISC_R_NOMORE; /* The tree is empty. */ + } + + rbtdbiter->result = result; + + return (result); +} + +static isc_result_t +dbiterator_seek(dns_dbiterator_t *iterator, + const dns_name_t *name DNS__DB_FLARG) { + isc_result_t result, tresult; + rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; + dns_name_t *iname = NULL, *origin = NULL; + + if (rbtdbiter->result != ISC_R_SUCCESS && + rbtdbiter->result != ISC_R_NOTFOUND && + rbtdbiter->result != DNS_R_PARTIALMATCH && + rbtdbiter->result != ISC_R_NOMORE) + { + return (rbtdbiter->result); + } + + if (rbtdbiter->paused) { + resume_iteration(rbtdbiter); + } + + dereference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); + + iname = dns_fixedname_name(&rbtdbiter->name); + origin = dns_fixedname_name(&rbtdbiter->origin); + dns_rbtnodechain_reset(&rbtdbiter->chain); + dns_rbtnodechain_reset(&rbtdbiter->nsec3chain); + + switch (rbtdbiter->nsec3mode) { + case nsec3only: + rbtdbiter->current = &rbtdbiter->nsec3chain; + result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, + &rbtdbiter->node, rbtdbiter->current, + DNS_RBTFIND_EMPTYDATA, NULL, NULL); + break; + case nonsec3: + rbtdbiter->current = &rbtdbiter->chain; + result = dns_rbt_findnode(rbtdb->tree, name, NULL, + &rbtdbiter->node, rbtdbiter->current, + DNS_RBTFIND_EMPTYDATA, NULL, NULL); + break; + case full: + /* + * Stay on main chain if not found on either chain. + */ + rbtdbiter->current = &rbtdbiter->chain; + result = dns_rbt_findnode(rbtdb->tree, name, NULL, + &rbtdbiter->node, rbtdbiter->current, + DNS_RBTFIND_EMPTYDATA, NULL, NULL); + if (result == DNS_R_PARTIALMATCH) { + dns_rbtnode_t *node = NULL; + tresult = dns_rbt_findnode( + rbtdb->nsec3, name, NULL, &node, + &rbtdbiter->nsec3chain, DNS_RBTFIND_EMPTYDATA, + NULL, NULL); + if (tresult == ISC_R_SUCCESS) { + rbtdbiter->node = node; + rbtdbiter->current = &rbtdbiter->nsec3chain; + result = tresult; + } + } + break; + default: + UNREACHABLE(); + } + + if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) { + tresult = dns_rbtnodechain_current(rbtdbiter->current, iname, + origin, NULL); + if (tresult == ISC_R_SUCCESS) { + rbtdbiter->new_origin = true; + reference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); + } else { + result = tresult; + rbtdbiter->node = NULL; + } + } else { + rbtdbiter->node = NULL; + } + + rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ? ISC_R_SUCCESS + : result; + + return (result); +} + +static isc_result_t +dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG) { + isc_result_t result; + rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; + dns_name_t *name = NULL, *origin = NULL; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; + + REQUIRE(rbtdbiter->node != NULL); + + if (rbtdbiter->result != ISC_R_SUCCESS) { + return (rbtdbiter->result); + } + + if (rbtdbiter->paused) { + resume_iteration(rbtdbiter); + } + + dereference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); + + name = dns_fixedname_name(&rbtdbiter->name); + origin = dns_fixedname_name(&rbtdbiter->origin); + result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin); + if (rbtdbiter->current == &rbtdbiter->nsec3chain && + (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN)) + { + /* + * If we're in the NSEC3 tree, it's empty or we've + * reached the origin, then we're done with it. + */ + result = dns_rbtnodechain_current(rbtdbiter->current, NULL, + NULL, &rbtdbiter->node); + if (result == ISC_R_NOTFOUND || + RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, rbtdbiter)) + { + rbtdbiter->node = NULL; + result = ISC_R_NOMORE; + } + } + if (result == ISC_R_NOMORE && rbtdbiter->nsec3mode != nsec3only && + &rbtdbiter->nsec3chain == rbtdbiter->current) + { + rbtdbiter->current = &rbtdbiter->chain; + dns_rbtnodechain_reset(rbtdbiter->current); + result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree, + name, origin); + if (result == ISC_R_NOTFOUND) { + result = ISC_R_NOMORE; + } + } + + if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) { + rbtdbiter->new_origin = (result == DNS_R_NEWORIGIN); + result = dns_rbtnodechain_current(rbtdbiter->current, NULL, + NULL, &rbtdbiter->node); + } + + if (result == ISC_R_SUCCESS) { + reference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); + } + + rbtdbiter->result = result; + + return (result); +} + +static isc_result_t +dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG) { + isc_result_t result; + rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; + dns_name_t *name = NULL, *origin = NULL; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; + + REQUIRE(rbtdbiter->node != NULL); + + if (rbtdbiter->result != ISC_R_SUCCESS) { + return (rbtdbiter->result); + } + + if (rbtdbiter->paused) { + resume_iteration(rbtdbiter); + } + + name = dns_fixedname_name(&rbtdbiter->name); + origin = dns_fixedname_name(&rbtdbiter->origin); + result = dns_rbtnodechain_next(rbtdbiter->current, name, origin); + if (result == ISC_R_NOMORE && rbtdbiter->nsec3mode != nonsec3 && + &rbtdbiter->chain == rbtdbiter->current) + { + rbtdbiter->current = &rbtdbiter->nsec3chain; + dns_rbtnodechain_reset(rbtdbiter->current); + result = dns_rbtnodechain_first(rbtdbiter->current, + rbtdb->nsec3, name, origin); + if (result == ISC_R_NOTFOUND) { + result = ISC_R_NOMORE; + } + } + + dereference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); + + if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) { + /* + * If we've just started the NSEC3 tree, + * skip over the origin. + */ + rbtdbiter->new_origin = (result == DNS_R_NEWORIGIN); + result = dns_rbtnodechain_current(rbtdbiter->current, NULL, + NULL, &rbtdbiter->node); + if (RBTDBITER_NSEC3_ORIGIN_NODE(rbtdb, rbtdbiter)) { + rbtdbiter->node = NULL; + result = dns_rbtnodechain_next(rbtdbiter->current, name, + origin); + if (result == ISC_R_SUCCESS || + result == DNS_R_NEWORIGIN) + { + result = dns_rbtnodechain_current( + rbtdbiter->current, NULL, NULL, + &rbtdbiter->node); + } + } + } + if (result == ISC_R_SUCCESS) { + reference_iter_node(rbtdbiter DNS__DB_FLARG_PASS); + } + + rbtdbiter->result = result; + + return (result); +} + +static isc_result_t +dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep, + dns_name_t *name DNS__DB_FLARG) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; + rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; + dns_rbtnode_t *node = rbtdbiter->node; + isc_result_t result; + dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name); + dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin); + + REQUIRE(rbtdbiter->result == ISC_R_SUCCESS); + REQUIRE(rbtdbiter->node != NULL); + + if (rbtdbiter->paused) { + resume_iteration(rbtdbiter); + } + + if (name != NULL) { + if (rbtdbiter->common.relative_names) { + origin = NULL; + } + result = dns_name_concatenate(nodename, origin, name, NULL); + if (result != ISC_R_SUCCESS) { + return (result); + } + if (rbtdbiter->common.relative_names && rbtdbiter->new_origin) { + result = DNS_R_NEWORIGIN; + } + } else { + result = ISC_R_SUCCESS; + } + + dns__rbtdb_newref(rbtdb, node, isc_rwlocktype_none DNS__DB_FLARG_PASS); + + *nodep = rbtdbiter->node; + + return (result); +} + +static isc_result_t +dbiterator_pause(dns_dbiterator_t *iterator) { + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db; + rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; + + if (rbtdbiter->result != ISC_R_SUCCESS && + rbtdbiter->result != ISC_R_NOTFOUND && + rbtdbiter->result != DNS_R_PARTIALMATCH && + rbtdbiter->result != ISC_R_NOMORE) + { + return (rbtdbiter->result); + } + + if (rbtdbiter->paused) { + return (ISC_R_SUCCESS); + } + + rbtdbiter->paused = true; + + if (rbtdbiter->tree_locked == isc_rwlocktype_read) { + TREE_UNLOCK(&rbtdb->tree_lock, &rbtdbiter->tree_locked); + } + INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none); + + return (ISC_R_SUCCESS); +} + +static isc_result_t +dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) { + rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator; + dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin); + + if (rbtdbiter->result != ISC_R_SUCCESS) { + return (rbtdbiter->result); + } + + dns_name_copy(origin, name); + return (ISC_R_SUCCESS); +} + +void +dns__rbtdb_freeglue(dns_glue_t *glue_list) { + if (glue_list == (void *)-1) { + return; + } + + dns_glue_t *glue = glue_list; + while (glue != NULL) { + dns_glue_t *next = glue->next; + + if (dns_rdataset_isassociated(&glue->rdataset_a)) { + dns_rdataset_disassociate(&glue->rdataset_a); + } + if (dns_rdataset_isassociated(&glue->sigrdataset_a)) { + dns_rdataset_disassociate(&glue->sigrdataset_a); + } + + if (dns_rdataset_isassociated(&glue->rdataset_aaaa)) { + dns_rdataset_disassociate(&glue->rdataset_aaaa); + } + if (dns_rdataset_isassociated(&glue->sigrdataset_aaaa)) { + dns_rdataset_disassociate(&glue->sigrdataset_aaaa); + } + + dns_rdataset_invalidate(&glue->rdataset_a); + dns_rdataset_invalidate(&glue->sigrdataset_a); + dns_rdataset_invalidate(&glue->rdataset_aaaa); + dns_rdataset_invalidate(&glue->sigrdataset_aaaa); + + isc_mem_putanddetach(&glue->mctx, glue, sizeof(*glue)); + + glue = next; + } +} + +static void +free_gluelist_rcu(struct rcu_head *rcu_head) { + dns_glue_t *glue = caa_container_of(rcu_head, dns_glue_t, rcu_head); + + dns__rbtdb_freeglue(glue); +} + +static void +free_gluetable(dns_rbtdb_version_t *rbtversion) { + struct cds_wfs_head *head = __cds_wfs_pop_all(&rbtversion->glue_stack); + struct cds_wfs_node *node = NULL, *next = NULL; + + rcu_read_lock(); + cds_wfs_for_each_blocking_safe(head, node, next) { + dns_slabheader_t *header = + caa_container_of(node, dns_slabheader_t, wfs_node); + dns_glue_t *glue = rcu_xchg_pointer(&header->glue_list, NULL); + + call_rcu(&glue->rcu_head, free_gluelist_rcu); + } + rcu_read_unlock(); +} + +void +dns__rbtdb_deletedata(dns_db_t *db ISC_ATTR_UNUSED, + dns_dbnode_t *node ISC_ATTR_UNUSED, void *data) { + dns_slabheader_t *header = data; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)header->db; + + if (header->heap != NULL && header->heap_index != 0) { + isc_heap_delete(header->heap, header->heap_index); + } + + if (IS_CACHE(rbtdb)) { + update_rrsetstats(rbtdb->rrsetstats, header->type, + atomic_load_acquire(&header->attributes), + false); + + if (ISC_LINK_LINKED(header, link)) { + int idx = RBTDB_HEADERNODE(header)->locknum; + INSIST(IS_CACHE(rbtdb)); + ISC_LIST_UNLINK(rbtdb->lru[idx], header, link); + } + + if (header->noqname != NULL) { + dns_slabheader_freeproof(db->mctx, &header->noqname); + } + if (header->closest != NULL) { + dns_slabheader_freeproof(db->mctx, &header->closest); + } + } else { + if (header->glue_list) { + dns__rbtdb_freeglue(header->glue_list); + } + } +} + +/* + * Caller must be holding the node write lock. + */ +static void +expire_ttl_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, + isc_rwlocktype_t *tlocktypep, isc_stdtime_t now, + bool cache_is_overmem DNS__DB_FLARG) { + isc_heap_t *heap = rbtdb->heaps[locknum]; + + for (size_t i = 0; i < DNS_RBTDB_EXPIRE_TTL_COUNT; i++) { + dns_slabheader_t *header = isc_heap_element(heap, 1); + + if (header == NULL) { + /* No headers left on this TTL heap; exit cleaning */ + return; + } + + dns_ttl_t ttl = header->ttl; + + if (!cache_is_overmem) { + /* Only account for stale TTL if cache is not overmem */ + ttl += STALE_TTL(header, rbtdb); + } + + if (ttl >= now - RBTDB_VIRTUAL) { + /* + * The header at the top of this TTL heap is not yet + * eligible for expiry, so none of the other headers on + * the same heap can be eligible for expiry, either; + * exit cleaning. + */ + return; + } + + dns__cacherbt_expireheader(header, tlocktypep, + dns_expire_ttl DNS__DB_FLARG_PASS); + } +} + +dns_qpdata_t * +dns_qpdata_create(dns_rbtdb_t *rbtdb, const dns_name_t *name) { + dns_qpdata_t *newdata = isc_mem_get(rbtdb->common.mctx, + sizeof(*newdata)); + *newdata = (dns_qpdata_t){ + .references = ISC_REFCOUNT_INITIALIZER(1), + }; + newdata->hashval = dns_name_hash(name); + newdata->locknum = newdata->hashval % rbtdb->node_lock_count; + newdata->name = dns_fixedname_initname(&newdata->fn); + dns_name_copy(name, newdata->name); + isc_mem_attach(rbtdb->common.mctx, &newdata->mctx); + + ISC_LINK_INIT(newdata, deadlink); + ISC_LINK_INIT(newdata, prunelink); + +#ifdef DNS_DB_NODETRACE + fprintf(stderr, "dns_qpdata_create:%s:%s:%d:%p->references = 1\n", + __func__, __FILE__, __LINE__ + 1, name); +#endif + return (newdata); +} + +void +dns_qpdata_destroy(dns_qpdata_t *data) { + dns_name_free(data->name, data->mctx); + isc_mem_putanddetach(&data->mctx, data, sizeof(dns_qpdata_t)); +} + +#ifdef DNS_DB_NODETRACE +ISC_REFCOUNT_TRACE_IMPL(dns_qpdata, dns_qpdata_destroy); +#else +ISC_REFCOUNT_IMPL(dns_qpdata, dns_qpdata_destroy); +#endif diff --git a/lib/dns/qpdb_p.h b/lib/dns/qpdb_p.h new file mode 100644 index 0000000000..92964ecbec --- /dev/null +++ b/lib/dns/qpdb_p.h @@ -0,0 +1,601 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +/*% + * Note that "impmagic" is not the first four bytes of the struct, so + * ISC_MAGIC_VALID cannot be used. + */ +#define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4') +#define VALID_RBTDB(rbtdb) \ + ((rbtdb) != NULL && (rbtdb)->common.impmagic == RBTDB_MAGIC) + +#define RBTDB_HEADERNODE(h) ((dns_rbtnode_t *)((h)->node)) + +/* + * Allow clients with a virtual time of up to 5 minutes in the past to see + * records that would have otherwise have expired. + */ +#define RBTDB_VIRTUAL 300 + +/***** +***** Module Info +*****/ + +/*! \file + * \brief + * DNS RBTDB Implementation (that actually uses qp tries) + */ + +ISC_LANG_BEGINDECLS + +/*% + * This is the structure that is used for each node in the qp trie of trees. + * For now it is a copy of the dns_rbtnode structure. + */ +struct dns_qpdata { + unsigned int magic; + /*@{*/ + /*! + * The following bitfields add up to a total bitwidth of 32. + * The range of values necessary for each item is indicated. + * + * In each case below the "range" indicated is what's _necessary_ for + * the bitfield to hold, not what it actually _can_ hold. + * + * Note: Tree lock must be held before modifying these + * bit-fields. + * + * Note: The two "unsigned int :0;" unnamed bitfields on either + * side of the bitfields below are scaffolding that border the + * set of bitfields which are accessed after acquiring the tree + * lock. Please don't insert any other bitfield members between + * the unnamed bitfields unless they should also be accessed + * after acquiring the tree lock. + */ + unsigned int : 0; /* start of bitfields c/o tree lock */ + unsigned int is_root : 1; /*%< range is 0..1 */ + unsigned int color : 1; /*%< range is 0..1 */ + unsigned int find_callback : 1; /*%< range is 0..1 */ + bool absolute : 1; /*%< node with absolute DNS name */ + unsigned int nsec : 2; /*%< range is 0..3 */ + unsigned int namelen : 8; /*%< range is 1..255 */ + unsigned int offsetlen : 8; /*%< range is 1..128 */ + unsigned int oldnamelen : 8; /*%< range is 1..255 */ + unsigned int : 0; /* end of bitfields c/o tree lock */ + /*@}*/ + + /*% + * These are needed for hashing. The 'uppernode' points to the + * node's superdomain node in the parent subtree, so that it can + * be reached from a child that was found by a hash lookup. + */ + unsigned int hashval; + dns_rbtnode_t *uppernode; + dns_rbtnode_t *hashnext; + + dns_rbtnode_t *parent; + dns_rbtnode_t *left; + dns_rbtnode_t *right; + dns_rbtnode_t *down; + + dns_fixedname_t fn; + dns_name_t *name; + isc_mem_t *mctx; + + /*% + * Used for LRU cache. This linked list is used to mark nodes which + * have no data any longer, but we cannot unlink at that exact moment + * because we did not or could not obtain a write lock on the tree. + */ + ISC_LINK(dns_rbtdbnode_t) deadlink; + + /*% + * This linked list is used to store nodes from which tree pruning can + * be started. + */ + ISC_LINK(dns_rbtdbnode_t) prunelink; + + /*@{*/ + /*! + * These values are used in the RBT DB implementation. The appropriate + * node lock must be held before accessing them. + * + * Note: The two "unsigned int :0;" unnamed bitfields on either + * side of the bitfields below are scaffolding that border the + * set of bitfields which are accessed after acquiring the node + * lock. Please don't insert any other bitfield members between + * the unnamed bitfields unless they should also be accessed + * after acquiring the node lock. + * + * NOTE: Do not merge these fields into bitfields above, as + * they'll all be put in the same qword that could be accessed + * without the node lock as it shares the qword with other + * members. Leave these members here so that they occupy a + * separate region of memory. + */ + void *data; + uint8_t : 0; /* start of bitfields c/o node lock */ + uint8_t dirty : 1; + uint8_t wild : 1; + uint8_t : 0; /* end of bitfields c/o node lock */ + uint16_t locknum; /* note that this is not in the bitfield */ + isc_refcount_t references; + /*@}*/ +}; + +typedef struct rbtdb_changed { + dns_rbtnode_t *node; + bool dirty; + ISC_LINK(struct rbtdb_changed) link; +} rbtdb_changed_t; + +typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t; + +struct dns_rbtdb_version { + /* Not locked */ + uint32_t serial; + dns_rbtdb_t *rbtdb; + /* + * Protected in the refcount routines. + * XXXJT: should we change the lock policy based on the refcount + * performance? + */ + isc_refcount_t references; + /* Locked by database lock. */ + bool writer; + bool commit_ok; + rbtdb_changedlist_t changed_list; + dns_slabheaderlist_t resigned_list; + ISC_LINK(dns_rbtdb_version_t) link; + bool secure; + bool havensec3; + /* NSEC3 parameters */ + dns_hash_t hash; + uint8_t flags; + uint16_t iterations; + uint8_t salt_length; + unsigned char salt[DNS_NSEC3_SALTSIZE]; + + /* + * records and xfrsize are covered by rwlock. + */ + isc_rwlock_t rwlock; + uint64_t records; + uint64_t xfrsize; + + struct cds_wfs_stack glue_stack; +}; + +typedef ISC_LIST(dns_rbtdb_version_t) rbtdb_versionlist_t; + +struct dns_rbtdb { + /* Unlocked. */ + dns_db_t common; + /* Locks the data in this struct */ + isc_rwlock_t lock; + /* Locks the tree structure (prevents nodes appearing/disappearing) */ + isc_rwlock_t tree_lock; + /* Locks for individual tree nodes */ + unsigned int node_lock_count; + db_nodelock_t *node_locks; + dns_rbtnode_t *origin_node; + dns_rbtnode_t *nsec3_origin_node; + dns_stats_t *rrsetstats; /* cache DB only */ + isc_stats_t *cachestats; /* cache DB only */ + isc_stats_t *gluecachestats; /* zone DB only */ + /* Locked by lock. */ + unsigned int active; + unsigned int attributes; + uint32_t current_serial; + uint32_t least_serial; + uint32_t next_serial; + dns_rbtdb_version_t *current_version; + dns_rbtdb_version_t *future_version; + rbtdb_versionlist_t open_versions; + isc_loop_t *loop; + dns_dbnode_t *soanode; + dns_dbnode_t *nsnode; + + /* + * The time after a failed lookup, where stale answers from cache + * may be used directly in a DNS response without attempting a + * new iterative lookup. + */ + uint32_t serve_stale_refresh; + + /* + * This is an array of linked lists used to implement the LRU cache. + * There will be node_lock_count linked lists here. Nodes in bucket 1 + * will be placed on the linked list lru[1]. + */ + dns_slabheaderlist_t *lru; + + /* + * Start point % node_lock_count for next LRU cleanup. + */ + atomic_uint lru_sweep; + + /* + * When performing LRU cleaning limit cleaning to headers that were + * last used at or before this. + */ + _Atomic(isc_stdtime_t) last_used; + + /*% + * Temporary storage for stale cache nodes and dynamically deleted + * nodes that await being cleaned up. + */ + dns_rbtnodelist_t *deadnodes; + + /* + * Heaps. These are used for TTL based expiry in a cache, + * or for zone resigning in a zone DB. hmctx is the memory + * context to use for the heap (which differs from the main + * database memory context in the case of a cache). + */ + isc_mem_t *hmctx; + isc_heap_t **heaps; + isc_heapcompare_t sooner; + + /* Locked by tree_lock. */ + dns_qp_t *tree; + dns_qp_t *nsec; + dns_qp_t *nsec3; + + /* Unlocked */ + unsigned int quantum; +}; + +/*% + * Search Context + */ +typedef struct { + dns_rbtdb_t *rbtdb; + dns_rbtdb_version_t *rbtversion; + uint32_t serial; + unsigned int options; + dns_rbtnodechain_t chain; + bool copy_name; + bool need_cleanup; + bool wild; + dns_rbtnode_t *zonecut; + dns_slabheader_t *zonecut_header; + dns_slabheader_t *zonecut_sigheader; + dns_fixedname_t zonecut_name; + isc_stdtime_t now; +} rbtdb_search_t; + +/*% + * Load Context + */ +typedef struct { + dns_db_t *db; + isc_stdtime_t now; +} rbtdb_load_t; + +/*% + * Prune context + */ +typedef struct { + dns_db_t *db; + dns_rbtnode_t *node; +} prune_t; + +extern dns_dbmethods_t dns__rbtdb_zonemethods; +extern dns_dbmethods_t dns__rbtdb_cachemethods; + +/* + * Common DB implementation methods shared by both cache and zone RBT + * databases: + */ + +isc_result_t +dns__rbtdb_create(isc_mem_t *mctx, const dns_name_t *base, dns_dbtype_t type, + dns_rdataclass_t rdclass, unsigned int argc, char *argv[], + void *driverarg, dns_db_t **dbp); +/*%< + * Create a new database of type "rbt". Called via dns_db_create(); + * see documentation for that function for more details. + * + * If argv[0] is set, it points to a valid memory context to be used for + * allocation of heap memory. Generally this is used for cache databases + * only. + * + * Requires: + * + * \li argc == 0 or argv[0] is a valid memory context. + */ + +void +dns__rbtdb_destroy(dns_db_t *arg); +/*%< + * Implement dns_db_destroy() for RBT databases, see documentation + * for that function for more details. + */ + +void +dns__rbtdb_currentversion(dns_db_t *db, dns_dbversion_t **versionp); +isc_result_t +dns__rbtdb_newversion(dns_db_t *db, dns_dbversion_t **versionp); +void +dns__rbtdb_attachversion(dns_db_t *db, dns_dbversion_t *source, + dns_dbversion_t **targetp); +void +dns__rbtdb_closeversion(dns_db_t *db, dns_dbversion_t **versionp, + bool commit DNS__DB_FLARG); +/*%< + * Implement the dns_db_currentversion(), _newversion(), + * _attachversion() and _closeversion() methods for RBT databases; + * see documentation of those functions for more details. + */ + +isc_result_t +dns__rbtdb_findnode(dns_db_t *db, const dns_name_t *name, bool create, + dns_dbnode_t **nodep DNS__DB_FLARG); +isc_result_t +dns__rbtdb_findnodeintree(dns_rbtdb_t *rbtdb, dns_qp_t *tree, + const dns_name_t *name, bool create, + dns_dbnode_t **nodep DNS__DB_FLARG); +/*%< + * Implement the dns_db_findnode() and _findnodeintree() methods for + * RBT databases; see documentation of those functions for more details. + */ + +void +dns__rbtdb_attachnode(dns_db_t *db, dns_dbnode_t *source, + dns_dbnode_t **targetp DNS__DB_FLARG); +void +dns__rbtdb_detachnode(dns_db_t *db, dns_dbnode_t **targetp DNS__DB_FLARG); +/*%< + * Implement the dns_db_attachnode() and _detachnode() methods for + * RBT databases; see documentation of those functions for more details. + */ + +isc_result_t +dns__rbtdb_createiterator(dns_db_t *db, unsigned int options, + dns_dbiterator_t **iteratorp); +/*%< + * Implement dns_db_createiterator() for RBT databases; see documentation of + * that function for more details. + */ + +isc_result_t +dns__rbtdb_allrdatasets(dns_db_t *db, dns_dbnode_t *node, + dns_dbversion_t *version, unsigned int options, + isc_stdtime_t now, + dns_rdatasetiter_t **iteratorp DNS__DB_FLARG); +/*%< + * Implement dns_db_allrdatasets() for RBT databases; see documentation of + * that function for more details. + */ +isc_result_t +dns__rbtdb_addrdataset(dns_db_t *db, dns_dbnode_t *node, + dns_dbversion_t *version, isc_stdtime_t now, + dns_rdataset_t *rdataset, unsigned int options, + dns_rdataset_t *addedrdataset DNS__DB_FLARG); +isc_result_t +dns__rbtdb_subtractrdataset(dns_db_t *db, dns_dbnode_t *node, + dns_dbversion_t *version, dns_rdataset_t *rdataset, + unsigned int options, + dns_rdataset_t *newrdataset DNS__DB_FLARG); +isc_result_t +dns__rbtdb_deleterdataset(dns_db_t *db, dns_dbnode_t *node, + dns_dbversion_t *version, dns_rdatatype_t type, + dns_rdatatype_t covers DNS__DB_FLARG); +/*%< + * Implement the dns_db_addrdataset(), _subtractrdataset() and + * _deleterdataset() methods for RBT databases; see documentation of + * those functions for more details. + */ + +unsigned int +dns__rbtdb_nodecount(dns_db_t *db, dns_dbtree_t tree); +/*%< + * Implement dns_db_nodecount() for RBT databases; see documentation of + * that function for more details. + */ + +void +dns__rbtdb_setloop(dns_db_t *db, isc_loop_t *loop); +/*%< + * Implement dns_db_setloop() for RBT databases; see documentation of + * that function for more details. + */ + +isc_result_t +dns__rbtdb_getoriginnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG); +/*%< + * Implement dns_db_getoriginnode() for RBT databases; see documentation of + * that function for more details. + */ + +void +dns__rbtdb_deletedata(dns_db_t *db ISC_ATTR_UNUSED, + dns_dbnode_t *node ISC_ATTR_UNUSED, void *data); +/*%< + * Implement dns_db_deletedata() for RBT databases; see documentation of + * that function for more details. + */ + +void +dns__rbtdb_locknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type); +void +dns__rbtdb_unlocknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type); +/*%< + * Implement the dns_db_locknode() and _unlocknode() methods for + * RBT databases; see documentation of those functions for more details. + */ + +/*% + * Functions used for the RBT implementation which are defined and + * used in rbtdb.c but may also be called from rbt-zonedb.c or + * rbt-cachedb.c: + */ +void +dns__rbtdb_bindrdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, + dns_slabheader_t *header, isc_stdtime_t now, + isc_rwlocktype_t locktype, + dns_rdataset_t *rdataset DNS__DB_FLARG); + +isc_result_t +dns__rbtdb_nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name); + +void +dns__rbtdb_freeglue(dns_glue_t *glue_list); + +void +dns__rbtdb_newref(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, + isc_rwlocktype_t locktype DNS__DB_FLARG); +/*%< + * Increment the reference counter to a node in an RBT database. + * If the caller holds a node lock then its lock type is specified + * as 'locktype'. If the node is write-locked, then the node can + * be removed from the dead nodes list. If not, the list can be + * cleaned up later. + */ + +bool +dns__rbtdb_decref(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, + uint32_t least_serial, isc_rwlocktype_t *nlocktypep, + isc_rwlocktype_t *tlocktypep, bool tryupgrade, + bool pruning DNS__DB_FLARG); +/*%< + * Decrement the reference counter to a node in an RBT database. + * 'nlocktypep' and 'tlocktypep' are pointers to the current status + * of the node lock and tree lock. + * + * If references go to 0, the node will be cleaned up, which may + * necessitate upgrading the locks. + */ + +isc_result_t +dns__rbtdb_add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, + const dns_name_t *nodename, dns_rbtdb_version_t *rbtversion, + dns_slabheader_t *newheader, unsigned int options, bool loading, + dns_rdataset_t *addedrdataset, isc_stdtime_t now DNS__DB_FLARG); +/*%< + * Add a slab header 'newheader' to a node in an RBT database. + * The caller must have the node write-locked. + */ + +void +dns__rbtdb_setsecure(dns_db_t *db, dns_rbtdb_version_t *version, + dns_dbnode_t *origin); +/*%< + * Update the secure status for an RBT database version 'version'. + * The version will be marked secure if it is fully signed and + * and contains a complete NSEC/NSEC3 chain. + */ + +void +dns__rbtdb_mark(dns_slabheader_t *header, uint_least16_t flag); +/*%< + * Set attribute 'flag' in a slab header 'header' - for example, + * DNS_SLABHEADERATTR_STALE or DNS_SLABHEADERATTR_ANCIENT - and, + * in a cache database, update the rrset stats accordingly. + */ + +void +dns__rbtdb_setttl(dns_slabheader_t *header, dns_ttl_t newttl); +/*%< + * Set the TTL in a slab header 'header'. In a cache database, + * also update the TTL heap accordingly. + */ + +/* + * Functions specific to zone databases that are also called from rbtdb.c. + */ +void +dns__zonerbt_resigninsert(dns_rbtdb_t *rbtdb, int idx, + dns_slabheader_t *newheader); +void +dns__zonerbt_resigndelete(dns_rbtdb_t *rbtdb, dns_rbtdb_version_t *version, + dns_slabheader_t *header DNS__DB_FLARG); +/*%< + * Insert/delete a node from the zone database's resigning heap. + */ + +isc_result_t +dns__zonerbt_wildcardmagic(dns_rbtdb_t *rbtdb, const dns_name_t *name, + bool lock); +/*%< + * Add the necessary magic for the wildcard name 'name' + * to be found in 'rbtdb'. + * + * In order for wildcard matching to work correctly in + * zone_find(), we must ensure that a node for the wildcarding + * level exists in the database, and has its 'find_callback' + * and 'wild' bits set. + * + * E.g. if the wildcard name is "*.sub.example." then we + * must ensure that "sub.example." exists and is marked as + * a wildcard level. + * + * The tree must be write-locked. + */ +isc_result_t +dns__zonerbt_addwildcards(dns_rbtdb_t *rbtdb, const dns_name_t *name, + bool lock); +/*%< + * If 'name' is or contains a wildcard name, create a node for it in the + * database. The tree must be write-locked. + */ + +/* + * Cache-specific functions that are called from rbtdb.c + */ +void +dns__cacherbt_expireheader(dns_slabheader_t *header, + isc_rwlocktype_t *tlocktypep, + dns_expire_t reason DNS__DB_FLARG); +void +dns__cacherbt_overmem(dns_rbtdb_t *rbtdb, dns_slabheader_t *newheader, + isc_rwlocktype_t *tlocktypep DNS__DB_FLARG); + +/* + * Create a new qpdata node. + */ +dns_qpdata_t * +dns_qpdata_create(dns_rbtdb_t *rbtdb, const dns_name_t *name); + +/* + * Destroy a qpdata node. + */ +void +dns_qpdata_destroy(dns_qpdata_t *qpdata); + +#ifdef DNS_DB_NODETRACE +#define dns_qpdata_ref(ptr) dns_qpdata__ref(ptr, __func__, __FILE__, __LINE__) +#define dns_qpdata_unref(ptr) \ + dns_qpdata__unref(ptr, __func__, __FILE__, __LINE__) +#define dns_qpdata_attach(ptr, ptrp) \ + dns_qpdata__attach(ptr, ptrp, __func__, __FILE__, __LINE__) +#define dns_qpdata_detach(ptrp) \ + dns_qpdata__detach(ptrp, __func__, __FILE__, __LINE__) +ISC_REFCOUNT_TRACE_DECL(dns_qpdata); +#else +ISC_REFCOUNT_DECL(dns_qpdata); +#endif + +ISC_LANG_ENDDECLS