2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-09-03 16:15:27 +00:00

dns/rbt.c: Implement incremental hash table resizing

Originally, the hash table used in RBT database would be resized when it
reached certain number of elements (defined by overcommit).  This was
causing resolution brownouts for busy resolvers, because the rehashing
could take several seconds to complete.  This was mitigated by
pre-allocating the hash table in the RBT database used for caching to be
large-enough as determined by max-cache-size.  The downside of this
solution was that the pre-allocated hash table could take a significant
chunk of the memory even when the resolver cache would be otherwise
empty because the default value for max-cache-size is 90% of available
memory.

Implement incremental resizing[1] to perform the rehashing gradually:

 1. During the resize, allocate the new hash table, but keep the old
    table unchanged.
 2. In each lookup or delete operation, check both tables.
 3. Perform insertion operations only in the new table.
 4. At each insertion also move r elements from the old table to the new
    table.
 5. When all elements are removed from the old table, deallocate it.

To ensure that the old table is completely copied over before the new
table itself needs to be enlarged, it is necessary to increase the
size of the table by a factor of at least (r + 1)/r during resizing.

In our implementation r is equal to 1.

The downside of this approach is that the old table and the new table
could stay in memory for longer when there are no new insertions into
the hash table for prolonged periods of time as the incremental
rehashing happens only during the insertions.

The upside of this approach is that it's no longer necessary to
pre-allocate large hash table, because the RBT hash table rehashing
doesn't cause resolution brownouts anymore and thus we can use the
memory as needed.

1. https://en.m.wikipedia.org/wiki/Hash_table#Dynamic_resizing
This commit is contained in:
Ondřej Surý
2021-10-07 18:41:02 +02:00
parent 0590d71977
commit 8c819ec366
10 changed files with 318 additions and 371 deletions

View File

@@ -533,56 +533,28 @@ hashsize(dns_db_t *db) {
* determine which implementation of dns_db_*() function to call. * determine which implementation of dns_db_*() function to call.
*/ */
static dns_dbmethods_t sampledb_methods = { static dns_dbmethods_t sampledb_methods = {
attach, attach, detach, beginload,
detach, endload, dump, currentversion,
beginload, newversion, attachversion, closeversion,
endload, findnode, find, findzonecut,
dump, attachnode, detachnode, expirenode,
currentversion, printnode, createiterator, findrdataset,
newversion, allrdatasets, addrdataset, subtractrdataset,
attachversion, deleterdataset, issecure, nodecount,
closeversion, ispersistent, overmem, settask,
findnode, getoriginnode, transfernode, getnsec3parameters,
find, findnsec3node, setsigningtime, getsigningtime,
findzonecut, resigned, isdnssec, getrrsetstats,
attachnode,
detachnode,
expirenode,
printnode,
createiterator,
findrdataset,
allrdatasets,
addrdataset,
subtractrdataset,
deleterdataset,
issecure,
nodecount,
ispersistent,
overmem,
settask,
getoriginnode,
transfernode,
getnsec3parameters,
findnsec3node,
setsigningtime,
getsigningtime,
resigned,
isdnssec,
getrrsetstats,
NULL, /* rpz_attach */ NULL, /* rpz_attach */
NULL, /* rpz_ready */ NULL, /* rpz_ready */
findnodeext, findnodeext, findext, setcachestats,
findext, hashsize, NULL, /* nodefullname */
setcachestats, NULL, /* getsize */
hashsize, NULL, /* setservestalettl */
NULL, /* nodefullname */ NULL, /* getservestalettl */
NULL, /* getsize */ NULL, /* setservestalerefresh */
NULL, /* setservestalettl */ NULL, /* getservestalerefresh */
NULL, /* getservestalettl */ NULL, /* setgluecachestats */
NULL, /* setservestalerefresh */
NULL, /* getservestalerefresh */
NULL, /* setgluecachestats */
NULL /* adjusthashsize */
}; };
/* Auxiliary driver functions. */ /* Auxiliary driver functions. */

View File

@@ -879,8 +879,6 @@ dns_cache_setcachesize(dns_cache_t *cache, size_t size) {
*/ */
isc_mem_setwater(cache->mctx, water, cache, hiwater, lowater); isc_mem_setwater(cache->mctx, water, cache, hiwater, lowater);
} }
dns_db_adjusthashsize(cache->db, size);
} }
size_t size_t

View File

@@ -824,17 +824,6 @@ dns_db_hashsize(dns_db_t *db) {
return ((db->methods->hashsize)(db)); return ((db->methods->hashsize)(db));
} }
isc_result_t
dns_db_adjusthashsize(dns_db_t *db, size_t size) {
REQUIRE(DNS_DB_VALID(db));
if (db->methods->adjusthashsize != NULL) {
return ((db->methods->adjusthashsize)(db, size));
}
return (ISC_R_NOTIMPLEMENTED);
}
void void
dns_db_settask(dns_db_t *db, isc_task_t *task) { dns_db_settask(dns_db_t *db, isc_task_t *task) {
REQUIRE(DNS_DB_VALID(db)); REQUIRE(DNS_DB_VALID(db));

View File

@@ -970,7 +970,6 @@ static dns_dbmethods_t rpsdb_db_methods = {
NULL, /* setservestalerefresh */ NULL, /* setservestalerefresh */
NULL, /* getservestalerefresh */ NULL, /* getservestalerefresh */
NULL, /* setgluecachestats */ NULL, /* setgluecachestats */
NULL /* adjusthashsize */
}; };
static dns_rdatasetmethods_t rpsdb_rdataset_methods = { static dns_rdatasetmethods_t rpsdb_rdataset_methods = {

View File

@@ -184,7 +184,6 @@ typedef struct dns_dbmethods {
isc_result_t (*setservestalerefresh)(dns_db_t *db, uint32_t interval); isc_result_t (*setservestalerefresh)(dns_db_t *db, uint32_t interval);
isc_result_t (*getservestalerefresh)(dns_db_t *db, uint32_t *interval); isc_result_t (*getservestalerefresh)(dns_db_t *db, uint32_t *interval);
isc_result_t (*setgluecachestats)(dns_db_t *db, isc_stats_t *stats); isc_result_t (*setgluecachestats)(dns_db_t *db, isc_stats_t *stats);
isc_result_t (*adjusthashsize)(dns_db_t *db, size_t size);
} dns_dbmethods_t; } dns_dbmethods_t;
typedef isc_result_t (*dns_dbcreatefunc_t)(isc_mem_t *mctx, typedef isc_result_t (*dns_dbcreatefunc_t)(isc_mem_t *mctx,
@@ -1378,24 +1377,6 @@ dns_db_hashsize(dns_db_t *db);
* 0 if not implemented. * 0 if not implemented.
*/ */
isc_result_t
dns_db_adjusthashsize(dns_db_t *db, size_t size);
/*%<
* For database implementations using a hash table, adjust the size of
* the hash table to store objects with a maximum total memory footprint
* of 'size' bytes. If 'size' is set to 0, it means no finite limit is
* requested.
*
* Requires:
*
* \li 'db' is a valid database.
* \li 'size' is maximum memory footprint of the database in bytes
*
* Returns:
* \li #ISC_R_SUCCESS The registration succeeded
* \li #ISC_R_NOMEMORY Out of memory
*/
void void
dns_db_settask(dns_db_t *db, isc_task_t *task); dns_db_settask(dns_db_t *db, isc_task_t *task);
/*%< /*%<

View File

@@ -670,17 +670,6 @@ dns_rbt_hashsize(dns_rbt_t *rbt);
* \li rbt is a valid rbt manager. * \li rbt is a valid rbt manager.
*/ */
isc_result_t
dns_rbt_adjusthashsize(dns_rbt_t *rbt, size_t size);
/*%<
* Adjust the number of buckets in the 'rbt' hash table, according to the
* expected maximum size of the rbt database.
*
* Requires:
* \li rbt is a valid rbt manager.
* \li size is expected maximum memory footprint of rbt.
*/
void void
dns_rbt_destroy(dns_rbt_t **rbtp); dns_rbt_destroy(dns_rbt_t **rbtp);
isc_result_t isc_result_t

View File

@@ -59,10 +59,12 @@
#define CHAIN_MAGIC ISC_MAGIC('0', '-', '0', '-') #define CHAIN_MAGIC ISC_MAGIC('0', '-', '0', '-')
#define VALID_CHAIN(chain) ISC_MAGIC_VALID(chain, CHAIN_MAGIC) #define VALID_CHAIN(chain) ISC_MAGIC_VALID(chain, CHAIN_MAGIC)
#define RBT_HASH_NO_BITS 0
#define RBT_HASH_MIN_BITS 4 #define RBT_HASH_MIN_BITS 4
#define RBT_HASH_MAX_BITS 32 #define RBT_HASH_MAX_BITS 32
#define RBT_HASH_OVERCOMMIT 3 #define RBT_HASH_OVERCOMMIT 3
#define RBT_HASH_BUCKETSIZE 4096 /* FIXME: What would be a good value here? */
#define RBT_HASH_NEXTTABLE(hindex) ((hindex == 0) ? 1 : 0)
#ifdef RBT_MEM_TEST #ifdef RBT_MEM_TEST
#undef RBT_HASH_SIZE #undef RBT_HASH_SIZE
@@ -87,10 +89,10 @@ struct dns_rbt {
void (*data_deleter)(void *, void *); void (*data_deleter)(void *, void *);
void *deleter_arg; void *deleter_arg;
unsigned int nodecount; unsigned int nodecount;
uint16_t hashbits; uint8_t hashbits[2];
uint16_t maxhashbits; dns_rbtnode_t **hashtable[2];
dns_rbtnode_t **hashtable; uint8_t hindex;
void *mmap_location; uint32_t hiter;
}; };
#define RED 0 #define RED 0
@@ -234,8 +236,10 @@ dns__rbtnode_getdistance(dns_rbtnode_t *node) {
static isc_result_t static isc_result_t
create_node(isc_mem_t *mctx, const dns_name_t *name, dns_rbtnode_t **nodep); create_node(isc_mem_t *mctx, const dns_name_t *name, dns_rbtnode_t **nodep);
static isc_result_t static inline void
inithash(dns_rbt_t *rbt); hashtable_new(dns_rbt_t *rbt, uint8_t index, uint8_t bits);
static inline void
hashtable_free(dns_rbt_t *rbt, uint8_t index);
static inline void static inline void
hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name); hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name);
@@ -246,9 +250,17 @@ unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *node);
static uint32_t static uint32_t
rehash_bits(dns_rbt_t *rbt, size_t newcount); rehash_bits(dns_rbt_t *rbt, size_t newcount);
static void static void
rehash(dns_rbt_t *rbt, uint32_t newbits); hashtable_rehash(dns_rbt_t *rbt, uint32_t newbits);
static void
hashtable_rehash_one(dns_rbt_t *rbt);
static void static void
maybe_rehash(dns_rbt_t *rbt, size_t size); maybe_rehash(dns_rbt_t *rbt, size_t size);
static inline bool
rehashing_in_progress(dns_rbt_t *rbt);
#define TRY_NEXTTABLE(hindex, rbt) \
(ISC_LIKELY(hindex == rbt->hindex) && \
ISC_UNLIKELY(rehashing_in_progress(rbt)))
static inline void static inline void
rotate_left(dns_rbtnode_t *node, dns_rbtnode_t **rootp); rotate_left(dns_rbtnode_t *node, dns_rbtnode_t **rootp);
@@ -302,7 +314,6 @@ dns__rbtnode_namelen(dns_rbtnode_t *node) {
isc_result_t isc_result_t
dns_rbt_create(isc_mem_t *mctx, dns_rbtdeleter_t deleter, void *deleter_arg, dns_rbt_create(isc_mem_t *mctx, dns_rbtdeleter_t deleter, void *deleter_arg,
dns_rbt_t **rbtp) { dns_rbt_t **rbtp) {
isc_result_t result;
dns_rbt_t *rbt; dns_rbt_t *rbt;
REQUIRE(mctx != NULL); REQUIRE(mctx != NULL);
@@ -310,23 +321,14 @@ dns_rbt_create(isc_mem_t *mctx, dns_rbtdeleter_t deleter, void *deleter_arg,
REQUIRE(deleter == NULL ? deleter_arg == NULL : 1); REQUIRE(deleter == NULL ? deleter_arg == NULL : 1);
rbt = isc_mem_get(mctx, sizeof(*rbt)); rbt = isc_mem_get(mctx, sizeof(*rbt));
*rbt = (dns_rbt_t){
.data_deleter = deleter,
.deleter_arg = deleter_arg,
};
rbt->mctx = NULL;
isc_mem_attach(mctx, &rbt->mctx); isc_mem_attach(mctx, &rbt->mctx);
rbt->data_deleter = deleter;
rbt->deleter_arg = deleter_arg;
rbt->root = NULL;
rbt->nodecount = 0;
rbt->hashtable = NULL;
rbt->hashbits = 0;
rbt->maxhashbits = RBT_HASH_MAX_BITS;
rbt->mmap_location = NULL;
result = inithash(rbt); hashtable_new(rbt, 0, RBT_HASH_MIN_BITS);
if (result != ISC_R_SUCCESS) {
isc_mem_putanddetach(&rbt->mctx, rbt, sizeof(*rbt));
return (result);
}
rbt->magic = RBT_MAGIC; rbt->magic = RBT_MAGIC;
@@ -360,11 +362,11 @@ dns_rbt_destroy2(dns_rbt_t **rbtp, unsigned int quantum) {
INSIST(rbt->nodecount == 0); INSIST(rbt->nodecount == 0);
rbt->mmap_location = NULL; if (rbt->hashtable[0] != NULL) {
hashtable_free(rbt, 0);
if (rbt->hashtable != NULL) { }
size_t size = HASHSIZE(rbt->hashbits) * sizeof(dns_rbtnode_t *); if (rbt->hashtable[1] != NULL) {
isc_mem_put(rbt->mctx, rbt->hashtable, size); hashtable_free(rbt, 1);
} }
rbt->magic = 0; rbt->magic = 0;
@@ -384,37 +386,11 @@ size_t
dns_rbt_hashsize(dns_rbt_t *rbt) { dns_rbt_hashsize(dns_rbt_t *rbt) {
REQUIRE(VALID_RBT(rbt)); REQUIRE(VALID_RBT(rbt));
return (1 << rbt->hashbits); uint8_t hashbits = (rbt->hashbits[0] > rbt->hashbits[1])
} ? rbt->hashbits[0]
: rbt->hashbits[1];
isc_result_t return (1 << hashbits);
dns_rbt_adjusthashsize(dns_rbt_t *rbt, size_t size) {
REQUIRE(VALID_RBT(rbt));
if (size > 0) {
/*
* Setting a new, finite size limit was requested for the RBT.
* Estimate how many hash table slots are needed for the
* requested size and how many bits would be needed to index
* those hash table slots, then rehash the RBT if necessary.
* Note that the hash table can only grow, it is not shrunk if
* the requested size limit is lower than the current one.
*/
size_t newsize = size / RBT_HASH_BUCKETSIZE;
rbt->maxhashbits = rehash_bits(rbt, newsize);
maybe_rehash(rbt, newsize);
} else {
/*
* Setting an infinite size limit was requested for the RBT.
* Increase the maximum allowed number of hash table slots to
* 2^32, which enables the hash table to grow as nodes are
* added to the RBT without immediately preallocating 2^32 hash
* table slots.
*/
rbt->maxhashbits = RBT_HASH_MAX_BITS;
}
return (ISC_R_SUCCESS);
} }
static inline isc_result_t static inline isc_result_t
@@ -831,6 +807,7 @@ dns_rbt_findnode(dns_rbt_t *rbt, const dns_name_t *name, dns_name_t *foundname,
unsigned int common_labels; unsigned int common_labels;
unsigned int hlabels = 0; unsigned int hlabels = 0;
int order; int order;
uint8_t hindex;
REQUIRE(VALID_RBT(rbt)); REQUIRE(VALID_RBT(rbt));
REQUIRE(dns_name_isabsolute(name)); REQUIRE(dns_name_isabsolute(name));
@@ -913,6 +890,7 @@ dns_rbt_findnode(dns_rbt_t *rbt, const dns_name_t *name, dns_name_t *foundname,
dns_rbtnode_t *up_current; dns_rbtnode_t *up_current;
unsigned int nlabels; unsigned int nlabels;
unsigned int tlabels = 1; unsigned int tlabels = 1;
uint32_t hashval;
uint32_t hash; uint32_t hash;
/* /*
@@ -936,6 +914,7 @@ dns_rbt_findnode(dns_rbt_t *rbt, const dns_name_t *name, dns_name_t *foundname,
dns_name_init(&hash_name, NULL); dns_name_init(&hash_name, NULL);
hashagain: hashagain:
hindex = rbt->hindex;
/* /*
* Compute the hash over the full absolute * Compute the hash over the full absolute
* name. Look for the smallest suffix match at * name. Look for the smallest suffix match at
@@ -947,30 +926,33 @@ dns_rbt_findnode(dns_rbt_t *rbt, const dns_name_t *name, dns_name_t *foundname,
dns_name_getlabelsequence(name, nlabels - tlabels, dns_name_getlabelsequence(name, nlabels - tlabels,
hlabels + tlabels, hlabels + tlabels,
&hash_name); &hash_name);
hash = dns_name_fullhash(&hash_name, false); hashval = dns_name_fullhash(&hash_name, false);
dns_name_getlabelsequence(search_name, dns_name_getlabelsequence(search_name,
nlabels - tlabels, tlabels, nlabels - tlabels, tlabels,
&hash_name); &hash_name);
nexttable:
/* /*
* Walk all the nodes in the hash bucket pointed * Walk all the nodes in the hash bucket pointed
* by the computed hash value. * by the computed hash value.
*/ */
for (hnode = rbt->hashtable[hash_32(hash,
rbt->hashbits)]; hash = hash_32(hashval, rbt->hashbits[hindex]);
hnode != NULL; hnode = hnode->hashnext)
for (hnode = rbt->hashtable[hindex][hash];
hnode != NULL; hnode = HASHNEXT(hnode))
{ {
dns_name_t hnode_name; dns_name_t hnode_name;
if (ISC_LIKELY(hash != HASHVAL(hnode))) { if (ISC_LIKELY(hashval != HASHVAL(hnode))) {
continue; continue;
} }
/* /*
* This checks that the hashed label * This checks that the hashed label sequence
* sequence being looked up is at the * being looked up is at the same tree level, so
* same tree level, so that we don't * that we don't match a labelsequence from some
* match a labelsequence from some other * other subdomain.
* subdomain.
*/ */
if (ISC_LIKELY(get_upper_node(hnode) != if (ISC_LIKELY(get_upper_node(hnode) !=
up_current)) { up_current)) {
@@ -1007,6 +989,14 @@ dns_rbt_findnode(dns_rbt_t *rbt, const dns_name_t *name, dns_name_t *foundname,
} }
} }
if (TRY_NEXTTABLE(hindex, rbt)) {
/*
* Rehashing in progress, check the other table
*/
hindex = RBT_HASH_NEXTTABLE(rbt->hindex);
goto nexttable;
}
if (tlabels++ < nlabels) { if (tlabels++ < nlabels) {
goto hashagain; goto hashagain;
} }
@@ -1634,30 +1624,43 @@ hash_add_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name) {
HASHVAL(node) = dns_name_fullhash(name, false); HASHVAL(node) = dns_name_fullhash(name, false);
hash = hash_32(HASHVAL(node), rbt->hashbits); hash = hash_32(HASHVAL(node), rbt->hashbits[rbt->hindex]);
HASHNEXT(node) = rbt->hashtable[hash]; HASHNEXT(node) = rbt->hashtable[rbt->hindex][hash];
rbt->hashtable[hash] = node; rbt->hashtable[rbt->hindex][hash] = node;
} }
/* /*
* Initialize hash table * Initialize hash table
*/ */
static isc_result_t static inline void
inithash(dns_rbt_t *rbt) { hashtable_new(dns_rbt_t *rbt, uint8_t index, uint8_t bits) {
size_t size; size_t size;
rbt->hashbits = RBT_HASH_MIN_BITS; REQUIRE(rbt->hashbits[index] == RBT_HASH_NO_BITS);
size = HASHSIZE(rbt->hashbits) * sizeof(dns_rbtnode_t *); REQUIRE(rbt->hashtable[index] == NULL);
rbt->hashtable = isc_mem_get(rbt->mctx, size); REQUIRE(bits >= RBT_HASH_MIN_BITS);
memset(rbt->hashtable, 0, size); REQUIRE(bits < RBT_HASH_MAX_BITS);
return (ISC_R_SUCCESS); rbt->hashbits[index] = bits;
size = HASHSIZE(rbt->hashbits[index]) * sizeof(dns_rbtnode_t *);
rbt->hashtable[index] = isc_mem_get(rbt->mctx, size);
memset(rbt->hashtable[index], 0, size);
}
static inline void
hashtable_free(dns_rbt_t *rbt, uint8_t index) {
size_t size = HASHSIZE(rbt->hashbits[index]) * sizeof(dns_rbtnode_t *);
isc_mem_put(rbt->mctx, rbt->hashtable[index], size);
rbt->hashbits[index] = RBT_HASH_NO_BITS;
rbt->hashtable[index] = NULL;
} }
static uint32_t static uint32_t
rehash_bits(dns_rbt_t *rbt, size_t newcount) { rehash_bits(dns_rbt_t *rbt, size_t newcount) {
uint32_t newbits = rbt->hashbits; uint32_t newbits = rbt->hashbits[rbt->hindex];
while (newcount >= HASHSIZE(newbits) && newbits < RBT_HASH_MAX_BITS) { while (newcount >= HASHSIZE(newbits) && newbits < RBT_HASH_MAX_BITS) {
newbits += 1; newbits += 1;
@@ -1670,47 +1673,85 @@ rehash_bits(dns_rbt_t *rbt, size_t newcount) {
* Rebuild the hashtable to reduce the load factor * Rebuild the hashtable to reduce the load factor
*/ */
static void static void
rehash(dns_rbt_t *rbt, uint32_t newbits) { hashtable_rehash(dns_rbt_t *rbt, uint32_t newbits) {
uint32_t oldbits; uint8_t oldindex = rbt->hindex;
size_t oldsize; uint32_t oldbits = rbt->hashbits[oldindex];
dns_rbtnode_t **oldtable; uint8_t newindex = RBT_HASH_NEXTTABLE(oldindex);
size_t newsize;
REQUIRE(rbt->hashbits <= rbt->maxhashbits); REQUIRE(rbt->hashbits[oldindex] >= RBT_HASH_MIN_BITS);
REQUIRE(newbits <= rbt->maxhashbits); REQUIRE(rbt->hashbits[oldindex] <= RBT_HASH_MAX_BITS);
REQUIRE(rbt->hashtable[oldindex] != NULL);
oldbits = rbt->hashbits; REQUIRE(newbits <= RBT_HASH_MAX_BITS);
oldsize = HASHSIZE(oldbits); REQUIRE(rbt->hashbits[newindex] == RBT_HASH_NO_BITS);
oldtable = rbt->hashtable; REQUIRE(rbt->hashtable[newindex] == NULL);
rbt->hashbits = newbits; REQUIRE(newbits > oldbits);
newsize = HASHSIZE(rbt->hashbits);
rbt->hashtable = isc_mem_get(rbt->mctx,
newsize * sizeof(dns_rbtnode_t *));
memset(rbt->hashtable, 0, newsize * sizeof(dns_rbtnode_t *));
for (size_t i = 0; i < oldsize; i++) { hashtable_new(rbt, newindex, newbits);
dns_rbtnode_t *node;
dns_rbtnode_t *nextnode; rbt->hindex = newindex;
for (node = oldtable[i]; node != NULL; node = nextnode) {
uint32_t hash = hash_32(HASHVAL(node), rbt->hashbits); hashtable_rehash_one(rbt);
nextnode = HASHNEXT(node); }
HASHNEXT(node) = rbt->hashtable[hash];
rbt->hashtable[hash] = node; static void
} hashtable_rehash_one(dns_rbt_t *rbt) {
dns_rbtnode_t **newtable = rbt->hashtable[rbt->hindex];
uint32_t oldsize =
HASHSIZE(rbt->hashbits[RBT_HASH_NEXTTABLE(rbt->hindex)]);
dns_rbtnode_t **oldtable =
rbt->hashtable[RBT_HASH_NEXTTABLE(rbt->hindex)];
dns_rbtnode_t *node = NULL;
dns_rbtnode_t *nextnode;
/* Find first non-empty node */
while (rbt->hiter < oldsize && oldtable[rbt->hiter] == NULL) {
rbt->hiter++;
} }
isc_mem_put(rbt->mctx, oldtable, oldsize * sizeof(dns_rbtnode_t *)); /* Rehashing complete */
if (rbt->hiter == oldsize) {
hashtable_free(rbt, RBT_HASH_NEXTTABLE(rbt->hindex));
rbt->hiter = 0;
return;
}
/* Move the first non-empty node from old hashtable to new hashtable */
for (node = oldtable[rbt->hiter]; node != NULL; node = nextnode) {
uint32_t hash = hash_32(HASHVAL(node),
rbt->hashbits[rbt->hindex]);
nextnode = HASHNEXT(node);
HASHNEXT(node) = newtable[hash];
newtable[hash] = node;
}
oldtable[rbt->hiter] = NULL;
rbt->hiter++;
} }
static void static void
maybe_rehash(dns_rbt_t *rbt, size_t newcount) { maybe_rehash(dns_rbt_t *rbt, size_t newcount) {
uint32_t newbits = rehash_bits(rbt, newcount); uint32_t newbits = rehash_bits(rbt, newcount);
if (rbt->hashbits < newbits && newbits <= rbt->maxhashbits) {
rehash(rbt, newbits); if (rbt->hashbits[rbt->hindex] < newbits &&
newbits <= RBT_HASH_MAX_BITS) {
hashtable_rehash(rbt, newbits);
} }
} }
static inline bool
rehashing_in_progress(dns_rbt_t *rbt) {
return (rbt->hashtable[RBT_HASH_NEXTTABLE(rbt->hindex)] != NULL);
}
static inline bool
hashtable_is_overcommited(dns_rbt_t *rbt) {
return (rbt->nodecount >=
(HASHSIZE(rbt->hashbits[rbt->hindex]) * RBT_HASH_OVERCOMMIT));
}
/* /*
* Add a node to the hash table. Rehash the hashtable if the node count * Add a node to the hash table. Rehash the hashtable if the node count
* rises above a critical level. * rises above a critical level.
@@ -1719,7 +1760,11 @@ static inline void
hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name) { hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name) {
REQUIRE(DNS_RBTNODE_VALID(node)); REQUIRE(DNS_RBTNODE_VALID(node));
if (rbt->nodecount >= (HASHSIZE(rbt->hashbits) * RBT_HASH_OVERCOMMIT)) { if (ISC_UNLIKELY(rehashing_in_progress(rbt))) {
/* Rehash in progress */
hashtable_rehash_one(rbt);
} else if (ISC_UNLIKELY(hashtable_is_overcommited(rbt))) {
/* Rehash requested */
maybe_rehash(rbt, rbt->nodecount); maybe_rehash(rbt, rbt->nodecount);
} }
@@ -1730,24 +1775,45 @@ hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name) {
* Remove a node from the hash table * Remove a node from the hash table
*/ */
static inline void static inline void
unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *node) { unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *dnode) {
uint32_t bucket; uint32_t hash;
dns_rbtnode_t *bucket_node; uint8_t hindex = rbt->hindex;
dns_rbtnode_t *hnode;
REQUIRE(DNS_RBTNODE_VALID(node)); REQUIRE(DNS_RBTNODE_VALID(dnode));
bucket = hash_32(HASHVAL(node), rbt->hashbits); /*
bucket_node = rbt->hashtable[bucket]; * The node could be either in:
* a) current table: no rehashing in progress, or
* b) current table: the node has been already moved, or
* c) other table: the node hasn't been moved yet.
*/
nexttable:
hash = hash_32(HASHVAL(dnode), rbt->hashbits[hindex]);
if (bucket_node == node) { hnode = rbt->hashtable[hindex][hash];
rbt->hashtable[bucket] = HASHNEXT(node);
if (hnode == dnode) {
rbt->hashtable[hindex][hash] = HASHNEXT(hnode);
return;
} else { } else {
while (HASHNEXT(bucket_node) != node) { for (; hnode != NULL; hnode = HASHNEXT(hnode)) {
INSIST(HASHNEXT(bucket_node) != NULL); if (HASHNEXT(hnode) == dnode) {
bucket_node = HASHNEXT(bucket_node); HASHNEXT(hnode) = HASHNEXT(dnode);
return;
}
} }
HASHNEXT(bucket_node) = HASHNEXT(node);
} }
if (TRY_NEXTTABLE(hindex, rbt)) {
/* Rehashing in progress, delete from the other table */
hindex = RBT_HASH_NEXTTABLE(hindex);
goto nexttable;
}
/* We haven't found any matching node, this should not be possible. */
INSIST(0);
ISC_UNREACHABLE();
} }
static inline void static inline void
@@ -2532,7 +2598,6 @@ print_dot_helper(dns_rbtnode_t *node, unsigned int *nodecount,
fprintf(f, "\"node%u\":f1 -> \"node%u\":f1 [penwidth=5];\n", fprintf(f, "\"node%u\":f1 -> \"node%u\":f1 [penwidth=5];\n",
*nodecount, d); *nodecount, d);
} }
if (RIGHT(node) != NULL) { if (RIGHT(node) != NULL) {
fprintf(f, "\"node%u\":f2 -> \"node%u\":f1;\n", *nodecount, r); fprintf(f, "\"node%u\":f2 -> \"node%u\":f1;\n", *nodecount, r);
} }
@@ -2597,7 +2662,8 @@ dns_rbtnodechain_current(dns_rbtnodechain_t *chain, dns_name_t *name,
INSIST(dns_name_isabsolute(name)); INSIST(dns_name_isabsolute(name));
/* /*
* This is cheaper than dns_name_getlabelsequence(). * This is cheaper than
* dns_name_getlabelsequence().
*/ */
name->labels--; name->labels--;
name->length--; name->length--;
@@ -2643,10 +2709,10 @@ dns_rbtnodechain_prev(dns_rbtnodechain_t *chain, dns_name_t *name,
predecessor = current; predecessor = current;
} else { } else {
/* /*
* No left links, so move toward the root. If at any point on * No left links, so move toward the root. If at any
* the way there the link from parent to child is a right * point on the way there the link from parent to child
* link, then the parent is the previous node, at least * is a right link, then the parent is the previous
* for this level. * node, at least for this level.
*/ */
while (!IS_ROOT(current)) { while (!IS_ROOT(current)) {
previous = current; previous = current;
@@ -2666,16 +2732,17 @@ dns_rbtnodechain_prev(dns_rbtnodechain_t *chain, dns_name_t *name,
*/ */
if (DOWN(predecessor) != NULL) { if (DOWN(predecessor) != NULL) {
/* /*
* The predecessor is really down at least one level. * The predecessor is really down at least one
* Go down and as far right as possible, and repeat * level. Go down and as far right as possible,
* as long as the rightmost node has a down pointer. * and repeat as long as the rightmost node has
* a down pointer.
*/ */
do { do {
/* /*
* XXX DCL Need to do something about origins * XXX DCL Need to do something about
* here. See whether to go down, and if so * origins here. See whether to go down,
* whether it is truly what Bob calls a * and if so whether it is truly what
* new origin. * Bob calls a new origin.
*/ */
ADD_LEVEL(chain, predecessor); ADD_LEVEL(chain, predecessor);
predecessor = DOWN(predecessor); predecessor = DOWN(predecessor);
@@ -2696,18 +2763,19 @@ dns_rbtnodechain_prev(dns_rbtnodechain_t *chain, dns_name_t *name,
} else if (chain->level_count > 0) { } else if (chain->level_count > 0) {
/* /*
* Dang, didn't find a predecessor in this level. * Dang, didn't find a predecessor in this level.
* Got to the root of this level without having traversed * Got to the root of this level without having
* any right links. Ascend the tree one level; the * traversed any right links. Ascend the tree one
* node that points to this tree is the predecessor. * level; the node that points to this tree is the
* predecessor.
*/ */
INSIST(chain->level_count > 0 && IS_ROOT(current)); INSIST(chain->level_count > 0 && IS_ROOT(current));
predecessor = chain->levels[--chain->level_count]; predecessor = chain->levels[--chain->level_count];
/* XXX DCL probably needs work on the concept */ /* XXX DCL probably needs work on the concept */
/* /*
* Don't declare an origin change when the new origin is "." * Don't declare an origin change when the new origin is
* at the top level tree, because "." is declared as the origin * "." at the top level tree, because "." is declared as
* for the second level tree. * the origin for the second level tree.
*/ */
if (origin != NULL && if (origin != NULL &&
(chain->level_count > 0 || OFFSETLEN(predecessor) > 1)) { (chain->level_count > 0 || OFFSETLEN(predecessor) > 1)) {
@@ -2750,9 +2818,9 @@ dns_rbtnodechain_down(dns_rbtnodechain_t *chain, dns_name_t *name,
if (DOWN(current) != NULL) { if (DOWN(current) != NULL) {
/* /*
* Don't declare an origin change when the new origin is "." * Don't declare an origin change when the new origin is
* at the second level tree, because "." is already declared * "." at the second level tree, because "." is already
* as the origin for the top level tree. * declared as the origin for the top level tree.
*/ */
if (chain->level_count > 0 || OFFSETLEN(current) > 1) { if (chain->level_count > 0 || OFFSETLEN(current) > 1) {
new_origin = true; new_origin = true;
@@ -2772,12 +2840,12 @@ dns_rbtnodechain_down(dns_rbtnodechain_t *chain, dns_name_t *name,
chain->end = successor; chain->end = successor;
/* /*
* It is not necessary to use dns_rbtnodechain_current like * It is not necessary to use dns_rbtnodechain_current
* the other functions because this function will never * like the other functions because this function will
* find a node in the topmost level. This is because the * never find a node in the topmost level. This is
* root level will never be more than one name, and everything * because the root level will never be more than one
* in the megatree is a successor to that node, down at * name, and everything in the megatree is a successor
* the second level or below. * to that node, down at the second level or below.
*/ */
if (name != NULL) { if (name != NULL) {
@@ -2862,14 +2930,14 @@ dns_rbtnodechain_next(dns_rbtnodechain_t *chain, dns_name_t *name,
current = chain->end; current = chain->end;
/* /*
* If there is a level below this node, the next node is the leftmost * If there is a level below this node, the next node is the
* node of the next level. * leftmost node of the next level.
*/ */
if (DOWN(current) != NULL) { if (DOWN(current) != NULL) {
/* /*
* Don't declare an origin change when the new origin is "." * Don't declare an origin change when the new origin is
* at the second level tree, because "." is already declared * "." at the second level tree, because "." is already
* as the origin for the top level tree. * declared as the origin for the top level tree.
*/ */
if (chain->level_count > 0 || OFFSETLEN(current) > 1) { if (chain->level_count > 0 || OFFSETLEN(current) > 1) {
new_origin = true; new_origin = true;
@@ -2885,13 +2953,14 @@ dns_rbtnodechain_next(dns_rbtnodechain_t *chain, dns_name_t *name,
successor = current; successor = current;
} else if (RIGHT(current) == NULL) { } else if (RIGHT(current) == NULL) {
/* /*
* The successor is up, either in this level or a previous one. * The successor is up, either in this level or a
* Head back toward the root of the tree, looking for any path * previous one. Head back toward the root of the tree,
* that was via a left link; the successor is the node that has * looking for any path that was via a left link; the
* that left link. In the event the root of the level is * successor is the node that has that left link. In
* reached without having traversed any left links, ascend one * the event the root of the level is reached without
* level and look for either a right link off the point of * having traversed any left links, ascend one level and
* ascent, or search for a left link upward again, repeating * look for either a right link off the point of ascent,
* or search for a left link upward again, repeating
* ascends until either case is true. * ascends until either case is true.
*/ */
do { do {
@@ -2907,21 +2976,25 @@ dns_rbtnodechain_next(dns_rbtnodechain_t *chain, dns_name_t *name,
if (successor == NULL) { if (successor == NULL) {
/* /*
* Reached the root without having traversed * Reached the root without having
* any left pointers, so this level is done. * traversed any left pointers, so this
* level is done.
*/ */
if (chain->level_count == 0) { if (chain->level_count == 0) {
/* /*
* If the tree we are iterating over * If the tree we are iterating
* was modified since this chain was * over was modified since this
* initialized in a way that caused * chain was initialized in a
* node splits to occur, "current" may * way that caused node splits
* now be pointing to a root node which * to occur, "current" may now
* appears to be at level 0, but still * be pointing to a root node
* has a parent. If that happens, * which appears to be at level
* abort. Otherwise, we are done * 0, but still has a parent. If
* looking for a successor as we really * that happens, abort.
* reached the root node on level 0. * Otherwise, we are done
* looking for a successor as we
* really reached the root node
* on level 0.
*/ */
INSIST(PARENT(current) == NULL); INSIST(PARENT(current) == NULL);
break; break;
@@ -2949,20 +3022,21 @@ dns_rbtnodechain_next(dns_rbtnodechain_t *chain, dns_name_t *name,
if (successor != NULL) { if (successor != NULL) {
/* /*
* If we determine that the current node is the successor to * If we determine that the current node is the
* itself, we will run into an infinite loop, so abort instead. * successor to itself, we will run into an infinite
* loop, so abort instead.
*/ */
INSIST(chain->end != successor); INSIST(chain->end != successor);
chain->end = successor; chain->end = successor;
/* /*
* It is not necessary to use dns_rbtnodechain_current like * It is not necessary to use dns_rbtnodechain_current
* the other functions because this function will never * like the other functions because this function will
* find a node in the topmost level. This is because the * never find a node in the topmost level. This is
* root level will never be more than one name, and everything * because the root level will never be more than one
* in the megatree is a successor to that node, down at * name, and everything in the megatree is a successor
* the second level or below. * to that node, down at the second level or below.
*/ */
if (name != NULL) { if (name != NULL) {

View File

@@ -7592,22 +7592,6 @@ hashsize(dns_db_t *db) {
return (size); return (size);
} }
static isc_result_t
adjusthashsize(dns_db_t *db, size_t size) {
isc_result_t result;
dns_rbtdb_t *rbtdb;
rbtdb = (dns_rbtdb_t *)db;
REQUIRE(VALID_RBTDB(rbtdb));
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
result = dns_rbt_adjusthashsize(rbtdb->tree, size);
RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
return (result);
}
static void static void
settask(dns_db_t *db, isc_task_t *task) { settask(dns_db_t *db, isc_task_t *task) {
dns_rbtdb_t *rbtdb; dns_rbtdb_t *rbtdb;
@@ -8037,8 +8021,7 @@ static dns_dbmethods_t zone_methods = { attach,
NULL, /* getservestalettl */ NULL, /* getservestalettl */
NULL, /* setservestalerefresh */ NULL, /* setservestalerefresh */
NULL, /* getservestalerefresh */ NULL, /* getservestalerefresh */
setgluecachestats, setgluecachestats };
adjusthashsize };
static dns_dbmethods_t cache_methods = { attach, static dns_dbmethods_t cache_methods = { attach,
detach, detach,
@@ -8088,8 +8071,7 @@ static dns_dbmethods_t cache_methods = { attach,
getservestalettl, getservestalettl,
setservestalerefresh, setservestalerefresh,
getservestalerefresh, getservestalerefresh,
NULL, NULL };
adjusthashsize };
isc_result_t isc_result_t
dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type, dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,

View File

@@ -1262,33 +1262,20 @@ settask(dns_db_t *db, isc_task_t *task) {
} }
static dns_dbmethods_t sdb_methods = { static dns_dbmethods_t sdb_methods = {
attach, attach, detach,
detach, beginload, endload,
beginload, dump, currentversion,
endload, newversion, attachversion,
dump, closeversion, NULL, /* findnode */
currentversion, NULL, /* find */
newversion, findzonecut, attachnode,
attachversion, detachnode, expirenode,
closeversion, printnode, createiterator,
NULL, /* findnode */ findrdataset, allrdatasets,
NULL, /* find */ addrdataset, subtractrdataset,
findzonecut, deleterdataset, issecure,
attachnode, nodecount, ispersistent,
detachnode, overmem, settask,
expirenode,
printnode,
createiterator,
findrdataset,
allrdatasets,
addrdataset,
subtractrdataset,
deleterdataset,
issecure,
nodecount,
ispersistent,
overmem,
settask,
getoriginnode, /* getoriginnode */ getoriginnode, /* getoriginnode */
NULL, /* transfernode */ NULL, /* transfernode */
NULL, /* getnsec3parameters */ NULL, /* getnsec3parameters */
@@ -1300,8 +1287,7 @@ static dns_dbmethods_t sdb_methods = {
NULL, /* getrrsetstats */ NULL, /* getrrsetstats */
NULL, /* rpz_attach */ NULL, /* rpz_attach */
NULL, /* rpz_ready */ NULL, /* rpz_ready */
findnodeext, findnodeext, findext,
findext,
NULL, /* setcachestats */ NULL, /* setcachestats */
NULL, /* hashsize */ NULL, /* hashsize */
NULL, /* nodefullname */ NULL, /* nodefullname */
@@ -1311,7 +1297,6 @@ static dns_dbmethods_t sdb_methods = {
NULL, /* setservestalerefresh */ NULL, /* setservestalerefresh */
NULL, /* getservestalerefresh */ NULL, /* getservestalerefresh */
NULL, /* setgluecachestats */ NULL, /* setgluecachestats */
NULL /* adjusthashsize */
}; };
static isc_result_t static isc_result_t

View File

@@ -1234,56 +1234,34 @@ getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
} }
static dns_dbmethods_t sdlzdb_methods = { static dns_dbmethods_t sdlzdb_methods = {
attach, attach, detach, beginload,
detach, endload, dump, currentversion,
beginload, newversion, attachversion, closeversion,
endload, findnode, find, findzonecut,
dump, attachnode, detachnode, expirenode,
currentversion, printnode, createiterator, findrdataset,
newversion, allrdatasets, addrdataset, subtractrdataset,
attachversion, deleterdataset, issecure, nodecount,
closeversion, ispersistent, overmem, settask,
findnode, getoriginnode, NULL, /* transfernode */
find, NULL, /* getnsec3parameters */
findzonecut, NULL, /* findnsec3node */
attachnode, NULL, /* setsigningtime */
detachnode, NULL, /* getsigningtime */
expirenode, NULL, /* resigned */
printnode, NULL, /* isdnssec */
createiterator, NULL, /* getrrsetstats */
findrdataset, NULL, /* rpz_attach */
allrdatasets, NULL, /* rpz_ready */
addrdataset, findnodeext, findext, NULL, /* setcachestats */
subtractrdataset, NULL, /* hashsize */
deleterdataset, NULL, /* nodefullname */
issecure, NULL, /* getsize */
nodecount, NULL, /* setservestalettl */
ispersistent, NULL, /* getservestalettl */
overmem, NULL, /* setservestalerefresh */
settask, NULL, /* getservestalerefresh */
getoriginnode, NULL, /* setgluecachestats */
NULL, /* transfernode */
NULL, /* getnsec3parameters */
NULL, /* findnsec3node */
NULL, /* setsigningtime */
NULL, /* getsigningtime */
NULL, /* resigned */
NULL, /* isdnssec */
NULL, /* getrrsetstats */
NULL, /* rpz_attach */
NULL, /* rpz_ready */
findnodeext,
findext,
NULL, /* setcachestats */
NULL, /* hashsize */
NULL, /* nodefullname */
NULL, /* getsize */
NULL, /* setservestalettl */
NULL, /* getservestalettl */
NULL, /* setservestalerefresh */
NULL, /* getservestalerefresh */
NULL, /* setgluecachestats */
NULL /* adjusthashsize */
}; };
/* /*