diff --git a/CHANGES b/CHANGES index f0bba520c2..a39958671f 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,10 @@ +4277. [performance] Improve performance of the RBT, the central zone + datastructure: The aux hashtable was improved, + hash function was updated to perform more + uniform mapping, uppernode was added to + dns_rbtnode, and other cleanups and performance + improvements were made. [RT #41165] + 4276. [protocol] Add support for SMIMEA. [RT #40513] 4275. [performance] Lazily initialize dns_compress->table only when diff --git a/bin/tests/system/dyndb/driver/db.c b/bin/tests/system/dyndb/driver/db.c index 6799d3b2e2..317674b393 100644 --- a/bin/tests/system/dyndb/driver/db.c +++ b/bin/tests/system/dyndb/driver/db.c @@ -568,7 +568,7 @@ setcachestats(dns_db_t *db, isc_stats_t *stats) { return (dns_db_setcachestats(sampledb->rbtdb, stats)); } -static unsigned int +static size_t hashsize(dns_db_t *db) { sampledb_t *sampledb = (sampledb_t *) db; diff --git a/lib/dns/acache.c b/lib/dns/acache.c index 8d59323126..178d2469c3 100644 --- a/lib/dns/acache.c +++ b/lib/dns/acache.c @@ -472,8 +472,7 @@ finddbent(dns_acache_t *acache, dns_db_t *db, dbentry_t **dbentryp) { * The caller must be holding the acache lock. */ - bucket = isc_hash_calc((const unsigned char *)&db, - sizeof(db), ISC_TRUE) % DBBUCKETS; + bucket = isc_hash_function(&db, sizeof(db), ISC_TRUE, NULL) % DBBUCKETS; for (dbentry = ISC_LIST_HEAD(acache->dbbucket[bucket]); dbentry != NULL; @@ -1264,8 +1263,7 @@ dns_acache_setdb(dns_acache_t *acache, dns_db_t *db) { dbentry->db = NULL; dns_db_attach(db, &dbentry->db); - bucket = isc_hash_calc((const unsigned char *)&db, - sizeof(db), ISC_TRUE) % DBBUCKETS; + bucket = isc_hash_function(&db, sizeof(db), ISC_TRUE, NULL) % DBBUCKETS; ISC_LIST_APPEND(acache->dbbucket[bucket], dbentry, link); @@ -1353,8 +1351,8 @@ dns_acache_putdb(dns_acache_t *acache, dns_db_t *db) { INSIST(ISC_LIST_EMPTY(dbentry->originlist) && ISC_LIST_EMPTY(dbentry->referlist)); - bucket = isc_hash_calc((const unsigned char *)&db, - sizeof(db), ISC_TRUE) % DBBUCKETS; + bucket = isc_hash_function(&db, sizeof(db), ISC_TRUE, NULL) % DBBUCKETS; + ISC_LIST_UNLINK(acache->dbbucket[bucket], dbentry, link); dns_db_detach(&dbentry->db); diff --git a/lib/dns/cache.c b/lib/dns/cache.c index 22ffdb54a6..83da390b47 100644 --- a/lib/dns/cache.c +++ b/lib/dns/cache.c @@ -1406,7 +1406,7 @@ dns_cache_dumpstats(dns_cache_t *cache, FILE *fp) { "cache records deleted due to TTL expiration"); fprintf(fp, "%20u %s\n", dns_db_nodecount(cache->db), "cache database nodes"); - fprintf(fp, "%20u %s\n", dns_db_hashsize(cache->db), + fprintf(fp, "%20zu %s\n", dns_db_hashsize(cache->db), "cache database hash buckets"); fprintf(fp, "%20u %s\n", (unsigned int) isc_mem_total(cache->mctx), diff --git a/lib/dns/db.c b/lib/dns/db.c index 7e4f35719b..e59abacbff 100644 --- a/lib/dns/db.c +++ b/lib/dns/db.c @@ -875,12 +875,12 @@ dns_db_nodecount(dns_db_t *db) { return ((db->methods->nodecount)(db)); } -unsigned int +size_t dns_db_hashsize(dns_db_t *db) { REQUIRE(DNS_DB_VALID(db)); if (db->methods->hashsize == NULL) - return (ISC_R_NOTIMPLEMENTED); + return (0); return ((db->methods->hashsize)(db)); } diff --git a/lib/dns/include/dns/db.h b/lib/dns/include/dns/db.h index 4679d639ee..9715249f9a 100644 --- a/lib/dns/include/dns/db.h +++ b/lib/dns/include/dns/db.h @@ -194,7 +194,7 @@ typedef struct dns_dbmethods { dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset); isc_result_t (*setcachestats)(dns_db_t *db, isc_stats_t *stats); - unsigned int (*hashsize)(dns_db_t *db); + size_t (*hashsize)(dns_db_t *db); } dns_dbmethods_t; typedef isc_result_t @@ -1380,7 +1380,7 @@ dns_db_nodecount(dns_db_t *db); * \li The number of nodes in the database */ -unsigned int +size_t dns_db_hashsize(dns_db_t *db); /*%< * For database implementations using a hash table, report the @@ -1392,7 +1392,7 @@ dns_db_hashsize(dns_db_t *db); * * Returns: * \li The number of buckets in the database's hash table, or - * ISC_R_NOTIMPLEMENTED. + * 0 if not implemented. */ void diff --git a/lib/dns/include/dns/rbt.h b/lib/dns/include/dns/rbt.h index 19f2d4d480..013cd74eb5 100644 --- a/lib/dns/include/dns/rbt.h +++ b/lib/dns/include/dns/rbt.h @@ -88,6 +88,7 @@ struct dns_rbtnode { dns_rbtnode_t *right; dns_rbtnode_t *down; #ifdef DNS_RBT_USEHASH + dns_rbtnode_t *uppernode; dns_rbtnode_t *hashnext; #endif @@ -657,7 +658,7 @@ dns_rbt_nodecount(dns_rbt_t *rbt); * \li rbt is a valid rbt manager. */ -unsigned int +size_t dns_rbt_hashsize(dns_rbt_t *rbt); /*%< * Obtain the current number of buckets in the 'rbt' hash table. diff --git a/lib/dns/name.c b/lib/dns/name.c index 6e1726ab44..80ebe5a767 100644 --- a/lib/dns/name.c +++ b/lib/dns/name.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -478,42 +479,10 @@ dns_name_internalwildcard(const dns_name_t *name) { return (ISC_FALSE); } -static inline unsigned int -name_hash(dns_name_t *name, isc_boolean_t case_sensitive) { - unsigned int length; - const unsigned char *s; - unsigned int h = 0; - unsigned char c; - - length = name->length; - if (length > 16) - length = 16; - - /* - * This hash function is similar to the one Ousterhout - * uses in Tcl. - */ - s = name->ndata; - if (case_sensitive) { - while (length > 0) { - h += ( h << 3 ) + *s; - s++; - length--; - } - } else { - while (length > 0) { - c = maptolower[*s]; - h += ( h << 3 ) + c; - s++; - length--; - } - } - - return (h); -} - unsigned int dns_name_hash(dns_name_t *name, isc_boolean_t case_sensitive) { + unsigned int length; + /* * Provide a hash value for 'name'. */ @@ -522,7 +491,12 @@ dns_name_hash(dns_name_t *name, isc_boolean_t case_sensitive) { if (name->labels == 0) return (0); - return (name_hash(name, case_sensitive)); + length = name->length; + if (length > 16) + length = 16; + + return (isc_hash_function_reverse(name->ndata, length, + case_sensitive, NULL)); } unsigned int @@ -535,19 +509,17 @@ dns_name_fullhash(dns_name_t *name, isc_boolean_t case_sensitive) { if (name->labels == 0) return (0); - return (isc_hash_calc((const unsigned char *)name->ndata, - name->length, case_sensitive)); + return (isc_hash_function_reverse(name->ndata, name->length, + case_sensitive, NULL)); } unsigned int dns_fullname_hash(dns_name_t *name, isc_boolean_t case_sensitive) { /* * This function was deprecated due to the breakage of the name space - * convention. We only keep this internally to provide binary backward + * convention. We only keep this internally to provide binary backward * compatibility. */ - REQUIRE(VALID_NAME(name)); - return (dns_name_fullhash(name, case_sensitive)); } @@ -567,7 +539,8 @@ dns_name_hashbylabel(dns_name_t *name, isc_boolean_t case_sensitive) { if (name->labels == 0) return (0); else if (name->labels == 1) - return (name_hash(name, case_sensitive)); + return (isc_hash_function_reverse(name->ndata, name->length, + case_sensitive, NULL)); SETUP_OFFSETS(name, offsets, odata); DNS_NAME_INIT(&tname, NULL); @@ -579,7 +552,8 @@ dns_name_hashbylabel(dns_name_t *name, isc_boolean_t case_sensitive) { tname.length = name->length - offsets[i]; else tname.length = offsets[i + 1] - offsets[i]; - h += name_hash(&tname, case_sensitive); + h += isc_hash_function_reverse(tname.ndata, tname.length, + case_sensitive, NULL); } return (h); @@ -637,12 +611,15 @@ dns_name_fullcompare(const dns_name_t *name1, const dns_name_t *name2, ldiff = l1 - l2; } + offsets1 += l1; + offsets2 += l2; + while (l > 0) { l--; - l1--; - l2--; - label1 = &name1->ndata[offsets1[l1]]; - label2 = &name2->ndata[offsets2[l2]]; + offsets1--; + offsets2--; + label1 = &name1->ndata[*offsets1]; + label2 = &name2->ndata[*offsets2]; count1 = *label1++; count2 = *label2++; @@ -658,16 +635,41 @@ dns_name_fullcompare(const dns_name_t *name1, const dns_name_t *name2, else count = count2; - while (count > 0) { - chdiff = (int)maptolower[*label1] - - (int)maptolower[*label2]; + while (count > 3) { + chdiff = (int)maptolower[label1[0]] - + (int)maptolower[label2[0]]; + if (chdiff != 0) { + *orderp = chdiff; + goto done; + } + chdiff = (int)maptolower[label1[1]] - + (int)maptolower[label2[1]]; + if (chdiff != 0) { + *orderp = chdiff; + goto done; + } + chdiff = (int)maptolower[label1[2]] - + (int)maptolower[label2[2]]; + if (chdiff != 0) { + *orderp = chdiff; + goto done; + } + chdiff = (int)maptolower[label1[3]] - + (int)maptolower[label2[3]]; + if (chdiff != 0) { + *orderp = chdiff; + goto done; + } + count -= 4; + label1 += 4; + label2 += 4; + } + while (count-- > 0) { + chdiff = (int)maptolower[*label1++] - (int)maptolower[*label2++]; if (chdiff != 0) { *orderp = chdiff; goto done; } - count--; - label1++; - label2++; } if (cdiff != 0) { *orderp = cdiff; @@ -683,11 +685,12 @@ dns_name_fullcompare(const dns_name_t *name1, const dns_name_t *name2, namereln = dns_namereln_subdomain; else namereln = dns_namereln_equal; + *nlabelsp = nlabels; + return (namereln); done: *nlabelsp = nlabels; - - if (nlabels > 0 && namereln == dns_namereln_none) + if (nlabels > 0) namereln = dns_namereln_commonancestor; return (namereln); @@ -749,16 +752,31 @@ dns_name_equal(const dns_name_t *name1, const dns_name_t *name2) { label1 = name1->ndata; label2 = name2->ndata; - while (l > 0) { - l--; + while (l-- > 0) { count = *label1++; if (count != *label2++) return (ISC_FALSE); INSIST(count <= 63); /* no bitstring support */ - while (count > 0) { - count--; + while (count > 3) { + c = maptolower[label1[0]]; + if (c != maptolower[label2[0]]) + return (ISC_FALSE); + c = maptolower[label1[1]]; + if (c != maptolower[label2[1]]) + return (ISC_FALSE); + c = maptolower[label1[2]]; + if (c != maptolower[label2[2]]) + return (ISC_FALSE); + c = maptolower[label1[3]]; + if (c != maptolower[label2[3]]) + return (ISC_FALSE); + count -= 4; + label1 += 4; + label2 += 4; + } + while (count-- > 0) { c = maptolower[*label1++]; if (c != maptolower[*label2++]) return (ISC_FALSE); diff --git a/lib/dns/rbt.c b/lib/dns/rbt.c index 10b8c498b4..a414e678bf 100644 --- a/lib/dns/rbt.c +++ b/lib/dns/rbt.c @@ -78,15 +78,15 @@ #endif struct dns_rbt { - unsigned int magic; - isc_mem_t * mctx; - dns_rbtnode_t * root; - void (*data_deleter)(void *, void *); - void * deleter_arg; - unsigned int nodecount; - unsigned int hashsize; - dns_rbtnode_t ** hashtable; - void * mmap_location; + unsigned int magic; + isc_mem_t * mctx; + dns_rbtnode_t * root; + void (*data_deleter)(void *, void *); + void * deleter_arg; + unsigned int nodecount; + size_t hashsize; + dns_rbtnode_t ** hashtable; + void * mmap_location; }; #define RED 0 @@ -208,6 +208,9 @@ getdata(dns_rbtnode_t *node, file_header_t *header) { #define LEFT(node) ((node)->left) #define RIGHT(node) ((node)->right) #define DOWN(node) ((node)->down) +#ifdef DNS_RBT_USEHASH +#define UPPERNODE(node) ((node)->uppernode) +#endif /* DNS_RBT_USEHASH */ #define DATA(node) ((node)->data) #define IS_EMPTY(node) ((node)->data == NULL) #define HASHNEXT(node) ((node)->hashnext) @@ -345,6 +348,39 @@ hexdump(const char *desc, unsigned char *data, size_t size) { } #endif /* DEBUG */ +#ifdef DNS_RBT_USEHASH + +/* Upper node is the parent of the root of the passed node's + * subtree. The passed node must not be NULL. + */ +static inline dns_rbtnode_t * +get_upper_node(dns_rbtnode_t *node) { + return (UPPERNODE(node)); +} + +static void +fixup_uppernodes_helper(dns_rbtnode_t *node, dns_rbtnode_t *uppernode) { + if (node == NULL) + return; + + UPPERNODE(node) = uppernode; + + fixup_uppernodes_helper(LEFT(node), uppernode); + fixup_uppernodes_helper(RIGHT(node), uppernode); + fixup_uppernodes_helper(DOWN(node), node); +} + +/* + * This function is used to fixup uppernode members of all dns_rbtnodes + * after deserialization. + */ +static void +fixup_uppernodes(dns_rbt_t *rbt) { + fixup_uppernodes_helper(rbt->root, NULL); +} + +#else + /* The passed node must not be NULL. */ static inline dns_rbtnode_t * get_subtree_root(dns_rbtnode_t *node) { @@ -370,6 +406,8 @@ get_upper_node(dns_rbtnode_t *node) { return (PARENT(root)); } +#endif /* DNS_RBT_USEHASH */ + size_t dns__rbtnode_getdistance(dns_rbtnode_t *node) { size_t nodes = 1; @@ -421,11 +459,9 @@ treefix(dns_rbt_t *rbt, void *base, size_t size, dns_rbtdatafixer_t datafixer, void *fixer_arg, isc_uint64_t *crc); -static isc_result_t -deletetree(dns_rbt_t *rbt, dns_rbtnode_t *node); - static void -deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, dns_rbtnode_t **nodep); +deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, isc_boolean_t unhash, + dns_rbtnode_t **nodep); static void printnodename(dns_rbtnode_t *node, isc_boolean_t quoted, FILE *f); @@ -898,13 +934,19 @@ dns_rbt_deserialize_tree(void *base_address, size_t filesize, goto cleanup; } + if (header->nodecount != rbt->nodecount) { + result = ISC_R_INVALIDFILE; + goto cleanup; + } + +#ifdef DNS_RBT_USEHASH + fixup_uppernodes(rbt); +#endif /* DNS_RBT_USEHASH */ + *rbtp = rbt; if (originp != NULL) *originp = rbt->root; - if (header->nodecount != rbt->nodecount) - result = ISC_R_INVALIDFILE; - cleanup: if (result != ISC_R_SUCCESS && rbt != NULL) { rbt->root = NULL; @@ -976,7 +1018,7 @@ dns_rbt_destroy2(dns_rbt_t **rbtp, unsigned int quantum) { rbt = *rbtp; - deletetreeflat(rbt, quantum, &rbt->root); + deletetreeflat(rbt, quantum, ISC_FALSE, &rbt->root); if (rbt->root != NULL) return (ISC_R_QUOTA); @@ -1003,7 +1045,7 @@ dns_rbt_nodecount(dns_rbt_t *rbt) { return (rbt->nodecount); } -unsigned int +size_t dns_rbt_hashsize(dns_rbt_t *rbt) { REQUIRE(VALID_RBT(rbt)); @@ -1098,6 +1140,9 @@ dns_rbt_addnode(dns_rbt_t *rbt, dns_name_t *name, dns_rbtnode_t **nodep) { if (result == ISC_R_SUCCESS) { rbt->nodecount++; new_current->is_root = 1; +#ifdef DNS_RBT_USEHASH + UPPERNODE(new_current) = NULL; +#endif /* DNS_RBT_USEHASH */ rbt->root = new_current; *nodep = new_current; hash_node(rbt, new_current, name); @@ -1277,7 +1322,10 @@ dns_rbt_addnode(dns_rbt_t *rbt, dns_name_t *name, dns_rbtnode_t **nodep) { PARENT(current) = new_current; DOWN(new_current) = current; root = &DOWN(new_current); - +#ifdef DNS_RBT_USEHASH + UPPERNODE(new_current) = UPPERNODE(current); + UPPERNODE(current) = new_current; +#endif /* DNS_RBT_USEHASH */ ADD_LEVEL(&chain, new_current); LEFT(current) = NULL; @@ -1334,6 +1382,12 @@ dns_rbt_addnode(dns_rbt_t *rbt, dns_name_t *name, dns_rbtnode_t **nodep) { result = create_node(rbt->mctx, add_name, &new_current); if (result == ISC_R_SUCCESS) { +#ifdef DNS_RBT_USEHASH + if (*root == NULL) + UPPERNODE(new_current) = current; + else + UPPERNODE(new_current) = PARENT(*root); +#endif /* DNS_RBT_USEHASH */ addonlevel(new_current, current, order, root); rbt->nodecount++; *nodep = new_current; @@ -1475,12 +1529,6 @@ dns_rbt_findnode(dns_rbt_t *rbt, dns_name_t *name, dns_name_t *foundname, unsigned int tlabels = 1; unsigned int hash; - /* - * If there is no hash table, hashing can't be done. - */ - if (rbt->hashtable == NULL) - goto nohash; - /* * The case of current != current_root, that * means a left or right pointer was followed, @@ -1495,7 +1543,7 @@ dns_rbt_findnode(dns_rbt_t *rbt, dns_name_t *name, dns_name_t *foundname, /* * current_root is the root of the current level, so - * it's parent is the same as it's "up" pointer. + * its parent is the same as its "up" pointer. */ up_current = PARENT(current_root); dns_name_init(&hash_name, NULL); @@ -1579,8 +1627,8 @@ dns_rbt_findnode(dns_rbt_t *rbt, dns_name_t *name, dns_name_t *foundname, current = NULL; continue; - nohash: -#endif /* DNS_RBT_USEHASH */ +#else /* DNS_RBT_USEHASH */ + /* * Standard binary search tree movement. */ @@ -1589,6 +1637,8 @@ dns_rbt_findnode(dns_rbt_t *rbt, dns_name_t *name, dns_name_t *foundname, else current = RIGHT(current); +#endif /* DNS_RBT_USEHASH */ + } else { /* * The names have some common suffix labels. @@ -2019,10 +2069,10 @@ dns_rbt_deletenode(dns_rbt_t *rbt, dns_rbtnode_t *node, isc_boolean_t recurse) INSIST(rbt->nodecount != 0); if (DOWN(node) != NULL) { - if (recurse) - RUNTIME_CHECK(deletetree(rbt, DOWN(node)) - == ISC_R_SUCCESS); - else { + if (recurse) { + PARENT(DOWN(node)) = NULL; + deletetreeflat(rbt, 0, ISC_TRUE, &DOWN(node)); + } else { if (DATA(node) != NULL && rbt->data_deleter != NULL) rbt->data_deleter(DATA(node), rbt->deleter_arg); DATA(node) = NULL; @@ -2034,6 +2084,7 @@ dns_rbt_deletenode(dns_rbt_t *rbt, dns_rbtnode_t *node, isc_boolean_t recurse) * by itself on a single level, so join_nodes() could * be used to collapse the tree (with all the caveats * of the comment at the start of this function). + * But join_nodes() function has now been removed. */ return (ISC_R_SUCCESS); } @@ -2269,12 +2320,14 @@ rehash(dns_rbt_t *rbt, unsigned int newcount) { unsigned int oldsize; dns_rbtnode_t **oldtable; dns_rbtnode_t *node; + dns_rbtnode_t *nextnode; unsigned int hash; unsigned int i; oldsize = rbt->hashsize; oldtable = rbt->hashtable; do { + INSIST((rbt->hashsize * 2 + 1) > rbt->hashsize); rbt->hashsize = rbt->hashsize * 2 + 1; } while (newcount >= (rbt->hashsize * 3)); rbt->hashtable = isc_mem_get(rbt->mctx, @@ -2285,19 +2338,15 @@ rehash(dns_rbt_t *rbt, unsigned int newcount) { return; } - INSIST(rbt->hashsize > 0); - for (i = 0; i < rbt->hashsize; i++) rbt->hashtable[i] = NULL; for (i = 0; i < oldsize; i++) { - node = oldtable[i]; - while (node != NULL) { + for (node = oldtable[i]; node != NULL; node = nextnode) { hash = HASHVAL(node) % rbt->hashsize; - oldtable[i] = HASHNEXT(node); + nextnode = HASHNEXT(node); HASHNEXT(node) = rbt->hashtable[hash]; rbt->hashtable[hash] = node; - node = oldtable[i]; } } @@ -2321,19 +2370,17 @@ unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *node) { REQUIRE(DNS_RBTNODE_VALID(node)); - if (rbt->hashtable != NULL) { - bucket = HASHVAL(node) % rbt->hashsize; - bucket_node = rbt->hashtable[bucket]; + bucket = HASHVAL(node) % rbt->hashsize; + bucket_node = rbt->hashtable[bucket]; - if (bucket_node == node) - rbt->hashtable[bucket] = HASHNEXT(node); - else { - while (HASHNEXT(bucket_node) != node) { - INSIST(HASHNEXT(bucket_node) != NULL); - bucket_node = HASHNEXT(bucket_node); - } - HASHNEXT(bucket_node) = HASHNEXT(node); + if (bucket_node == node) { + rbt->hashtable[bucket] = HASHNEXT(node); + } else { + while (HASHNEXT(bucket_node) != node) { + INSIST(HASHNEXT(bucket_node) != NULL); + bucket_node = HASHNEXT(bucket_node); } + HASHNEXT(bucket_node) = HASHNEXT(node); } } #endif /* DNS_RBT_USEHASH */ @@ -2747,64 +2794,6 @@ deletefromlevel(dns_rbtnode_t *delete, dns_rbtnode_t **rootp) { } } -/* - * This should only be used on the root of a tree, because no color fixup - * is done at all. - * - * NOTE: No root pointer maintenance is done, because the function is only - * used for two cases: - * + deleting everything DOWN from a node that is itself being deleted, and - * + deleting the entire tree of trees from dns_rbt_destroy. - * In each case, the root pointer is no longer relevant, so there - * is no need for a root parameter to this function. - * - * If the function is ever intended to be used to delete something where - * a pointer needs to be told that this tree no longer exists, - * this function would need to adjusted accordingly. - */ -static isc_result_t -deletetree(dns_rbt_t *rbt, dns_rbtnode_t *node) { - isc_result_t result = ISC_R_SUCCESS; - - REQUIRE(VALID_RBT(rbt)); - - if (node == NULL) - return (result); - - if (LEFT(node) != NULL) { - result = deletetree(rbt, LEFT(node)); - if (result != ISC_R_SUCCESS) - goto done; - LEFT(node) = NULL; - } - if (RIGHT(node) != NULL) { - result = deletetree(rbt, RIGHT(node)); - if (result != ISC_R_SUCCESS) - goto done; - RIGHT(node) = NULL; - } - if (DOWN(node) != NULL) { - result = deletetree(rbt, DOWN(node)); - if (result != ISC_R_SUCCESS) - goto done; - DOWN(node) = NULL; - } - done: - if (result != ISC_R_SUCCESS) - return (result); - - if (DATA(node) != NULL && rbt->data_deleter != NULL) - rbt->data_deleter(DATA(node), rbt->deleter_arg); - - unhash_node(rbt, node); -#if DNS_RBT_USEMAGIC - node->magic = 0; -#endif - - freenode(rbt, &node); - return (result); -} - static void freenode(dns_rbt_t *rbt, dns_rbtnode_t **nodep) { dns_rbtnode_t *node = *nodep; @@ -2818,57 +2807,55 @@ freenode(dns_rbt_t *rbt, dns_rbtnode_t **nodep) { } static void -deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, dns_rbtnode_t **nodep) { - dns_rbtnode_t *parent; - dns_rbtnode_t *node = *nodep; +deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, isc_boolean_t unhash, + dns_rbtnode_t **nodep) +{ + dns_rbtnode_t *root = *nodep; - REQUIRE(VALID_RBT(rbt)); + while (root != NULL) { + /* + * If there is a left, right or down node, walk into it + * and iterate. + */ + if (LEFT(root) != NULL) { + dns_rbtnode_t *node = root; + root = LEFT(root); + LEFT(node) = NULL; + } else if (RIGHT(root) != NULL) { + dns_rbtnode_t *node = root; + root = RIGHT(root); + RIGHT(node) = NULL; + } else if (DOWN(root) != NULL) { + dns_rbtnode_t *node = root; + root = DOWN(root); + DOWN(node) = NULL; + } else { + /* + * There are no left, right or down nodes, so we + * can free this one and go back to its parent. + */ + dns_rbtnode_t *node = root; + root = PARENT(root); - again: - if (node == NULL) { - *nodep = NULL; - return; - } - - traverse: - if (LEFT(node) != NULL) { - node = LEFT(node); - goto traverse; - } - if (DOWN(node) != NULL) { - node = DOWN(node); - goto traverse; - } - - if (DATA(node) != NULL && rbt->data_deleter != NULL) - rbt->data_deleter(DATA(node), rbt->deleter_arg); - - /* - * Note: we don't call unhash_node() here as we are destroying - * the complete rbt tree. - */ + if (DATA(node) != NULL && rbt->data_deleter != NULL) + rbt->data_deleter(DATA(node), + rbt->deleter_arg); + if (unhash) + unhash_node(rbt, node); + /* + * Note: we don't call unhash_node() here as we + * are destroying the complete RBT tree. + */ #if DNS_RBT_USEMAGIC - node->magic = 0; + node->magic = 0; #endif - parent = PARENT(node); - if (RIGHT(node) != NULL) - PARENT(RIGHT(node)) = parent; - if (parent != NULL) { - if (LEFT(parent) == node) - LEFT(parent) = RIGHT(node); - else if (DOWN(parent) == node) - DOWN(parent) = RIGHT(node); - } else - parent = RIGHT(node); - - freenode(rbt, &node); - - node = parent; - if (quantum != 0 && --quantum == 0) { - *nodep = node; - return; + freenode(rbt, &node); + if (quantum != 0 && --quantum == 0) + break; + } } - goto again; + + *nodep = root; } static size_t diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index 0cd1e0e960..f39c76feed 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -7729,20 +7729,20 @@ nodecount(dns_db_t *db) { return (count); } -static unsigned int +static size_t hashsize(dns_db_t *db) { dns_rbtdb_t *rbtdb; - unsigned int count; + size_t size; rbtdb = (dns_rbtdb_t *)db; REQUIRE(VALID_RBTDB(rbtdb)); RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); - count = dns_rbt_hashsize(rbtdb->tree); + size = dns_rbt_hashsize(rbtdb->tree); RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); - return (count); + return (size); } static void diff --git a/lib/dns/tests/dnstest.c b/lib/dns/tests/dnstest.c index bc08e2c9ba..577f0ab4de 100644 --- a/lib/dns/tests/dnstest.c +++ b/lib/dns/tests/dnstest.c @@ -57,6 +57,7 @@ isc_socketmgr_t *socketmgr = NULL; dns_zonemgr_t *zonemgr = NULL; isc_boolean_t app_running = ISC_FALSE; int ncpus; +isc_boolean_t debug_mem_record = ISC_TRUE; static isc_boolean_t hash_active = ISC_FALSE, dst_active = ISC_FALSE; @@ -115,7 +116,8 @@ dns_test_begin(FILE *logfile, isc_boolean_t start_managers) { if (start_managers) CHECK(isc_app_start()); - isc_mem_debugging |= ISC_MEM_DEBUGRECORD; + if (debug_mem_record) + isc_mem_debugging |= ISC_MEM_DEBUGRECORD; CHECK(isc_mem_create(0, 0, &mctx)); CHECK(isc_entropy_create(mctx, &ectx)); diff --git a/lib/dns/tests/dnstest.h b/lib/dns/tests/dnstest.h index ba749738af..679a544729 100644 --- a/lib/dns/tests/dnstest.h +++ b/lib/dns/tests/dnstest.h @@ -50,6 +50,7 @@ extern isc_socketmgr_t *socketmgr; extern dns_zonemgr_t *zonemgr; extern isc_boolean_t app_running; extern int ncpus; +extern isc_boolean_t debug_mem_record; isc_result_t dns_test_begin(FILE *logfile, isc_boolean_t create_managers); diff --git a/lib/dns/tests/rbt_test.c b/lib/dns/tests/rbt_test.c index daf9419776..f2e054c746 100644 --- a/lib/dns/tests/rbt_test.c +++ b/lib/dns/tests/rbt_test.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -58,6 +59,8 @@ #include #include +#include +#include typedef struct { dns_rbt_t *rbt; @@ -1305,6 +1308,131 @@ ATF_TC_BODY(rbt_insert_and_remove, tc) { dns_test_end(); } +ATF_TC(benchmark); +ATF_TC_HEAD(benchmark, tc) { + atf_tc_set_md_var(tc, "descr", "Benchmark RBT implementation"); +} + +static dns_fixedname_t *fnames; +static dns_name_t **names; +static int *values; + +static void * +find_thread(void *arg) { + dns_rbt_t *mytree; + isc_result_t result; + dns_rbtnode_t *node; + unsigned int j, i; + unsigned int start = 0; + + mytree = (dns_rbt_t *) arg; + while (start == 0) + start = random() % 4000000; + + /* Query 32 million random names from it in each thread */ + for (j = 0; j < 8; j++) { + for (i = start; i != start - 1; i = (i + 1) % 4000000) { + node = NULL; + result = dns_rbt_findnode(mytree, names[i], NULL, + &node, NULL, + DNS_RBTFIND_EMPTYDATA, + NULL, NULL); + ATF_CHECK_EQ(result, ISC_R_SUCCESS); + ATF_REQUIRE(node != NULL); + ATF_CHECK_EQ(values[i], (intptr_t) node->data); + } + } + + return (NULL); +} + +ATF_TC_BODY(benchmark, tc) { + isc_result_t result; + char namestr[sizeof("name18446744073709551616.example.org.")]; + unsigned int r; + dns_rbt_t *mytree; + dns_rbtnode_t *node; + unsigned int i; + unsigned int maxvalue = 1000000; + isc_time_t ts1, ts2; + double t; + unsigned int nthreads; + pthread_t threads[32]; + + UNUSED(tc); + + srandom(time(NULL)); + + debug_mem_record = ISC_FALSE; + + result = dns_test_begin(NULL, ISC_TRUE); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + fnames = (dns_fixedname_t *) malloc(4000000 * sizeof(dns_fixedname_t)); + names = (dns_name_t **) malloc(4000000 * sizeof(dns_name_t *)); + values = (int *) malloc(4000000 * sizeof(int)); + + for (i = 0; i < 4000000; i++) { + r = ((unsigned long) random()) % maxvalue; + snprintf(namestr, sizeof(namestr), "name%u.example.org.", r); + build_name_from_str(namestr, &fnames[i]); + names[i] = dns_fixedname_name(&fnames[i]); + values[i] = r; + } + + /* Create a tree. */ + mytree = NULL; + result = dns_rbt_create(mctx, NULL, NULL, &mytree); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + /* Insert test data into the tree. */ + for (i = 0; i < maxvalue; i++) { + snprintf(namestr, sizeof(namestr), "name%u.example.org.", i); + node = NULL; + result = insert_helper(mytree, namestr, &node); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + node->data = (void *) (intptr_t) i; + } + + result = isc_time_now(&ts1); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + nthreads = ISC_MIN(isc_os_ncpus(), 32); + nthreads = ISC_MAX(nthreads, 1); + for (i = 0; i < nthreads; i++) { + int s; + + s = pthread_create(&threads[i], NULL, find_thread, mytree); + + ATF_REQUIRE(s == 0); + } + + for (i = 0; i < nthreads; i++) { + int s; + + s = pthread_join(threads[i], NULL); + + ATF_REQUIRE(s == 0); + } + + result = isc_time_now(&ts2); + ATF_REQUIRE_EQ(result, ISC_R_SUCCESS); + + t = isc_time_microdiff(&ts2, &ts1); + + printf("%u findnode calls, %f seconds, %f calls/second\n", + nthreads * 8 * 4000000, t / 1000000.0, + (nthreads * 8 * 4000000) / (t / 1000000.0)); + + free(values); + free(names); + free(fnames); + + dns_rbt_destroy(&mytree); + + dns_test_end(); +} + /* * Main */ @@ -1318,6 +1446,7 @@ ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, rbt_remove); ATF_TP_ADD_TC(tp, rbt_remove_empty); ATF_TP_ADD_TC(tp, rbt_insert_and_remove); + ATF_TP_ADD_TC(tp, benchmark); return (atf_no_error()); } diff --git a/lib/isc/hash.c b/lib/isc/hash.c index 00d869261d..95b573db10 100644 --- a/lib/isc/hash.c +++ b/lib/isc/hash.c @@ -403,3 +403,142 @@ isc__hash_setvec(const isc_uint16_t *vec) { p[i] = vec[i]; } } + +static unsigned int fnv_offset_basis; +static isc_once_t fnv_once = ISC_ONCE_INIT; + +static void +fnv_initialize(void) { + /* + * This function should not leave fnv_offset_basis set to + * 0. Also, after this function has been called, if it is called + * again, it should not change fnv_offset_basis. + */ + while (fnv_offset_basis == 0) { + isc_random_get(&fnv_offset_basis); + } +} + +unsigned int +isc_hash_function(const void *data, size_t length, + isc_boolean_t case_sensitive, + unsigned int *previous_hashp) +{ + unsigned int hval; + const unsigned char *bp; + const unsigned char *be; + + RUNTIME_CHECK(isc_once_do(&fnv_once, fnv_initialize) == ISC_R_SUCCESS); + + hval = previous_hashp != NULL ? *previous_hashp : fnv_offset_basis; + + bp = (const unsigned char *) data; + be = bp + length; + + /* + * Fowler-Noll-Vo FNV-1a hash function. + * + * NOTE: A random fnv_offset_basis is used by default to avoid + * collision attacks as the hash function is reversible. This + * makes the mapping non-deterministic, but the distribution in + * the domain is still uniform. + */ + + if (case_sensitive) { + while (bp < be - 4) { + hval ^= (unsigned int) bp[0]; + hval *= 16777619; + hval ^= (unsigned int) bp[1]; + hval *= 16777619; + hval ^= (unsigned int) bp[2]; + hval *= 16777619; + hval ^= (unsigned int) bp[3]; + hval *= 16777619; + bp += 4; + } + while (bp < be) { + hval ^= (unsigned int) *bp++; + hval *= 16777619; + } + } else { + while (bp < be - 4) { + hval ^= (unsigned int) maptolower[bp[0]]; + hval *= 16777619; + hval ^= (unsigned int) maptolower[bp[1]]; + hval *= 16777619; + hval ^= (unsigned int) maptolower[bp[2]]; + hval *= 16777619; + hval ^= (unsigned int) maptolower[bp[3]]; + hval *= 16777619; + bp += 4; + } + while (bp < be) { + hval ^= (unsigned int) maptolower[*bp++]; + hval *= 16777619; + } + } + + return (hval); +} + +unsigned int +isc_hash_function_reverse(const void *data, size_t length, + isc_boolean_t case_sensitive, + unsigned int *previous_hashp) +{ + unsigned int hval; + const unsigned char *bp; + const unsigned char *be; + + RUNTIME_CHECK(isc_once_do(&fnv_once, fnv_initialize) == ISC_R_SUCCESS); + + hval = previous_hashp != NULL ? *previous_hashp : fnv_offset_basis; + + bp = (const unsigned char *) data; + be = bp + length; + + /* + * Fowler-Noll-Vo FNV-1a hash function. + * + * NOTE: A random fnv_offset_basis is used by default to avoid + * collision attacks as the hash function is reversible. This + * makes the mapping non-deterministic, but the distribution in + * the domain is still uniform. + */ + + if (case_sensitive) { + while (be >= bp + 4) { + be -= 4; + hval ^= (unsigned int) be[3]; + hval *= 16777619; + hval ^= (unsigned int) be[2]; + hval *= 16777619; + hval ^= (unsigned int) be[1]; + hval *= 16777619; + hval ^= (unsigned int) be[0]; + hval *= 16777619; + } + while (--be >= bp) { + hval ^= (unsigned int) *be; + hval *= 16777619; + } + } else { + while (be >= bp + 4) { + be -= 4; + hval ^= (unsigned int) maptolower[be[3]]; + hval *= 16777619; + hval ^= (unsigned int) maptolower[be[2]]; + hval *= 16777619; + hval ^= (unsigned int) maptolower[be[1]]; + hval *= 16777619; + hval ^= (unsigned int) maptolower[be[0]]; + hval *= 16777619; + } + while (--be >= bp) { + hval ^= (unsigned int) maptolower[*be]; + hval *= 16777619; + } + } + + return (hval); +} diff --git a/lib/isc/include/isc/hash.h b/lib/isc/include/isc/hash.h index a53677ece5..fe344a0159 100644 --- a/lib/isc/include/isc/hash.h +++ b/lib/isc/include/isc/hash.h @@ -198,6 +198,44 @@ isc__hash_setvec(const isc_uint16_t *vec); * doing before using this function. */ +unsigned int +isc_hash_function(const void *data, size_t length, + isc_boolean_t case_sensitive, + unsigned int *previous_hashp); +unsigned int +isc_hash_function_reverse(const void *data, size_t length, + isc_boolean_t case_sensitive, + unsigned int *previous_hashp); +/*!< + * \brief Calculate a hash over data. + * + * This hash function is useful for hashtables. The hash function is + * opaque and not important to the caller. The returned hash values are + * non-deterministic and will have different mapping every time a + * process using this library is run, but will have uniform + * distribution. + * + * isc_hash_function() calculates the hash from start to end over the + * input data. isc_hash_function_reverse() calculates the hash from the + * end to the start over the input data. The difference in order is + * useful in incremental hashing. + * + * This is a new variant of isc_hash_calc() and will supercede + * isc_hash_calc() eventually. + * + * 'data' is the data to be hashed. + * + * 'length' is the size of the data to be hashed. + * + * 'case_sensitive' specifies whether the hash key should be treated as + * case_sensitive values. It should typically be ISC_FALSE if the hash key + * is a DNS name. + * + * 'previous_hashp' is a pointer to a previous hash value returned by + * this function. It can be used to perform incremental hashing. NULL + * must be passed during first calls. + */ + ISC_LANG_ENDDECLS #endif /* ISC_HASH_H */ diff --git a/lib/isc/sockaddr.c b/lib/isc/sockaddr.c index d6a5849ed5..eda89be72e 100644 --- a/lib/isc/sockaddr.c +++ b/lib/isc/sockaddr.c @@ -205,7 +205,6 @@ isc_sockaddr_hash(const isc_sockaddr_t *sockaddr, isc_boolean_t address_only) { unsigned int length = 0; const unsigned char *s = NULL; unsigned int h = 0; - unsigned int g; unsigned int p = 0; const struct in6_addr *in6; @@ -239,12 +238,9 @@ isc_sockaddr_hash(const isc_sockaddr_t *sockaddr, isc_boolean_t address_only) { p = 0; } - h = isc_hash_calc(s, length, ISC_TRUE); - if (!address_only) { - g = isc_hash_calc((const unsigned char *)&p, sizeof(p), - ISC_TRUE); - h = h ^ g; /* XXX: we should concatenate h and p first */ - } + h = isc_hash_function(s, length, ISC_TRUE, NULL); + if (!address_only) + h = isc_hash_function(&p, sizeof(p), ISC_TRUE, &h); return (h); } diff --git a/lib/isc/win32/libisc.def.in b/lib/isc/win32/libisc.def.in index d982c5db27..791856fd74 100644 --- a/lib/isc/win32/libisc.def.in +++ b/lib/isc/win32/libisc.def.in @@ -261,6 +261,8 @@ isc_hash_ctxcreate isc_hash_ctxdetach isc_hash_ctxinit isc_hash_destroy +isc_hash_function +isc_hash_function_reverse isc_hash_init isc_heap_create isc_heap_decreased