2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-29 13:38:26 +00:00

Switch symtab to use fxhash hashing

This merge request resolves some performance regressions introduced
with the change from isc_symtab_t to isc_hashmap_t.

The key improvements are:

1. Using a faster hash function than both isc_hashmap_t and
   isc_symtab_t. The previous implementation used SipHash, but the
   hashflood resistance properties of SipHash are unneeded for config
   parsing.
2. Shrinking the initial size of the isc_hashmap_t used inside
   isc_symtab_t. Symtab is mainly used for config parsing, and the
   when used that way it will have between 1 and ~50 keys, but the
   previous implementation initialized a map with 128 slots.
   By initializing a smaller map, we speed up mallocs and optimize for
   the typical case of few config keys.
3. Slight optimization of the string matching in the hashmap, so that
   the tail is handled in a single load + comparison, instead of byte
   by byte.
   Of the three improvements, this is the least important.
This commit is contained in:
alessio 2025-02-27 07:37:04 +01:00
parent d2214cb704
commit e1e10adc3a
4 changed files with 118 additions and 20 deletions

View File

@ -26,6 +26,7 @@ libisc_la_HEADERS = \
include/isc/file.h \ include/isc/file.h \
include/isc/formatcheck.h \ include/isc/formatcheck.h \
include/isc/fuzz.h \ include/isc/fuzz.h \
include/isc/fxhash.h \
include/isc/getaddresses.h \ include/isc/getaddresses.h \
include/isc/hash.h \ include/isc/hash.h \
include/isc/hashmap.h \ include/isc/hashmap.h \

View File

@ -131,8 +131,12 @@ isc__ascii_load8(const uint8_t *ptr) {
* Compare `len` bytes at `a` and `b` for case-insensitive equality * Compare `len` bytes at `a` and `b` for case-insensitive equality
*/ */
static inline bool static inline bool
isc_ascii_lowerequal(const uint8_t *a, const uint8_t *b, unsigned int len) { isc_ascii_lowerequal(const uint8_t *restrict a, const uint8_t *restrict b,
unsigned int len) {
uint64_t a8 = 0, b8 = 0; uint64_t a8 = 0, b8 = 0;
if (len >= 8) {
const uint8_t *a_tail = a + len - 8;
const uint8_t *b_tail = b + len - 8;
while (len >= 8) { while (len >= 8) {
a8 = isc_ascii_tolower8(isc__ascii_load8(a)); a8 = isc_ascii_tolower8(isc__ascii_load8(a));
b8 = isc_ascii_tolower8(isc__ascii_load8(b)); b8 = isc_ascii_tolower8(isc__ascii_load8(b));
@ -143,6 +147,12 @@ isc_ascii_lowerequal(const uint8_t *a, const uint8_t *b, unsigned int len) {
a += 8; a += 8;
b += 8; b += 8;
} }
a8 = isc_ascii_tolower8(isc__ascii_load8(a_tail));
b8 = isc_ascii_tolower8(isc__ascii_load8(b_tail));
return a8 == b8;
}
while (len-- > 0) { while (len-- > 0) {
if (isc_ascii_tolower(*a++) != isc_ascii_tolower(*b++)) { if (isc_ascii_tolower(*a++) != isc_ascii_tolower(*b++)) {
return false; return false;

View File

@ -0,0 +1,89 @@
#pragma once
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the Software), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
/* The constant K from Rust's fxhash */
#define K 0x9e3779b97f4a7c15ull
static inline size_t
rotate_left(size_t x, unsigned int n) {
return (x << n) | (x >> (sizeof(size_t) * 8 - n));
}
static inline size_t
fx_add_to_hash(size_t hash, size_t i) {
return rotate_left(hash, 5) ^ i * K;
}
/*
* Beware: this implementation will use an approximate conversion to lowercase.
* This is ok as fxhash is already not hash-flooding resistant and we use it
* only for config parsing.
*/
static inline size_t
fx_hash_bytes(size_t initial_hash, const uint8_t *restrict bytes, size_t len,
bool case_sensitive) {
size_t hash = initial_hash;
size_t case_mask = case_sensitive
? -1ull
: (0b11011111 * 0x0101010101010101ull);
while (len >= sizeof(size_t)) {
size_t value;
memmove(&value, bytes, sizeof(size_t));
hash = fx_add_to_hash(hash, value & case_mask);
bytes += sizeof(size_t);
len -= sizeof(size_t);
}
/* Will be ignored if sizeof(size_t) <= 4 */
if (len >= 4) {
uint32_t value;
memmove(&value, bytes, sizeof(uint32_t));
hash = fx_add_to_hash(hash, value & case_mask);
bytes += 4;
len -= 4;
}
/* Will be ignored if sizeof(size_t) <= 2 */
if (len >= 2) {
uint16_t value;
memmove(&value, bytes, sizeof(uint16_t));
hash = fx_add_to_hash(hash, value & case_mask);
bytes += 2;
len -= 2;
}
if (len >= 1) {
hash = fx_add_to_hash(hash, bytes[0] & case_mask);
}
return hash;
}

View File

@ -16,6 +16,7 @@
#include <stdbool.h> #include <stdbool.h>
#include <isc/ascii.h> #include <isc/ascii.h>
#include <isc/fxhash.h>
#include <isc/hash.h> #include <isc/hash.h>
#include <isc/hashmap.h> #include <isc/hashmap.h>
#include <isc/magic.h> #include <isc/magic.h>
@ -31,9 +32,9 @@ typedef struct elt {
isc_symvalue_t value; isc_symvalue_t value;
} elt_t; } elt_t;
/* 7 bits means 128 entries at creation, which matches the common use of /* 4 bits means 16 entries at creation, which matches the common use of
* symtab */ * symtab */
#define ISC_SYMTAB_INIT_HASH_BITS 7 #define ISC_SYMTAB_INIT_HASH_BITS 4
#define SYMTAB_MAGIC ISC_MAGIC('S', 'y', 'm', 'T') #define SYMTAB_MAGIC ISC_MAGIC('S', 'y', 'm', 'T')
#define VALID_SYMTAB(st) ISC_MAGIC_VALID(st, SYMTAB_MAGIC) #define VALID_SYMTAB(st) ISC_MAGIC_VALID(st, SYMTAB_MAGIC)
@ -122,7 +123,7 @@ elt__match(void *node, const void *key0, bool case_sensitive) {
if (case_sensitive) { if (case_sensitive) {
return memcmp(elt->key, key->key, key->size) == 0; return memcmp(elt->key, key->key, key->size) == 0;
} else { } else {
return isc_ascii_lowercmp(elt->key, key->key, key->size) == 0; return isc_ascii_lowerequal(elt->key, key->key, key->size);
} }
} }
@ -136,14 +137,11 @@ elt_match_nocase(void *node, const void *key) {
return elt__match(node, key, false); return elt__match(node, key, false);
} }
static uint32_t static inline uint32_t
elt_hash(elt_t *elt, bool case_sensitive) { elt_hash(elt_t *restrict elt, bool case_sensitive) {
isc_hash32_t hash; const uint8_t *ptr = elt->key;
size_t len = elt->size;
isc_hash32_init(&hash); return fx_hash_bytes(0, ptr, len, case_sensitive);
isc_hash32_hash(&hash, elt->key, elt->size, case_sensitive);
isc_hash32_hash(&hash, &elt->type, sizeof(elt->type), false);
return isc_hash32_finalize(&hash);
} }
isc_result_t isc_result_t