2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-22 01:59:26 +00:00
bind/lib/dns/badcache.c
Ondřej Surý a5f5f68502
Refactor isc_time_now() to return time, and not result
The isc_time_now() and isc_time_now_hires() were used inconsistently
through the code - either with status check, or without status check,
or via TIME_NOW() macro with RUNTIME_CHECK() on failure.

Refactor the isc_time_now() and isc_time_now_hires() to always fail when
getting current time has failed, and return the isc_time_t value as
return value instead of passing the pointer to result in the argument.
2023-03-31 15:02:06 +02:00

513 lines
12 KiB
C

/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
/*! \file */
#include <inttypes.h>
#include <stdbool.h>
#include <isc/atomic.h>
#include <isc/buffer.h>
#include <isc/hash.h>
#include <isc/log.h>
#include <isc/mem.h>
#include <isc/mutex.h>
#include <isc/rwlock.h>
#include <isc/string.h>
#include <isc/time.h>
#include <isc/util.h>
#include <dns/badcache.h>
#include <dns/fixedname.h>
#include <dns/name.h>
#include <dns/rdatatype.h>
#include <dns/types.h>
typedef struct dns_bcentry dns_bcentry_t;
struct dns_badcache {
unsigned int magic;
isc_rwlock_t lock;
isc_mem_t *mctx;
isc_mutex_t *tlocks;
dns_bcentry_t **table;
atomic_uint_fast32_t count;
atomic_uint_fast32_t sweep;
unsigned int minsize;
unsigned int size;
};
#define BADCACHE_MAGIC ISC_MAGIC('B', 'd', 'C', 'a')
#define VALID_BADCACHE(m) ISC_MAGIC_VALID(m, BADCACHE_MAGIC)
struct dns_bcentry {
dns_bcentry_t *next;
dns_rdatatype_t type;
isc_time_t expire;
uint32_t flags;
unsigned int hashval;
dns_fixedname_t fname;
dns_name_t *name;
};
static void
badcache_resize(dns_badcache_t *bc, isc_time_t *now);
isc_result_t
dns_badcache_init(isc_mem_t *mctx, unsigned int size, dns_badcache_t **bcp) {
dns_badcache_t *bc = NULL;
unsigned int i;
REQUIRE(bcp != NULL && *bcp == NULL);
REQUIRE(mctx != NULL);
bc = isc_mem_get(mctx, sizeof(*bc));
*bc = (dns_badcache_t){
.size = size,
.minsize = size,
};
isc_mem_attach(mctx, &bc->mctx);
isc_rwlock_init(&bc->lock);
bc->table = isc_mem_getx(bc->mctx, sizeof(bc->table[0]) * size,
ISC_MEM_ZERO);
bc->tlocks = isc_mem_getx(bc->mctx, sizeof(bc->tlocks[0]) * size,
ISC_MEM_ZERO);
for (i = 0; i < size; i++) {
isc_mutex_init(&bc->tlocks[i]);
}
bc->magic = BADCACHE_MAGIC;
*bcp = bc;
return (ISC_R_SUCCESS);
}
void
dns_badcache_destroy(dns_badcache_t **bcp) {
dns_badcache_t *bc;
unsigned int i;
REQUIRE(bcp != NULL && *bcp != NULL);
bc = *bcp;
*bcp = NULL;
dns_badcache_flush(bc);
bc->magic = 0;
isc_rwlock_destroy(&bc->lock);
for (i = 0; i < bc->size; i++) {
isc_mutex_destroy(&bc->tlocks[i]);
}
isc_mem_put(bc->mctx, bc->table, sizeof(bc->table[0]) * bc->size);
isc_mem_put(bc->mctx, bc->tlocks, sizeof(bc->tlocks[0]) * bc->size);
isc_mem_putanddetach(&bc->mctx, bc, sizeof(dns_badcache_t));
}
static void
badcache_resize(dns_badcache_t *bc, isc_time_t *now) {
dns_bcentry_t **newtable, *bad, *next;
isc_mutex_t *newlocks;
unsigned int newsize, i;
bool grow;
RWLOCK(&bc->lock, isc_rwlocktype_write);
/*
* XXXWPK we will have a thundering herd problem here,
* as all threads will wait on the RWLOCK when there's
* a need to resize badcache.
* However, it happens so rarely it should not be a
* performance issue. This is because we double the
* size every time we grow it, and we don't shrink
* unless the number of entries really shrunk. In a
* high load situation, the number of badcache entries
* will eventually stabilize.
*/
if (atomic_load_relaxed(&bc->count) > bc->size * 8) {
grow = true;
} else if (atomic_load_relaxed(&bc->count) < bc->size * 2 &&
bc->size > bc->minsize)
{
grow = false;
} else {
/* Someone resized it already, bail. */
RWUNLOCK(&bc->lock, isc_rwlocktype_write);
return;
}
if (grow) {
newsize = bc->size * 2 + 1;
} else {
newsize = (bc->size - 1) / 2;
#ifdef __clang_analyzer__
/*
* XXXWPK there's a bug in clang static analyzer -
* `value % newsize` is considered undefined even though
* we check if newsize is larger than 0. This helps.
*/
newsize += 1;
#endif
}
RUNTIME_CHECK(newsize > 0);
newtable = isc_mem_getx(bc->mctx, sizeof(dns_bcentry_t *) * newsize,
ISC_MEM_ZERO);
newlocks = isc_mem_get(bc->mctx, sizeof(isc_mutex_t) * newsize);
/* Copy existing mutexes */
for (i = 0; i < newsize && i < bc->size; i++) {
newlocks[i] = bc->tlocks[i];
}
/* Initialize additional mutexes if we're growing */
for (i = bc->size; i < newsize; i++) {
isc_mutex_init(&newlocks[i]);
}
/* Destroy extra mutexes if we're shrinking */
for (i = newsize; i < bc->size; i++) {
isc_mutex_destroy(&bc->tlocks[i]);
}
for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
for (bad = bc->table[i]; bad != NULL; bad = next) {
next = bad->next;
if (isc_time_compare(&bad->expire, now) < 0) {
isc_mem_put(bc->mctx, bad, sizeof(*bad));
atomic_fetch_sub_relaxed(&bc->count, 1);
} else {
bad->next = newtable[bad->hashval % newsize];
newtable[bad->hashval % newsize] = bad;
}
}
bc->table[i] = NULL;
}
isc_mem_put(bc->mctx, bc->tlocks, sizeof(isc_mutex_t) * bc->size);
bc->tlocks = newlocks;
isc_mem_put(bc->mctx, bc->table, sizeof(*bc->table) * bc->size);
bc->size = newsize;
bc->table = newtable;
RWUNLOCK(&bc->lock, isc_rwlocktype_write);
}
void
dns_badcache_add(dns_badcache_t *bc, const dns_name_t *name,
dns_rdatatype_t type, bool update, uint32_t flags,
isc_time_t *expire) {
unsigned int hashval, hash;
dns_bcentry_t *bad, *prev, *next;
isc_time_t now;
bool resize = false;
REQUIRE(VALID_BADCACHE(bc));
REQUIRE(name != NULL);
REQUIRE(expire != NULL);
RWLOCK(&bc->lock, isc_rwlocktype_read);
now = isc_time_now();
hashval = dns_name_hash(name);
hash = hashval % bc->size;
LOCK(&bc->tlocks[hash]);
prev = NULL;
for (bad = bc->table[hash]; bad != NULL; bad = next) {
next = bad->next;
if (bad->type == type && dns_name_equal(name, bad->name)) {
if (update) {
bad->expire = *expire;
bad->flags = flags;
}
break;
}
if (isc_time_compare(&bad->expire, &now) < 0) {
if (prev == NULL) {
bc->table[hash] = bad->next;
} else {
prev->next = bad->next;
}
isc_mem_put(bc->mctx, bad, sizeof(*bad));
atomic_fetch_sub_relaxed(&bc->count, 1);
} else {
prev = bad;
}
}
if (bad == NULL) {
unsigned int count;
isc_buffer_t buffer;
bad = isc_mem_get(bc->mctx, sizeof(*bad));
*bad = (dns_bcentry_t){ .type = type,
.hashval = hashval,
.expire = *expire,
.flags = flags,
.next = bc->table[hash] };
isc_buffer_init(&buffer, bad + 1, name->length);
bad->name = dns_fixedname_initname(&bad->fname);
dns_name_copy(name, bad->name);
bc->table[hash] = bad;
count = atomic_fetch_add_relaxed(&bc->count, 1);
if ((count > bc->size * 8) ||
(count < bc->size * 2 && bc->size > bc->minsize))
{
resize = true;
}
} else {
bad->expire = *expire;
}
UNLOCK(&bc->tlocks[hash]);
RWUNLOCK(&bc->lock, isc_rwlocktype_read);
if (resize) {
badcache_resize(bc, &now);
}
}
bool
dns_badcache_find(dns_badcache_t *bc, const dns_name_t *name,
dns_rdatatype_t type, uint32_t *flagp, isc_time_t *now) {
dns_bcentry_t *bad, *prev, *next;
bool answer = false;
unsigned int i;
unsigned int hash;
REQUIRE(VALID_BADCACHE(bc));
REQUIRE(name != NULL);
REQUIRE(now != NULL);
RWLOCK(&bc->lock, isc_rwlocktype_read);
/*
* XXXMUKS: dns_name_equal() is expensive as it does a
* octet-by-octet comparison, and it can be made better in two
* ways here. First, lowercase the names (use
* dns_name_downcase() instead of dns_name_copy() in
* dns_badcache_add()) so that dns_name_caseequal() can be used
* which the compiler will emit as SIMD instructions. Second,
* don't put multiple copies of the same name in the chain (or
* multiple names will have to be matched for equality), but use
* name->link to store the type specific part.
*/
if (atomic_load_relaxed(&bc->count) == 0) {
goto skip;
}
hash = dns_name_hash(name) % bc->size;
prev = NULL;
LOCK(&bc->tlocks[hash]);
for (bad = bc->table[hash]; bad != NULL; bad = next) {
next = bad->next;
/*
* Search the hash list. Clean out expired records as we go.
*/
if (isc_time_compare(&bad->expire, now) < 0) {
if (prev != NULL) {
prev->next = bad->next;
} else {
bc->table[hash] = bad->next;
}
isc_mem_put(bc->mctx, bad, sizeof(*bad));
atomic_fetch_sub(&bc->count, 1);
continue;
}
if (bad->type == type && dns_name_equal(name, bad->name)) {
if (flagp != NULL) {
*flagp = bad->flags;
}
answer = true;
break;
}
prev = bad;
}
UNLOCK(&bc->tlocks[hash]);
skip:
/*
* Slow sweep to clean out stale records.
*/
i = atomic_fetch_add(&bc->sweep, 1) % bc->size;
if (isc_mutex_trylock(&bc->tlocks[i]) == ISC_R_SUCCESS) {
bad = bc->table[i];
if (bad != NULL && isc_time_compare(&bad->expire, now) < 0) {
bc->table[i] = bad->next;
isc_mem_put(bc->mctx, bad, sizeof(*bad));
atomic_fetch_sub_relaxed(&bc->count, 1);
}
UNLOCK(&bc->tlocks[i]);
}
RWUNLOCK(&bc->lock, isc_rwlocktype_read);
return (answer);
}
void
dns_badcache_flush(dns_badcache_t *bc) {
dns_bcentry_t *entry, *next;
unsigned int i;
RWLOCK(&bc->lock, isc_rwlocktype_write);
REQUIRE(VALID_BADCACHE(bc));
for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
for (entry = bc->table[i]; entry != NULL; entry = next) {
next = entry->next;
isc_mem_put(bc->mctx, entry, sizeof(*entry));
atomic_fetch_sub_relaxed(&bc->count, 1);
}
bc->table[i] = NULL;
}
RWUNLOCK(&bc->lock, isc_rwlocktype_write);
}
void
dns_badcache_flushname(dns_badcache_t *bc, const dns_name_t *name) {
dns_bcentry_t *bad, *prev, *next;
isc_time_t now;
unsigned int hash;
REQUIRE(VALID_BADCACHE(bc));
REQUIRE(name != NULL);
RWLOCK(&bc->lock, isc_rwlocktype_read);
now = isc_time_now();
hash = dns_name_hash(name) % bc->size;
LOCK(&bc->tlocks[hash]);
prev = NULL;
for (bad = bc->table[hash]; bad != NULL; bad = next) {
int n;
next = bad->next;
n = isc_time_compare(&bad->expire, &now);
if (n < 0 || dns_name_equal(name, bad->name)) {
if (prev == NULL) {
bc->table[hash] = bad->next;
} else {
prev->next = bad->next;
}
isc_mem_put(bc->mctx, bad, sizeof(*bad));
atomic_fetch_sub_relaxed(&bc->count, 1);
} else {
prev = bad;
}
}
UNLOCK(&bc->tlocks[hash]);
RWUNLOCK(&bc->lock, isc_rwlocktype_read);
}
void
dns_badcache_flushtree(dns_badcache_t *bc, const dns_name_t *name) {
dns_bcentry_t *bad, *prev, *next;
unsigned int i;
int n;
isc_time_t now;
REQUIRE(VALID_BADCACHE(bc));
REQUIRE(name != NULL);
/*
* We write lock the tree to avoid relocking every node
* individually.
*/
RWLOCK(&bc->lock, isc_rwlocktype_write);
now = isc_time_now();
for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
prev = NULL;
for (bad = bc->table[i]; bad != NULL; bad = next) {
next = bad->next;
n = isc_time_compare(&bad->expire, &now);
if (n < 0 || dns_name_issubdomain(bad->name, name)) {
if (prev == NULL) {
bc->table[i] = bad->next;
} else {
prev->next = bad->next;
}
isc_mem_put(bc->mctx, bad, sizeof(*bad));
atomic_fetch_sub_relaxed(&bc->count, 1);
} else {
prev = bad;
}
}
}
RWUNLOCK(&bc->lock, isc_rwlocktype_write);
}
void
dns_badcache_print(dns_badcache_t *bc, const char *cachename, FILE *fp) {
char namebuf[DNS_NAME_FORMATSIZE];
char typebuf[DNS_RDATATYPE_FORMATSIZE];
dns_bcentry_t *bad, *next, *prev;
isc_time_t now;
unsigned int i;
uint64_t t;
REQUIRE(VALID_BADCACHE(bc));
REQUIRE(cachename != NULL);
REQUIRE(fp != NULL);
/*
* We write lock the tree to avoid relocking every node
* individually.
*/
RWLOCK(&bc->lock, isc_rwlocktype_write);
fprintf(fp, ";\n; %s\n;\n", cachename);
now = isc_time_now();
for (i = 0; atomic_load_relaxed(&bc->count) > 0 && i < bc->size; i++) {
prev = NULL;
for (bad = bc->table[i]; bad != NULL; bad = next) {
next = bad->next;
if (isc_time_compare(&bad->expire, &now) < 0) {
if (prev != NULL) {
prev->next = bad->next;
} else {
bc->table[i] = bad->next;
}
isc_mem_put(bc->mctx, bad, sizeof(*bad));
atomic_fetch_sub_relaxed(&bc->count, 1);
continue;
}
prev = bad;
dns_name_format(bad->name, namebuf, sizeof(namebuf));
dns_rdatatype_format(bad->type, typebuf,
sizeof(typebuf));
t = isc_time_microdiff(&bad->expire, &now);
t /= 1000;
fprintf(fp,
"; %s/%s [ttl "
"%" PRIu64 "]\n",
namebuf, typebuf, t);
}
}
RWUNLOCK(&bc->lock, isc_rwlocktype_write);
}