2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-09-01 23:25:38 +00:00

fix: dev: Reduce the false sharing the dns_qpcache and dns_qpzone

Instead of having many node_lock_count * sizeof(<member>) arrays, pack all
the members into a qpcache_bucket_t that is cacheline aligned to prevent
false sharing between RWLocks.

Merge branch 'ondrej/prevent-nodelock-false-sharing' into 'main'

See merge request isc-projects/bind9!10072
This commit is contained in:
Ondřej Surý
2025-02-04 22:24:20 +00:00
4 changed files with 215 additions and 226 deletions

View File

@@ -14,6 +14,7 @@
/*! \file */ /*! \file */
#include <inttypes.h> #include <inttypes.h>
#include <stdalign.h>
#include <stdbool.h> #include <stdbool.h>
#include <sys/mman.h> #include <sys/mman.h>
@@ -21,15 +22,13 @@
#include <isc/async.h> #include <isc/async.h>
#include <isc/atomic.h> #include <isc/atomic.h>
#include <isc/file.h> #include <isc/file.h>
#include <isc/hash.h>
#include <isc/hashmap.h>
#include <isc/heap.h> #include <isc/heap.h>
#include <isc/hex.h> #include <isc/hex.h>
#include <isc/log.h> #include <isc/log.h>
#include <isc/loop.h> #include <isc/loop.h>
#include <isc/mem.h> #include <isc/mem.h>
#include <isc/mutex.h> #include <isc/mutex.h>
#include <isc/once.h> #include <isc/os.h>
#include <isc/queue.h> #include <isc/queue.h>
#include <isc/random.h> #include <isc/random.h>
#include <isc/refcount.h> #include <isc/refcount.h>
@@ -217,6 +216,41 @@ struct qpcnode {
isc_queue_node_t deadlink; isc_queue_node_t deadlink;
}; };
/*%
* One bucket structure will be created for each loop, and
* nodes in the database will evenly distributed among buckets
* to reduce contention between threads.
*/
typedef struct qpcache_bucket {
/*%
* Temporary storage for stale cache nodes and dynamically
* deleted nodes that await being cleaned up.
*/
isc_queue_t deadnodes;
/* Per-bucket lock. */
isc_rwlock_t lock;
/*
* Linked list used to implement LRU cache cleaning.
*/
dns_slabheaderlist_t lru;
/*
* The heap is used for TTL based expiry. Note that qpcache->hmctx
* is the memory context to use for heap memory; this differs from
* the main database memory context, which is qpcache->common.mctx.
*/
isc_heap_t *heap;
/* Padding to prevent false sharing between locks. */
uint8_t __padding[ISC_OS_CACHELINE_SIZE -
(sizeof(dns_slabheaderlist_t) + sizeof(isc_heap_t *) +
sizeof(isc_rwlock_t)) %
ISC_OS_CACHELINE_SIZE];
} qpcache_bucket_t;
typedef struct qpcache qpcache_t; typedef struct qpcache qpcache_t;
struct qpcache { struct qpcache {
/* Unlocked. */ /* Unlocked. */
@@ -245,10 +279,6 @@ struct qpcache {
*/ */
isc_refcount_t references; isc_refcount_t references;
/* Locks for individual tree nodes */
unsigned int node_lock_count;
isc_rwlock_t *node_locks;
dns_stats_t *rrsetstats; dns_stats_t *rrsetstats;
isc_stats_t *cachestats; isc_stats_t *cachestats;
@@ -262,13 +292,6 @@ struct qpcache {
*/ */
uint32_t serve_stale_refresh; uint32_t serve_stale_refresh;
/*
* This is an array of linked lists used to implement the LRU cache.
* There will be node_lock_count linked lists here. Nodes in bucket 1
* will be placed on the linked list lru[1].
*/
dns_slabheaderlist_t *lru;
/* /*
* Start point % node_lock_count for next LRU cleanup. * Start point % node_lock_count for next LRU cleanup.
*/ */
@@ -280,24 +303,14 @@ struct qpcache {
*/ */
_Atomic(isc_stdtime_t) last_used; _Atomic(isc_stdtime_t) last_used;
/*%
* Temporary storage for stale cache nodes and dynamically deleted
* nodes that await being cleaned up.
*/
isc_queue_t *deadnodes;
/*
* Heaps. These are used for TTL based expiry in a cache,
* or for zone resigning in a zone DB. hmctx is the memory
* context to use for the heap (which differs from the main
* database memory context in the case of a cache).
*/
isc_mem_t *hmctx;
isc_heap_t **heaps;
/* Locked by tree_lock. */ /* Locked by tree_lock. */
dns_qp_t *tree; dns_qp_t *tree;
dns_qp_t *nsec; dns_qp_t *nsec;
isc_mem_t *hmctx; /* Memory context for the heaps */
size_t buckets_count;
qpcache_bucket_t buckets[]; /* attribute((counted_by(buckets_count))) */
}; };
#ifdef DNS_DB_NODETRACE #ifdef DNS_DB_NODETRACE
@@ -542,9 +555,11 @@ update_header(qpcache_t *qpdb, dns_slabheader_t *header, isc_stdtime_t now) {
/* To be checked: can we really assume this? XXXMLG */ /* To be checked: can we really assume this? XXXMLG */
INSIST(ISC_LINK_LINKED(header, link)); INSIST(ISC_LINK_LINKED(header, link));
ISC_LIST_UNLINK(qpdb->lru[HEADERNODE(header)->locknum], header, link); ISC_LIST_UNLINK(qpdb->buckets[HEADERNODE(header)->locknum].lru, header,
link);
header->last_used = now; header->last_used = now;
ISC_LIST_PREPEND(qpdb->lru[HEADERNODE(header)->locknum], header, link); ISC_LIST_PREPEND(qpdb->buckets[HEADERNODE(header)->locknum].lru, header,
link);
} }
/* /*
@@ -773,7 +788,7 @@ qpcnode_release(qpcache_t *qpdb, qpcnode_t *node, isc_rwlocktype_t *nlocktypep,
* erefs (but NOT references!), upgrade the node lock, * erefs (but NOT references!), upgrade the node lock,
* decrement erefs again, and see if it's still zero. * decrement erefs again, and see if it's still zero.
*/ */
isc_rwlock_t *nlock = &qpdb->node_locks[node->locknum]; isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
qpcnode_erefs_increment(qpdb, node, *nlocktypep, qpcnode_erefs_increment(qpdb, node, *nlocktypep,
*tlocktypep DNS__DB_FLARG_PASS); *tlocktypep DNS__DB_FLARG_PASS);
NODE_FORCEUPGRADE(nlock, nlocktypep); NODE_FORCEUPGRADE(nlock, nlocktypep);
@@ -833,8 +848,9 @@ qpcnode_release(qpcache_t *qpdb, qpcnode_t *node, isc_rwlocktype_t *nlocktypep,
*tlocktypep DNS__DB_FLARG_PASS); *tlocktypep DNS__DB_FLARG_PASS);
isc_queue_node_init(&node->deadlink); isc_queue_node_init(&node->deadlink);
if (!isc_queue_enqueue_entry(&qpdb->deadnodes[node->locknum], if (!isc_queue_enqueue_entry(
node, deadlink)) &qpdb->buckets[node->locknum].deadnodes, node,
deadlink))
{ {
/* Queue was empty, trigger new cleaning */ /* Queue was empty, trigger new cleaning */
isc_loop_t *loop = isc_loop_get(qpdb->loopmgr, isc_loop_t *loop = isc_loop_get(qpdb->loopmgr,
@@ -1161,7 +1177,8 @@ setup_delegation(qpc_search_t *search, dns_dbnode_t **nodep,
} }
if (rdataset != NULL) { if (rdataset != NULL) {
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &search->qpdb->node_locks[node->locknum]; isc_rwlock_t *nlock =
&search->qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
bindrdataset(search->qpdb, node, search->zonecut_header, bindrdataset(search->qpdb, node, search->zonecut_header,
search->now, nlocktype, tlocktype, search->now, nlocktype, tlocktype,
@@ -1299,7 +1316,7 @@ check_zonecut(qpcnode_t *node, void *arg DNS__DB_FLARG) {
REQUIRE(search->zonecut == NULL); REQUIRE(search->zonecut == NULL);
nlock = &search->qpdb->node_locks[node->locknum]; nlock = &search->qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
/* /*
@@ -1372,7 +1389,7 @@ find_deepest_zonecut(qpc_search_t *search, qpcnode_t *node,
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
dns_qpchain_node(&search->chain, i, NULL, (void **)&node, NULL); dns_qpchain_node(&search->chain, i, NULL, (void **)&node, NULL);
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
@@ -1522,7 +1539,7 @@ find_coveringnsec(qpc_search_t *search, const dns_name_t *name,
} }
dns_name_copy(&node->name, fname); dns_name_copy(&node->name, fname);
nlock = &search->qpdb->node_locks[node->locknum]; nlock = &search->qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
for (header = node->data; header != NULL; header = header_next) { for (header = node->data; header != NULL; header = header_next) {
header_next = header->next; header_next = header->next;
@@ -1700,7 +1717,7 @@ qpcache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
* We now go looking for rdata... * We now go looking for rdata...
*/ */
nlock = &search.qpdb->node_locks[node->locknum]; nlock = &search.qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
/* /*
@@ -2004,7 +2021,7 @@ tree_exit:
if (search.need_cleanup) { if (search.need_cleanup) {
node = search.zonecut; node = search.zonecut;
INSIST(node != NULL); INSIST(node != NULL);
nlock = &search.qpdb->node_locks[node->locknum]; nlock = &search.qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
qpcnode_release(search.qpdb, node, &nlocktype, &tlocktype, qpcnode_release(search.qpdb, node, &nlocktype, &tlocktype,
@@ -2088,7 +2105,7 @@ qpcache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options,
* We now go looking for an NS rdataset at the node. * We now go looking for an NS rdataset at the node.
*/ */
nlock = &search.qpdb->node_locks[node->locknum]; nlock = &search.qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
for (header = node->data; header != NULL; header = header_next) { for (header = node->data; header != NULL; header = header_next) {
@@ -2210,7 +2227,7 @@ qpcache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
now = isc_stdtime_now(); now = isc_stdtime_now();
} }
nlock = &qpdb->node_locks[qpnode->locknum]; nlock = &qpdb->buckets[qpnode->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
matchtype = DNS_TYPEPAIR_VALUE(type, covers); matchtype = DNS_TYPEPAIR_VALUE(type, covers);
@@ -2355,7 +2372,7 @@ expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) {
dns_slabheader_t *header = data; dns_slabheader_t *header = data;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlocktype_t tlocktype = isc_rwlocktype_none; isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[qpnode->locknum]; isc_rwlock_t *nlock = &qpdb->buckets[qpnode->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype); NODE_WRLOCK(nlock, &nlocktype);
expireheader(header, &nlocktype, &tlocktype, expireheader(header, &nlocktype, &tlocktype,
@@ -2381,10 +2398,10 @@ expire_lru_headers(qpcache_t *qpdb, unsigned int locknum,
dns_slabheader_t *header = NULL; dns_slabheader_t *header = NULL;
size_t purged = 0; size_t purged = 0;
for (header = ISC_LIST_TAIL(qpdb->lru[locknum]); for (header = ISC_LIST_TAIL(qpdb->buckets[locknum].lru);
header != NULL && header->last_used <= qpdb->last_used && header != NULL && header->last_used <= qpdb->last_used &&
purged <= purgesize; purged <= purgesize;
header = ISC_LIST_TAIL(qpdb->lru[locknum])) header = ISC_LIST_TAIL(qpdb->buckets[locknum].lru))
{ {
size_t header_size = rdataset_size(header); size_t header_size = rdataset_size(header);
@@ -2395,7 +2412,7 @@ expire_lru_headers(qpcache_t *qpdb, unsigned int locknum,
* referenced any more (so unlinking is safe) since the * referenced any more (so unlinking is safe) since the
* TTL will be reset to 0. * TTL will be reset to 0.
*/ */
ISC_LIST_UNLINK(qpdb->lru[locknum], header, link); ISC_LIST_UNLINK(qpdb->buckets[locknum].lru, header, link);
expireheader(header, nlocktypep, tlocktypep, expireheader(header, nlocktypep, tlocktypep,
dns_expire_lru DNS__DB_FLARG_PASS); dns_expire_lru DNS__DB_FLARG_PASS);
purged += header_size; purged += header_size;
@@ -2417,7 +2434,7 @@ expire_lru_headers(qpcache_t *qpdb, unsigned int locknum,
static void static void
overmem(qpcache_t *qpdb, dns_slabheader_t *newheader, overmem(qpcache_t *qpdb, dns_slabheader_t *newheader,
isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) { isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) {
uint32_t locknum_start = qpdb->lru_sweep++ % qpdb->node_lock_count; uint32_t locknum_start = qpdb->lru_sweep++ % qpdb->buckets_count;
uint32_t locknum = locknum_start; uint32_t locknum = locknum_start;
size_t purgesize, purged = 0; size_t purgesize, purged = 0;
isc_stdtime_t min_last_used = 0; isc_stdtime_t min_last_used = 0;
@@ -2437,7 +2454,7 @@ overmem(qpcache_t *qpdb, dns_slabheader_t *newheader,
again: again:
do { do {
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[locknum]; isc_rwlock_t *nlock = &qpdb->buckets[locknum].lock;
NODE_WRLOCK(nlock, &nlocktype); NODE_WRLOCK(nlock, &nlocktype);
purged += expire_lru_headers( purged += expire_lru_headers(
@@ -2448,14 +2465,15 @@ again:
* Work out the oldest remaining last_used values of the list * Work out the oldest remaining last_used values of the list
* tails as we walk across the array of lru lists. * tails as we walk across the array of lru lists.
*/ */
dns_slabheader_t *header = ISC_LIST_TAIL(qpdb->lru[locknum]); dns_slabheader_t *header =
ISC_LIST_TAIL(qpdb->buckets[locknum].lru);
if (header != NULL && if (header != NULL &&
(min_last_used == 0 || header->last_used < min_last_used)) (min_last_used == 0 || header->last_used < min_last_used))
{ {
min_last_used = header->last_used; min_last_used = header->last_used;
} }
NODE_UNLOCK(nlock, &nlocktype); NODE_UNLOCK(nlock, &nlocktype);
locknum = (locknum + 1) % qpdb->node_lock_count; locknum = (locknum + 1) % qpdb->buckets_count;
} while (locknum != locknum_start && purged <= purgesize); } while (locknum != locknum_start && purged <= purgesize);
/* /*
@@ -2527,40 +2545,15 @@ qpcache__destroy(qpcache_t *qpdb) {
if (dns_name_dynamic(&qpdb->common.origin)) { if (dns_name_dynamic(&qpdb->common.origin)) {
dns_name_free(&qpdb->common.origin, qpdb->common.mctx); dns_name_free(&qpdb->common.origin, qpdb->common.mctx);
} }
for (i = 0; i < qpdb->node_lock_count; i++) { for (i = 0; i < qpdb->buckets_count; i++) {
NODE_DESTROYLOCK(&qpdb->node_locks[i]); NODE_DESTROYLOCK(&qpdb->buckets[i].lock);
}
/* INSIST(ISC_LIST_EMPTY(qpdb->buckets[i].lru));
* Clean up LRU / re-signing order lists.
*/
if (qpdb->lru != NULL) {
for (i = 0; i < qpdb->node_lock_count; i++) {
INSIST(ISC_LIST_EMPTY(qpdb->lru[i]));
}
isc_mem_cput(qpdb->common.mctx, qpdb->lru,
qpdb->node_lock_count,
sizeof(dns_slabheaderlist_t));
}
/*
* Clean up dead node buckets.
*/
for (i = 0; i < qpdb->node_lock_count; i++) {
INSIST(isc_queue_empty(&qpdb->deadnodes[i]));
isc_queue_destroy(&qpdb->deadnodes[i]);
}
isc_mem_cput(qpdb->common.mctx, qpdb->deadnodes, qpdb->node_lock_count,
sizeof(qpdb->deadnodes[0]));
/* INSIST(isc_queue_empty(&qpdb->buckets[i].deadnodes));
* Clean up heap objects. isc_queue_destroy(&qpdb->buckets[i].deadnodes);
*/
if (qpdb->heaps != NULL) { isc_heap_destroy(&qpdb->buckets[i].heap);
for (i = 0; i < qpdb->node_lock_count; i++) {
isc_heap_destroy(&qpdb->heaps[i]);
}
isc_mem_cput(qpdb->hmctx, qpdb->heaps, qpdb->node_lock_count,
sizeof(isc_heap_t *));
} }
if (qpdb->rrsetstats != NULL) { if (qpdb->rrsetstats != NULL) {
@@ -2570,8 +2563,6 @@ qpcache__destroy(qpcache_t *qpdb) {
isc_stats_detach(&qpdb->cachestats); isc_stats_detach(&qpdb->cachestats);
} }
isc_mem_cput(qpdb->common.mctx, qpdb->node_locks, qpdb->node_lock_count,
sizeof(qpdb->node_locks[0]));
TREE_DESTROYLOCK(&qpdb->tree_lock); TREE_DESTROYLOCK(&qpdb->tree_lock);
isc_refcount_destroy(&qpdb->references); isc_refcount_destroy(&qpdb->references);
isc_refcount_destroy(&qpdb->common.references); isc_refcount_destroy(&qpdb->common.references);
@@ -2581,7 +2572,9 @@ qpcache__destroy(qpcache_t *qpdb) {
qpdb->common.impmagic = 0; qpdb->common.impmagic = 0;
isc_mem_detach(&qpdb->hmctx); isc_mem_detach(&qpdb->hmctx);
isc_mem_putanddetach(&qpdb->common.mctx, qpdb, sizeof(*qpdb)); isc_mem_putanddetach(&qpdb->common.mctx, qpdb,
sizeof(*qpdb) + qpdb->buckets_count *
sizeof(qpdb->buckets[0]));
} }
static void static void
@@ -2603,18 +2596,19 @@ cleanup_deadnodes(void *arg) {
uint16_t locknum = isc_tid(); uint16_t locknum = isc_tid();
isc_rwlocktype_t tlocktype = isc_rwlocktype_none; isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[locknum]; isc_rwlock_t *nlock = &qpdb->buckets[locknum].lock;
qpcnode_t *qpnode = NULL, *qpnext = NULL; qpcnode_t *qpnode = NULL, *qpnext = NULL;
isc_queue_t deadnodes; isc_queue_t deadnodes;
INSIST(locknum < qpdb->node_lock_count); INSIST(locknum < qpdb->buckets_count);
isc_queue_init(&deadnodes); isc_queue_init(&deadnodes);
TREE_WRLOCK(&qpdb->tree_lock, &tlocktype); TREE_WRLOCK(&qpdb->tree_lock, &tlocktype);
NODE_WRLOCK(nlock, &nlocktype); NODE_WRLOCK(nlock, &nlocktype);
RUNTIME_CHECK(isc_queue_splice(&deadnodes, &qpdb->deadnodes[locknum])); RUNTIME_CHECK(isc_queue_splice(&deadnodes,
&qpdb->buckets[locknum].deadnodes));
isc_queue_for_each_entry_safe(&deadnodes, qpnode, qpnext, deadlink) { isc_queue_for_each_entry_safe(&deadnodes, qpnode, qpnext, deadlink) {
qpcnode_release(qpdb, qpnode, &nlocktype, &tlocktype, false); qpcnode_release(qpdb, qpnode, &nlocktype, &tlocktype, false);
} }
@@ -2636,7 +2630,7 @@ static void
reactivate_node(qpcache_t *qpdb, qpcnode_t *node, reactivate_node(qpcache_t *qpdb, qpcnode_t *node,
isc_rwlocktype_t tlocktype ISC_ATTR_UNUSED DNS__DB_FLARG) { isc_rwlocktype_t tlocktype ISC_ATTR_UNUSED DNS__DB_FLARG) {
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[node->locknum]; isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
qpcnode_acquire(qpdb, node, nlocktype, tlocktype DNS__DB_FLARG_PASS); qpcnode_acquire(qpdb, node, nlocktype, tlocktype DNS__DB_FLARG_PASS);
@@ -2649,11 +2643,9 @@ new_qpcnode(qpcache_t *qpdb, const dns_name_t *name) {
*newdata = (qpcnode_t){ *newdata = (qpcnode_t){
.name = DNS_NAME_INITEMPTY, .name = DNS_NAME_INITEMPTY,
.references = ISC_REFCOUNT_INITIALIZER(1), .references = ISC_REFCOUNT_INITIALIZER(1),
.locknum = isc_random_uniform(qpdb->node_lock_count), .locknum = isc_random_uniform(qpdb->buckets_count),
}; };
INSIST(newdata->locknum < qpdb->node_lock_count);
isc_mem_attach(qpdb->common.mctx, &newdata->mctx); isc_mem_attach(qpdb->common.mctx, &newdata->mctx);
dns_name_dupwithoffsets(name, newdata->mctx, &newdata->name); dns_name_dupwithoffsets(name, newdata->mctx, &newdata->name);
@@ -2728,7 +2720,7 @@ qpcache_detachnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) {
node = (qpcnode_t *)(*nodep); node = (qpcnode_t *)(*nodep);
*nodep = NULL; *nodep = NULL;
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
/* /*
* We can't destroy qpcache while holding a nodelock, so * We can't destroy qpcache while holding a nodelock, so
@@ -3022,11 +3014,13 @@ find_header:
} }
if (header->last_used != now) { if (header->last_used != now) {
ISC_LIST_UNLINK( ISC_LIST_UNLINK(
qpdb->lru[HEADERNODE(header)->locknum], qpdb->buckets[HEADERNODE(header)->locknum]
.lru,
header, link); header, link);
header->last_used = now; header->last_used = now;
ISC_LIST_PREPEND( ISC_LIST_PREPEND(
qpdb->lru[HEADERNODE(header)->locknum], qpdb->buckets[HEADERNODE(header)->locknum]
.lru,
header, link); header, link);
} }
if (header->noqname == NULL && if (header->noqname == NULL &&
@@ -3084,11 +3078,13 @@ find_header:
} }
if (header->last_used != now) { if (header->last_used != now) {
ISC_LIST_UNLINK( ISC_LIST_UNLINK(
qpdb->lru[HEADERNODE(header)->locknum], qpdb->buckets[HEADERNODE(header)->locknum]
.lru,
header, link); header, link);
header->last_used = now; header->last_used = now;
ISC_LIST_PREPEND( ISC_LIST_PREPEND(
qpdb->lru[HEADERNODE(header)->locknum], qpdb->buckets[HEADERNODE(header)->locknum]
.lru,
header, link); header, link);
} }
if (header->noqname == NULL && if (header->noqname == NULL &&
@@ -3117,15 +3113,14 @@ find_header:
idx = HEADERNODE(newheader)->locknum; idx = HEADERNODE(newheader)->locknum;
if (ZEROTTL(newheader)) { if (ZEROTTL(newheader)) {
newheader->last_used = qpdb->last_used + 1; newheader->last_used = qpdb->last_used + 1;
ISC_LIST_APPEND(qpdb->lru[idx], newheader, ISC_LIST_APPEND(qpdb->buckets[idx].lru,
link); newheader, link);
} else { } else {
ISC_LIST_PREPEND(qpdb->lru[idx], newheader, ISC_LIST_PREPEND(qpdb->buckets[idx].lru,
link); newheader, link);
} }
INSIST(qpdb->heaps != NULL); isc_heap_insert(qpdb->buckets[idx].heap, newheader);
isc_heap_insert(qpdb->heaps[idx], newheader); newheader->heap = qpdb->buckets[idx].heap;
newheader->heap = qpdb->heaps[idx];
/* /*
* There are no other references to 'header' when * There are no other references to 'header' when
@@ -3142,16 +3137,15 @@ find_header:
dns_slabheader_destroy(&header); dns_slabheader_destroy(&header);
} else { } else {
idx = HEADERNODE(newheader)->locknum; idx = HEADERNODE(newheader)->locknum;
INSIST(qpdb->heaps != NULL); isc_heap_insert(qpdb->buckets[idx].heap, newheader);
isc_heap_insert(qpdb->heaps[idx], newheader); newheader->heap = qpdb->buckets[idx].heap;
newheader->heap = qpdb->heaps[idx];
if (ZEROTTL(newheader)) { if (ZEROTTL(newheader)) {
newheader->last_used = qpdb->last_used + 1; newheader->last_used = qpdb->last_used + 1;
ISC_LIST_APPEND(qpdb->lru[idx], newheader, ISC_LIST_APPEND(qpdb->buckets[idx].lru,
link); newheader, link);
} else { } else {
ISC_LIST_PREPEND(qpdb->lru[idx], newheader, ISC_LIST_PREPEND(qpdb->buckets[idx].lru,
link); newheader, link);
} }
if (topheader_prev != NULL) { if (topheader_prev != NULL) {
topheader_prev->next = newheader; topheader_prev->next = newheader;
@@ -3181,12 +3175,14 @@ find_header:
} }
idx = HEADERNODE(newheader)->locknum; idx = HEADERNODE(newheader)->locknum;
isc_heap_insert(qpdb->heaps[idx], newheader); isc_heap_insert(qpdb->buckets[idx].heap, newheader);
newheader->heap = qpdb->heaps[idx]; newheader->heap = qpdb->buckets[idx].heap;
if (ZEROTTL(newheader)) { if (ZEROTTL(newheader)) {
ISC_LIST_APPEND(qpdb->lru[idx], newheader, link); ISC_LIST_APPEND(qpdb->buckets[idx].lru, newheader,
link);
} else { } else {
ISC_LIST_PREPEND(qpdb->lru[idx], newheader, link); ISC_LIST_PREPEND(qpdb->buckets[idx].lru, newheader,
link);
} }
if (topheader != NULL) { if (topheader != NULL) {
@@ -3470,7 +3466,7 @@ qpcache_addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
overmem(qpdb, newheader, &tlocktype DNS__DB_FLARG_PASS); overmem(qpdb, newheader, &tlocktype DNS__DB_FLARG_PASS);
} }
nlock = &qpdb->node_locks[qpnode->locknum]; nlock = &qpdb->buckets[qpnode->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype); NODE_WRLOCK(nlock, &nlocktype);
@@ -3557,7 +3553,7 @@ qpcache_deleterdataset(dns_db_t *db, dns_dbnode_t *node,
setttl(newheader, 0); setttl(newheader, 0);
atomic_init(&newheader->attributes, DNS_SLABHEADERATTR_NONEXISTENT); atomic_init(&newheader->attributes, DNS_SLABHEADERATTR_NONEXISTENT);
nlock = &qpdb->node_locks[qpnode->locknum]; nlock = &qpdb->buckets[qpnode->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype); NODE_WRLOCK(nlock, &nlocktype);
result = add(qpdb, qpnode, NULL, newheader, DNS_DBADD_FORCE, false, result = add(qpdb, qpnode, NULL, newheader, DNS_DBADD_FORCE, false,
NULL, 0, nlocktype, NULL, 0, nlocktype,
@@ -3596,7 +3592,7 @@ locknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) {
qpcache_t *qpdb = (qpcache_t *)db; qpcache_t *qpdb = (qpcache_t *)db;
qpcnode_t *qpnode = (qpcnode_t *)node; qpcnode_t *qpnode = (qpcnode_t *)node;
RWLOCK(&qpdb->node_locks[qpnode->locknum], type); RWLOCK(&qpdb->buckets[qpnode->locknum].lock, type);
} }
static void static void
@@ -3604,7 +3600,7 @@ unlocknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) {
qpcache_t *qpdb = (qpcache_t *)db; qpcache_t *qpdb = (qpcache_t *)db;
qpcnode_t *qpnode = (qpcnode_t *)node; qpcnode_t *qpnode = (qpcnode_t *)node;
RWUNLOCK(&qpdb->node_locks[qpnode->locknum], type); RWUNLOCK(&qpdb->buckets[qpnode->locknum].lock, type);
} }
isc_result_t isc_result_t
@@ -3616,12 +3612,15 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
isc_mem_t *hmctx = mctx; isc_mem_t *hmctx = mctx;
isc_loop_t *loop = isc_loop(); isc_loop_t *loop = isc_loop();
int i; int i;
isc_loopmgr_t *loopmgr = isc_loop_getloopmgr(loop);
size_t nloops = isc_loopmgr_nloops(loopmgr);
/* This database implementation only supports cache semantics */ /* This database implementation only supports cache semantics */
REQUIRE(type == dns_dbtype_cache); REQUIRE(type == dns_dbtype_cache);
REQUIRE(loop != NULL); REQUIRE(loop != NULL);
qpdb = isc_mem_get(mctx, sizeof(*qpdb)); qpdb = isc_mem_get(mctx,
sizeof(*qpdb) + nloops * sizeof(qpdb->buckets[0]));
*qpdb = (qpcache_t){ *qpdb = (qpcache_t){
.common.methods = &qpdb_cachemethods, .common.methods = &qpdb_cachemethods,
.common.origin = DNS_NAME_INITEMPTY, .common.origin = DNS_NAME_INITEMPTY,
@@ -3630,6 +3629,7 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
.common.references = 1, .common.references = 1,
.loopmgr = isc_loop_getloopmgr(loop), .loopmgr = isc_loop_getloopmgr(loop),
.references = 1, .references = 1,
.buckets_count = nloops,
}; };
/* /*
@@ -3642,38 +3642,19 @@ dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
isc_rwlock_init(&qpdb->lock); isc_rwlock_init(&qpdb->lock);
TREE_INITLOCK(&qpdb->tree_lock); TREE_INITLOCK(&qpdb->tree_lock);
qpdb->node_lock_count = isc_loopmgr_nloops(qpdb->loopmgr); qpdb->buckets_count = isc_loopmgr_nloops(qpdb->loopmgr);
qpdb->node_locks = isc_mem_cget(mctx, qpdb->node_lock_count,
sizeof(qpdb->node_locks[0]));
dns_rdatasetstats_create(mctx, &qpdb->rrsetstats); dns_rdatasetstats_create(mctx, &qpdb->rrsetstats);
qpdb->lru = isc_mem_cget(mctx, qpdb->node_lock_count, for (i = 0; i < (int)qpdb->buckets_count; i++) {
sizeof(dns_slabheaderlist_t)); ISC_LIST_INIT(qpdb->buckets[i].lru);
for (i = 0; i < (int)qpdb->node_lock_count; i++) {
ISC_LIST_INIT(qpdb->lru[i]);
}
/* qpdb->buckets[i].heap = NULL;
* Create the heaps.
*/
qpdb->heaps = isc_mem_cget(hmctx, qpdb->node_lock_count,
sizeof(isc_heap_t *));
for (i = 0; i < (int)qpdb->node_lock_count; i++) {
isc_heap_create(hmctx, ttl_sooner, set_index, 0, isc_heap_create(hmctx, ttl_sooner, set_index, 0,
&qpdb->heaps[i]); &qpdb->buckets[i].heap);
}
/* isc_queue_init(&qpdb->buckets[i].deadnodes);
* Create deadnode lists.
*/
qpdb->deadnodes = isc_mem_cget(mctx, qpdb->node_lock_count,
sizeof(qpdb->deadnodes[0]));
for (i = 0; i < (int)(qpdb->node_lock_count); i++) {
isc_queue_init(&qpdb->deadnodes[i]);
}
for (i = 0; i < (int)(qpdb->node_lock_count); i++) { NODE_INITLOCK(&qpdb->buckets[i].lock);
NODE_INITLOCK(&qpdb->node_locks[i]);
} }
/* /*
@@ -3756,7 +3737,7 @@ rdatasetiter_first(dns_rdatasetiter_t *it DNS__DB_FLARG) {
qpcnode_t *qpnode = (qpcnode_t *)iterator->common.node; qpcnode_t *qpnode = (qpcnode_t *)iterator->common.node;
dns_slabheader_t *header = NULL, *top_next = NULL; dns_slabheader_t *header = NULL, *top_next = NULL;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[qpnode->locknum]; isc_rwlock_t *nlock = &qpdb->buckets[qpnode->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
@@ -3802,7 +3783,7 @@ rdatasetiter_next(dns_rdatasetiter_t *it DNS__DB_FLARG) {
dns_typepair_t type, negtype; dns_typepair_t type, negtype;
dns_rdatatype_t rdtype, covers; dns_rdatatype_t rdtype, covers;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[qpnode->locknum]; isc_rwlock_t *nlock = &qpdb->buckets[qpnode->locknum].lock;
bool expiredok = EXPIREDOK(iterator); bool expiredok = EXPIREDOK(iterator);
header = iterator->current; header = iterator->current;
@@ -3890,7 +3871,7 @@ rdatasetiter_current(dns_rdatasetiter_t *it,
qpcnode_t *qpnode = (qpcnode_t *)iterator->common.node; qpcnode_t *qpnode = (qpcnode_t *)iterator->common.node;
dns_slabheader_t *header = NULL; dns_slabheader_t *header = NULL;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[qpnode->locknum]; isc_rwlock_t *nlock = &qpdb->buckets[qpnode->locknum].lock;
header = iterator->current; header = iterator->current;
REQUIRE(header != NULL); REQUIRE(header != NULL);
@@ -3934,7 +3915,7 @@ dereference_iter_node(qpc_dbit_t *qpdbiter DNS__DB_FLARG) {
REQUIRE(tlocktype != isc_rwlocktype_write); REQUIRE(tlocktype != isc_rwlocktype_write);
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
qpcnode_release(qpdb, node, &nlocktype, &qpdbiter->tree_locked, qpcnode_release(qpdb, node, &nlocktype, &qpdbiter->tree_locked,
false DNS__DB_FLARG_PASS); false DNS__DB_FLARG_PASS);
@@ -4252,7 +4233,7 @@ deletedata(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *node ISC_ATTR_UNUSED,
if (ISC_LINK_LINKED(header, link)) { if (ISC_LINK_LINKED(header, link)) {
int idx = HEADERNODE(header)->locknum; int idx = HEADERNODE(header)->locknum;
ISC_LIST_UNLINK(qpdb->lru[idx], header, link); ISC_LIST_UNLINK(qpdb->buckets[idx].lru, header, link);
} }
if (header->noqname != NULL) { if (header->noqname != NULL) {
@@ -4270,7 +4251,7 @@ static void
expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum, expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum,
isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep, isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
isc_stdtime_t now, bool cache_is_overmem DNS__DB_FLARG) { isc_stdtime_t now, bool cache_is_overmem DNS__DB_FLARG) {
isc_heap_t *heap = qpdb->heaps[locknum]; isc_heap_t *heap = qpdb->buckets[locknum].heap;
for (size_t i = 0; i < DNS_QPDB_EXPIRE_TTL_COUNT; i++) { for (size_t i = 0; i < DNS_QPDB_EXPIRE_TTL_COUNT; i++) {
dns_slabheader_t *header = isc_heap_element(heap, 1); dns_slabheader_t *header = isc_heap_element(heap, 1);

View File

@@ -14,6 +14,7 @@
/*! \file */ /*! \file */
#include <inttypes.h> #include <inttypes.h>
#include <stdalign.h>
#include <stdbool.h> #include <stdbool.h>
#include <sys/mman.h> #include <sys/mman.h>
@@ -27,7 +28,7 @@
#include <isc/loop.h> #include <isc/loop.h>
#include <isc/mem.h> #include <isc/mem.h>
#include <isc/mutex.h> #include <isc/mutex.h>
#include <isc/once.h> #include <isc/os.h>
#include <isc/random.h> #include <isc/random.h>
#include <isc/refcount.h> #include <isc/refcount.h>
#include <isc/result.h> #include <isc/result.h>
@@ -90,7 +91,7 @@
#define QPDB_ATTR_LOADED 0x01 #define QPDB_ATTR_LOADED 0x01
#define QPDB_ATTR_LOADING 0x02 #define QPDB_ATTR_LOADING 0x02
#define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */ #define DEFAULT_BUCKETS_COUNT 17 /*%< Should be prime. */
#define QPDBITER_NSEC3_ORIGIN_NODE(qpdb, iterator) \ #define QPDBITER_NSEC3_ORIGIN_NODE(qpdb, iterator) \
((iterator)->current == &(iterator)->nsec3iter && \ ((iterator)->current == &(iterator)->nsec3iter && \
@@ -184,6 +185,15 @@ struct qpznode {
void *data; void *data;
}; };
typedef struct qpcache_bucket {
/* Per-bucket lock. */
isc_rwlock_t lock;
/* Padding to prevent false sharing between locks. */
uint8_t __padding[ISC_OS_CACHELINE_SIZE -
(sizeof(isc_rwlock_t)) % ISC_OS_CACHELINE_SIZE];
} qpzone_bucket_t;
struct qpzonedb { struct qpzonedb {
/* Unlocked. */ /* Unlocked. */
dns_db_t common; dns_db_t common;
@@ -207,15 +217,10 @@ struct qpzonedb {
*/ */
isc_refcount_t references; isc_refcount_t references;
/* Locks for tree nodes */
int node_lock_count;
isc_rwlock_t *node_locks;
qpznode_t *origin; qpznode_t *origin;
qpznode_t *nsec3_origin; qpznode_t *nsec3_origin;
isc_stats_t *gluecachestats; isc_stats_t *gluecachestats;
/* Locked by lock. */ /* Locked by lock. */
unsigned int active;
unsigned int attributes; unsigned int attributes;
uint32_t current_serial; uint32_t current_serial;
uint32_t least_serial; uint32_t least_serial;
@@ -233,6 +238,9 @@ struct qpzonedb {
dns_qpmulti_t *tree; /* Main QP trie for data storage */ dns_qpmulti_t *tree; /* Main QP trie for data storage */
dns_qpmulti_t *nsec; /* NSEC nodes only */ dns_qpmulti_t *nsec; /* NSEC nodes only */
dns_qpmulti_t *nsec3; /* NSEC3 nodes only */ dns_qpmulti_t *nsec3; /* NSEC3 nodes only */
size_t buckets_count;
qpzone_bucket_t buckets[]; /* attribute((counted_by(buckets_count))) */
}; };
#ifdef DNS_DB_NODETRACE #ifdef DNS_DB_NODETRACE
@@ -499,8 +507,8 @@ free_db_rcu(struct rcu_head *rcu_head) {
if (dns_name_dynamic(&qpdb->common.origin)) { if (dns_name_dynamic(&qpdb->common.origin)) {
dns_name_free(&qpdb->common.origin, qpdb->common.mctx); dns_name_free(&qpdb->common.origin, qpdb->common.mctx);
} }
for (int i = 0; i < qpdb->node_lock_count; i++) { for (size_t i = 0; i < qpdb->buckets_count; i++) {
NODE_DESTROYLOCK(&qpdb->node_locks[i]); NODE_DESTROYLOCK(&qpdb->buckets[i].lock);
} }
isc_heap_destroy(&qpdb->heap); isc_heap_destroy(&qpdb->heap);
@@ -509,8 +517,6 @@ free_db_rcu(struct rcu_head *rcu_head) {
isc_stats_detach(&qpdb->gluecachestats); isc_stats_detach(&qpdb->gluecachestats);
} }
isc_mem_cput(qpdb->common.mctx, qpdb->node_locks, qpdb->node_lock_count,
sizeof(qpdb->node_locks[0]));
if (qpdb->loop != NULL) { if (qpdb->loop != NULL) {
isc_loop_detach(&qpdb->loop); isc_loop_detach(&qpdb->loop);
} }
@@ -526,7 +532,9 @@ free_db_rcu(struct rcu_head *rcu_head) {
INSIST(!cds_lfht_destroy(qpdb->common.update_listeners, NULL)); INSIST(!cds_lfht_destroy(qpdb->common.update_listeners, NULL));
} }
isc_mem_putanddetach(&qpdb->common.mctx, qpdb, sizeof(*qpdb)); isc_mem_putanddetach(&qpdb->common.mctx, qpdb,
sizeof(*qpdb) + qpdb->buckets_count *
sizeof(qpdb->buckets[0]));
} }
static void static void
@@ -587,10 +595,11 @@ new_qpznode(qpzonedb_t *qpdb, const dns_name_t *name) {
*newdata = (qpznode_t){ *newdata = (qpznode_t){
.name = DNS_NAME_INITEMPTY, .name = DNS_NAME_INITEMPTY,
.references = ISC_REFCOUNT_INITIALIZER(1), .references = ISC_REFCOUNT_INITIALIZER(1),
.locknum = isc_random_uniform(qpdb->buckets_count),
}; };
newdata->locknum = dns_name_hash(name) % qpdb->node_lock_count;
dns_name_dupwithoffsets(name, qpdb->common.mctx, &newdata->name);
isc_mem_attach(qpdb->common.mctx, &newdata->mctx); isc_mem_attach(qpdb->common.mctx, &newdata->mctx);
dns_name_dupwithoffsets(name, qpdb->common.mctx, &newdata->name);
#if DNS_DB_NODETRACE #if DNS_DB_NODETRACE
fprintf(stderr, "new_qpznode:%s:%s:%d:%p->references = 1\n", __func__, fprintf(stderr, "new_qpznode:%s:%s:%d:%p->references = 1\n", __func__,
@@ -627,12 +636,14 @@ dns__qpzone_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,
isc_result_t result; isc_result_t result;
dns_qp_t *qp = NULL; dns_qp_t *qp = NULL;
qpdb = isc_mem_get(mctx, sizeof(*qpdb)); qpdb = isc_mem_get(mctx,
sizeof(*qpdb) + DEFAULT_BUCKETS_COUNT *
sizeof(qpdb->buckets[0]));
*qpdb = (qpzonedb_t){ *qpdb = (qpzonedb_t){
.common.origin = DNS_NAME_INITEMPTY, .common.origin = DNS_NAME_INITEMPTY,
.common.rdclass = rdclass, .common.rdclass = rdclass,
.common.references = ISC_REFCOUNT_INITIALIZER(1), .common.references = ISC_REFCOUNT_INITIALIZER(1),
.node_lock_count = DEFAULT_NODE_LOCK_COUNT, .buckets_count = DEFAULT_BUCKETS_COUNT,
.current_serial = 1, .current_serial = 1,
.least_serial = 1, .least_serial = 1,
.next_serial = 2, .next_serial = 2,
@@ -647,17 +658,12 @@ dns__qpzone_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,
isc_rwlock_init(&qpdb->lock); isc_rwlock_init(&qpdb->lock);
qpdb->node_locks = isc_mem_cget(mctx, qpdb->node_lock_count,
sizeof(qpdb->node_locks[0]));
qpdb->common.update_listeners = cds_lfht_new(16, 16, 0, 0, NULL); qpdb->common.update_listeners = cds_lfht_new(16, 16, 0, 0, NULL);
isc_heap_create(mctx, resign_sooner, set_index, 0, &qpdb->heap); isc_heap_create(mctx, resign_sooner, set_index, 0, &qpdb->heap);
qpdb->active = qpdb->node_lock_count; for (size_t i = 0; i < qpdb->buckets_count; i++) {
NODE_INITLOCK(&qpdb->buckets[i].lock);
for (int i = 0; i < qpdb->node_lock_count; i++) {
NODE_INITLOCK(&qpdb->node_locks[i]);
} }
/* /*
@@ -943,7 +949,7 @@ qpznode_release(qpzonedb_t *qpdb, qpznode_t *node, uint32_t least_serial,
* erefs (but NOT references!), upgrade the node lock, * erefs (but NOT references!), upgrade the node lock,
* decrement erefs again, and see if it's still zero. * decrement erefs again, and see if it's still zero.
*/ */
isc_rwlock_t *nlock = &qpdb->node_locks[node->locknum]; isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
qpznode_erefs_increment(qpdb, node DNS__DB_FLARG_PASS); qpznode_erefs_increment(qpdb, node DNS__DB_FLARG_PASS);
NODE_FORCEUPGRADE(nlock, nlocktypep); NODE_FORCEUPGRADE(nlock, nlocktypep);
if (!qpznode_erefs_decrement(qpdb, node DNS__DB_FLARG_PASS)) { if (!qpznode_erefs_decrement(qpdb, node DNS__DB_FLARG_PASS)) {
@@ -1037,7 +1043,7 @@ setnsec3parameters(dns_db_t *db, qpz_version_t *version) {
version->havensec3 = false; version->havensec3 = false;
node = qpdb->origin; node = qpdb->origin;
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
for (header = node->data; header != NULL; header = header_next) { for (header = node->data; header != NULL; header = header_next) {
header_next = header->next; header_next = header->next;
@@ -1512,7 +1518,7 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp,
ISC_LIST_UNLINK(resigned_list, header, link); ISC_LIST_UNLINK(resigned_list, header, link);
nlock = &qpdb->node_locks[HEADERNODE(header)->locknum]; nlock = &qpdb->buckets[HEADERNODE(header)->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype); NODE_WRLOCK(nlock, &nlocktype);
if (rollback && !IGNORE(header)) { if (rollback && !IGNORE(header)) {
resigninsert(qpdb, header); resigninsert(qpdb, header);
@@ -1535,7 +1541,7 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp,
next_changed = NEXT(changed, link); next_changed = NEXT(changed, link);
node = changed->node; node = changed->node;
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype); NODE_WRLOCK(nlock, &nlocktype);
if (rollback) { if (rollback) {
@@ -1579,7 +1585,7 @@ qpzone_findrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
} }
serial = version->serial; serial = version->serial;
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
matchtype = DNS_TYPEPAIR_VALUE(type, covers); matchtype = DNS_TYPEPAIR_VALUE(type, covers);
@@ -2197,7 +2203,7 @@ loading_addrdataset(void *arg, const dns_name_t *name,
newheader->resign_lsb = rdataset->resign & 0x1; newheader->resign_lsb = rdataset->resign & 0x1;
} }
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype); NODE_WRLOCK(nlock, &nlocktype);
result = add(qpdb, node, name, qpdb->current_version, newheader, result = add(qpdb, node, name, qpdb->current_version, newheader,
DNS_DBADD_MERGE, true, NULL, 0 DNS__DB_FLARG_PASS); DNS_DBADD_MERGE, true, NULL, 0 DNS__DB_FLARG_PASS);
@@ -2406,7 +2412,7 @@ setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
header = dns_slabheader_fromrdataset(rdataset); header = dns_slabheader_fromrdataset(rdataset);
nlock = &qpdb->node_locks[HEADERNODE(header)->locknum]; nlock = &qpdb->buckets[HEADERNODE(header)->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype); NODE_WRLOCK(nlock, &nlocktype);
oldheader = *header; oldheader = *header;
@@ -2467,7 +2473,7 @@ getsigningtime(dns_db_t *db, isc_stdtime_t *resign, dns_name_t *foundname,
RWUNLOCK(&qpdb->lock, isc_rwlocktype_read); RWUNLOCK(&qpdb->lock, isc_rwlocktype_read);
again: again:
nlock = &qpdb->node_locks[locknum]; nlock = &qpdb->buckets[locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
@@ -2662,7 +2668,8 @@ qpzone_setup_delegation(qpz_search_t *search, dns_dbnode_t **nodep,
} }
if (rdataset != NULL) { if (rdataset != NULL) {
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &search->qpdb->node_locks[node->locknum]; isc_rwlock_t *nlock =
&search->qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
bindrdataset(search->qpdb, node, search->zonecut_header, bindrdataset(search->qpdb, node, search->zonecut_header,
rdataset DNS__DB_FLARG_PASS); rdataset DNS__DB_FLARG_PASS);
@@ -2702,7 +2709,7 @@ step(qpz_search_t *search, dns_qpiter_t *it, direction_t direction,
result = dns_qpiter_current(it, nodename, (void **)&node, NULL); result = dns_qpiter_current(it, nodename, (void **)&node, NULL);
while (result == ISC_R_SUCCESS) { while (result == ISC_R_SUCCESS) {
isc_rwlock_t *nlock = &qpdb->node_locks[node->locknum]; isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
@@ -2855,7 +2862,7 @@ find_wildcard(qpz_search_t *search, qpznode_t **nodep,
dns_qpchain_node(&search->chain, i, NULL, (void **)&node, NULL); dns_qpchain_node(&search->chain, i, NULL, (void **)&node, NULL);
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
/* /*
* First we try to figure out if this node is active in * First we try to figure out if this node is active in
@@ -2899,7 +2906,7 @@ find_wildcard(qpz_search_t *search, qpznode_t **nodep,
* is active in the search's version, we're * is active in the search's version, we're
* done. * done.
*/ */
nlock = &qpdb->node_locks[wnode->locknum]; nlock = &qpdb->buckets[wnode->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
for (header = wnode->data; header != NULL; for (header = wnode->data; header != NULL;
header = header->next) header = header->next)
@@ -3082,7 +3089,8 @@ again:
do { do {
dns_slabheader_t *found = NULL, *foundsig = NULL; dns_slabheader_t *found = NULL, *foundsig = NULL;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &search->qpdb->node_locks[node->locknum]; isc_rwlock_t *nlock =
&search->qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
empty_node = true; empty_node = true;
for (header = node->data; header != NULL; header = header_next) for (header = node->data; header != NULL; header = header_next)
@@ -3225,7 +3233,7 @@ qpzone_check_zonecut(qpznode_t *node, void *arg DNS__DB_FLARG) {
dns_slabheader_t *found = NULL; dns_slabheader_t *found = NULL;
isc_result_t result = DNS_R_CONTINUE; isc_result_t result = DNS_R_CONTINUE;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &search->qpdb->node_locks[node->locknum]; isc_rwlock_t *nlock = &search->qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
@@ -3491,7 +3499,7 @@ found:
* have matched a wildcard. * have matched a wildcard.
*/ */
nlock = &search.qpdb->node_locks[node->locknum]; nlock = &search.qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
if (search.zonecut != NULL) { if (search.zonecut != NULL) {
@@ -3853,7 +3861,7 @@ tree_exit:
if (search.need_cleanup) { if (search.need_cleanup) {
node = search.zonecut; node = search.zonecut;
INSIST(node != NULL); INSIST(node != NULL);
nlock = &search.qpdb->node_locks[node->locknum]; nlock = &search.qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
qpznode_release(search.qpdb, node, 0, qpznode_release(search.qpdb, node, 0,
@@ -3929,20 +3937,18 @@ detachnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) {
node = (qpznode_t *)(*nodep); node = (qpznode_t *)(*nodep);
*nodep = NULL; *nodep = NULL;
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
/* /*
* We can't destroy qpzonedb while holding a nodelock, so * qpzone_destroy() uses call_rcu() API to destroy the node locks,
* we need to reference it before acquiring the lock * so it is safe to call it in the middle of NODE_LOCK.
* and release it afterward.
*/ */
qpzonedb_ref(qpdb);
rcu_read_lock();
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
qpznode_release(qpdb, node, 0, &nlocktype DNS__DB_FLARG_PASS); qpznode_release(qpdb, node, 0, &nlocktype DNS__DB_FLARG_PASS);
NODE_UNLOCK(nlock, &nlocktype); NODE_UNLOCK(nlock, &nlocktype);
rcu_read_unlock();
qpzonedb_detach(&qpdb);
} }
static unsigned int static unsigned int
@@ -4009,7 +4015,7 @@ locknode(dns_db_t *db, dns_dbnode_t *dbnode, isc_rwlocktype_t type) {
qpzonedb_t *qpdb = (qpzonedb_t *)db; qpzonedb_t *qpdb = (qpzonedb_t *)db;
qpznode_t *node = (qpznode_t *)dbnode; qpznode_t *node = (qpznode_t *)dbnode;
RWLOCK(&qpdb->node_locks[node->locknum], type); RWLOCK(&qpdb->buckets[node->locknum].lock, type);
} }
static void static void
@@ -4017,7 +4023,7 @@ unlocknode(dns_db_t *db, dns_dbnode_t *dbnode, isc_rwlocktype_t type) {
qpzonedb_t *qpdb = (qpzonedb_t *)db; qpzonedb_t *qpdb = (qpzonedb_t *)db;
qpznode_t *node = (qpznode_t *)dbnode; qpznode_t *node = (qpznode_t *)dbnode;
RWUNLOCK(&qpdb->node_locks[node->locknum], type); RWUNLOCK(&qpdb->buckets[node->locknum].lock, type);
} }
static void static void
@@ -4063,7 +4069,7 @@ rdatasetiter_first(dns_rdatasetiter_t *iterator DNS__DB_FLARG) {
qpz_version_t *version = (qpz_version_t *)qrditer->common.version; qpz_version_t *version = (qpz_version_t *)qrditer->common.version;
dns_slabheader_t *header = NULL, *top_next = NULL; dns_slabheader_t *header = NULL, *top_next = NULL;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[node->locknum]; isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
@@ -4107,7 +4113,7 @@ rdatasetiter_next(dns_rdatasetiter_t *iterator DNS__DB_FLARG) {
dns_typepair_t type, negtype; dns_typepair_t type, negtype;
dns_rdatatype_t rdtype; dns_rdatatype_t rdtype;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[node->locknum]; isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
header = qrditer->current; header = qrditer->current;
if (header == NULL) { if (header == NULL) {
@@ -4177,7 +4183,7 @@ rdatasetiter_current(dns_rdatasetiter_t *iterator,
qpznode_t *node = (qpznode_t *)qrditer->common.node; qpznode_t *node = (qpznode_t *)qrditer->common.node;
dns_slabheader_t *header = NULL; dns_slabheader_t *header = NULL;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none; isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[node->locknum]; isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
header = qrditer->current; header = qrditer->current;
REQUIRE(header != NULL); REQUIRE(header != NULL);
@@ -4216,7 +4222,7 @@ dereference_iter_node(qpdb_dbiterator_t *iter DNS__DB_FLARG) {
} }
iter->node = NULL; iter->node = NULL;
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
qpznode_release(qpdb, node, 0, &nlocktype DNS__DB_FLARG_PASS); qpznode_release(qpdb, node, 0, &nlocktype DNS__DB_FLARG_PASS);
@@ -4718,7 +4724,7 @@ qpzone_addrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
* (Note: node lock must be acquired after starting * (Note: node lock must be acquired after starting
* the QPDB transaction and released before committing.) * the QPDB transaction and released before committing.)
*/ */
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype); NODE_WRLOCK(nlock, &nlocktype);
@@ -4822,7 +4828,7 @@ qpzone_subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
newheader->resign_lsb = 0; newheader->resign_lsb = 0;
} }
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype); NODE_WRLOCK(nlock, &nlocktype);
changed = add_changed(newheader, version DNS__DB_FLARG_PASS); changed = add_changed(newheader, version DNS__DB_FLARG_PASS);
@@ -4986,7 +4992,7 @@ qpzone_deleterdataset(dns_db_t *db, dns_dbnode_t *dbnode,
dns_name_copy(&node->name, nodename); dns_name_copy(&node->name, nodename);
nlock = &qpdb->node_locks[node->locknum]; nlock = &qpdb->buckets[node->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype); NODE_WRLOCK(nlock, &nlocktype);
result = add(qpdb, node, nodename, version, newheader, DNS_DBADD_FORCE, result = add(qpdb, node, nodename, version, newheader, DNS_DBADD_FORCE,
false, NULL, 0 DNS__DB_FLARG_PASS); false, NULL, 0 DNS__DB_FLARG_PASS);
@@ -5005,7 +5011,7 @@ nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) {
REQUIRE(node != NULL); REQUIRE(node != NULL);
REQUIRE(name != NULL); REQUIRE(name != NULL);
nlock = &qpdb->node_locks[qpnode->locknum]; nlock = &qpdb->buckets[qpnode->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype); NODE_RDLOCK(nlock, &nlocktype);
dns_name_copy(&qpnode->name, name); dns_name_copy(&qpnode->name, name);

View File

@@ -20,9 +20,11 @@ STATIC_ASSERT(sizeof(struct __cds_wfcq_head) <= ISC_OS_CACHELINE_SIZE,
typedef struct isc_queue { typedef struct isc_queue {
struct __cds_wfcq_head head; struct __cds_wfcq_head head;
uint8_t __padding[ISC_OS_CACHELINE_SIZE - uint8_t __padding_head[ISC_OS_CACHELINE_SIZE -
sizeof(struct __cds_wfcq_head)]; sizeof(struct __cds_wfcq_head)];
struct cds_wfcq_tail tail; struct cds_wfcq_tail tail;
uint8_t __padding_tail[ISC_OS_CACHELINE_SIZE -
sizeof(struct __cds_wfcq_head)];
} isc_queue_t; } isc_queue_t;
typedef struct cds_wfcq_node isc_queue_node_t; typedef struct cds_wfcq_node isc_queue_node_t;

View File

@@ -101,21 +101,22 @@ const char *ownercase_vectors[12][2] = {
static bool static bool
ownercase_test_one(const char *str1, const char *str2) { ownercase_test_one(const char *str1, const char *str2) {
isc_result_t result; isc_result_t result;
isc_rwlock_t node_locks[1]; uint8_t qpdb_s[sizeof(qpzonedb_t) + sizeof(qpzone_bucket_t)];
qpzonedb_t qpdb = { qpzonedb_t *qpdb = (qpzonedb_t *)&qpdb_s;
*qpdb = (qpzonedb_t){
.common.methods = &qpdb_zonemethods, .common.methods = &qpdb_zonemethods,
.common.mctx = mctx, .common.mctx = mctx,
.node_locks = node_locks, .buckets_count = 1,
}; };
qpznode_t node = { .locknum = 0 }; qpznode_t node = { .locknum = 0 };
dns_slabheader_t header = { dns_slabheader_t header = {
.node = (dns_dbnode_t *)&node, .node = (dns_dbnode_t *)&node,
.db = (dns_db_t *)&qpdb, .db = (dns_db_t *)qpdb,
}; };
unsigned char *raw = (unsigned char *)(&header) + sizeof(header); unsigned char *raw = (unsigned char *)(&header) + sizeof(header);
dns_rdataset_t rdataset = { dns_rdataset_t rdataset = {
.magic = DNS_RDATASET_MAGIC, .magic = DNS_RDATASET_MAGIC,
.slab = { .db = (dns_db_t *)&qpdb, .slab = { .db = (dns_db_t *)qpdb,
.node = (dns_dbnode_t *)&node, .node = (dns_dbnode_t *)&node,
.raw = raw, .raw = raw,
}, },
@@ -126,9 +127,8 @@ ownercase_test_one(const char *str1, const char *str2) {
dns_name_t *name1 = dns_fixedname_initname(&fname1); dns_name_t *name1 = dns_fixedname_initname(&fname1);
dns_name_t *name2 = dns_fixedname_initname(&fname2); dns_name_t *name2 = dns_fixedname_initname(&fname2);
memset(node_locks, 0, sizeof(node_locks));
/* Minimal initialization of the mock objects */ /* Minimal initialization of the mock objects */
NODE_INITLOCK(&qpdb.node_locks[0]); NODE_INITLOCK(&qpdb->buckets[0].lock);
isc_buffer_constinit(&b, str1, strlen(str1)); isc_buffer_constinit(&b, str1, strlen(str1));
isc_buffer_add(&b, strlen(str1)); isc_buffer_add(&b, strlen(str1));
@@ -148,7 +148,7 @@ ownercase_test_one(const char *str1, const char *str2) {
/* Retrieve the case to name2 */ /* Retrieve the case to name2 */
dns_rdataset_getownercase(&rdataset, name2); dns_rdataset_getownercase(&rdataset, name2);
NODE_DESTROYLOCK(&qpdb.node_locks[0]); NODE_DESTROYLOCK(&qpdb->buckets[0].lock);
return dns_name_caseequal(name1, name2); return dns_name_caseequal(name1, name2);
} }
@@ -169,21 +169,22 @@ ISC_RUN_TEST_IMPL(ownercase) {
ISC_RUN_TEST_IMPL(setownercase) { ISC_RUN_TEST_IMPL(setownercase) {
isc_result_t result; isc_result_t result;
isc_rwlock_t node_locks[1]; uint8_t qpdb_s[sizeof(qpzonedb_t) + sizeof(qpzone_bucket_t)];
qpzonedb_t qpdb = { qpzonedb_t *qpdb = (qpzonedb_t *)&qpdb_s;
*qpdb = (qpzonedb_t){
.common.methods = &qpdb_zonemethods, .common.methods = &qpdb_zonemethods,
.common.mctx = mctx, .common.mctx = mctx,
.node_locks = node_locks, .buckets_count = 1,
}; };
qpznode_t node = { .locknum = 0 }; qpznode_t node = { .locknum = 0 };
dns_slabheader_t header = { dns_slabheader_t header = {
.node = (dns_dbnode_t *)&node, .node = (dns_dbnode_t *)&node,
.db = (dns_db_t *)&qpdb, .db = (dns_db_t *)qpdb,
}; };
unsigned char *raw = (unsigned char *)(&header) + sizeof(header); unsigned char *raw = (unsigned char *)(&header) + sizeof(header);
dns_rdataset_t rdataset = { dns_rdataset_t rdataset = {
.magic = DNS_RDATASET_MAGIC, .magic = DNS_RDATASET_MAGIC,
.slab = { .db = (dns_db_t *)&qpdb, .slab = { .db = (dns_db_t *)qpdb,
.node = (dns_dbnode_t *)&node, .node = (dns_dbnode_t *)&node,
.raw = raw, .raw = raw,
}, },
@@ -199,8 +200,7 @@ ISC_RUN_TEST_IMPL(setownercase) {
UNUSED(state); UNUSED(state);
/* Minimal initialization of the mock objects */ /* Minimal initialization of the mock objects */
memset(node_locks, 0, sizeof(node_locks)); NODE_INITLOCK(&qpdb->buckets[0].lock);
NODE_INITLOCK(&qpdb.node_locks[0]);
isc_buffer_constinit(&b, str1, strlen(str1)); isc_buffer_constinit(&b, str1, strlen(str1));
isc_buffer_add(&b, strlen(str1)); isc_buffer_add(&b, strlen(str1));
@@ -217,7 +217,7 @@ ISC_RUN_TEST_IMPL(setownercase) {
/* Retrieve the case to name2 */ /* Retrieve the case to name2 */
dns_rdataset_getownercase(&rdataset, name2); dns_rdataset_getownercase(&rdataset, name2);
NODE_DESTROYLOCK(&qpdb.node_locks[0]); NODE_DESTROYLOCK(&qpdb->buckets[0].lock);
assert_true(dns_name_caseequal(name1, name2)); assert_true(dns_name_caseequal(name1, name2));
} }