2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-31 06:25:31 +00:00

Reduce false sharing in dns_qpzone

Instead of having many node_lock_count * sizeof(<member>) arrays, pack
all the members into a qpzone_bucket_t that is cacheline aligned and have
a single array of those.

(cherry picked from commit 6dcc398726)
This commit is contained in:
Ondřej Surý
2025-02-04 16:50:27 +01:00
parent a9f4e3369a
commit c6c03a6b11
2 changed files with 79 additions and 66 deletions

View File

@@ -14,6 +14,7 @@
/*! \file */
#include <inttypes.h>
#include <stdalign.h>
#include <stdbool.h>
#include <sys/mman.h>
@@ -91,7 +92,7 @@
#define QPDB_ATTR_LOADED 0x01
#define QPDB_ATTR_LOADING 0x02
#define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
#define DEFAULT_BUCKETS_COUNT 17 /*%< Should be prime. */
#define QPDBITER_NSEC3_ORIGIN_NODE(qpdb, iterator) \
((iterator)->current == &(iterator)->nsec3iter && \
@@ -185,6 +186,15 @@ struct qpznode {
void *data;
};
typedef struct qpcache_bucket {
/* Per-bucket lock. */
isc_rwlock_t lock;
/* Padding to prevent false sharing between locks. */
uint8_t __padding[ISC_OS_CACHELINE_SIZE -
(sizeof(isc_rwlock_t)) % ISC_OS_CACHELINE_SIZE];
} qpzone_bucket_t;
struct qpzonedb {
/* Unlocked. */
dns_db_t common;
@@ -208,15 +218,10 @@ struct qpzonedb {
*/
isc_refcount_t references;
/* Locks for tree nodes */
int node_lock_count;
isc_rwlock_t *node_locks;
qpznode_t *origin;
qpznode_t *nsec3_origin;
isc_stats_t *gluecachestats;
/* Locked by lock. */
unsigned int active;
unsigned int attributes;
uint32_t current_serial;
uint32_t least_serial;
@@ -234,6 +239,9 @@ struct qpzonedb {
dns_qpmulti_t *tree; /* Main QP trie for data storage */
dns_qpmulti_t *nsec; /* NSEC nodes only */
dns_qpmulti_t *nsec3; /* NSEC3 nodes only */
size_t buckets_count;
qpzone_bucket_t buckets[]; /* attribute((counted_by(buckets_count))) */
};
#ifdef DNS_DB_NODETRACE
@@ -431,8 +439,8 @@ free_db_rcu(struct rcu_head *rcu_head) {
if (dns_name_dynamic(&qpdb->common.origin)) {
dns_name_free(&qpdb->common.origin, qpdb->common.mctx);
}
for (int i = 0; i < qpdb->node_lock_count; i++) {
NODE_DESTROYLOCK(&qpdb->node_locks[i]);
for (size_t i = 0; i < qpdb->buckets_count; i++) {
NODE_DESTROYLOCK(&qpdb->buckets[i].lock);
}
isc_heap_destroy(&qpdb->heap);
@@ -441,8 +449,6 @@ free_db_rcu(struct rcu_head *rcu_head) {
isc_stats_detach(&qpdb->gluecachestats);
}
isc_mem_cput(qpdb->common.mctx, qpdb->node_locks, qpdb->node_lock_count,
sizeof(qpdb->node_locks[0]));
if (qpdb->loop != NULL) {
isc_loop_detach(&qpdb->loop);
}
@@ -458,7 +464,9 @@ free_db_rcu(struct rcu_head *rcu_head) {
INSIST(!cds_lfht_destroy(qpdb->common.update_listeners, NULL));
}
isc_mem_putanddetach(&qpdb->common.mctx, qpdb, sizeof(*qpdb));
isc_mem_putanddetach(&qpdb->common.mctx, qpdb,
sizeof(*qpdb) + qpdb->buckets_count *
sizeof(qpdb->buckets[0]));
}
static void
@@ -520,7 +528,7 @@ new_qpznode(qpzonedb_t *qpdb, const dns_name_t *name) {
.name = DNS_NAME_INITEMPTY,
.references = ISC_REFCOUNT_INITIALIZER(1),
};
newdata->locknum = dns_name_hash(name) % qpdb->node_lock_count;
newdata->locknum = dns_name_hash(name) % qpdb->buckets_count;
dns_name_dupwithoffsets(name, qpdb->common.mctx, &newdata->name);
isc_mem_attach(qpdb->common.mctx, &newdata->mctx);
@@ -559,12 +567,14 @@ dns__qpzone_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,
isc_result_t result;
dns_qp_t *qp = NULL;
qpdb = isc_mem_get(mctx, sizeof(*qpdb));
qpdb = isc_mem_get(mctx,
sizeof(*qpdb) + DEFAULT_BUCKETS_COUNT *
sizeof(qpdb->buckets[0]));
*qpdb = (qpzonedb_t){
.common.origin = DNS_NAME_INITEMPTY,
.common.rdclass = rdclass,
.common.references = ISC_REFCOUNT_INITIALIZER(1),
.node_lock_count = DEFAULT_NODE_LOCK_COUNT,
.buckets_count = DEFAULT_BUCKETS_COUNT,
.current_serial = 1,
.least_serial = 1,
.next_serial = 2,
@@ -579,17 +589,12 @@ dns__qpzone_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,
isc_rwlock_init(&qpdb->lock);
qpdb->node_locks = isc_mem_cget(mctx, qpdb->node_lock_count,
sizeof(qpdb->node_locks[0]));
qpdb->common.update_listeners = cds_lfht_new(16, 16, 0, 0, NULL);
isc_heap_create(mctx, resign_sooner, set_index, 0, &qpdb->heap);
qpdb->active = qpdb->node_lock_count;
for (int i = 0; i < qpdb->node_lock_count; i++) {
NODE_INITLOCK(&qpdb->node_locks[i]);
for (size_t i = 0; i < qpdb->buckets_count; i++) {
NODE_INITLOCK(&qpdb->buckets[i].lock);
}
/*
@@ -875,7 +880,7 @@ qpznode_release(qpzonedb_t *qpdb, qpznode_t *node, uint32_t least_serial,
* erefs (but NOT references!), upgrade the node lock,
* decrement erefs again, and see if it's still zero.
*/
isc_rwlock_t *nlock = &qpdb->node_locks[node->locknum];
isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
qpznode_erefs_increment(qpdb, node DNS__DB_FLARG_PASS);
NODE_FORCEUPGRADE(nlock, nlocktypep);
if (!qpznode_erefs_decrement(qpdb, node DNS__DB_FLARG_PASS)) {
@@ -969,7 +974,7 @@ setnsec3parameters(dns_db_t *db, qpz_version_t *version) {
version->havensec3 = false;
node = qpdb->origin;
nlock = &qpdb->node_locks[node->locknum];
nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
for (header = node->data; header != NULL; header = header_next) {
header_next = header->next;
@@ -1443,7 +1448,7 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp,
ISC_LIST_UNLINK(resigned_list, header, link);
nlock = &qpdb->node_locks[HEADERNODE(header)->locknum];
nlock = &qpdb->buckets[HEADERNODE(header)->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype);
if (rollback && !IGNORE(header)) {
resigninsert(qpdb, header);
@@ -1466,7 +1471,7 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp,
next_changed = NEXT(changed, link);
node = changed->node;
nlock = &qpdb->node_locks[node->locknum];
nlock = &qpdb->buckets[node->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype);
if (rollback) {
@@ -1509,7 +1514,7 @@ findrdataset(dns_db_t *db, dns_dbnode_t *dbnode, dns_dbversion_t *dbversion,
}
serial = version->serial;
nlock = &qpdb->node_locks[node->locknum];
nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
matchtype = DNS_TYPEPAIR_VALUE(type, covers);
@@ -2127,7 +2132,7 @@ loading_addrdataset(void *arg, const dns_name_t *name,
newheader->resign_lsb = rdataset->resign & 0x1;
}
nlock = &qpdb->node_locks[node->locknum];
nlock = &qpdb->buckets[node->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype);
result = add(qpdb, node, name, qpdb->current_version, newheader,
DNS_DBADD_MERGE, true, NULL, 0 DNS__DB_FLARG_PASS);
@@ -2336,7 +2341,7 @@ setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
header = dns_slabheader_fromrdataset(rdataset);
nlock = &qpdb->node_locks[HEADERNODE(header)->locknum];
nlock = &qpdb->buckets[HEADERNODE(header)->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype);
oldheader = *header;
@@ -2397,7 +2402,7 @@ getsigningtime(dns_db_t *db, isc_stdtime_t *resign, dns_name_t *foundname,
RWUNLOCK(&qpdb->lock, isc_rwlocktype_read);
again:
nlock = &qpdb->node_locks[locknum];
nlock = &qpdb->buckets[locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
@@ -2592,7 +2597,8 @@ setup_delegation(qpz_search_t *search, dns_dbnode_t **nodep,
}
if (rdataset != NULL) {
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &search->qpdb->node_locks[node->locknum];
isc_rwlock_t *nlock =
&search->qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
bindrdataset(search->qpdb, node, search->zonecut_header,
search->now, rdataset DNS__DB_FLARG_PASS);
@@ -2632,7 +2638,7 @@ step(qpz_search_t *search, dns_qpiter_t *it, direction_t direction,
result = dns_qpiter_current(it, nodename, (void **)&node, NULL);
while (result == ISC_R_SUCCESS) {
isc_rwlock_t *nlock = &qpdb->node_locks[node->locknum];
isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
NODE_RDLOCK(nlock, &nlocktype);
@@ -2785,7 +2791,7 @@ find_wildcard(qpz_search_t *search, qpznode_t **nodep,
dns_qpchain_node(&search->chain, i, NULL, (void **)&node, NULL);
nlock = &qpdb->node_locks[node->locknum];
nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
/*
* First we try to figure out if this node is active in
@@ -2829,7 +2835,7 @@ find_wildcard(qpz_search_t *search, qpznode_t **nodep,
* is active in the search's version, we're
* done.
*/
nlock = &qpdb->node_locks[wnode->locknum];
nlock = &qpdb->buckets[wnode->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
for (header = wnode->data; header != NULL;
header = header->next)
@@ -3012,7 +3018,8 @@ again:
do {
dns_slabheader_t *found = NULL, *foundsig = NULL;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &search->qpdb->node_locks[node->locknum];
isc_rwlock_t *nlock =
&search->qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
empty_node = true;
for (header = node->data; header != NULL; header = header_next)
@@ -3155,7 +3162,7 @@ check_zonecut(qpznode_t *node, void *arg DNS__DB_FLARG) {
dns_slabheader_t *found = NULL;
isc_result_t result = DNS_R_CONTINUE;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &search->qpdb->node_locks[node->locknum];
isc_rwlock_t *nlock = &search->qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
@@ -3423,7 +3430,7 @@ found:
* have matched a wildcard.
*/
nlock = &search.qpdb->node_locks[node->locknum];
nlock = &search.qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
if (search.zonecut != NULL) {
@@ -3777,7 +3784,7 @@ tree_exit:
if (search.need_cleanup) {
node = search.zonecut;
INSIST(node != NULL);
nlock = &search.qpdb->node_locks[node->locknum];
nlock = &search.qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
qpznode_release(search.qpdb, node, 0,
@@ -3852,7 +3859,7 @@ detachnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) {
node = (qpznode_t *)(*nodep);
*nodep = NULL;
nlock = &qpdb->node_locks[node->locknum];
nlock = &qpdb->buckets[node->locknum].lock;
/*
* We can't destroy qpzonedb while holding a nodelock, so
@@ -3934,7 +3941,7 @@ locknode(dns_db_t *db, dns_dbnode_t *dbnode, isc_rwlocktype_t type) {
qpzonedb_t *qpdb = (qpzonedb_t *)db;
qpznode_t *node = (qpznode_t *)dbnode;
RWLOCK(&qpdb->node_locks[node->locknum], type);
RWLOCK(&qpdb->buckets[node->locknum].lock, type);
}
static void
@@ -3942,7 +3949,7 @@ unlocknode(dns_db_t *db, dns_dbnode_t *dbnode, isc_rwlocktype_t type) {
qpzonedb_t *qpdb = (qpzonedb_t *)db;
qpznode_t *node = (qpznode_t *)dbnode;
RWUNLOCK(&qpdb->node_locks[node->locknum], type);
RWUNLOCK(&qpdb->buckets[node->locknum].lock, type);
}
static void
@@ -3988,7 +3995,7 @@ rdatasetiter_first(dns_rdatasetiter_t *iterator DNS__DB_FLARG) {
qpz_version_t *version = qrditer->common.version;
dns_slabheader_t *header = NULL, *top_next = NULL;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[node->locknum];
isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
@@ -4032,7 +4039,7 @@ rdatasetiter_next(dns_rdatasetiter_t *iterator DNS__DB_FLARG) {
dns_typepair_t type, negtype;
dns_rdatatype_t rdtype;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[node->locknum];
isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
header = qrditer->current;
if (header == NULL) {
@@ -4102,7 +4109,7 @@ rdatasetiter_current(dns_rdatasetiter_t *iterator,
qpznode_t *node = qrditer->common.node;
dns_slabheader_t *header = NULL;
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
isc_rwlock_t *nlock = &qpdb->node_locks[node->locknum];
isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
header = qrditer->current;
REQUIRE(header != NULL);
@@ -4142,7 +4149,7 @@ dereference_iter_node(qpdb_dbiterator_t *iter DNS__DB_FLARG) {
}
iter->node = NULL;
nlock = &qpdb->node_locks[node->locknum];
nlock = &qpdb->buckets[node->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
qpznode_release(qpdb, node, 0, &nlocktype DNS__DB_FLARG_PASS);
@@ -4642,7 +4649,7 @@ addrdataset(dns_db_t *db, dns_dbnode_t *dbnode, dns_dbversion_t *dbversion,
* (Note: node lock must be acquired after starting
* the QPDB transaction and released before committing.)
*/
nlock = &qpdb->node_locks[node->locknum];
nlock = &qpdb->buckets[node->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype);
@@ -4745,7 +4752,7 @@ subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode, dns_dbversion_t *dbversion,
newheader->resign_lsb = 0;
}
nlock = &qpdb->node_locks[node->locknum];
nlock = &qpdb->buckets[node->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype);
changed = add_changed(newheader, version DNS__DB_FLARG_PASS);
@@ -4907,7 +4914,7 @@ deleterdataset(dns_db_t *db, dns_dbnode_t *dbnode, dns_dbversion_t *dbversion,
dns_name_copy(&node->name, nodename);
nlock = &qpdb->node_locks[node->locknum];
nlock = &qpdb->buckets[node->locknum].lock;
NODE_WRLOCK(nlock, &nlocktype);
result = add(qpdb, node, nodename, version, newheader, DNS_DBADD_FORCE,
false, NULL, 0 DNS__DB_FLARG_PASS);
@@ -4926,7 +4933,7 @@ nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) {
REQUIRE(node != NULL);
REQUIRE(name != NULL);
nlock = &qpdb->node_locks[qpnode->locknum];
nlock = &qpdb->buckets[qpnode->locknum].lock;
NODE_RDLOCK(nlock, &nlocktype);
dns_name_copy(&qpnode->name, name);

View File

@@ -101,21 +101,25 @@ const char *ownercase_vectors[12][2] = {
static bool
ownercase_test_one(const char *str1, const char *str2) {
isc_result_t result;
isc_rwlock_t node_locks[1];
qpzonedb_t qpdb = {
uint8_t qpdb_s[sizeof(qpzonedb_t) + sizeof(qpzone_bucket_t)];
qpzonedb_t *qpdb = (qpzonedb_t *)&qpdb_s;
*qpdb = (qpzonedb_t){
.common.methods = &qpdb_zonemethods,
.common.mctx = mctx,
.node_locks = node_locks,
.buckets_count = 1,
};
qpznode_t node = { .locknum = 0 };
dns_slabheader_t header = {
.node = &node,
.db = (dns_db_t *)&qpdb,
.node = (dns_dbnode_t *)&node,
.db = (dns_db_t *)qpdb,
};
unsigned char *raw = (unsigned char *)(&header) + sizeof(header);
dns_rdataset_t rdataset = {
.magic = DNS_RDATASET_MAGIC,
.slab = { .db = (dns_db_t *)&qpdb, .node = &node, .raw = raw },
.slab = { .db = (dns_db_t *)qpdb,
.node = (dns_dbnode_t *)&node,
.raw = raw,
},
.methods = &dns_rdataslab_rdatasetmethods,
};
isc_buffer_t b;
@@ -123,9 +127,8 @@ ownercase_test_one(const char *str1, const char *str2) {
dns_name_t *name1 = dns_fixedname_initname(&fname1);
dns_name_t *name2 = dns_fixedname_initname(&fname2);
memset(node_locks, 0, sizeof(node_locks));
/* Minimal initialization of the mock objects */
NODE_INITLOCK(&qpdb.node_locks[0]);
NODE_INITLOCK(&qpdb->buckets[0].lock);
isc_buffer_constinit(&b, str1, strlen(str1));
isc_buffer_add(&b, strlen(str1));
@@ -145,7 +148,7 @@ ownercase_test_one(const char *str1, const char *str2) {
/* Retrieve the case to name2 */
dns_rdataset_getownercase(&rdataset, name2);
NODE_DESTROYLOCK(&qpdb.node_locks[0]);
NODE_DESTROYLOCK(&qpdb->buckets[0].lock);
return dns_name_caseequal(name1, name2);
}
@@ -166,21 +169,25 @@ ISC_RUN_TEST_IMPL(ownercase) {
ISC_RUN_TEST_IMPL(setownercase) {
isc_result_t result;
isc_rwlock_t node_locks[1];
qpzonedb_t qpdb = {
uint8_t qpdb_s[sizeof(qpzonedb_t) + sizeof(qpzone_bucket_t)];
qpzonedb_t *qpdb = (qpzonedb_t *)&qpdb_s;
*qpdb = (qpzonedb_t){
.common.methods = &qpdb_zonemethods,
.common.mctx = mctx,
.node_locks = node_locks,
.buckets_count = 1,
};
qpznode_t node = { .locknum = 0 };
dns_slabheader_t header = {
.node = &node,
.db = (dns_db_t *)&qpdb,
.node = (dns_dbnode_t *)&node,
.db = (dns_db_t *)qpdb,
};
unsigned char *raw = (unsigned char *)(&header) + sizeof(header);
dns_rdataset_t rdataset = {
.magic = DNS_RDATASET_MAGIC,
.slab = { .db = (dns_db_t *)&qpdb, .node = &node, .raw = raw },
.slab = { .db = (dns_db_t *)qpdb,
.node = (dns_dbnode_t *)&node,
.raw = raw,
},
.methods = &dns_rdataslab_rdatasetmethods,
};
const char *str1 =
@@ -193,8 +200,7 @@ ISC_RUN_TEST_IMPL(setownercase) {
UNUSED(state);
/* Minimal initialization of the mock objects */
memset(node_locks, 0, sizeof(node_locks));
NODE_INITLOCK(&qpdb.node_locks[0]);
NODE_INITLOCK(&qpdb->buckets[0].lock);
isc_buffer_constinit(&b, str1, strlen(str1));
isc_buffer_add(&b, strlen(str1));
@@ -211,7 +217,7 @@ ISC_RUN_TEST_IMPL(setownercase) {
/* Retrieve the case to name2 */
dns_rdataset_getownercase(&rdataset, name2);
NODE_DESTROYLOCK(&qpdb.node_locks[0]);
NODE_DESTROYLOCK(&qpdb->buckets[0].lock);
assert_true(dns_name_caseequal(name1, name2));
}