2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-30 22:15:20 +00:00

Adaptive memory allocation strategy for qp-tries

qp-tries allocate their nodes (twigs) in chunks to reduce allocator
pressure and improve memory locality. The choice of chunk size presents
a tradeoff: larger chunks benefit qp-tries with many values (as seen
in large zones and resolvers) but waste memory in smaller use cases.

Previously, our fixed chunk size of 2^10 twigs meant that even an
empty qp-trie would consume 12KB of memory, while reducing this size
would negatively impact resolver performance.

This commit implements an adaptive chunking strategy that:
 - Tracks the size of the most recently allocated chunk.
 - Doubles the chunk size for each new allocation until reaching a
   predefined maximum.

This approach effectively balances memory efficiency for small tries
while maintaining the performance benefits of larger chunk sizes for
bigger data structures.

This commit also splits the callback freeing qpmultis into two
phases, one that frees the underlying qptree, and one that reclaims
the qpmulti memory. In order to prevent races between the qpmulti
destructor and chunk garbage collection jobs, the second phase is
protected by reference counting.

(cherry picked from commit 70b1777d8a)
This commit is contained in:
alessio
2025-03-09 09:13:16 +01:00
committed by Alessio Podda
parent 39ad2016c1
commit d21f63884a
12 changed files with 161 additions and 48 deletions

View File

@@ -34,6 +34,7 @@
#include <isc/serial.h>
#include <isc/string.h>
#include <isc/time.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <dns/callbacks.h>
@@ -1083,6 +1084,7 @@ cleanup(void) {
}
isc_mem_destroy(&mctx);
}
rcu_barrier();
}
int

View File

@@ -25,6 +25,7 @@
#include <isc/mem.h>
#include <isc/result.h>
#include <isc/string.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <dns/callbacks.h>
@@ -560,6 +561,8 @@ main(int argc, char **argv) {
}
isc_mem_destroy(&mctx);
rcu_barrier();
fflush(stdout);
if (ferror(stdout)) {
fprintf(stderr, "write error\n");

View File

@@ -23,6 +23,7 @@
#include <isc/mem.h>
#include <isc/result.h>
#include <isc/string.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <dns/callbacks.h>
@@ -465,6 +466,8 @@ main(int argc, char **argv) {
}
isc_mem_destroy(&mctx);
rcu_barrier();
fflush(stdout);
if (ferror(stdout)) {
fprintf(stderr, "write error\n");

View File

@@ -60,6 +60,7 @@
#include <isc/string.h>
#include <isc/tid.h>
#include <isc/time.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <dns/db.h>
@@ -4175,5 +4176,7 @@ main(int argc, char *argv[]) {
}
isc_mutex_destroy(&namelock);
rcu_barrier();
return vresult == ISC_R_SUCCESS ? 0 : 1;
}

View File

@@ -33,6 +33,7 @@
#include <isc/stdio.h>
#include <isc/string.h>
#include <isc/time.h>
#include <isc/urcu.h>
#include <isc/util.h>
#include <dns/db.h>
@@ -342,5 +343,7 @@ main(int argc, char *argv[]) {
}
isc_mem_destroy(&mctx);
rcu_barrier();
return result == ISC_R_SUCCESS ? 0 : 1;
}

View File

@@ -156,5 +156,7 @@ cleanup:
isc_mem_destroy(&mctx);
}
rcu_barrier();
return result != ISC_R_SUCCESS ? 1 : 0;
}

View File

@@ -89,6 +89,12 @@
#include <dns/name.h>
#include <dns/types.h>
/*%
* How many bytes a qp-trie might allocate as part of an insert. Needed for
* overmem checks.
*/
#define QP_SAFETY_MARGIN ((1ul << 12ul) * 12)
/*%
* A `dns_qp_t` supports single-threaded read/write access.
*/
@@ -306,7 +312,6 @@ typedef struct dns_qp_memusage {
size_t hold; /*%< nodes retained for readers */
size_t free; /*%< nodes to be reclaimed */
size_t node_size; /*%< in bytes */
size_t chunk_size; /*%< nodes per chunk */
size_t chunk_count; /*%< allocated chunks */
size_t bytes; /*%< total memory in chunks and metadata */
bool fragmented; /*%< trie needs compaction */

View File

@@ -96,6 +96,19 @@ static atomic_uint_fast64_t rollback_time;
#define TRACE(...)
#endif
#if DNS_QPMULTI_TRACE
ISC_REFCOUNT_STATIC_TRACE_DECL(dns_qpmulti);
#define dns_qpmulti_ref(ptr) dns_qpmulti__ref(ptr, __func__, __FILE__, __LINE__)
#define dns_qpmulti_unref(ptr) \
dns_qpmulti__unref(ptr, __func__, __FILE__, __LINE__)
#define dns_qpmulti_attach(ptr, ptrp) \
dns_qpmulti__attach(ptr, ptrp, __func__, __FILE__, __LINE__)
#define dns_qpmulti_detach(ptrp) \
dns_qpmulti__detach(ptrp, __func__, __FILE__, __LINE__)
#else
ISC_REFCOUNT_STATIC_DECL(dns_qpmulti);
#endif
/***********************************************************************
*
* converting DNS names to trie keys
@@ -427,7 +440,7 @@ write_protect(dns_qp_t *qp, dns_qpchunk_t chunk) {
#else
#define chunk_get_raw(qp) isc_mem_allocate(qp->mctx, QP_CHUNK_BYTES)
#define chunk_get_raw(qp, size) isc_mem_allocate(qp->mctx, size)
#define chunk_free_raw(qp, ptr) isc_mem_free(qp->mctx, ptr)
#define chunk_shrink_raw(qp, ptr, size) isc_mem_reallocate(qp->mctx, ptr, size)
@@ -456,6 +469,22 @@ cells_immutable(dns_qp_t *qp, dns_qpref_t ref) {
}
}
/*
* Find the next power that is both bigger than size and prev_capacity,
* but still within the chunk min and max sizes.
*/
static dns_qpcell_t
next_capacity(uint32_t prev_capacity, uint32_t size) {
/*
* Unfortunately builtin_clz is undefined for 0. We work around this
* issue by flooring the request size at 2.
*/
size = ISC_MAX3(size, prev_capacity, 2u);
uint32_t log2 = 32u - __builtin_clz(size - 1u);
return 1U << ISC_CLAMP(log2, QP_CHUNK_LOG_MIN, QP_CHUNK_LOG_MAX);
}
/*
* Create a fresh bump chunk and allocate some twigs from it.
*/
@@ -464,9 +493,15 @@ chunk_alloc(dns_qp_t *qp, dns_qpchunk_t chunk, dns_qpweight_t size) {
INSIST(qp->base->ptr[chunk] == NULL);
INSIST(qp->usage[chunk].used == 0);
INSIST(qp->usage[chunk].free == 0);
INSIST(qp->chunk_capacity <= QP_CHUNK_SIZE);
qp->base->ptr[chunk] = chunk_get_raw(qp);
qp->usage[chunk] = (qp_usage_t){ .exists = true, .used = size };
qp->chunk_capacity = next_capacity(qp->chunk_capacity * 2u, size);
qp->base->ptr[chunk] =
chunk_get_raw(qp, qp->chunk_capacity * sizeof(dns_qpnode_t));
qp->usage[chunk] = (qp_usage_t){ .exists = true,
.used = size,
.capacity = qp->chunk_capacity };
qp->used_count += size;
qp->bump = chunk;
qp->fender = 0;
@@ -546,7 +581,7 @@ alloc_twigs(dns_qp_t *qp, dns_qpweight_t size) {
dns_qpchunk_t chunk = qp->bump;
dns_qpcell_t cell = qp->usage[chunk].used;
if (cell + size <= QP_CHUNK_SIZE) {
if (cell + size <= qp->usage[chunk].capacity) {
qp->usage[chunk].used += size;
qp->used_count += size;
return make_ref(chunk, cell);
@@ -699,38 +734,47 @@ reclaim_chunks_cb(struct rcu_head *arg) {
REQUIRE(QPMULTI_VALID(multi));
LOCK(&multi->mutex);
dns_qp_t *qp = &multi->writer;
REQUIRE(QP_VALID(qp));
unsigned int nfree = 0;
isc_nanosecs_t start = isc_time_monotonic();
/*
* If chunk_max is zero, chunks have already been freed.
*/
if (qp->chunk_max != 0) {
unsigned int free = 0;
isc_nanosecs_t start = isc_time_monotonic();
for (unsigned int i = 0; i < rcuctx->count; i++) {
dns_qpchunk_t chunk = rcuctx->chunk[i];
if (qp->usage[chunk].snapshot) {
/* cleanup when snapshot is destroyed */
qp->usage[chunk].snapfree = true;
} else {
chunk_free(qp, chunk);
nfree++;
INSIST(QP_VALID(qp));
for (unsigned int i = 0; i < rcuctx->count; i++) {
dns_qpchunk_t chunk = rcuctx->chunk[i];
if (qp->usage[chunk].snapshot) {
/* clean up when snapshot is destroyed */
qp->usage[chunk].snapfree = true;
} else {
chunk_free(qp, chunk);
free++;
}
}
isc_nanosecs_t time = isc_time_monotonic() - start;
recycle_time += time;
if (free > 0) {
LOG_STATS("qp reclaim" PRItime "free %u chunks", time,
free);
LOG_STATS(
"qp reclaim leaf %u live %u used %u free %u "
"hold %u",
qp->leaf_count, qp->used_count - qp->free_count,
qp->used_count, qp->free_count, qp->hold_count);
}
}
UNLOCK(&multi->mutex);
dns_qpmulti_detach(&multi);
isc_mem_putanddetach(&rcuctx->mctx, rcuctx,
STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count));
isc_nanosecs_t time = isc_time_monotonic() - start;
recycle_time += time;
if (nfree > 0) {
LOG_STATS("qp reclaim" PRItime "free %u chunks", time, nfree);
LOG_STATS("qp reclaim leaf %u live %u used %u free %u hold %u",
qp->leaf_count, qp->used_count - qp->free_count,
qp->used_count, qp->free_count, qp->hold_count);
}
UNLOCK(&multi->mutex);
}
/*
@@ -775,6 +819,11 @@ reclaim_chunks(dns_qpmulti_t *multi) {
}
}
/*
* Reference the qpmulti object to keep it from being
* freed until reclaim_chunks_cb() runs.
*/
dns_qpmulti_ref(multi);
call_rcu(&rcuctx->rcu_head, reclaim_chunks_cb);
LOG_STATS("qp will reclaim %u chunks", count);
@@ -1027,12 +1076,13 @@ dns_qp_memusage(dns_qp_t *qp) {
.hold = qp->hold_count,
.free = qp->free_count,
.node_size = sizeof(dns_qpnode_t),
.chunk_size = QP_CHUNK_SIZE,
.fragmented = QP_NEEDGC(qp),
};
size_t chunk_usage_bytes = 0;
for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
if (qp->base->ptr[chunk] != NULL) {
chunk_usage_bytes += qp->usage[chunk].capacity;
memusage.chunk_count += 1;
}
}
@@ -1041,7 +1091,7 @@ dns_qp_memusage(dns_qp_t *qp) {
* XXXFANF does not subtract chunks that have been shrunk,
* and does not count unreclaimed dns_qpbase_t objects
*/
memusage.bytes = memusage.chunk_count * QP_CHUNK_BYTES +
memusage.bytes = chunk_usage_bytes +
qp->chunk_max * sizeof(qp->base->ptr[0]) +
qp->chunk_max * sizeof(qp->usage[0]);
@@ -1059,7 +1109,7 @@ dns_qpmulti_memusage(dns_qpmulti_t *multi) {
dns_qp_memusage_t memusage = dns_qp_memusage(qp);
if (qp->transaction_mode == QP_UPDATE) {
memusage.bytes -= QP_CHUNK_BYTES;
memusage.bytes -= qp->usage[qp->bump].capacity;
memusage.bytes += qp->usage[qp->bump].used *
sizeof(dns_qpnode_t);
}
@@ -1444,12 +1494,12 @@ dns_qpmulti_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx,
REQUIRE(qpmp != NULL && *qpmp == NULL);
dns_qpmulti_t *multi = isc_mem_get(mctx, sizeof(*multi));
*multi = (dns_qpmulti_t){
.magic = QPMULTI_MAGIC,
.reader_ref = INVALID_REF,
};
*multi = (dns_qpmulti_t){ .magic = QPMULTI_MAGIC,
.reader_ref = INVALID_REF,
.references = ISC_REFCOUNT_INITIALIZER(1) };
isc_mutex_init(&multi->mutex);
ISC_LIST_INIT(multi->snapshots);
/*
* Do not waste effort allocating a bump chunk that will be thrown
* away when a transaction is opened. dns_qpmulti_update() always
@@ -1469,11 +1519,13 @@ destroy_guts(dns_qp_t *qp) {
if (qp->chunk_max == 0) {
return;
}
for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
if (qp->base->ptr[chunk] != NULL) {
chunk_free(qp, chunk);
}
}
qp->chunk_max = 0;
ENSURE(qp->used_count == 0);
ENSURE(qp->free_count == 0);
ENSURE(isc_refcount_current(&qp->base->refcount) == 1);
@@ -1499,7 +1551,26 @@ dns_qp_destroy(dns_qp_t **qptp) {
}
static void
qpmulti_destroy_cb(struct rcu_head *arg) {
qpmulti_free_mem(dns_qpmulti_t *multi) {
REQUIRE(QPMULTI_VALID(multi));
/* reassure thread sanitizer */
LOCK(&multi->mutex);
dns_qp_t *qp = &multi->writer;
UNLOCK(&multi->mutex);
isc_mutex_destroy(&multi->mutex);
isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi));
}
#if QPMULTI_TRACE
ISC_REFCOUNT_STATIC_TRACE_IMPL(dns_qpmulti, qpmulti_free_mem)
#else
ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem)
#endif
static void
qpmulti_destroy_guts_cb(struct rcu_head *arg) {
qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head);
REQUIRE(QPRCU_VALID(rcuctx));
/* only nonzero for reclaim_chunks_cb() */
@@ -1518,10 +1589,9 @@ qpmulti_destroy_cb(struct rcu_head *arg) {
UNLOCK(&multi->mutex);
isc_mutex_destroy(&multi->mutex);
dns_qpmulti_detach(&multi);
isc_mem_putanddetach(&rcuctx->mctx, rcuctx,
STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count));
isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi));
}
void
@@ -1547,7 +1617,7 @@ dns_qpmulti_destroy(dns_qpmulti_t **qpmp) {
.multi = multi,
};
isc_mem_attach(qp->mctx, &rcuctx->mctx);
call_rcu(&rcuctx->rcu_head, qpmulti_destroy_cb);
call_rcu(&rcuctx->rcu_head, qpmulti_destroy_guts_cb);
}
/***********************************************************************

View File

@@ -19,6 +19,10 @@
#pragma once
#include <isc/refcount.h>
#include <dns/qp.h>
/***********************************************************************
*
* interior node basics
@@ -139,22 +143,29 @@ enum {
* size to make the allocator work harder.
*/
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
#define QP_CHUNK_LOG 7
#define QP_CHUNK_LOG_MIN 6
#define QP_CHUNK_LOG_MAX 7
#else
#define QP_CHUNK_LOG 10
#define QP_CHUNK_LOG_MIN 6
#define QP_CHUNK_LOG_MAX 10
#endif
STATIC_ASSERT(6 <= QP_CHUNK_LOG && QP_CHUNK_LOG <= 20,
"qp-trie chunk size is unreasonable");
STATIC_ASSERT(6 <= QP_CHUNK_LOG_MIN && QP_CHUNK_LOG_MIN <= QP_CHUNK_LOG_MAX,
"qp-trie min chunk size is unreasonable");
STATIC_ASSERT(6 <= QP_CHUNK_LOG_MAX && QP_CHUNK_LOG_MAX <= 20,
"qp-trie max chunk size is unreasonable");
#define QP_CHUNK_SIZE (1U << QP_CHUNK_LOG)
#define QP_CHUNK_SIZE (1U << QP_CHUNK_LOG_MAX)
#define QP_CHUNK_BYTES (QP_CHUNK_SIZE * sizeof(dns_qpnode_t))
STATIC_ASSERT(QP_SAFETY_MARGIN >= QP_CHUNK_BYTES,
"qp-trie safety margin too small");
/*
* We need a bitfield this big to count how much of a chunk is in use:
* it needs to count from 0 up to and including `1 << QP_CHUNK_LOG`.
* it needs to count from 0 up to and including `1 << QP_CHUNK_LOG_MAX`.
*/
#define QP_USAGE_BITS (QP_CHUNK_LOG + 1)
#define QP_USAGE_BITS (QP_CHUNK_LOG_MAX + 1)
/*
* A chunk needs to be compacted if it is less full than this threshold.
@@ -266,6 +277,8 @@ ref_cell(dns_qpref_t ref) {
typedef struct qp_usage {
/*% the allocation point, increases monotonically */
dns_qpcell_t used : QP_USAGE_BITS;
/*% the actual size of the allocation */
dns_qpcell_t capacity : QP_USAGE_BITS;
/*% count of nodes no longer needed, also monotonic */
dns_qpcell_t free : QP_USAGE_BITS;
/*% qp->base->ptr[chunk] != NULL */
@@ -320,6 +333,7 @@ typedef struct qp_rcuctx {
struct rcu_head rcu_head;
isc_mem_t *mctx;
dns_qpmulti_t *multi;
ISC_LINK(struct qp_rcuctx) link;
dns_qpchunk_t count;
dns_qpchunk_t chunk[];
} qp_rcuctx_t;
@@ -477,6 +491,8 @@ struct dns_qp {
dns_qpcell_t used_count, free_count;
/*% free cells that cannot be recovered right now */
dns_qpcell_t hold_count;
/*% capacity of last allocated chunk, for exponential chunk growth */
dns_qpcell_t chunk_capacity;
/*% what kind of transaction was most recently started [MT] */
enum { QP_NONE, QP_WRITE, QP_UPDATE } transaction_mode : 2;
/*% compact the entire trie [MT] */
@@ -521,6 +537,8 @@ struct dns_qpmulti {
dns_qp_t *rollback;
/*% all snapshots of this trie */
ISC_LIST(dns_qpsnap_t) snapshots;
/*% refcount for memory reclamation */
isc_refcount_t references;
};
/***********************************************************************

View File

@@ -2460,7 +2460,7 @@ overmem(qpcache_t *qpdb, dns_slabheader_t *newheader,
*/
purgesize = 2 * (sizeof(qpcnode_t) +
dns_name_size(&HEADERNODE(newheader)->name)) +
rdataset_size(newheader) + 12288;
rdataset_size(newheader) + QP_SAFETY_MARGIN;
again:
do {
isc_rwlocktype_t nlocktype = isc_rwlocktype_none;

View File

@@ -79,6 +79,8 @@
#define ISC_CLAMP(v, x, y) ((v) < (x) ? (x) : ((v) > (y) ? (y) : (v)))
#define ISC_MAX3(a, b, c) ISC_MAX(ISC_MAX((a), (b)), (c))
/*%
* The UNCONST() macro can be used to omit warnings produced by certain
* compilers when operating with pointers declared with the const type qual-

View File

@@ -446,6 +446,8 @@ static void
mem_shutdown(void) {
bool empty;
rcu_barrier();
isc__mem_checkdestroyed();
LOCK(&contextslock);