diff --git a/bin/dnssec/dnssec-cds.c b/bin/dnssec/dnssec-cds.c index 811bcb2747..b80452deb9 100644 --- a/bin/dnssec/dnssec-cds.c +++ b/bin/dnssec/dnssec-cds.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -1083,6 +1084,7 @@ cleanup(void) { } isc_mem_destroy(&mctx); } + rcu_barrier(); } int diff --git a/bin/dnssec/dnssec-dsfromkey.c b/bin/dnssec/dnssec-dsfromkey.c index 13d4e5c190..f0adc4b0d1 100644 --- a/bin/dnssec/dnssec-dsfromkey.c +++ b/bin/dnssec/dnssec-dsfromkey.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -560,6 +561,8 @@ main(int argc, char **argv) { } isc_mem_destroy(&mctx); + rcu_barrier(); + fflush(stdout); if (ferror(stdout)) { fprintf(stderr, "write error\n"); diff --git a/bin/dnssec/dnssec-importkey.c b/bin/dnssec/dnssec-importkey.c index f21172a6eb..0d11d95e25 100644 --- a/bin/dnssec/dnssec-importkey.c +++ b/bin/dnssec/dnssec-importkey.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -465,6 +466,8 @@ main(int argc, char **argv) { } isc_mem_destroy(&mctx); + rcu_barrier(); + fflush(stdout); if (ferror(stdout)) { fprintf(stderr, "write error\n"); diff --git a/bin/dnssec/dnssec-signzone.c b/bin/dnssec/dnssec-signzone.c index c71d4160d2..2c9b843df5 100644 --- a/bin/dnssec/dnssec-signzone.c +++ b/bin/dnssec/dnssec-signzone.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include @@ -4175,5 +4176,7 @@ main(int argc, char *argv[]) { } isc_mutex_destroy(&namelock); + rcu_barrier(); + return vresult == ISC_R_SUCCESS ? 0 : 1; } diff --git a/bin/dnssec/dnssec-verify.c b/bin/dnssec/dnssec-verify.c index f71f0059b8..74157cd918 100644 --- a/bin/dnssec/dnssec-verify.c +++ b/bin/dnssec/dnssec-verify.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -342,5 +343,7 @@ main(int argc, char *argv[]) { } isc_mem_destroy(&mctx); + rcu_barrier(); + return result == ISC_R_SUCCESS ? 0 : 1; } diff --git a/bin/tests/system/makejournal.c b/bin/tests/system/makejournal.c index ec1d1b8e44..844ecf7940 100644 --- a/bin/tests/system/makejournal.c +++ b/bin/tests/system/makejournal.c @@ -156,5 +156,7 @@ cleanup: isc_mem_destroy(&mctx); } + rcu_barrier(); + return result != ISC_R_SUCCESS ? 1 : 0; } diff --git a/lib/dns/include/dns/qp.h b/lib/dns/include/dns/qp.h index 16eca977d9..d72730c80a 100644 --- a/lib/dns/include/dns/qp.h +++ b/lib/dns/include/dns/qp.h @@ -89,6 +89,12 @@ #include #include +/*% + * How many bytes a qp-trie might allocate as part of an insert. Needed for + * overmem checks. + */ +#define QP_SAFETY_MARGIN ((1ul << 12ul) * 12) + /*% * A `dns_qp_t` supports single-threaded read/write access. */ @@ -306,7 +312,6 @@ typedef struct dns_qp_memusage { size_t hold; /*%< nodes retained for readers */ size_t free; /*%< nodes to be reclaimed */ size_t node_size; /*%< in bytes */ - size_t chunk_size; /*%< nodes per chunk */ size_t chunk_count; /*%< allocated chunks */ size_t bytes; /*%< total memory in chunks and metadata */ bool fragmented; /*%< trie needs compaction */ diff --git a/lib/dns/qp.c b/lib/dns/qp.c index ea81aada34..07080f5bd2 100644 --- a/lib/dns/qp.c +++ b/lib/dns/qp.c @@ -96,6 +96,19 @@ static atomic_uint_fast64_t rollback_time; #define TRACE(...) #endif +#if DNS_QPMULTI_TRACE +ISC_REFCOUNT_STATIC_TRACE_DECL(dns_qpmulti); +#define dns_qpmulti_ref(ptr) dns_qpmulti__ref(ptr, __func__, __FILE__, __LINE__) +#define dns_qpmulti_unref(ptr) \ + dns_qpmulti__unref(ptr, __func__, __FILE__, __LINE__) +#define dns_qpmulti_attach(ptr, ptrp) \ + dns_qpmulti__attach(ptr, ptrp, __func__, __FILE__, __LINE__) +#define dns_qpmulti_detach(ptrp) \ + dns_qpmulti__detach(ptrp, __func__, __FILE__, __LINE__) +#else +ISC_REFCOUNT_STATIC_DECL(dns_qpmulti); +#endif + /*********************************************************************** * * converting DNS names to trie keys @@ -427,7 +440,7 @@ write_protect(dns_qp_t *qp, dns_qpchunk_t chunk) { #else -#define chunk_get_raw(qp) isc_mem_allocate(qp->mctx, QP_CHUNK_BYTES) +#define chunk_get_raw(qp, size) isc_mem_allocate(qp->mctx, size) #define chunk_free_raw(qp, ptr) isc_mem_free(qp->mctx, ptr) #define chunk_shrink_raw(qp, ptr, size) isc_mem_reallocate(qp->mctx, ptr, size) @@ -456,6 +469,22 @@ cells_immutable(dns_qp_t *qp, dns_qpref_t ref) { } } +/* + * Find the next power that is both bigger than size and prev_capacity, + * but still within the chunk min and max sizes. + */ +static dns_qpcell_t +next_capacity(uint32_t prev_capacity, uint32_t size) { + /* + * Unfortunately builtin_clz is undefined for 0. We work around this + * issue by flooring the request size at 2. + */ + size = ISC_MAX3(size, prev_capacity, 2u); + uint32_t log2 = 32u - __builtin_clz(size - 1u); + + return 1U << ISC_CLAMP(log2, QP_CHUNK_LOG_MIN, QP_CHUNK_LOG_MAX); +} + /* * Create a fresh bump chunk and allocate some twigs from it. */ @@ -464,9 +493,15 @@ chunk_alloc(dns_qp_t *qp, dns_qpchunk_t chunk, dns_qpweight_t size) { INSIST(qp->base->ptr[chunk] == NULL); INSIST(qp->usage[chunk].used == 0); INSIST(qp->usage[chunk].free == 0); + INSIST(qp->chunk_capacity <= QP_CHUNK_SIZE); - qp->base->ptr[chunk] = chunk_get_raw(qp); - qp->usage[chunk] = (qp_usage_t){ .exists = true, .used = size }; + qp->chunk_capacity = next_capacity(qp->chunk_capacity * 2u, size); + qp->base->ptr[chunk] = + chunk_get_raw(qp, qp->chunk_capacity * sizeof(dns_qpnode_t)); + + qp->usage[chunk] = (qp_usage_t){ .exists = true, + .used = size, + .capacity = qp->chunk_capacity }; qp->used_count += size; qp->bump = chunk; qp->fender = 0; @@ -546,7 +581,7 @@ alloc_twigs(dns_qp_t *qp, dns_qpweight_t size) { dns_qpchunk_t chunk = qp->bump; dns_qpcell_t cell = qp->usage[chunk].used; - if (cell + size <= QP_CHUNK_SIZE) { + if (cell + size <= qp->usage[chunk].capacity) { qp->usage[chunk].used += size; qp->used_count += size; return make_ref(chunk, cell); @@ -699,38 +734,47 @@ reclaim_chunks_cb(struct rcu_head *arg) { REQUIRE(QPMULTI_VALID(multi)); LOCK(&multi->mutex); - dns_qp_t *qp = &multi->writer; - REQUIRE(QP_VALID(qp)); - unsigned int nfree = 0; - isc_nanosecs_t start = isc_time_monotonic(); + /* + * If chunk_max is zero, chunks have already been freed. + */ + if (qp->chunk_max != 0) { + unsigned int free = 0; + isc_nanosecs_t start = isc_time_monotonic(); - for (unsigned int i = 0; i < rcuctx->count; i++) { - dns_qpchunk_t chunk = rcuctx->chunk[i]; - if (qp->usage[chunk].snapshot) { - /* cleanup when snapshot is destroyed */ - qp->usage[chunk].snapfree = true; - } else { - chunk_free(qp, chunk); - nfree++; + INSIST(QP_VALID(qp)); + + for (unsigned int i = 0; i < rcuctx->count; i++) { + dns_qpchunk_t chunk = rcuctx->chunk[i]; + if (qp->usage[chunk].snapshot) { + /* clean up when snapshot is destroyed */ + qp->usage[chunk].snapfree = true; + } else { + chunk_free(qp, chunk); + free++; + } + } + + isc_nanosecs_t time = isc_time_monotonic() - start; + recycle_time += time; + + if (free > 0) { + LOG_STATS("qp reclaim" PRItime "free %u chunks", time, + free); + LOG_STATS( + "qp reclaim leaf %u live %u used %u free %u " + "hold %u", + qp->leaf_count, qp->used_count - qp->free_count, + qp->used_count, qp->free_count, qp->hold_count); } } + UNLOCK(&multi->mutex); + + dns_qpmulti_detach(&multi); isc_mem_putanddetach(&rcuctx->mctx, rcuctx, STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count)); - - isc_nanosecs_t time = isc_time_monotonic() - start; - recycle_time += time; - - if (nfree > 0) { - LOG_STATS("qp reclaim" PRItime "free %u chunks", time, nfree); - LOG_STATS("qp reclaim leaf %u live %u used %u free %u hold %u", - qp->leaf_count, qp->used_count - qp->free_count, - qp->used_count, qp->free_count, qp->hold_count); - } - - UNLOCK(&multi->mutex); } /* @@ -775,6 +819,11 @@ reclaim_chunks(dns_qpmulti_t *multi) { } } + /* + * Reference the qpmulti object to keep it from being + * freed until reclaim_chunks_cb() runs. + */ + dns_qpmulti_ref(multi); call_rcu(&rcuctx->rcu_head, reclaim_chunks_cb); LOG_STATS("qp will reclaim %u chunks", count); @@ -1027,12 +1076,13 @@ dns_qp_memusage(dns_qp_t *qp) { .hold = qp->hold_count, .free = qp->free_count, .node_size = sizeof(dns_qpnode_t), - .chunk_size = QP_CHUNK_SIZE, .fragmented = QP_NEEDGC(qp), }; + size_t chunk_usage_bytes = 0; for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { if (qp->base->ptr[chunk] != NULL) { + chunk_usage_bytes += qp->usage[chunk].capacity; memusage.chunk_count += 1; } } @@ -1041,7 +1091,7 @@ dns_qp_memusage(dns_qp_t *qp) { * XXXFANF does not subtract chunks that have been shrunk, * and does not count unreclaimed dns_qpbase_t objects */ - memusage.bytes = memusage.chunk_count * QP_CHUNK_BYTES + + memusage.bytes = chunk_usage_bytes + qp->chunk_max * sizeof(qp->base->ptr[0]) + qp->chunk_max * sizeof(qp->usage[0]); @@ -1059,7 +1109,7 @@ dns_qpmulti_memusage(dns_qpmulti_t *multi) { dns_qp_memusage_t memusage = dns_qp_memusage(qp); if (qp->transaction_mode == QP_UPDATE) { - memusage.bytes -= QP_CHUNK_BYTES; + memusage.bytes -= qp->usage[qp->bump].capacity; memusage.bytes += qp->usage[qp->bump].used * sizeof(dns_qpnode_t); } @@ -1444,12 +1494,12 @@ dns_qpmulti_create(isc_mem_t *mctx, const dns_qpmethods_t *methods, void *uctx, REQUIRE(qpmp != NULL && *qpmp == NULL); dns_qpmulti_t *multi = isc_mem_get(mctx, sizeof(*multi)); - *multi = (dns_qpmulti_t){ - .magic = QPMULTI_MAGIC, - .reader_ref = INVALID_REF, - }; + *multi = (dns_qpmulti_t){ .magic = QPMULTI_MAGIC, + .reader_ref = INVALID_REF, + .references = ISC_REFCOUNT_INITIALIZER(1) }; isc_mutex_init(&multi->mutex); ISC_LIST_INIT(multi->snapshots); + /* * Do not waste effort allocating a bump chunk that will be thrown * away when a transaction is opened. dns_qpmulti_update() always @@ -1469,11 +1519,13 @@ destroy_guts(dns_qp_t *qp) { if (qp->chunk_max == 0) { return; } + for (dns_qpchunk_t chunk = 0; chunk < qp->chunk_max; chunk++) { if (qp->base->ptr[chunk] != NULL) { chunk_free(qp, chunk); } } + qp->chunk_max = 0; ENSURE(qp->used_count == 0); ENSURE(qp->free_count == 0); ENSURE(isc_refcount_current(&qp->base->refcount) == 1); @@ -1499,7 +1551,26 @@ dns_qp_destroy(dns_qp_t **qptp) { } static void -qpmulti_destroy_cb(struct rcu_head *arg) { +qpmulti_free_mem(dns_qpmulti_t *multi) { + REQUIRE(QPMULTI_VALID(multi)); + + /* reassure thread sanitizer */ + LOCK(&multi->mutex); + dns_qp_t *qp = &multi->writer; + UNLOCK(&multi->mutex); + + isc_mutex_destroy(&multi->mutex); + isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi)); +} + +#if QPMULTI_TRACE +ISC_REFCOUNT_STATIC_TRACE_IMPL(dns_qpmulti, qpmulti_free_mem) +#else +ISC_REFCOUNT_STATIC_IMPL(dns_qpmulti, qpmulti_free_mem) +#endif + +static void +qpmulti_destroy_guts_cb(struct rcu_head *arg) { qp_rcuctx_t *rcuctx = caa_container_of(arg, qp_rcuctx_t, rcu_head); REQUIRE(QPRCU_VALID(rcuctx)); /* only nonzero for reclaim_chunks_cb() */ @@ -1518,10 +1589,9 @@ qpmulti_destroy_cb(struct rcu_head *arg) { UNLOCK(&multi->mutex); - isc_mutex_destroy(&multi->mutex); + dns_qpmulti_detach(&multi); isc_mem_putanddetach(&rcuctx->mctx, rcuctx, STRUCT_FLEX_SIZE(rcuctx, chunk, rcuctx->count)); - isc_mem_putanddetach(&qp->mctx, multi, sizeof(*multi)); } void @@ -1547,7 +1617,7 @@ dns_qpmulti_destroy(dns_qpmulti_t **qpmp) { .multi = multi, }; isc_mem_attach(qp->mctx, &rcuctx->mctx); - call_rcu(&rcuctx->rcu_head, qpmulti_destroy_cb); + call_rcu(&rcuctx->rcu_head, qpmulti_destroy_guts_cb); } /*********************************************************************** diff --git a/lib/dns/qp_p.h b/lib/dns/qp_p.h index d7c65131e7..d61c4a0007 100644 --- a/lib/dns/qp_p.h +++ b/lib/dns/qp_p.h @@ -19,6 +19,10 @@ #pragma once +#include + +#include + /*********************************************************************** * * interior node basics @@ -139,22 +143,29 @@ enum { * size to make the allocator work harder. */ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -#define QP_CHUNK_LOG 7 +#define QP_CHUNK_LOG_MIN 6 +#define QP_CHUNK_LOG_MAX 7 #else -#define QP_CHUNK_LOG 10 +#define QP_CHUNK_LOG_MIN 6 +#define QP_CHUNK_LOG_MAX 10 #endif -STATIC_ASSERT(6 <= QP_CHUNK_LOG && QP_CHUNK_LOG <= 20, - "qp-trie chunk size is unreasonable"); +STATIC_ASSERT(6 <= QP_CHUNK_LOG_MIN && QP_CHUNK_LOG_MIN <= QP_CHUNK_LOG_MAX, + "qp-trie min chunk size is unreasonable"); +STATIC_ASSERT(6 <= QP_CHUNK_LOG_MAX && QP_CHUNK_LOG_MAX <= 20, + "qp-trie max chunk size is unreasonable"); -#define QP_CHUNK_SIZE (1U << QP_CHUNK_LOG) +#define QP_CHUNK_SIZE (1U << QP_CHUNK_LOG_MAX) #define QP_CHUNK_BYTES (QP_CHUNK_SIZE * sizeof(dns_qpnode_t)) +STATIC_ASSERT(QP_SAFETY_MARGIN >= QP_CHUNK_BYTES, + "qp-trie safety margin too small"); + /* * We need a bitfield this big to count how much of a chunk is in use: - * it needs to count from 0 up to and including `1 << QP_CHUNK_LOG`. + * it needs to count from 0 up to and including `1 << QP_CHUNK_LOG_MAX`. */ -#define QP_USAGE_BITS (QP_CHUNK_LOG + 1) +#define QP_USAGE_BITS (QP_CHUNK_LOG_MAX + 1) /* * A chunk needs to be compacted if it is less full than this threshold. @@ -266,6 +277,8 @@ ref_cell(dns_qpref_t ref) { typedef struct qp_usage { /*% the allocation point, increases monotonically */ dns_qpcell_t used : QP_USAGE_BITS; + /*% the actual size of the allocation */ + dns_qpcell_t capacity : QP_USAGE_BITS; /*% count of nodes no longer needed, also monotonic */ dns_qpcell_t free : QP_USAGE_BITS; /*% qp->base->ptr[chunk] != NULL */ @@ -320,6 +333,7 @@ typedef struct qp_rcuctx { struct rcu_head rcu_head; isc_mem_t *mctx; dns_qpmulti_t *multi; + ISC_LINK(struct qp_rcuctx) link; dns_qpchunk_t count; dns_qpchunk_t chunk[]; } qp_rcuctx_t; @@ -477,6 +491,8 @@ struct dns_qp { dns_qpcell_t used_count, free_count; /*% free cells that cannot be recovered right now */ dns_qpcell_t hold_count; + /*% capacity of last allocated chunk, for exponential chunk growth */ + dns_qpcell_t chunk_capacity; /*% what kind of transaction was most recently started [MT] */ enum { QP_NONE, QP_WRITE, QP_UPDATE } transaction_mode : 2; /*% compact the entire trie [MT] */ @@ -521,6 +537,8 @@ struct dns_qpmulti { dns_qp_t *rollback; /*% all snapshots of this trie */ ISC_LIST(dns_qpsnap_t) snapshots; + /*% refcount for memory reclamation */ + isc_refcount_t references; }; /*********************************************************************** diff --git a/lib/dns/qpcache.c b/lib/dns/qpcache.c index abb1888d17..4956841829 100644 --- a/lib/dns/qpcache.c +++ b/lib/dns/qpcache.c @@ -2460,7 +2460,7 @@ overmem(qpcache_t *qpdb, dns_slabheader_t *newheader, */ purgesize = 2 * (sizeof(qpcnode_t) + dns_name_size(&HEADERNODE(newheader)->name)) + - rdataset_size(newheader) + 12288; + rdataset_size(newheader) + QP_SAFETY_MARGIN; again: do { isc_rwlocktype_t nlocktype = isc_rwlocktype_none; diff --git a/lib/isc/include/isc/util.h b/lib/isc/include/isc/util.h index 5eec716ea2..6128688a36 100644 --- a/lib/isc/include/isc/util.h +++ b/lib/isc/include/isc/util.h @@ -79,6 +79,8 @@ #define ISC_CLAMP(v, x, y) ((v) < (x) ? (x) : ((v) > (y) ? (y) : (v))) +#define ISC_MAX3(a, b, c) ISC_MAX(ISC_MAX((a), (b)), (c)) + /*% * The UNCONST() macro can be used to omit warnings produced by certain * compilers when operating with pointers declared with the const type qual- diff --git a/lib/isc/mem.c b/lib/isc/mem.c index 592d945b44..85b0b89d1c 100644 --- a/lib/isc/mem.c +++ b/lib/isc/mem.c @@ -446,6 +446,8 @@ static void mem_shutdown(void) { bool empty; + rcu_barrier(); + isc__mem_checkdestroyed(); LOCK(&contextslock);