2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-29 13:38:26 +00:00

Some minor qp-trie improvements

Adjust the dns_qp_memusage() and dns_qp_compact() functions
to be more informative and flexible about handling fragmentation.

Avoid wasting space in runt chunks.

Switch from twigs_mutable() to cells_immutable() because that is the
sense we usually want.

Drop the redundant evacuate() function and rename evacuate_twigs() to
evacuate(). Move some chunk test functions closer to their point of
use.

Clarify compact_recursive(). Some small cleanups to comments.

Use isc_time_monotonic() for qp-trie timing stats.

Use #define constants to control debug logging.

Set up DNS name label offsets in dns_qpkey_fromname() so it is easier
to use in cases where the name is not fully hydrated.
This commit is contained in:
Tony Finch 2023-01-06 19:10:19 +00:00
parent 4b09c9a6ae
commit 549854f63b
5 changed files with 163 additions and 117 deletions

View File

@ -147,9 +147,10 @@ typedef uint8_t dns_qpkey_t[512];
* responsible for the leaf values that are stored in the trie. The
* methods are provided for a whole trie when the trie is created.
*
* The qp-trie is also given a context pointer that is passed to the
* methods, so the methods know about the trie's context as well as a
* particular leaf value.
* When you create a qp-trie, you provide a context pointer that is
* passed to the methods. The context pointer can tell the methods
* something about the trie as a whole, in addition to a particular
* leaf's values.
*
* The `attach` and `detach` methods adjust reference counts on value
* objects. They support copy-on-write and safe memory reclamation
@ -198,6 +199,7 @@ typedef struct dns_qp_memusage {
size_t chunk_size; /*%< nodes per chunk */
size_t chunk_count; /*%< allocated chunks */
size_t bytes; /*%< total memory in chunks and metadata */
bool fragmented; /*%< trie needs compaction */
} dns_qp_memusage_t;
/***********************************************************************
@ -266,20 +268,23 @@ dns_qpmulti_destroy(dns_qpmulti_t **qpmp);
*/
void
dns_qp_compact(dns_qp_t *qp);
dns_qp_compact(dns_qp_t *qp, bool all);
/*%<
* Defragment the entire qp-trie and release unused memory.
* Defragment the qp-trie and release unused memory.
*
* When modifications make a trie too fragmented, it is automatically
* compacted. Automatic compaction avoids compacting chunks that are not
* fragmented to save time, but this function compacts the entire trie to
* defragment it as much as possible.
* compacted. However, automatic compaction is limited when a
* multithreaded trie has lots of immutable memory from past
* transactions, and lightweight write transactions do not do
*
* This function can be used with a single-threaded qp-trie and during a
* transaction on a multi-threaded trie.
*
* Requires:
* \li `qp` is a pointer to a valid qp-trie
* \li `all` is true, to compact the whole trie
* \li `all` is false, to save time by not compacting
* chunk that are not fragmented
*/
void
@ -305,6 +310,19 @@ dns_qp_memusage(dns_qp_t *qp);
* \li a `dns_qp_memusage_t` structure described above
*/
dns_qp_memusage_t
dns_qpmulti_memusage(dns_qpmulti_t *multi);
/*%<
* Get the memory counters from multi-threaded qp-trie outside the
* context of a transaction.
*
* Requires:
* \li `multi` is a pointer to a valid dns_qpmulti_t
*
* Returns:
* \li a `dns_qp_memusage_t` structure described above
*/
/***********************************************************************
*
* functions - search, modify

View File

@ -38,6 +38,7 @@
#include <isc/types.h>
#include <isc/util.h>
#include <dns/fixedname.h>
#include <dns/log.h>
#include <dns/name.h>
#include <dns/qp.h>
@ -45,6 +46,13 @@
#include "qp_p.h"
#ifndef DNS_QP_LOG_STATS
#define DNS_QP_LOG_STATS 1
#endif
#ifndef DNS_QP_TRACE
#define DNS_QP_TRACE 0
#endif
/*
* very basic garbage collector statistics
*
@ -55,7 +63,10 @@ static atomic_uint_fast64_t compact_time;
static atomic_uint_fast64_t recycle_time;
static atomic_uint_fast64_t rollback_time;
#if 1
/* for LOG_STATS() format strings */
#define PRItime " %" PRIu64 " ns "
#if DNS_QP_LOG_STATS
#define LOG_STATS(...) \
isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_QP, \
ISC_LOG_DEBUG(1), __VA_ARGS__)
@ -65,7 +76,7 @@ static atomic_uint_fast64_t rollback_time;
#define PRItime " %" PRIu64 " us "
#if 0
#if DNS_QP_TRACE
/*
* TRACE is generally used in allocation-related functions so it doesn't
* trace very high-frequency ops
@ -200,11 +211,17 @@ initialize_bits_for_byte(void) {
size_t
dns_qpkey_fromname(dns_qpkey_t key, const dns_name_t *name) {
size_t len, label;
dns_fixedname_t fixed;
REQUIRE(ISC_MAGIC_VALID(name, DNS_NAME_MAGIC));
REQUIRE(name->offsets != NULL);
REQUIRE(name->labels > 0);
if (name->offsets == NULL) {
dns_name_t *clone = dns_fixedname_initname(&fixed);
dns_name_clone(name, clone);
name = clone;
}
len = 0;
label = name->labels;
while (label-- > 0) {
@ -367,29 +384,13 @@ clone_array(isc_mem_t *mctx, void *oldp, size_t oldsz, size_t newsz,
* allocator
*/
/*
* How many cells are actually in use in a chunk?
*/
static inline qp_cell_t
chunk_usage(dns_qp_t *qp, qp_chunk_t chunk) {
return (qp->usage[chunk].used - qp->usage[chunk].free);
}
/*
* Is this chunk wasting space?
*/
static inline qp_cell_t
chunk_fragmented(dns_qp_t *qp, qp_chunk_t chunk) {
return (qp->usage[chunk].free > QP_MAX_FREE);
}
/*
* We can mutate a chunk if it was allocated in the current generation.
* This might not be true for the `bump` chunk when it is reused.
*/
static inline bool
chunk_mutable(dns_qp_t *qp, qp_chunk_t chunk) {
return (qp->usage[chunk].generation == qp->generation);
chunk_immutable(dns_qp_t *qp, qp_chunk_t chunk) {
return (qp->usage[chunk].generation != qp->generation);
}
/*
@ -397,13 +398,13 @@ chunk_mutable(dns_qp_t *qp, qp_chunk_t chunk) {
* it can have an immutable prefix and a mutable suffix.
*/
static inline bool
twigs_mutable(dns_qp_t *qp, qp_ref_t ref) {
cells_immutable(dns_qp_t *qp, qp_ref_t ref) {
qp_chunk_t chunk = ref_chunk(ref);
qp_cell_t cell = ref_cell(ref);
if (chunk == qp->bump) {
return (cell >= qp->fender);
return (cell < qp->fender);
} else {
return (chunk_mutable(qp, chunk));
return (chunk_immutable(qp, chunk));
}
}
@ -519,6 +520,8 @@ alloc_twigs(dns_qp_t *qp, qp_weight_t size) {
* zero them to ensure that there isn't a spurious double detach when
* the chunk is later recycled.
*
* Returns true if the twigs were immediately destroyed.
*
* NOTE: the caller is responsible for attaching or detaching any
* leaves as required.
*/
@ -531,13 +534,13 @@ free_twigs(dns_qp_t *qp, qp_ref_t twigs, qp_weight_t size) {
ENSURE(qp->free_count <= qp->used_count);
ENSURE(qp->usage[chunk].free <= qp->usage[chunk].used);
if (twigs_mutable(qp, twigs)) {
zero_twigs(ref_ptr(qp, twigs), size);
return (true);
} else {
if (cells_immutable(qp, twigs)) {
qp->hold_count += size;
ENSURE(qp->free_count >= qp->hold_count);
return (false);
} else {
zero_twigs(ref_ptr(qp, twigs), size);
return (true);
}
}
@ -560,6 +563,14 @@ attach_twigs(dns_qp_t *qp, qp_node_t *twigs, qp_weight_t size) {
* chunk reclamation
*/
/*
* Is any of this chunk still in use?
*/
static inline qp_cell_t
chunk_usage(dns_qp_t *qp, qp_chunk_t chunk) {
return (qp->usage[chunk].used - qp->usage[chunk].free);
}
/*
* When a chunk is being recycled after a long-running read transaction,
* or after a rollback, we need to detach any leaves that remain.
@ -600,8 +611,6 @@ chunk_free(dns_qp_t *qp, qp_chunk_t chunk) {
*/
static void
recycle(dns_qp_t *qp) {
isc_time_t t0, t1;
uint64_t time;
unsigned int live = 0;
unsigned int keep = 0;
unsigned int free = 0;
@ -610,23 +619,22 @@ recycle(dns_qp_t *qp) {
(qp->free_count - qp->hold_count),
(qp->free_count - qp->hold_count) / QP_CHUNK_SIZE);
isc_time_now_hires(&t0);
isc_nanosecs_t start = isc_time_monotonic();
for (qp_chunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
if (qp->base[chunk] == NULL) {
continue;
} else if (chunk == qp->bump || chunk_usage(qp, chunk) > 0) {
} else if (chunk_usage(qp, chunk) > 0 || chunk == qp->bump) {
live++;
} else if (chunk_mutable(qp, chunk) || qp->hold_count == 0) {
} else if (chunk_immutable(qp, chunk) && qp->hold_count > 0) {
keep++;
} else {
chunk_free(qp, chunk);
free++;
} else {
keep++;
}
}
isc_time_now_hires(&t1);
time = isc_time_microdiff(&t1, &t0);
isc_nanosecs_t time = isc_time_monotonic() - start;
atomic_fetch_add_relaxed(&recycle_time, time);
LOG_STATS("qp recycle" PRItime "live %u keep %u free %u chunks", time,
@ -646,9 +654,12 @@ recycle(dns_qp_t *qp) {
* or for garbage collection. We don't update the node in place
* because `compact_recursive()` does not ensure the node itself is
* mutable until after it discovers evacuation was necessary.
*
* If free_twigs() could not immediately destroy the old twigs, we have
* to re-attach to any leaves.
*/
static qp_ref_t
evacuate_twigs(dns_qp_t *qp, qp_node_t *n) {
evacuate(dns_qp_t *qp, qp_node_t *n) {
qp_weight_t size = branch_twigs_size(n);
qp_ref_t old_ref = branch_twigs_ref(n);
qp_ref_t new_ref = alloc_twigs(qp, size);
@ -664,73 +675,74 @@ evacuate_twigs(dns_qp_t *qp, qp_node_t *n) {
}
/*
* Evacuate the node's twigs and update the node in place.
* Immutable nodes need copy-on-write. As we walk down the trie finding
* the right place to modify, make_twigs_mutable() is called to ensure
* that shared nodes on the path from the root are copied to a mutable
* chunk.
*/
static void
evacuate(dns_qp_t *qp, qp_node_t *n) {
*n = make_node(branch_index(n), evacuate_twigs(qp, n));
static inline void
make_twigs_mutable(dns_qp_t *qp, qp_node_t *n) {
if (cells_immutable(qp, branch_twigs_ref(n))) {
*n = make_node(branch_index(n), evacuate(qp, n));
}
}
/*
* Compact the trie by traversing the whole thing recursively, copying
* bottom-up as required. The aim is to avoid evacuation as much as
* possible, but when parts of the trie are shared, we need to evacuate
* possible, but when parts of the trie are immutable, we need to evacuate
* the paths from the root to the parts of the trie that occupy
* fragmented chunks.
*
* Without the "should we evacuate?" check, the algorithm will leave
* the trie unchanged. If the twigs are all leaves, the loop changes
* nothing, so we will return this node's original ref. If all of the
* twigs that are branches did not need moving, again, the loop
* changes nothing. So the evacuation check is the only place that the
* algorithm introduces ref changes, that then bubble up through the
* logic inside the loop.
* Without the QP_MIN_USED check, the algorithm will leave the trie
* unchanged. If the children are all leaves, the loop changes nothing,
* so we will return this node's original ref. If all of the children
* that are branches did not need moving, again, the loop changes
* nothing. So the evacuation check is the only place that the
* algorithm introduces ref changes, that then bubble up towards the
* root through the logic inside the loop.
*/
static qp_ref_t
compact_recursive(dns_qp_t *qp, qp_node_t *parent) {
qp_ref_t ref = branch_twigs_ref(parent);
/* should we evacuate the twigs? */
if (chunk_fragmented(qp, ref_chunk(ref)) || qp->compact_all) {
ref = evacuate_twigs(qp, parent);
}
bool mutable = twigs_mutable(qp, ref);
qp_weight_t size = branch_twigs_size(parent);
qp_ref_t twigs_ref = branch_twigs_ref(parent);
qp_chunk_t chunk = ref_chunk(twigs_ref);
if (qp->compact_all ||
(chunk != qp->bump && chunk_usage(qp, chunk) < QP_MIN_USED))
{
twigs_ref = evacuate(qp, parent);
}
bool immutable = cells_immutable(qp, twigs_ref);
for (qp_weight_t pos = 0; pos < size; pos++) {
qp_node_t *child = ref_ptr(qp, ref) + pos;
qp_node_t *child = ref_ptr(qp, twigs_ref) + pos;
if (!is_branch(child)) {
continue;
}
qp_ref_t old_ref = branch_twigs_ref(child);
qp_ref_t new_ref = compact_recursive(qp, child);
if (old_ref == new_ref) {
qp_ref_t old_grandtwigs = branch_twigs_ref(child);
qp_ref_t new_grandtwigs = compact_recursive(qp, child);
if (old_grandtwigs == new_grandtwigs) {
continue;
}
if (!mutable) {
ref = evacuate_twigs(qp, parent);
if (immutable) {
twigs_ref = evacuate(qp, parent);
/* the twigs have moved */
child = ref_ptr(qp, ref) + pos;
mutable = true;
child = ref_ptr(qp, twigs_ref) + pos;
immutable = false;
}
*child = make_node(branch_index(child), new_ref);
*child = make_node(branch_index(child), new_grandtwigs);
}
return (ref);
return (twigs_ref);
}
static void
compact(dns_qp_t *qp) {
isc_time_t t0, t1;
uint64_t time;
LOG_STATS("qp compact before leaf %u live %u used %u free %u hold %u",
qp->leaf_count, qp->used_count - qp->free_count,
qp->used_count, qp->free_count, qp->hold_count);
isc_time_now_hires(&t0);
isc_nanosecs_t start = isc_time_monotonic();
/*
* Reset the bump chunk if it is fragmented.
*/
if (chunk_fragmented(qp, qp->bump)) {
if (qp->usage[qp->bump].free > QP_MAX_FREE) {
alloc_reset(qp);
}
@ -740,8 +752,7 @@ compact(dns_qp_t *qp) {
}
qp->compact_all = false;
isc_time_now_hires(&t1);
time = isc_time_microdiff(&t1, &t0);
isc_nanosecs_t time = isc_time_monotonic() - start;
atomic_fetch_add_relaxed(&compact_time, time);
LOG_STATS("qp compact" PRItime
@ -751,9 +762,9 @@ compact(dns_qp_t *qp) {
}
void
dns_qp_compact(dns_qp_t *qp) {
dns_qp_compact(dns_qp_t *qp, bool all) {
REQUIRE(QP_VALID(qp));
qp->compact_all = true;
qp->compact_all = all;
compact(qp);
recycle(qp);
}
@ -780,8 +791,8 @@ auto_compact_recycle(dns_qp_t *qp) {
}
/*
* Free some twigs and compact the trie if necessary; the space
* accounting is similar to `evacuate_twigs()` above.
* Free some twigs and (if they were destroyed immediately so that the
* result from QP_MAX_GARBAGE can change) compact the trie if necessary.
*
* This is called by the trie modification API entry points. The
* free_twigs() function requires the caller to attach or detach any
@ -808,19 +819,6 @@ squash_twigs(dns_qp_t *qp, qp_ref_t twigs, qp_weight_t size) {
return (destroyed);
}
/*
* Shared twigs need copy-on-write. As we walk down the trie finding
* the right place to modify, make_twigs_mutable() is called to ensure
* that shared nodes on the path from the root are copied to a mutable
* chunk.
*/
static inline void
make_twigs_mutable(dns_qp_t *qp, qp_node_t *n) {
if (!twigs_mutable(qp, branch_twigs_ref(n))) {
evacuate(qp, n);
}
}
/***********************************************************************
*
* public accessors for memory management internals
@ -841,6 +839,14 @@ dns_qp_memusage(dns_qp_t *qp) {
.chunk_size = QP_CHUNK_SIZE,
};
/*
* tell the caller about fragmentation of the whole trie
* without discounting immutable chunks
*/
qp->hold_count = 0;
memusage.fragmented = QP_MAX_GARBAGE(qp);
qp->hold_count = memusage.hold;
for (qp_chunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
if (qp->base[chunk] != NULL) {
memusage.chunk_count += 1;
@ -855,8 +861,28 @@ dns_qp_memusage(dns_qp_t *qp) {
return (memusage);
}
dns_qp_memusage_t
dns_qpmulti_memusage(dns_qpmulti_t *multi) {
REQUIRE(QPMULTI_VALID(multi));
LOCK(&multi->mutex);
dns_qp_t *qp = &multi->writer;
INSIST(QP_VALID(qp));
dns_qp_memusage_t memusage = dns_qp_memusage(qp);
if (qp->transaction_mode == QP_UPDATE) {
memusage.bytes -= QP_CHUNK_BYTES;
memusage.bytes += qp->usage[qp->bump].used * sizeof(qp_node_t);
}
UNLOCK(&multi->mutex);
return (memusage);
}
void
dns_qp_gctime(uint64_t *compact_p, uint64_t *recycle_p, uint64_t *rollback_p) {
dns_qp_gctime(isc_nanosecs_t *compact_p, isc_nanosecs_t *recycle_p,
isc_nanosecs_t *rollback_p) {
*compact_p = atomic_load_relaxed(&compact_time);
*recycle_p = atomic_load_relaxed(&recycle_time);
*rollback_p = atomic_load_relaxed(&rollback_time);
@ -1029,28 +1055,25 @@ dns_qpmulti_commit(dns_qpmulti_t *multi, dns_qp_t **qptp) {
*/
void
dns_qpmulti_rollback(dns_qpmulti_t *multi, dns_qp_t **qptp) {
dns_qp_t *qp;
isc_time_t t0, t1;
uint64_t time;
unsigned int free = 0;
REQUIRE(QPMULTI_VALID(multi));
REQUIRE(qptp != NULL);
REQUIRE(*qptp == write_phase(multi));
qp = *qptp;
dns_qp_t *qp = *qptp;
REQUIRE(qp->transaction_mode == QP_UPDATE);
TRACE("");
isc_time_now_hires(&t0);
isc_nanosecs_t start = isc_time_monotonic();
/*
* recycle any chunks allocated in this transaction,
* including the bump chunk, and detach value objects
*/
for (qp_chunk_t chunk = 0; chunk < qp->chunk_max; chunk++) {
if (qp->base[chunk] != NULL && chunk_mutable(qp, chunk)) {
if (qp->base[chunk] != NULL && !chunk_immutable(qp, chunk)) {
chunk_free(qp, chunk);
free++;
}
@ -1060,8 +1083,7 @@ dns_qpmulti_rollback(dns_qpmulti_t *multi, dns_qp_t **qptp) {
INSIST(!qp->shared_arrays);
free_chunk_arrays(qp);
isc_time_now_hires(&t1);
time = isc_time_microdiff(&t1, &t0);
isc_nanosecs_t time = isc_time_monotonic() - start;
atomic_fetch_add_relaxed(&rollback_time, time);
LOG_STATS("qp rollback" PRItime "free %u chunks", time, free);
@ -1388,6 +1410,7 @@ dns_qp_insert(dns_qp_t *qp, void *pval, uint32_t ival) {
INSIST(branch_has_twig(n, bit));
n = branch_twig_ptr(qp, n, bit);
}
/* fall through */
newbranch:
new_ref = alloc_twigs(qp, 2);

View File

@ -127,6 +127,7 @@ STATIC_ASSERT(6 <= QP_CHUNK_LOG && QP_CHUNK_LOG <= 20,
* (12% overhead seems reasonable)
*/
#define QP_MAX_FREE (QP_CHUNK_SIZE / 8)
#define QP_MIN_USED (QP_CHUNK_SIZE - QP_MAX_FREE)
/*
* Compact automatically when we pass this threshold: when there is a lot
@ -169,6 +170,10 @@ STATIC_ASSERT(6 <= QP_CHUNK_LOG && QP_CHUNK_LOG <= 20,
* Note: A dns_qpkey_t is logically an array of qp_shift_t values, but it
* isn't declared that way because dns_qpkey_t is a public type whereas
* qp_shift_t is private.
*
* A dns_qpkey element key[off] must satisfy
*
* SHIFT_NOBYTE <= key[off] && key[off] < SHIFT_OFFSET
*/
typedef uint8_t qp_shift_t;
@ -250,7 +255,7 @@ ref_cell(qp_ref_t ref) {
uint32_t magic; \
qp_node_t root; \
qp_node_t **base; \
void *uctx; \
void *uctx; \
const dns_qpmethods_t *methods
struct dns_qpread {
@ -364,7 +369,7 @@ struct dns_qp {
qp_chunk_t chunk_max;
/*% which chunk is used for allocations */
qp_chunk_t bump;
/*% twigs in the `bump` chunk below `fender` are read only [MT] */
/*% nodes in the `bump` chunk below `fender` are read only [MT] */
qp_cell_t fender;
/*% number of leaf nodes */
qp_cell_t leaf_count;
@ -560,8 +565,7 @@ ref_ptr(dns_qpreadable_t qpr, qp_ref_t ref) {
*/
static inline qp_node_t *
branch_twigs_vector(dns_qpreadable_t qpr, qp_node_t *n) {
dns_qpread_t *qp = dns_qpreadable_cast(qpr);
return (ref_ptr(qp, branch_twigs_ref(n)));
return (ref_ptr(qpr, branch_twigs_ref(n)));
}
/*
@ -659,7 +663,8 @@ detach_leaf(dns_qpreadable_t qpr, qp_node_t *n) {
static inline size_t
leaf_qpkey(dns_qpreadable_t qpr, qp_node_t *n, dns_qpkey_t key) {
dns_qpread_t *qp = dns_qpreadable_cast(qpr);
return (qp->methods->makekey(key, qp->uctx, leaf_pval(n), leaf_ival(n)));
return (qp->methods->makekey(key, qp->uctx, leaf_pval(n),
leaf_ival(n)));
}
static inline char *

View File

@ -161,7 +161,7 @@ add_qp(void *qp, size_t count) {
static void
sqz_qp(void *qp) {
dns_qp_compact(qp);
dns_qp_compact(qp, true);
}
static isc_result_t

View File

@ -222,7 +222,7 @@ main(int argc, char *argv[]) {
labels += name->labels;
names += 1;
}
dns_qp_compact(qp);
dns_qp_compact(qp, true);
size_t smallbytes = wirebytes + labels + names * sizeof(isc_refcount_t);
dns_qp_memusage_t memusage = dns_qp_memusage(qp);