diff --git a/lib/isc/histo.c b/lib/isc/histo.c index 6d6fc65ec7..a0b94c9b2a 100644 --- a/lib/isc/histo.c +++ b/lib/isc/histo.c @@ -25,6 +25,7 @@ #include #include #include +#include /* * XXXFANF to be added to isc/util.h by a commmit in a qp-trie @@ -33,8 +34,10 @@ #define STRUCT_FLEX_SIZE(pointer, member, count) \ (sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count)) -#define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o') -#define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC) +#define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o') +#define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC) +#define HISTOMULTI_MAGIC ISC_MAGIC('H', 'g', 'M', 't') +#define HISTOMULTI_VALID(p) ISC_MAGIC_VALID(p, HISTOMULTI_MAGIC) /* * Natural logarithms of 2 and 10 for converting precisions between @@ -101,6 +104,12 @@ struct isc_histosummary { uint64_t buckets[]; }; +struct isc_histomulti { + uint magic; + uint size; + isc_histo_t *hg[]; +}; + /**********************************************************************/ #define OUTARG(ptr, val) \ @@ -402,6 +411,67 @@ isc_histo_merge(isc_histo_t **targetp, isc_historead_t source) { /**********************************************************************/ +void +isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp) { + REQUIRE(hmp != NULL); + REQUIRE(*hmp == NULL); + + uint size = isc_tid_count(); + INSIST(size > 0); + + isc_histomulti_t *hm = isc_mem_getx( + mctx, STRUCT_FLEX_SIZE(hm, hg, size), ISC_MEM_ZERO); + *hm = (isc_histomulti_t){ + .magic = HISTOMULTI_MAGIC, + .size = size, + }; + + for (uint i = 0; i < hm->size; i++) { + isc_histo_create(mctx, sigbits, &hm->hg[i]); + } + + *hmp = hm; +} + +void +isc_histomulti_destroy(isc_histomulti_t **hmp) { + REQUIRE(hmp != NULL); + REQUIRE(HISTOMULTI_VALID(*hmp)); + + isc_histomulti_t *hm = *hmp; + isc_mem_t *mctx = hm->hg[0]->mctx; + *hmp = NULL; + + for (uint i = 0; i < hm->size; i++) { + isc_histo_destroy(&hm->hg[i]); + } + + isc_mem_put(mctx, hm, STRUCT_FLEX_SIZE(hm, hg, hm->size)); +} + +void +isc_histomulti_merge(isc_histo_t **hgp, isc_histomulti_t *hm) { + REQUIRE(HISTOMULTI_VALID(hm)); + + for (uint i = 0; i < hm->size; i++) { + isc_histo_merge(hgp, hm->hg[i]); + } +} + +void +isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc) { + REQUIRE(HISTOMULTI_VALID(hm)); + isc_histo_t *hg = hm->hg[isc_tid()]; + add_key_count(hg, value_to_key(hg, value), inc); +} + +void +isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value) { + isc_histomulti_add(hm, value, 1); +} + +/**********************************************************************/ + /* * https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf * equation 4 (incremental mean) and equation 44 (incremental variance) diff --git a/lib/isc/include/isc/histo.h b/lib/isc/include/isc/histo.h index ad55dac889..461dbb7517 100644 --- a/lib/isc/include/isc/histo.h +++ b/lib/isc/include/isc/histo.h @@ -72,6 +72,7 @@ typedef struct isc_histo isc_histo_t; typedef struct isc_histosummary isc_histosummary_t; +typedef struct isc_histomulti isc_histomulti_t; /* * For functions that can take either type. @@ -157,6 +158,8 @@ isc_histo_digits_to_bits(uint digits); *\li `digits <= ISC_HISTO_MAXDIGS` */ +/**********************************************************************/ + void isc_histo_inc(isc_histo_t *hg, uint64_t value); /*%< @@ -269,6 +272,76 @@ isc_histo_merge(isc_histo_t **targetp, isc_historead_t source); /**********************************************************************/ +void +isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp); +/*%< + * Create a multithreaded sharded histogram. + * + * Although an `isc_histo_t` is thread-safe, it can suffer + * from cache contention under heavy load. To avoid this, + * an `isc_histomulti_t` contains a histogram per thread, + * so updates are local and low-contention. + * + * Requires: + *\li `sigbits >= ISC_HISTO_MINBITS` + *\li `sigbits <= ISC_HISTO_MAXBITS` + *\li `hmp != NULL` + *\li `*hmp == NULL` + * + * Ensures: + *\li `*hmp` is a pointer to a multithreaded sharded histogram. + */ + +void +isc_histomulti_destroy(isc_histomulti_t **hmp); +/*%< + * Destroy a multithreaded sharded histogram + * + * Requires: + *\li `hmp != NULL` + *\li `*hmp` is a pointer to a valid multithreaded sharded histogram + * + * Ensures: + *\li all memory allocated by the histogram has been released + *\li `*hmp == NULL` + */ + +void +isc_histomulti_merge(isc_histo_t **targetp, isc_histomulti_t *source); +/*%< + * Increase the counts in `*targetp` by the counts recorded in `source` + * + * The target histogram is created if `*targetp` is NULL. + * + * Requires: + *\li `targetp != NULL` + *\li `*targetp` is NULL or a pointer to a valid histogram + *\li `source` is a pointer to a valid multithreaded sharded histogram + * + * Ensures: + *\li `*targetp` is a pointer to a valid histogram + */ + +void +isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value); +/*%< + * Add 1 to the value's bucket + * + * Requires: + *\li `hm` is a pointer to a valid histomulti + */ + +void +isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc); +/*%< + * Add an arbitrary increment to the value's bucket + * + * Requires: + *\li `hm` is a pointer to a valid histomulti + */ + +/**********************************************************************/ + void isc_histosummary_create(const isc_histo_t *hg, isc_histosummary_t **hsp); /*%< @@ -406,3 +479,5 @@ isc_histo_cdf(const isc_histosummary_t *hs, uint64_t value, *\li `hs` is a pointer to a valid histogram summary *\li `proportionp != NULL` */ + +/**********************************************************************/ diff --git a/lib/isc/include/isc/tid.h b/lib/isc/include/isc/tid.h index 9868573239..7bb7ca5f98 100644 --- a/lib/isc/include/isc/tid.h +++ b/lib/isc/include/isc/tid.h @@ -21,6 +21,12 @@ ISC_LANG_BEGINDECLS #define ISC_TID_UNKNOWN UINT32_MAX +uint32_t +isc_tid_count(void); +/*%< + * Returns the number of threads. + */ + uint32_t isc_tid(void); /*%< @@ -32,4 +38,7 @@ isc_tid(void); void isc__tid_init(uint32_t tid); +void +isc__tid_initcount(uint32_t count); + ISC_LANG_ENDDECLS diff --git a/lib/isc/loop.c b/lib/isc/loop.c index 65250c14af..9164939a92 100644 --- a/lib/isc/loop.c +++ b/lib/isc/loop.c @@ -346,6 +346,7 @@ isc_loopmgr_create(isc_mem_t *mctx, uint32_t nloops, isc_loopmgr_t **loopmgrp) { REQUIRE(nloops > 0); threadpool_initialize(nloops); + isc__tid_initcount(nloops); loopmgr = isc_mem_get(mctx, sizeof(*loopmgr)); *loopmgr = (isc_loopmgr_t){ diff --git a/lib/isc/tid.c b/lib/isc/tid.c index 554ad96564..5a278e799e 100644 --- a/lib/isc/tid.c +++ b/lib/isc/tid.c @@ -26,7 +26,13 @@ #define ISC_TID_UNKNOWN UINT32_MAX -static thread_local uint32_t isc__tid_v = ISC_TID_UNKNOWN; +static thread_local uint32_t tid_local = ISC_TID_UNKNOWN; + +/* + * Zero is a better nonsense value in this case than ISC_TID_UNKNOWN; + * avoids things like trying to allocate 32GB of per-thread counters. + */ +static uint32_t tid_count = 0; /** * Protected @@ -34,9 +40,14 @@ static thread_local uint32_t isc__tid_v = ISC_TID_UNKNOWN; void isc__tid_init(uint32_t tid) { - REQUIRE(isc__tid_v == ISC_TID_UNKNOWN || isc__tid_v == tid); + REQUIRE(tid_local == ISC_TID_UNKNOWN || tid_local == tid); + tid_local = tid; +} - isc__tid_v = tid; +void +isc__tid_initcount(uint32_t count) { + REQUIRE(tid_count == 0 || tid_count == count); + tid_count = count; } /** @@ -45,5 +56,10 @@ isc__tid_init(uint32_t tid) { uint32_t isc_tid(void) { - return (isc__tid_v); + return (tid_local); +} + +uint32_t +isc_tid_count(void) { + return (tid_count); }