2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-30 22:15:20 +00:00

Add per-thread sharded histograms for heavy loads

Although an `isc_histo_t` is thread-safe, it can suffer
from cache contention under heavy load. To avoid this,
an `isc_histomulti_t` contains a histogram per thread,
so updates are local and low-contention.
This commit is contained in:
Tony Finch
2023-03-16 09:46:15 +00:00
committed by Tony Finch
parent 82213a48cf
commit bc2389b828
5 changed files with 177 additions and 6 deletions

View File

@@ -25,6 +25,7 @@
#include <isc/histo.h> #include <isc/histo.h>
#include <isc/magic.h> #include <isc/magic.h>
#include <isc/mem.h> #include <isc/mem.h>
#include <isc/tid.h>
/* /*
* XXXFANF to be added to isc/util.h by a commmit in a qp-trie * XXXFANF to be added to isc/util.h by a commmit in a qp-trie
@@ -33,8 +34,10 @@
#define STRUCT_FLEX_SIZE(pointer, member, count) \ #define STRUCT_FLEX_SIZE(pointer, member, count) \
(sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count)) (sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count))
#define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o') #define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o')
#define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC) #define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC)
#define HISTOMULTI_MAGIC ISC_MAGIC('H', 'g', 'M', 't')
#define HISTOMULTI_VALID(p) ISC_MAGIC_VALID(p, HISTOMULTI_MAGIC)
/* /*
* Natural logarithms of 2 and 10 for converting precisions between * Natural logarithms of 2 and 10 for converting precisions between
@@ -101,6 +104,12 @@ struct isc_histosummary {
uint64_t buckets[]; uint64_t buckets[];
}; };
struct isc_histomulti {
uint magic;
uint size;
isc_histo_t *hg[];
};
/**********************************************************************/ /**********************************************************************/
#define OUTARG(ptr, val) \ #define OUTARG(ptr, val) \
@@ -402,6 +411,67 @@ isc_histo_merge(isc_histo_t **targetp, isc_historead_t source) {
/**********************************************************************/ /**********************************************************************/
void
isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp) {
REQUIRE(hmp != NULL);
REQUIRE(*hmp == NULL);
uint size = isc_tid_count();
INSIST(size > 0);
isc_histomulti_t *hm = isc_mem_getx(
mctx, STRUCT_FLEX_SIZE(hm, hg, size), ISC_MEM_ZERO);
*hm = (isc_histomulti_t){
.magic = HISTOMULTI_MAGIC,
.size = size,
};
for (uint i = 0; i < hm->size; i++) {
isc_histo_create(mctx, sigbits, &hm->hg[i]);
}
*hmp = hm;
}
void
isc_histomulti_destroy(isc_histomulti_t **hmp) {
REQUIRE(hmp != NULL);
REQUIRE(HISTOMULTI_VALID(*hmp));
isc_histomulti_t *hm = *hmp;
isc_mem_t *mctx = hm->hg[0]->mctx;
*hmp = NULL;
for (uint i = 0; i < hm->size; i++) {
isc_histo_destroy(&hm->hg[i]);
}
isc_mem_put(mctx, hm, STRUCT_FLEX_SIZE(hm, hg, hm->size));
}
void
isc_histomulti_merge(isc_histo_t **hgp, isc_histomulti_t *hm) {
REQUIRE(HISTOMULTI_VALID(hm));
for (uint i = 0; i < hm->size; i++) {
isc_histo_merge(hgp, hm->hg[i]);
}
}
void
isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc) {
REQUIRE(HISTOMULTI_VALID(hm));
isc_histo_t *hg = hm->hg[isc_tid()];
add_key_count(hg, value_to_key(hg, value), inc);
}
void
isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value) {
isc_histomulti_add(hm, value, 1);
}
/**********************************************************************/
/* /*
* https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf * https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
* equation 4 (incremental mean) and equation 44 (incremental variance) * equation 4 (incremental mean) and equation 44 (incremental variance)

View File

@@ -72,6 +72,7 @@
typedef struct isc_histo isc_histo_t; typedef struct isc_histo isc_histo_t;
typedef struct isc_histosummary isc_histosummary_t; typedef struct isc_histosummary isc_histosummary_t;
typedef struct isc_histomulti isc_histomulti_t;
/* /*
* For functions that can take either type. * For functions that can take either type.
@@ -157,6 +158,8 @@ isc_histo_digits_to_bits(uint digits);
*\li `digits <= ISC_HISTO_MAXDIGS` *\li `digits <= ISC_HISTO_MAXDIGS`
*/ */
/**********************************************************************/
void void
isc_histo_inc(isc_histo_t *hg, uint64_t value); isc_histo_inc(isc_histo_t *hg, uint64_t value);
/*%< /*%<
@@ -269,6 +272,76 @@ isc_histo_merge(isc_histo_t **targetp, isc_historead_t source);
/**********************************************************************/ /**********************************************************************/
void
isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp);
/*%<
* Create a multithreaded sharded histogram.
*
* Although an `isc_histo_t` is thread-safe, it can suffer
* from cache contention under heavy load. To avoid this,
* an `isc_histomulti_t` contains a histogram per thread,
* so updates are local and low-contention.
*
* Requires:
*\li `sigbits >= ISC_HISTO_MINBITS`
*\li `sigbits <= ISC_HISTO_MAXBITS`
*\li `hmp != NULL`
*\li `*hmp == NULL`
*
* Ensures:
*\li `*hmp` is a pointer to a multithreaded sharded histogram.
*/
void
isc_histomulti_destroy(isc_histomulti_t **hmp);
/*%<
* Destroy a multithreaded sharded histogram
*
* Requires:
*\li `hmp != NULL`
*\li `*hmp` is a pointer to a valid multithreaded sharded histogram
*
* Ensures:
*\li all memory allocated by the histogram has been released
*\li `*hmp == NULL`
*/
void
isc_histomulti_merge(isc_histo_t **targetp, isc_histomulti_t *source);
/*%<
* Increase the counts in `*targetp` by the counts recorded in `source`
*
* The target histogram is created if `*targetp` is NULL.
*
* Requires:
*\li `targetp != NULL`
*\li `*targetp` is NULL or a pointer to a valid histogram
*\li `source` is a pointer to a valid multithreaded sharded histogram
*
* Ensures:
*\li `*targetp` is a pointer to a valid histogram
*/
void
isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value);
/*%<
* Add 1 to the value's bucket
*
* Requires:
*\li `hm` is a pointer to a valid histomulti
*/
void
isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc);
/*%<
* Add an arbitrary increment to the value's bucket
*
* Requires:
*\li `hm` is a pointer to a valid histomulti
*/
/**********************************************************************/
void void
isc_histosummary_create(const isc_histo_t *hg, isc_histosummary_t **hsp); isc_histosummary_create(const isc_histo_t *hg, isc_histosummary_t **hsp);
/*%< /*%<
@@ -406,3 +479,5 @@ isc_histo_cdf(const isc_histosummary_t *hs, uint64_t value,
*\li `hs` is a pointer to a valid histogram summary *\li `hs` is a pointer to a valid histogram summary
*\li `proportionp != NULL` *\li `proportionp != NULL`
*/ */
/**********************************************************************/

View File

@@ -21,6 +21,12 @@ ISC_LANG_BEGINDECLS
#define ISC_TID_UNKNOWN UINT32_MAX #define ISC_TID_UNKNOWN UINT32_MAX
uint32_t
isc_tid_count(void);
/*%<
* Returns the number of threads.
*/
uint32_t uint32_t
isc_tid(void); isc_tid(void);
/*%< /*%<
@@ -32,4 +38,7 @@ isc_tid(void);
void void
isc__tid_init(uint32_t tid); isc__tid_init(uint32_t tid);
void
isc__tid_initcount(uint32_t count);
ISC_LANG_ENDDECLS ISC_LANG_ENDDECLS

View File

@@ -346,6 +346,7 @@ isc_loopmgr_create(isc_mem_t *mctx, uint32_t nloops, isc_loopmgr_t **loopmgrp) {
REQUIRE(nloops > 0); REQUIRE(nloops > 0);
threadpool_initialize(nloops); threadpool_initialize(nloops);
isc__tid_initcount(nloops);
loopmgr = isc_mem_get(mctx, sizeof(*loopmgr)); loopmgr = isc_mem_get(mctx, sizeof(*loopmgr));
*loopmgr = (isc_loopmgr_t){ *loopmgr = (isc_loopmgr_t){

View File

@@ -26,7 +26,13 @@
#define ISC_TID_UNKNOWN UINT32_MAX #define ISC_TID_UNKNOWN UINT32_MAX
static thread_local uint32_t isc__tid_v = ISC_TID_UNKNOWN; static thread_local uint32_t tid_local = ISC_TID_UNKNOWN;
/*
* Zero is a better nonsense value in this case than ISC_TID_UNKNOWN;
* avoids things like trying to allocate 32GB of per-thread counters.
*/
static uint32_t tid_count = 0;
/** /**
* Protected * Protected
@@ -34,9 +40,14 @@ static thread_local uint32_t isc__tid_v = ISC_TID_UNKNOWN;
void void
isc__tid_init(uint32_t tid) { isc__tid_init(uint32_t tid) {
REQUIRE(isc__tid_v == ISC_TID_UNKNOWN || isc__tid_v == tid); REQUIRE(tid_local == ISC_TID_UNKNOWN || tid_local == tid);
tid_local = tid;
}
isc__tid_v = tid; void
isc__tid_initcount(uint32_t count) {
REQUIRE(tid_count == 0 || tid_count == count);
tid_count = count;
} }
/** /**
@@ -45,5 +56,10 @@ isc__tid_init(uint32_t tid) {
uint32_t uint32_t
isc_tid(void) { isc_tid(void) {
return (isc__tid_v); return (tid_local);
}
uint32_t
isc_tid_count(void) {
return (tid_count);
} }