2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-08-30 14:07:59 +00:00

Add per-thread sharded histograms for heavy loads

Although an `isc_histo_t` is thread-safe, it can suffer
from cache contention under heavy load. To avoid this,
an `isc_histomulti_t` contains a histogram per thread,
so updates are local and low-contention.
This commit is contained in:
Tony Finch 2023-03-16 09:46:15 +00:00 committed by Tony Finch
parent 82213a48cf
commit bc2389b828
5 changed files with 177 additions and 6 deletions

View File

@ -25,6 +25,7 @@
#include <isc/histo.h>
#include <isc/magic.h>
#include <isc/mem.h>
#include <isc/tid.h>
/*
* XXXFANF to be added to isc/util.h by a commmit in a qp-trie
@ -33,8 +34,10 @@
#define STRUCT_FLEX_SIZE(pointer, member, count) \
(sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count))
#define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o')
#define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC)
#define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o')
#define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC)
#define HISTOMULTI_MAGIC ISC_MAGIC('H', 'g', 'M', 't')
#define HISTOMULTI_VALID(p) ISC_MAGIC_VALID(p, HISTOMULTI_MAGIC)
/*
* Natural logarithms of 2 and 10 for converting precisions between
@ -101,6 +104,12 @@ struct isc_histosummary {
uint64_t buckets[];
};
struct isc_histomulti {
uint magic;
uint size;
isc_histo_t *hg[];
};
/**********************************************************************/
#define OUTARG(ptr, val) \
@ -402,6 +411,67 @@ isc_histo_merge(isc_histo_t **targetp, isc_historead_t source) {
/**********************************************************************/
void
isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp) {
REQUIRE(hmp != NULL);
REQUIRE(*hmp == NULL);
uint size = isc_tid_count();
INSIST(size > 0);
isc_histomulti_t *hm = isc_mem_getx(
mctx, STRUCT_FLEX_SIZE(hm, hg, size), ISC_MEM_ZERO);
*hm = (isc_histomulti_t){
.magic = HISTOMULTI_MAGIC,
.size = size,
};
for (uint i = 0; i < hm->size; i++) {
isc_histo_create(mctx, sigbits, &hm->hg[i]);
}
*hmp = hm;
}
void
isc_histomulti_destroy(isc_histomulti_t **hmp) {
REQUIRE(hmp != NULL);
REQUIRE(HISTOMULTI_VALID(*hmp));
isc_histomulti_t *hm = *hmp;
isc_mem_t *mctx = hm->hg[0]->mctx;
*hmp = NULL;
for (uint i = 0; i < hm->size; i++) {
isc_histo_destroy(&hm->hg[i]);
}
isc_mem_put(mctx, hm, STRUCT_FLEX_SIZE(hm, hg, hm->size));
}
void
isc_histomulti_merge(isc_histo_t **hgp, isc_histomulti_t *hm) {
REQUIRE(HISTOMULTI_VALID(hm));
for (uint i = 0; i < hm->size; i++) {
isc_histo_merge(hgp, hm->hg[i]);
}
}
void
isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc) {
REQUIRE(HISTOMULTI_VALID(hm));
isc_histo_t *hg = hm->hg[isc_tid()];
add_key_count(hg, value_to_key(hg, value), inc);
}
void
isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value) {
isc_histomulti_add(hm, value, 1);
}
/**********************************************************************/
/*
* https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
* equation 4 (incremental mean) and equation 44 (incremental variance)

View File

@ -72,6 +72,7 @@
typedef struct isc_histo isc_histo_t;
typedef struct isc_histosummary isc_histosummary_t;
typedef struct isc_histomulti isc_histomulti_t;
/*
* For functions that can take either type.
@ -157,6 +158,8 @@ isc_histo_digits_to_bits(uint digits);
*\li `digits <= ISC_HISTO_MAXDIGS`
*/
/**********************************************************************/
void
isc_histo_inc(isc_histo_t *hg, uint64_t value);
/*%<
@ -269,6 +272,76 @@ isc_histo_merge(isc_histo_t **targetp, isc_historead_t source);
/**********************************************************************/
void
isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp);
/*%<
* Create a multithreaded sharded histogram.
*
* Although an `isc_histo_t` is thread-safe, it can suffer
* from cache contention under heavy load. To avoid this,
* an `isc_histomulti_t` contains a histogram per thread,
* so updates are local and low-contention.
*
* Requires:
*\li `sigbits >= ISC_HISTO_MINBITS`
*\li `sigbits <= ISC_HISTO_MAXBITS`
*\li `hmp != NULL`
*\li `*hmp == NULL`
*
* Ensures:
*\li `*hmp` is a pointer to a multithreaded sharded histogram.
*/
void
isc_histomulti_destroy(isc_histomulti_t **hmp);
/*%<
* Destroy a multithreaded sharded histogram
*
* Requires:
*\li `hmp != NULL`
*\li `*hmp` is a pointer to a valid multithreaded sharded histogram
*
* Ensures:
*\li all memory allocated by the histogram has been released
*\li `*hmp == NULL`
*/
void
isc_histomulti_merge(isc_histo_t **targetp, isc_histomulti_t *source);
/*%<
* Increase the counts in `*targetp` by the counts recorded in `source`
*
* The target histogram is created if `*targetp` is NULL.
*
* Requires:
*\li `targetp != NULL`
*\li `*targetp` is NULL or a pointer to a valid histogram
*\li `source` is a pointer to a valid multithreaded sharded histogram
*
* Ensures:
*\li `*targetp` is a pointer to a valid histogram
*/
void
isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value);
/*%<
* Add 1 to the value's bucket
*
* Requires:
*\li `hm` is a pointer to a valid histomulti
*/
void
isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc);
/*%<
* Add an arbitrary increment to the value's bucket
*
* Requires:
*\li `hm` is a pointer to a valid histomulti
*/
/**********************************************************************/
void
isc_histosummary_create(const isc_histo_t *hg, isc_histosummary_t **hsp);
/*%<
@ -406,3 +479,5 @@ isc_histo_cdf(const isc_histosummary_t *hs, uint64_t value,
*\li `hs` is a pointer to a valid histogram summary
*\li `proportionp != NULL`
*/
/**********************************************************************/

View File

@ -21,6 +21,12 @@ ISC_LANG_BEGINDECLS
#define ISC_TID_UNKNOWN UINT32_MAX
uint32_t
isc_tid_count(void);
/*%<
* Returns the number of threads.
*/
uint32_t
isc_tid(void);
/*%<
@ -32,4 +38,7 @@ isc_tid(void);
void
isc__tid_init(uint32_t tid);
void
isc__tid_initcount(uint32_t count);
ISC_LANG_ENDDECLS

View File

@ -346,6 +346,7 @@ isc_loopmgr_create(isc_mem_t *mctx, uint32_t nloops, isc_loopmgr_t **loopmgrp) {
REQUIRE(nloops > 0);
threadpool_initialize(nloops);
isc__tid_initcount(nloops);
loopmgr = isc_mem_get(mctx, sizeof(*loopmgr));
*loopmgr = (isc_loopmgr_t){

View File

@ -26,7 +26,13 @@
#define ISC_TID_UNKNOWN UINT32_MAX
static thread_local uint32_t isc__tid_v = ISC_TID_UNKNOWN;
static thread_local uint32_t tid_local = ISC_TID_UNKNOWN;
/*
* Zero is a better nonsense value in this case than ISC_TID_UNKNOWN;
* avoids things like trying to allocate 32GB of per-thread counters.
*/
static uint32_t tid_count = 0;
/**
* Protected
@ -34,9 +40,14 @@ static thread_local uint32_t isc__tid_v = ISC_TID_UNKNOWN;
void
isc__tid_init(uint32_t tid) {
REQUIRE(isc__tid_v == ISC_TID_UNKNOWN || isc__tid_v == tid);
REQUIRE(tid_local == ISC_TID_UNKNOWN || tid_local == tid);
tid_local = tid;
}
isc__tid_v = tid;
void
isc__tid_initcount(uint32_t count) {
REQUIRE(tid_count == 0 || tid_count == count);
tid_count = count;
}
/**
@ -45,5 +56,10 @@ isc__tid_init(uint32_t tid) {
uint32_t
isc_tid(void) {
return (isc__tid_v);
return (tid_local);
}
uint32_t
isc_tid_count(void) {
return (tid_count);
}