mirror of
https://gitlab.isc.org/isc-projects/bind9
synced 2025-08-30 22:15:20 +00:00
Add per-thread sharded histograms for heavy loads
Although an `isc_histo_t` is thread-safe, it can suffer from cache contention under heavy load. To avoid this, an `isc_histomulti_t` contains a histogram per thread, so updates are local and low-contention.
This commit is contained in:
@@ -25,6 +25,7 @@
|
|||||||
#include <isc/histo.h>
|
#include <isc/histo.h>
|
||||||
#include <isc/magic.h>
|
#include <isc/magic.h>
|
||||||
#include <isc/mem.h>
|
#include <isc/mem.h>
|
||||||
|
#include <isc/tid.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* XXXFANF to be added to isc/util.h by a commmit in a qp-trie
|
* XXXFANF to be added to isc/util.h by a commmit in a qp-trie
|
||||||
@@ -33,8 +34,10 @@
|
|||||||
#define STRUCT_FLEX_SIZE(pointer, member, count) \
|
#define STRUCT_FLEX_SIZE(pointer, member, count) \
|
||||||
(sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count))
|
(sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count))
|
||||||
|
|
||||||
#define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o')
|
#define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o')
|
||||||
#define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC)
|
#define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC)
|
||||||
|
#define HISTOMULTI_MAGIC ISC_MAGIC('H', 'g', 'M', 't')
|
||||||
|
#define HISTOMULTI_VALID(p) ISC_MAGIC_VALID(p, HISTOMULTI_MAGIC)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Natural logarithms of 2 and 10 for converting precisions between
|
* Natural logarithms of 2 and 10 for converting precisions between
|
||||||
@@ -101,6 +104,12 @@ struct isc_histosummary {
|
|||||||
uint64_t buckets[];
|
uint64_t buckets[];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct isc_histomulti {
|
||||||
|
uint magic;
|
||||||
|
uint size;
|
||||||
|
isc_histo_t *hg[];
|
||||||
|
};
|
||||||
|
|
||||||
/**********************************************************************/
|
/**********************************************************************/
|
||||||
|
|
||||||
#define OUTARG(ptr, val) \
|
#define OUTARG(ptr, val) \
|
||||||
@@ -402,6 +411,67 @@ isc_histo_merge(isc_histo_t **targetp, isc_historead_t source) {
|
|||||||
|
|
||||||
/**********************************************************************/
|
/**********************************************************************/
|
||||||
|
|
||||||
|
void
|
||||||
|
isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp) {
|
||||||
|
REQUIRE(hmp != NULL);
|
||||||
|
REQUIRE(*hmp == NULL);
|
||||||
|
|
||||||
|
uint size = isc_tid_count();
|
||||||
|
INSIST(size > 0);
|
||||||
|
|
||||||
|
isc_histomulti_t *hm = isc_mem_getx(
|
||||||
|
mctx, STRUCT_FLEX_SIZE(hm, hg, size), ISC_MEM_ZERO);
|
||||||
|
*hm = (isc_histomulti_t){
|
||||||
|
.magic = HISTOMULTI_MAGIC,
|
||||||
|
.size = size,
|
||||||
|
};
|
||||||
|
|
||||||
|
for (uint i = 0; i < hm->size; i++) {
|
||||||
|
isc_histo_create(mctx, sigbits, &hm->hg[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
*hmp = hm;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
isc_histomulti_destroy(isc_histomulti_t **hmp) {
|
||||||
|
REQUIRE(hmp != NULL);
|
||||||
|
REQUIRE(HISTOMULTI_VALID(*hmp));
|
||||||
|
|
||||||
|
isc_histomulti_t *hm = *hmp;
|
||||||
|
isc_mem_t *mctx = hm->hg[0]->mctx;
|
||||||
|
*hmp = NULL;
|
||||||
|
|
||||||
|
for (uint i = 0; i < hm->size; i++) {
|
||||||
|
isc_histo_destroy(&hm->hg[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
isc_mem_put(mctx, hm, STRUCT_FLEX_SIZE(hm, hg, hm->size));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
isc_histomulti_merge(isc_histo_t **hgp, isc_histomulti_t *hm) {
|
||||||
|
REQUIRE(HISTOMULTI_VALID(hm));
|
||||||
|
|
||||||
|
for (uint i = 0; i < hm->size; i++) {
|
||||||
|
isc_histo_merge(hgp, hm->hg[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc) {
|
||||||
|
REQUIRE(HISTOMULTI_VALID(hm));
|
||||||
|
isc_histo_t *hg = hm->hg[isc_tid()];
|
||||||
|
add_key_count(hg, value_to_key(hg, value), inc);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value) {
|
||||||
|
isc_histomulti_add(hm, value, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**********************************************************************/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
|
* https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
|
||||||
* equation 4 (incremental mean) and equation 44 (incremental variance)
|
* equation 4 (incremental mean) and equation 44 (incremental variance)
|
||||||
|
@@ -72,6 +72,7 @@
|
|||||||
|
|
||||||
typedef struct isc_histo isc_histo_t;
|
typedef struct isc_histo isc_histo_t;
|
||||||
typedef struct isc_histosummary isc_histosummary_t;
|
typedef struct isc_histosummary isc_histosummary_t;
|
||||||
|
typedef struct isc_histomulti isc_histomulti_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For functions that can take either type.
|
* For functions that can take either type.
|
||||||
@@ -157,6 +158,8 @@ isc_histo_digits_to_bits(uint digits);
|
|||||||
*\li `digits <= ISC_HISTO_MAXDIGS`
|
*\li `digits <= ISC_HISTO_MAXDIGS`
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**********************************************************************/
|
||||||
|
|
||||||
void
|
void
|
||||||
isc_histo_inc(isc_histo_t *hg, uint64_t value);
|
isc_histo_inc(isc_histo_t *hg, uint64_t value);
|
||||||
/*%<
|
/*%<
|
||||||
@@ -269,6 +272,76 @@ isc_histo_merge(isc_histo_t **targetp, isc_historead_t source);
|
|||||||
|
|
||||||
/**********************************************************************/
|
/**********************************************************************/
|
||||||
|
|
||||||
|
void
|
||||||
|
isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp);
|
||||||
|
/*%<
|
||||||
|
* Create a multithreaded sharded histogram.
|
||||||
|
*
|
||||||
|
* Although an `isc_histo_t` is thread-safe, it can suffer
|
||||||
|
* from cache contention under heavy load. To avoid this,
|
||||||
|
* an `isc_histomulti_t` contains a histogram per thread,
|
||||||
|
* so updates are local and low-contention.
|
||||||
|
*
|
||||||
|
* Requires:
|
||||||
|
*\li `sigbits >= ISC_HISTO_MINBITS`
|
||||||
|
*\li `sigbits <= ISC_HISTO_MAXBITS`
|
||||||
|
*\li `hmp != NULL`
|
||||||
|
*\li `*hmp == NULL`
|
||||||
|
*
|
||||||
|
* Ensures:
|
||||||
|
*\li `*hmp` is a pointer to a multithreaded sharded histogram.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
isc_histomulti_destroy(isc_histomulti_t **hmp);
|
||||||
|
/*%<
|
||||||
|
* Destroy a multithreaded sharded histogram
|
||||||
|
*
|
||||||
|
* Requires:
|
||||||
|
*\li `hmp != NULL`
|
||||||
|
*\li `*hmp` is a pointer to a valid multithreaded sharded histogram
|
||||||
|
*
|
||||||
|
* Ensures:
|
||||||
|
*\li all memory allocated by the histogram has been released
|
||||||
|
*\li `*hmp == NULL`
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
isc_histomulti_merge(isc_histo_t **targetp, isc_histomulti_t *source);
|
||||||
|
/*%<
|
||||||
|
* Increase the counts in `*targetp` by the counts recorded in `source`
|
||||||
|
*
|
||||||
|
* The target histogram is created if `*targetp` is NULL.
|
||||||
|
*
|
||||||
|
* Requires:
|
||||||
|
*\li `targetp != NULL`
|
||||||
|
*\li `*targetp` is NULL or a pointer to a valid histogram
|
||||||
|
*\li `source` is a pointer to a valid multithreaded sharded histogram
|
||||||
|
*
|
||||||
|
* Ensures:
|
||||||
|
*\li `*targetp` is a pointer to a valid histogram
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value);
|
||||||
|
/*%<
|
||||||
|
* Add 1 to the value's bucket
|
||||||
|
*
|
||||||
|
* Requires:
|
||||||
|
*\li `hm` is a pointer to a valid histomulti
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc);
|
||||||
|
/*%<
|
||||||
|
* Add an arbitrary increment to the value's bucket
|
||||||
|
*
|
||||||
|
* Requires:
|
||||||
|
*\li `hm` is a pointer to a valid histomulti
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**********************************************************************/
|
||||||
|
|
||||||
void
|
void
|
||||||
isc_histosummary_create(const isc_histo_t *hg, isc_histosummary_t **hsp);
|
isc_histosummary_create(const isc_histo_t *hg, isc_histosummary_t **hsp);
|
||||||
/*%<
|
/*%<
|
||||||
@@ -406,3 +479,5 @@ isc_histo_cdf(const isc_histosummary_t *hs, uint64_t value,
|
|||||||
*\li `hs` is a pointer to a valid histogram summary
|
*\li `hs` is a pointer to a valid histogram summary
|
||||||
*\li `proportionp != NULL`
|
*\li `proportionp != NULL`
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**********************************************************************/
|
||||||
|
@@ -21,6 +21,12 @@ ISC_LANG_BEGINDECLS
|
|||||||
|
|
||||||
#define ISC_TID_UNKNOWN UINT32_MAX
|
#define ISC_TID_UNKNOWN UINT32_MAX
|
||||||
|
|
||||||
|
uint32_t
|
||||||
|
isc_tid_count(void);
|
||||||
|
/*%<
|
||||||
|
* Returns the number of threads.
|
||||||
|
*/
|
||||||
|
|
||||||
uint32_t
|
uint32_t
|
||||||
isc_tid(void);
|
isc_tid(void);
|
||||||
/*%<
|
/*%<
|
||||||
@@ -32,4 +38,7 @@ isc_tid(void);
|
|||||||
void
|
void
|
||||||
isc__tid_init(uint32_t tid);
|
isc__tid_init(uint32_t tid);
|
||||||
|
|
||||||
|
void
|
||||||
|
isc__tid_initcount(uint32_t count);
|
||||||
|
|
||||||
ISC_LANG_ENDDECLS
|
ISC_LANG_ENDDECLS
|
||||||
|
@@ -346,6 +346,7 @@ isc_loopmgr_create(isc_mem_t *mctx, uint32_t nloops, isc_loopmgr_t **loopmgrp) {
|
|||||||
REQUIRE(nloops > 0);
|
REQUIRE(nloops > 0);
|
||||||
|
|
||||||
threadpool_initialize(nloops);
|
threadpool_initialize(nloops);
|
||||||
|
isc__tid_initcount(nloops);
|
||||||
|
|
||||||
loopmgr = isc_mem_get(mctx, sizeof(*loopmgr));
|
loopmgr = isc_mem_get(mctx, sizeof(*loopmgr));
|
||||||
*loopmgr = (isc_loopmgr_t){
|
*loopmgr = (isc_loopmgr_t){
|
||||||
|
@@ -26,7 +26,13 @@
|
|||||||
|
|
||||||
#define ISC_TID_UNKNOWN UINT32_MAX
|
#define ISC_TID_UNKNOWN UINT32_MAX
|
||||||
|
|
||||||
static thread_local uint32_t isc__tid_v = ISC_TID_UNKNOWN;
|
static thread_local uint32_t tid_local = ISC_TID_UNKNOWN;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Zero is a better nonsense value in this case than ISC_TID_UNKNOWN;
|
||||||
|
* avoids things like trying to allocate 32GB of per-thread counters.
|
||||||
|
*/
|
||||||
|
static uint32_t tid_count = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Protected
|
* Protected
|
||||||
@@ -34,9 +40,14 @@ static thread_local uint32_t isc__tid_v = ISC_TID_UNKNOWN;
|
|||||||
|
|
||||||
void
|
void
|
||||||
isc__tid_init(uint32_t tid) {
|
isc__tid_init(uint32_t tid) {
|
||||||
REQUIRE(isc__tid_v == ISC_TID_UNKNOWN || isc__tid_v == tid);
|
REQUIRE(tid_local == ISC_TID_UNKNOWN || tid_local == tid);
|
||||||
|
tid_local = tid;
|
||||||
|
}
|
||||||
|
|
||||||
isc__tid_v = tid;
|
void
|
||||||
|
isc__tid_initcount(uint32_t count) {
|
||||||
|
REQUIRE(tid_count == 0 || tid_count == count);
|
||||||
|
tid_count = count;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -45,5 +56,10 @@ isc__tid_init(uint32_t tid) {
|
|||||||
|
|
||||||
uint32_t
|
uint32_t
|
||||||
isc_tid(void) {
|
isc_tid(void) {
|
||||||
return (isc__tid_v);
|
return (tid_local);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t
|
||||||
|
isc_tid_count(void) {
|
||||||
|
return (tid_count);
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user