diff --git a/lib/isc/Makefile.am b/lib/isc/Makefile.am index 70358f8a82..c6f800d5c6 100644 --- a/lib/isc/Makefile.am +++ b/lib/isc/Makefile.am @@ -168,6 +168,7 @@ libisc_la_SOURCES = \ openssl_shim.c \ openssl_shim.h \ os.c \ + os_p.h \ parseint.c \ pool.c \ portset.c \ diff --git a/lib/isc/hp.c b/lib/isc/hp.c index aaa59f6c45..5cd64d4f75 100644 --- a/lib/isc/hp.c +++ b/lib/isc/hp.c @@ -45,34 +45,32 @@ #include +#include #include #include #include #include +#include #include #include #include -#define HP_MAX_THREADS 128 static int isc__hp_max_threads = 1; -#define HP_MAX_HPS 4 /* This is named 'K' in the HP paper */ -#define CLPAD (128 / sizeof(uintptr_t)) -#define HP_THRESHOLD_R 0 /* This is named 'R' in the HP paper */ - -/* Maximum number of retired objects per thread */ -static int isc__hp_max_retired = HP_MAX_THREADS * HP_MAX_HPS; typedef struct retirelist { int size; uintptr_t *list; } retirelist_t; +typedef atomic_uintptr_t isc_hp_uintptr_t; + struct isc_hp { int max_hps; + int max_retired; isc_mem_t *mctx; - atomic_uintptr_t **hp; - retirelist_t **rl; isc_hp_deletefunc_t *deletefunc; + isc_hp_uintptr_t **hp; + retirelist_t **rl; }; static inline int @@ -89,7 +87,6 @@ isc_hp_init(int max_threads) { } isc__hp_max_threads = max_threads; - isc__hp_max_retired = max_threads * HP_MAX_HPS; } isc_hp_t * @@ -97,29 +94,42 @@ isc_hp_new(isc_mem_t *mctx, size_t max_hps, isc_hp_deletefunc_t *deletefunc) { isc_hp_t *hp = isc_mem_get(mctx, sizeof(*hp)); REQUIRE(isc__hp_max_threads > 0); - REQUIRE(max_hps <= HP_MAX_HPS); + REQUIRE(max_hps > 0); - if (max_hps == 0) { - max_hps = HP_MAX_HPS; - } - - *hp = (isc_hp_t){ .max_hps = max_hps, .deletefunc = deletefunc }; + *hp = (isc_hp_t){ + .max_hps = max_hps, + .max_retired = isc__hp_max_threads * max_hps, + .deletefunc = deletefunc, + }; isc_mem_attach(mctx, &hp->mctx); hp->hp = isc_mem_get(mctx, isc__hp_max_threads * sizeof(hp->hp[0])); + for (int i = 0; i < isc__hp_max_threads; i++) { + isc_hp_uintptr_t *hps; + + hps = isc_mem_get_aligned(mctx, hp->max_hps * sizeof(*hps), + ISC_OS_CACHELINE_SIZE); + for (int j = 0; j < hp->max_hps; j++) { + atomic_init(&hps[j], 0); + } + + hp->hp[i] = hps; + } + hp->rl = isc_mem_get(mctx, isc__hp_max_threads * sizeof(hp->rl[0])); for (int i = 0; i < isc__hp_max_threads; i++) { - hp->hp[i] = isc_mem_get(mctx, CLPAD * 2 * sizeof(hp->hp[i][0])); - hp->rl[i] = isc_mem_get(mctx, sizeof(*hp->rl[0])); - *hp->rl[i] = (retirelist_t){ .size = 0 }; + retirelist_t *rl; - for (int j = 0; j < hp->max_hps; j++) { - atomic_init(&hp->hp[i][j], 0); - } - hp->rl[i]->list = isc_mem_get( - hp->mctx, isc__hp_max_retired * sizeof(uintptr_t)); + rl = isc_mem_get_aligned(mctx, sizeof(*rl), + ISC_OS_CACHELINE_SIZE); + rl->size = 0; + rl->list = isc_mem_get(hp->mctx, + hp->max_retired * sizeof(uintptr_t)); + memset(rl->list, 0, hp->max_retired * sizeof(uintptr_t)); + + hp->rl[i] = rl; } return (hp); @@ -128,16 +138,21 @@ isc_hp_new(isc_mem_t *mctx, size_t max_hps, isc_hp_deletefunc_t *deletefunc) { void isc_hp_destroy(isc_hp_t *hp) { for (int i = 0; i < isc__hp_max_threads; i++) { - isc_mem_put(hp->mctx, hp->hp[i], - CLPAD * 2 * sizeof(hp->hp[i][0])); + retirelist_t *rl = hp->rl[i]; - for (int j = 0; j < hp->rl[i]->size; j++) { - void *data = (void *)hp->rl[i]->list[j]; + for (int j = 0; j < rl->size; j++) { + void *data = (void *)rl->list[j]; hp->deletefunc(data); } - isc_mem_put(hp->mctx, hp->rl[i]->list, - isc__hp_max_retired * sizeof(uintptr_t)); - isc_mem_put(hp->mctx, hp->rl[i], sizeof(*hp->rl[0])); + isc_mem_put(hp->mctx, rl->list, + hp->max_retired * sizeof(uintptr_t)); + isc_mem_put_aligned(hp->mctx, rl, sizeof(*rl), + ISC_OS_CACHELINE_SIZE); + } + for (int i = 0; i < isc__hp_max_threads; i++) { + isc_hp_uintptr_t *hps = hp->hp[i]; + isc_mem_put_aligned(hp->mctx, hps, hp->max_hps * sizeof(*hps), + ISC_OS_CACHELINE_SIZE); } isc_mem_put(hp->mctx, hp->hp, isc__hp_max_threads * sizeof(hp->hp[0])); isc_mem_put(hp->mctx, hp->rl, isc__hp_max_threads * sizeof(hp->rl[0])); @@ -182,15 +197,12 @@ isc_hp_protect_release(isc_hp_t *hp, int ihp, atomic_uintptr_t ptr) { void isc_hp_retire(isc_hp_t *hp, uintptr_t ptr) { - hp->rl[tid()]->list[hp->rl[tid()]->size++] = ptr; - INSIST(hp->rl[tid()]->size < isc__hp_max_retired); + retirelist_t *rl = hp->rl[tid()]; + rl->list[rl->size++] = ptr; + INSIST(rl->size < hp->max_retired); - if (hp->rl[tid()]->size < HP_THRESHOLD_R) { - return; - } - - for (int iret = 0; iret < hp->rl[tid()]->size; iret++) { - uintptr_t obj = hp->rl[tid()]->list[iret]; + for (int iret = 0; iret < rl->size; iret++) { + uintptr_t obj = rl->list[iret]; bool can_delete = true; for (int itid = 0; itid < isc__hp_max_threads && can_delete; itid++) { @@ -203,11 +215,9 @@ isc_hp_retire(isc_hp_t *hp, uintptr_t ptr) { } if (can_delete) { - size_t bytes = (hp->rl[tid()]->size - iret) * - sizeof(hp->rl[tid()]->list[0]); - memmove(&hp->rl[tid()]->list[iret], - &hp->rl[tid()]->list[iret + 1], bytes); - hp->rl[tid()]->size--; + size_t bytes = (rl->size - iret) * sizeof(rl->list[0]); + memmove(&rl->list[iret], &rl->list[iret + 1], bytes); + rl->size--; hp->deletefunc((void *)obj); } } diff --git a/lib/isc/include/isc/mem.h b/lib/isc/include/isc/mem.h index b90c3d6017..3e3b72324f 100644 --- a/lib/isc/include/isc/mem.h +++ b/lib/isc/include/isc/mem.h @@ -133,9 +133,13 @@ extern unsigned int isc_mem_defaultflags; #define ISCMEMFUNC(sfx) isc__mem_##sfx #define ISCMEMPOOLFUNC(sfx) isc__mempool_##sfx -#define isc_mem_get(c, s) ISCMEMFUNC(get)((c), (s)_ISC_MEM_FILELINE) +#define isc_mem_get(c, s) ISCMEMFUNC(get)((c), (s), 0 _ISC_MEM_FILELINE) +#define isc_mem_get_aligned(c, s, a) \ + ISCMEMFUNC(get)((c), (s), (a)_ISC_MEM_FILELINE) #define isc_mem_reget(c, p, o, n) \ - ISCMEMFUNC(reget)((c), (p), (o), (n)_ISC_MEM_FILELINE) + ISCMEMFUNC(reget)((c), (p), (o), (n), 0 _ISC_MEM_FILELINE) +#define isc_mem_reget_aligned(c, p, o, n, a) \ + ISCMEMFUNC(reget)((c), (p), (o), (n), (a)_ISC_MEM_FILELINE) #define isc_mem_allocate(c, s) ISCMEMFUNC(allocate)((c), (s)_ISC_MEM_FILELINE) #define isc_mem_reallocate(c, p, s) \ ISCMEMFUNC(reallocate)((c), (p), (s)_ISC_MEM_FILELINE) @@ -144,15 +148,27 @@ extern unsigned int isc_mem_defaultflags; ISCMEMFUNC(strndup)((c), (p), (l)_ISC_MEM_FILELINE) #define isc_mempool_get(c) ISCMEMPOOLFUNC(get)((c)_ISC_MEM_FILELINE) -#define isc_mem_put(c, p, s) \ - do { \ - ISCMEMFUNC(put)((c), (p), (s)_ISC_MEM_FILELINE); \ - (p) = NULL; \ +#define isc_mem_put(c, p, s) \ + do { \ + ISCMEMFUNC(put)((c), (p), (s), 0 _ISC_MEM_FILELINE); \ + (p) = NULL; \ } while (0) -#define isc_mem_putanddetach(c, p, s) \ - do { \ - ISCMEMFUNC(putanddetach)((c), (p), (s)_ISC_MEM_FILELINE); \ - (p) = NULL; \ +#define isc_mem_put_aligned(c, p, s, a) \ + do { \ + ISCMEMFUNC(put) \ + ((c), (p), (s), (a)_ISC_MEM_FILELINE); \ + (p) = NULL; \ + } while (0) +#define isc_mem_putanddetach(c, p, s) \ + do { \ + ISCMEMFUNC(putanddetach)((c), (p), (s), 0 _ISC_MEM_FILELINE); \ + (p) = NULL; \ + } while (0) +#define isc_mem_putanddetach_aligned(c, p, s, a) \ + do { \ + ISCMEMFUNC(putanddetach) \ + ((c), (p), (s), (a)_ISC_MEM_FILELINE); \ + (p) = NULL; \ } while (0) #define isc_mem_free(c, p) \ do { \ @@ -477,15 +493,17 @@ isc_mempool_setfillcount(isc_mempool_t *restrict mpctx, /* * Pseudo-private functions for use via macros. Do not call directly. */ -void ISCMEMFUNC(putanddetach)(isc_mem_t **, void *, size_t _ISC_MEM_FLARG); -void ISCMEMFUNC(put)(isc_mem_t *, void *, size_t _ISC_MEM_FLARG); +void ISCMEMFUNC(putanddetach)(isc_mem_t **, void *, size_t, + size_t _ISC_MEM_FLARG); +void ISCMEMFUNC(put)(isc_mem_t *, void *, size_t, size_t _ISC_MEM_FLARG); void ISCMEMFUNC(free)(isc_mem_t *, void *_ISC_MEM_FLARG); ISC_ATTR_MALLOC_DEALLOCATOR_IDX(ISCMEMFUNC(put), 2) -void *ISCMEMFUNC(get)(isc_mem_t *, size_t _ISC_MEM_FLARG); +void *ISCMEMFUNC(get)(isc_mem_t *, size_t, size_t _ISC_MEM_FLARG); ISC_ATTR_DEALLOCATOR_IDX(ISCMEMFUNC(put), 2) -void *ISCMEMFUNC(reget)(isc_mem_t *, void *, size_t, size_t _ISC_MEM_FLARG); +void *ISCMEMFUNC(reget)(isc_mem_t *, void *, size_t, size_t, + size_t _ISC_MEM_FLARG); ISC_ATTR_MALLOC_DEALLOCATOR_IDX(ISCMEMFUNC(free), 2) void *ISCMEMFUNC(allocate)(isc_mem_t *, size_t _ISC_MEM_FLARG); diff --git a/lib/isc/include/isc/os.h b/lib/isc/include/isc/os.h index f9e90e0d82..96c9f0f3ea 100644 --- a/lib/isc/include/isc/os.h +++ b/lib/isc/include/isc/os.h @@ -14,9 +14,17 @@ /*! \file isc/os.h */ #include +#include ISC_LANG_BEGINDECLS +/*%< + * Hardcode the L1 cacheline size of the CPU to 64, this is checked in + * the os.c library constructor if operating system provide means to + * get the L1 cacheline size using sysconf(). + */ +#define ISC_OS_CACHELINE_SIZE 64 + unsigned int isc_os_ncpus(void); /*%< diff --git a/lib/isc/include/isc/queue.h b/lib/isc/include/isc/queue.h index a48af88881..1e2821c878 100644 --- a/lib/isc/include/isc/queue.h +++ b/lib/isc/include/isc/queue.h @@ -15,7 +15,7 @@ typedef struct isc_queue isc_queue_t; isc_queue_t * -isc_queue_new(isc_mem_t *mctx, int max_threads); +isc_queue_new(isc_mem_t *mctx); /*%< * Create a new fetch-and-add array queue. * diff --git a/lib/isc/jemalloc_shim.h b/lib/isc/jemalloc_shim.h index b207eabc89..20801def01 100644 --- a/lib/isc/jemalloc_shim.h +++ b/lib/isc/jemalloc_shim.h @@ -19,6 +19,9 @@ const char *malloc_conf = NULL; +/* Without jemalloc, isc_mem_get_align() is equal to isc_mem_get() */ +#define MALLOCX_ALIGN(a) (a & 0) /* Clear the flag */ + #if defined(HAVE_MALLOC_SIZE) || defined(HAVE_MALLOC_USABLE_SIZE) #include diff --git a/lib/isc/lib.c b/lib/isc/lib.c index 5f9173ff4c..e591b08780 100644 --- a/lib/isc/lib.c +++ b/lib/isc/lib.c @@ -19,6 +19,7 @@ #include "config.h" #include "mem_p.h" +#include "os_p.h" #include "tls_p.h" #include "trampoline_p.h" @@ -37,6 +38,7 @@ isc__shutdown(void) ISC_DESTRUCTOR; void isc__initialize(void) { + isc__os_initialize(); isc__mem_initialize(); isc__tls_initialize(); isc__trampoline_initialize(); @@ -48,4 +50,5 @@ isc__shutdown(void) { isc__trampoline_shutdown(); isc__tls_shutdown(); isc__mem_shutdown(); + isc__os_shutdown(); } diff --git a/lib/isc/mem.c b/lib/isc/mem.c index 0fa088a0e1..7bbbad3254 100644 --- a/lib/isc/mem.c +++ b/lib/isc/mem.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -328,16 +329,18 @@ unlock: s = ZERO_ALLOCATION_SIZE; \ } +#define MEM_ALIGN(a) ((a) ? MALLOCX_ALIGN(a) : 0) + /*! * Perform a malloc, doing memory filling and overrun detection as necessary. */ static inline void * -mem_get(isc_mem_t *ctx, size_t size) { +mem_get(isc_mem_t *ctx, size_t size, int flags) { char *ret = NULL; ADJUST_ZERO_ALLOCATION_SIZE(size); - ret = mallocx(size, 0); + ret = mallocx(size, flags); INSIST(ret != NULL); if ((ctx->flags & ISC_MEMFLAG_FILL) != 0) { @@ -352,22 +355,23 @@ mem_get(isc_mem_t *ctx, size_t size) { */ /* coverity[+free : arg-1] */ static inline void -mem_put(isc_mem_t *ctx, void *mem, size_t size) { +mem_put(isc_mem_t *ctx, void *mem, size_t size, int flags) { ADJUST_ZERO_ALLOCATION_SIZE(size); if ((ctx->flags & ISC_MEMFLAG_FILL) != 0) { memset(mem, 0xde, size); /* Mnemonic for "dead". */ } - sdallocx(mem, size, 0); + sdallocx(mem, size, flags); } static inline void * -mem_realloc(isc_mem_t *ctx, void *old_ptr, size_t old_size, size_t new_size) { +mem_realloc(isc_mem_t *ctx, void *old_ptr, size_t old_size, size_t new_size, + int flags) { void *new_ptr = NULL; ADJUST_ZERO_ALLOCATION_SIZE(new_size); - new_ptr = rallocx(old_ptr, new_size, 0); + new_ptr = rallocx(old_ptr, new_size, flags); INSIST(new_ptr != NULL); if ((ctx->flags & ISC_MEMFLAG_FILL) != 0) { @@ -453,7 +457,7 @@ mem_create(isc_mem_t **ctxp, unsigned int flags) { REQUIRE(ctxp != NULL && *ctxp == NULL); - ctx = mallocx(sizeof(*ctx), 0); + ctx = mallocx(sizeof(*ctx), MALLOCX_ALIGN(ISC_OS_CACHELINE_SIZE)); INSIST(ctx != NULL); *ctx = (isc_mem_t){ @@ -572,7 +576,7 @@ destroy(isc_mem_t *ctx) { if (ctx->checkfree) { INSIST(malloced == 0); } - sdallocx(ctx, sizeof(*ctx), 0); + sdallocx(ctx, sizeof(*ctx), MALLOCX_ALIGN(ISC_OS_CACHELINE_SIZE)); } void @@ -617,7 +621,8 @@ isc__mem_detach(isc_mem_t **ctxp FLARG) { */ void -isc__mem_putanddetach(isc_mem_t **ctxp, void *ptr, size_t size FLARG) { +isc__mem_putanddetach(isc_mem_t **ctxp, void *ptr, size_t size, + size_t alignment FLARG) { isc_mem_t *ctx = NULL; REQUIRE(ctxp != NULL && VALID_CONTEXT(*ctxp)); @@ -630,7 +635,7 @@ isc__mem_putanddetach(isc_mem_t **ctxp, void *ptr, size_t size FLARG) { DELETE_TRACE(ctx, ptr, size, file, line); mem_putstats(ctx, ptr, size); - mem_put(ctx, ptr, size); + mem_put(ctx, ptr, size, MEM_ALIGN(alignment)); if (isc_refcount_decrement(&ctx->references) == 1) { isc_refcount_destroy(&ctx->references); @@ -745,12 +750,12 @@ lo_water(isc_mem_t *ctx) { } void * -isc__mem_get(isc_mem_t *ctx, size_t size FLARG) { +isc__mem_get(isc_mem_t *ctx, size_t size, size_t alignment FLARG) { void *ptr = NULL; REQUIRE(VALID_CONTEXT(ctx)); - ptr = mem_get(ctx, size); + ptr = mem_get(ctx, size, MEM_ALIGN(alignment)); mem_getstats(ctx, size); ADD_TRACE(ctx, ptr, size, file, line); @@ -761,13 +766,13 @@ isc__mem_get(isc_mem_t *ctx, size_t size FLARG) { } void -isc__mem_put(isc_mem_t *ctx, void *ptr, size_t size FLARG) { +isc__mem_put(isc_mem_t *ctx, void *ptr, size_t size, size_t alignment FLARG) { REQUIRE(VALID_CONTEXT(ctx)); DELETE_TRACE(ctx, ptr, size, file, line); mem_putstats(ctx, ptr, size); - mem_put(ctx, ptr, size); + mem_put(ctx, ptr, size, MEM_ALIGN(alignment)); CALL_LO_WATER(ctx); } @@ -884,7 +889,7 @@ isc__mem_allocate(isc_mem_t *ctx, size_t size FLARG) { REQUIRE(VALID_CONTEXT(ctx)); - ptr = mem_get(ctx, size); + ptr = mem_get(ctx, size, 0); /* Recalculate the real allocated size */ size = sallocx(ptr, 0); @@ -898,20 +903,21 @@ isc__mem_allocate(isc_mem_t *ctx, size_t size FLARG) { } void * -isc__mem_reget(isc_mem_t *ctx, void *old_ptr, size_t old_size, - size_t new_size FLARG) { +isc__mem_reget(isc_mem_t *ctx, void *old_ptr, size_t old_size, size_t new_size, + size_t alignment FLARG) { void *new_ptr = NULL; if (old_ptr == NULL) { REQUIRE(old_size == 0); - new_ptr = isc__mem_get(ctx, new_size FLARG_PASS); + new_ptr = isc__mem_get(ctx, new_size, alignment FLARG_PASS); } else if (new_size == 0) { - isc__mem_put(ctx, old_ptr, old_size FLARG_PASS); + isc__mem_put(ctx, old_ptr, old_size, alignment FLARG_PASS); } else { DELETE_TRACE(ctx, old_ptr, old_size, file, line); mem_putstats(ctx, old_ptr, old_size); - new_ptr = mem_realloc(ctx, old_ptr, old_size, new_size); + new_ptr = mem_realloc(ctx, old_ptr, old_size, new_size, + MEM_ALIGN(alignment)); mem_getstats(ctx, new_size); ADD_TRACE(ctx, new_ptr, new_size, file, line); @@ -944,7 +950,7 @@ isc__mem_reallocate(isc_mem_t *ctx, void *old_ptr, size_t new_size FLARG) { DELETE_TRACE(ctx, old_ptr, old_size, file, line); mem_putstats(ctx, old_ptr, old_size); - new_ptr = mem_realloc(ctx, old_ptr, old_size, new_size); + new_ptr = mem_realloc(ctx, old_ptr, old_size, new_size, 0); /* Recalculate the real allocated size */ new_size = sallocx(new_ptr, 0); @@ -975,7 +981,7 @@ isc__mem_free(isc_mem_t *ctx, void *ptr FLARG) { DELETE_TRACE(ctx, ptr, size, file, line); mem_putstats(ctx, ptr, size); - mem_put(ctx, ptr, size); + mem_put(ctx, ptr, size, 0); CALL_LO_WATER(ctx); } @@ -1243,7 +1249,7 @@ isc__mempool_destroy(isc_mempool_t **restrict mpctxp FLARG) { mpctx->items = item->next; mem_putstats(mctx, item, mpctx->size); - mem_put(mctx, item, mpctx->size); + mem_put(mctx, item, mpctx->size, 0); } /* @@ -1278,7 +1284,7 @@ isc__mempool_get(isc_mempool_t *restrict mpctx FLARG) { * We need to dip into the well. Fill up our free list. */ for (size_t i = 0; i < fillcount; i++) { - item = mem_get(mctx, mpctx->size); + item = mem_get(mctx, mpctx->size, 0); mem_getstats(mctx, mpctx->size); item->next = mpctx->items; mpctx->items = item; @@ -1326,7 +1332,7 @@ isc__mempool_put(isc_mempool_t *restrict mpctx, void *mem FLARG) { */ if (freecount >= freemax) { mem_putstats(mctx, mem, mpctx->size); - mem_put(mctx, mem, mpctx->size); + mem_put(mctx, mem, mpctx->size, 0); return; } diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c index 51045521a1..e5e9a120bc 100644 --- a/lib/isc/netmgr/netmgr.c +++ b/lib/isc/netmgr/netmgr.c @@ -312,7 +312,7 @@ isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) { isc_condition_init(&worker->cond_prio); for (size_t type = 0; type < NETIEVENT_MAX; type++) { - worker->ievents[type] = isc_queue_new(mgr->mctx, 128); + worker->ievents[type] = isc_queue_new(mgr->mctx); atomic_init(&worker->nievents[type], 0); } diff --git a/lib/isc/os.c b/lib/isc/os.c index 6a75b60976..660d94af57 100644 --- a/lib/isc/os.c +++ b/lib/isc/os.c @@ -9,12 +9,15 @@ * information regarding copyright ownership. */ -#include +#include + #include +#include #include -static isc_once_t ncpus_once = ISC_ONCE_INIT; -static unsigned int ncpus = 0; +#include "os_p.h" + +static unsigned int isc__os_ncpus = 0; #ifdef HAVE_SYSCONF @@ -54,23 +57,33 @@ sysctl_ncpus(void) { static void ncpus_initialize(void) { #if defined(HAVE_SYSCONF) - ncpus = sysconf_ncpus(); + isc__os_ncpus = sysconf_ncpus(); #endif /* if defined(HAVE_SYSCONF) */ #if defined(HAVE_SYS_SYSCTL_H) && defined(HAVE_SYSCTLBYNAME) - if (ncpus <= 0) { - ncpus = sysctl_ncpus(); + if (isc__os_ncpus <= 0) { + isc__os_ncpus = sysctl_ncpus(); } #endif /* if defined(HAVE_SYS_SYSCTL_H) && defined(HAVE_SYSCTLBYNAME) */ - if (ncpus <= 0) { - ncpus = 1; + if (isc__os_ncpus == 0) { + isc__os_ncpus = 1; } } unsigned int isc_os_ncpus(void) { - isc_result_t result = isc_once_do(&ncpus_once, ncpus_initialize); - RUNTIME_CHECK(result == ISC_R_SUCCESS); - INSIST(ncpus > 0); - - return (ncpus); + return (isc__os_ncpus); +} + +void +isc__os_initialize(void) { + ncpus_initialize(); +#if defined(HAVE_SYSCONF) && defined(_SC_LEVEL1_DCACHE_LINESIZE) + long s = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + RUNTIME_CHECK((size_t)s == (size_t)ISC_OS_CACHELINE_SIZE); +#endif +} + +void +isc__os_shutdown(void) { + /* empty, but defined for completeness */; } diff --git a/lib/isc/os_p.h b/lib/isc/os_p.h new file mode 100644 index 0000000000..0c28a06975 --- /dev/null +++ b/lib/isc/os_p.h @@ -0,0 +1,24 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#pragma once + +#include + +#include + +/*! \file */ + +void +isc__os_initialize(void); + +void +isc__os_shutdown(void); diff --git a/lib/isc/queue.c b/lib/isc/queue.c index d16c8e69ae..590bc92a53 100644 --- a/lib/isc/queue.c +++ b/lib/isc/queue.c @@ -15,15 +15,12 @@ #include #include #include +#include #include #include #define BUFFER_SIZE 1024 -#define MAX_THREADS 128 - -#define ALIGNMENT 128 - static uintptr_t nulluintptr = (uintptr_t)NULL; typedef struct node { @@ -39,13 +36,11 @@ typedef struct node { #define HP_HEAD 0 struct isc_queue { - alignas(ALIGNMENT) atomic_uintptr_t head; - alignas(ALIGNMENT) atomic_uintptr_t tail; + alignas(ISC_OS_CACHELINE_SIZE) atomic_uintptr_t head; + alignas(ISC_OS_CACHELINE_SIZE) atomic_uintptr_t tail; isc_mem_t *mctx; - int max_threads; int taken; isc_hp_t *hp; - void *alloced_ptr; }; static node_t * @@ -93,27 +88,14 @@ queue_cas_head(isc_queue_t *queue, node_t *cmp, const node_t *val) { } isc_queue_t * -isc_queue_new(isc_mem_t *mctx, int max_threads) { +isc_queue_new(isc_mem_t *mctx) { isc_queue_t *queue = NULL; node_t *sentinel = NULL; - void *qbuf = NULL; - uintptr_t qptr; - /* - * A trick to allocate an aligned isc_queue_t structure - */ - qbuf = isc_mem_get(mctx, sizeof(*queue) + ALIGNMENT); - qptr = (uintptr_t)qbuf; - queue = (isc_queue_t *)(qptr + (ALIGNMENT - (qptr % ALIGNMENT))); + queue = isc_mem_get_aligned(mctx, sizeof(*queue), + ISC_OS_CACHELINE_SIZE); - if (max_threads == 0) { - max_threads = MAX_THREADS; - } - - *queue = (isc_queue_t){ - .max_threads = max_threads, - .alloced_ptr = qbuf, - }; + *queue = (isc_queue_t){ 0 }; isc_mem_attach(mctx, &queue->mctx); @@ -137,7 +119,7 @@ isc_queue_enqueue(isc_queue_t *queue, uintptr_t item) { uint_fast32_t idx; uintptr_t n = nulluintptr; - lt = (node_t *)isc_hp_protect(queue->hp, 0, &queue->tail); + lt = (node_t *)isc_hp_protect(queue->hp, HP_TAIL, &queue->tail); idx = atomic_fetch_add(<->enqidx, 1); if (idx > BUFFER_SIZE - 1) { node_t *lnext = NULL; @@ -178,7 +160,7 @@ isc_queue_dequeue(isc_queue_t *queue) { uint_fast32_t idx; uintptr_t item; - lh = (node_t *)isc_hp_protect(queue->hp, 0, &queue->head); + lh = (node_t *)isc_hp_protect(queue->hp, HP_HEAD, &queue->head); if (atomic_load(&lh->deqidx) >= atomic_load(&lh->enqidx) && atomic_load(&lh->next) == nulluintptr) { @@ -215,7 +197,6 @@ isc_queue_dequeue(isc_queue_t *queue) { void isc_queue_destroy(isc_queue_t *queue) { node_t *last = NULL; - void *alloced = NULL; REQUIRE(queue != NULL); @@ -227,6 +208,6 @@ isc_queue_destroy(isc_queue_t *queue) { node_destroy(last); isc_hp_destroy(queue->hp); - alloced = queue->alloced_ptr; - isc_mem_putanddetach(&queue->mctx, alloced, sizeof(*queue) + ALIGNMENT); + isc_mem_putanddetach_aligned(&queue->mctx, queue, sizeof(*queue), + ISC_OS_CACHELINE_SIZE); } diff --git a/lib/isc/tests/mem_test.c b/lib/isc/tests/mem_test.c index 9e35af3fc7..b61ed7cb01 100644 --- a/lib/isc/tests/mem_test.c +++ b/lib/isc/tests/mem_test.c @@ -12,6 +12,7 @@ #if HAVE_CMOCKA #include +#include #include /* IWYU pragma: keep */ #include #include @@ -148,6 +149,45 @@ isc_mem_test(void **state) { isc_mempool_destroy(&mp1); } +#if defined(HAVE_MALLOC_NP_H) || defined(HAVE_JEMALLOC) +/* aligned memory system tests */ +static void +isc_mem_aligned_test(void **state) { + isc_mem_t *mctx2 = NULL; + void *ptr; + size_t alignment; + uintptr_t aligned; + + UNUSED(state); + + /* Check different alignment sizes up to the page size */ + for (alignment = sizeof(void *); alignment <= 4096; alignment *= 2) { + size_t size = alignment / 2 - 1; + ptr = isc_mem_get_aligned(test_mctx, size, alignment); + + /* Check if the pointer is properly aligned */ + aligned = (((uintptr_t)ptr / alignment) * alignment); + assert_ptr_equal(aligned, (uintptr_t)ptr); + + /* Check if we can resize to range */ + ptr = isc_mem_reget_aligned(test_mctx, ptr, size, + size * 2 + alignment, alignment); + + /* Check if the pointer is still properly aligned */ + aligned = (((uintptr_t)ptr / alignment) * alignment); + assert_ptr_equal(aligned, (uintptr_t)ptr); + + isc_mem_put_aligned(test_mctx, ptr, size * 2 + alignment, + alignment); + + /* Check whether isc_mem_putanddetach_detach() also works */ + isc_mem_create(&mctx2); + ptr = isc_mem_get_aligned(mctx2, size, alignment); + isc_mem_putanddetach_aligned(&mctx2, ptr, size, alignment); + } +} +#endif /* defined(HAVE_MALLOC_NP_H) || defined(HAVE_JEMALLOC) */ + /* test TotalUse calculation */ static void isc_mem_total_test(void **state) { @@ -474,7 +514,8 @@ isc_mem_benchmark(void **state) { t = isc_time_microdiff(&ts2, &ts1); printf("[ TIME ] isc_mem_benchmark: " - "%d isc_mem_{get,put} calls, %f seconds, %f calls/second\n", + "%d isc_mem_{get,put} calls, %f seconds, %f " + "calls/second\n", nthreads * ITERS * NUM_ITEMS, t / 1000000.0, (nthreads * ITERS * NUM_ITEMS) / (t / 1000000.0)); } @@ -490,6 +531,10 @@ main(void) { const struct CMUnitTest tests[] = { cmocka_unit_test_setup_teardown(isc_mem_test, _setup, _teardown), +#if defined(HAVE_MALLOC_NP_H) || defined(HAVE_JEMALLOC) + cmocka_unit_test_setup_teardown(isc_mem_aligned_test, _setup, + _teardown), +#endif /* defined(HAVE_MALLOC_NP_H) || defined(HAVE_JEMALLOC) */ cmocka_unit_test_setup_teardown(isc_mem_total_test, _setup, _teardown), cmocka_unit_test_setup_teardown(isc_mem_inuse_test, _setup, diff --git a/util/copyrights b/util/copyrights index 150f956e9d..e57b83189e 100644 --- a/util/copyrights +++ b/util/copyrights @@ -1790,6 +1790,7 @@ ./lib/isc/openssl_shim.c C 2018,2019,2020,2021,2022 ./lib/isc/openssl_shim.h C 2020,2021,2022 ./lib/isc/os.c C 2000,2001,2004,2005,2007,2016,2018,2019,2020,2021,2022 +./lib/isc/os_p.h C 2022 ./lib/isc/parseint.c C 2001,2002,2003,2004,2005,2007,2012,2016,2018,2019,2020,2021,2022 ./lib/isc/pool.c C 2013,2015,2016,2018,2019,2020,2021,2022 ./lib/isc/portset.c C 2008,2016,2017,2018,2019,2020,2021,2022