2
0
mirror of https://gitlab.isc.org/isc-projects/bind9 synced 2025-09-03 16:15:27 +00:00

Simplify thread spawning

The `isc_trampoline` module had a lot of machinery to support stable
thread IDs for use by hazard pointers. But the hazard pointer code
is gone, and the `isc_loop` module now has its own per-loop thread
IDs.

The trampoline machinery seems over-complicated for its remaining
tasks, so move the per-thread initialization into `isc/thread.c`,
and delete the rest.
This commit is contained in:
Tony Finch
2023-01-02 19:56:27 +00:00
parent 1736900b4e
commit 555690a3c9
8 changed files with 67 additions and 313 deletions

View File

@@ -1,3 +1,6 @@
6137. [cleanup] Remove the trampoline jump when spawning threads.
[GL !7293]
6136. [cleanup] Remove the isc_fsaccess API in favor of creating
temporary file first and atomically replace the key
with non-truncated content. [GL #3982]

View File

@@ -193,8 +193,6 @@ libisc_la_SOURCES = \
timer.c \
tls.c \
tm.c \
trampoline.c \
trampoline_p.h \
url.c \
utf8.c \
uv.c \

View File

@@ -30,8 +30,6 @@
#include <isc/lang.h>
#include <isc/result.h>
extern thread_local size_t isc_tid_v;
ISC_LANG_BEGINDECLS
typedef pthread_t isc_thread_t;

View File

@@ -26,7 +26,7 @@
#include "mem_p.h"
#include "mutex_p.h"
#include "os_p.h"
#include "trampoline_p.h"
#include "random_p.h"
#ifndef ISC_CONSTRUCTOR
#error Either __attribute__((constructor|destructor))__ or DllMain support needed to compile BIND 9.
@@ -46,8 +46,8 @@ isc__initialize(void) {
isc__os_initialize();
isc__mutex_initialize();
isc__mem_initialize();
isc__random_initialize();
isc__tls_initialize();
isc__trampoline_initialize();
isc__uv_initialize();
isc__xml_initialize();
isc__md_initialize();
@@ -61,7 +61,6 @@ isc__shutdown(void) {
isc__md_shutdown();
isc__xml_shutdown();
isc__uv_shutdown();
isc__trampoline_shutdown();
isc__tls_shutdown();
isc__mem_shutdown();
isc__mutex_shutdown();

View File

@@ -46,7 +46,6 @@
#include "../loop_p.h"
#include "netmgr-int.h"
#include "openssl_shim.h"
#include "trampoline_p.h"
/*%
* Shortcut index arrays to get access to statistics counters.

View File

@@ -28,23 +28,72 @@
#include <sys/types.h>
#endif /* if defined(HAVE_SYS_PROCSET_H) */
#include <stdlib.h>
#include <isc/iterated_hash.h>
#include <isc/strerr.h>
#include <isc/thread.h>
#include <isc/util.h>
#include "trampoline_p.h"
#include "random_p.h"
#ifndef THREAD_MINSTACKSIZE
#define THREAD_MINSTACKSIZE (1024U * 1024)
#endif /* ifndef THREAD_MINSTACKSIZE */
/*
* We can't use isc_mem API here, because it's called too early and the
* isc_mem_debugging flags can be changed later causing mismatch between flags
* used for isc_mem_get() and isc_mem_put().
*/
struct thread_wrap {
isc_threadfunc_t func;
isc_threadarg_t arg;
isc_threadresult_t result;
void *jemalloc_enforce_init;
};
static isc_threadresult_t
thread_run(isc_threadarg_t arg) {
struct thread_wrap *wrap = arg;
/*
* Ensure every thread starts with a malloc() call to prevent memory
* bloat caused by a jemalloc quirk. While this dummy allocation is
* not used for anything, free() must not be immediately called for it
* so that an optimizing compiler does not strip away such a pair of
* malloc() + free() calls altogether, as it would foil the fix.
*/
wrap->jemalloc_enforce_init = malloc(8);
/* Re-seed the random number generator in each thread. */
isc__random_initialize();
/* Get a thread-local digest context. */
isc__iterated_hash_initialize();
/* Run the main function */
wrap->result = wrap->func(wrap->arg);
/* Cleanup */
isc__iterated_hash_shutdown();
/* Return the wrapper struct for jemalloc cleanup */
return (wrap);
}
void
isc_thread_create(isc_threadfunc_t func, isc_threadarg_t arg,
isc_thread_t *thread) {
pthread_attr_t attr;
isc__trampoline_t *trampoline_arg;
struct thread_wrap *wrap = malloc(sizeof(*wrap));
RUNTIME_CHECK(wrap != NULL);
trampoline_arg = isc__trampoline_get(func, arg);
*wrap = (struct thread_wrap){
.func = func,
.arg = arg,
};
#if defined(HAVE_PTHREAD_ATTR_GETSTACKSIZE) && \
defined(HAVE_PTHREAD_ATTR_SETSTACKSIZE)
@@ -67,8 +116,7 @@ isc_thread_create(isc_threadfunc_t func, isc_threadarg_t arg,
#endif /* if defined(HAVE_PTHREAD_ATTR_GETSTACKSIZE) && \
* defined(HAVE_PTHREAD_ATTR_SETSTACKSIZE) */
ret = pthread_create(thread, &attr, isc__trampoline_run,
trampoline_arg);
ret = pthread_create(thread, &attr, thread_run, wrap);
PTHREADS_RUNTIME_CHECK(pthread_create, ret);
pthread_attr_destroy(&attr);
@@ -78,9 +126,17 @@ isc_thread_create(isc_threadfunc_t func, isc_threadarg_t arg,
void
isc_thread_join(isc_thread_t thread, isc_threadresult_t *result) {
int ret = pthread_join(thread, result);
void *wrap_v;
int ret = pthread_join(thread, &wrap_v);
PTHREADS_RUNTIME_CHECK(pthread_join, ret);
struct thread_wrap *wrap = wrap_v;
if (result != NULL) {
*result = wrap->result;
}
free(wrap->jemalloc_enforce_init);
free(wrap);
}
void

View File

@@ -1,209 +0,0 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
/*! \file */
#include <inttypes.h>
#include <stdlib.h>
#include <isc/iterated_hash.h>
#include <isc/md.h>
#include <isc/mem.h>
#include <isc/once.h>
#include <isc/thread.h>
#include <isc/util.h>
#include <isc/uv.h>
#include "random_p.h"
#include "trampoline_p.h"
#define ISC__TRAMPOLINE_UNUSED 0
struct isc__trampoline {
int tid; /* const */
uintptr_t self;
isc_threadfunc_t start;
isc_threadarg_t arg;
void *jemalloc_enforce_init;
};
/*
* We can't use isc_mem API here, because it's called too early and the
* isc_mem_debugging flags can be changed later causing mismatch between flags
* used for isc_mem_get() and isc_mem_put().
*
* Since this is a single allocation at library load and deallocation at library
* unload, using the standard allocator without the tracking is fine for this
* single purpose.
*
* We can't use isc_mutex API either, because we track whether the mutexes get
* properly destroyed, and we intentionally leak the static mutex here without
* destroying it to prevent data race between library destructor running while
* thread is being still created.
*/
static uv_mutex_t isc__trampoline_lock;
static isc__trampoline_t **trampolines;
thread_local size_t isc_tid_v = SIZE_MAX;
static size_t isc__trampoline_min = 1;
static size_t isc__trampoline_max = 65;
static isc__trampoline_t *
isc__trampoline_new(int tid, isc_threadfunc_t start, isc_threadarg_t arg) {
isc__trampoline_t *trampoline = calloc(1, sizeof(*trampoline));
RUNTIME_CHECK(trampoline != NULL);
*trampoline = (isc__trampoline_t){
.tid = tid,
.start = start,
.arg = arg,
.self = ISC__TRAMPOLINE_UNUSED,
};
return (trampoline);
}
void
isc__trampoline_initialize(void) {
uv_mutex_init(&isc__trampoline_lock);
trampolines = calloc(isc__trampoline_max, sizeof(trampolines[0]));
RUNTIME_CHECK(trampolines != NULL);
/* Get the trampoline slot 0 for the main thread */
trampolines[0] = isc__trampoline_new(0, NULL, NULL);
isc_tid_v = trampolines[0]->tid;
trampolines[0]->self = isc_thread_self();
/* Initialize the other trampolines */
for (size_t i = 1; i < isc__trampoline_max; i++) {
trampolines[i] = NULL;
}
isc__trampoline_min = 1;
isc__random_initialize();
}
void
isc__trampoline_shutdown(void) {
/*
* When the program using the library exits abruptly and the library
* gets unloaded, there might be some existing trampolines from unjoined
* threads. We intentionally ignore those and don't check whether all
* trampolines have been cleared before exiting, so we leak a little bit
* of resources here, including the lock.
*/
free(trampolines[0]);
}
isc__trampoline_t *
isc__trampoline_get(isc_threadfunc_t start, isc_threadarg_t arg) {
isc__trampoline_t **tmp = NULL;
isc__trampoline_t *trampoline = NULL;
uv_mutex_lock(&isc__trampoline_lock);
again:
for (size_t i = isc__trampoline_min; i < isc__trampoline_max; i++) {
if (trampolines[i] == NULL) {
trampoline = isc__trampoline_new(i, start, arg);
trampolines[i] = trampoline;
isc__trampoline_min = i + 1;
goto done;
}
}
tmp = calloc(2 * isc__trampoline_max, sizeof(trampolines[0]));
RUNTIME_CHECK(tmp != NULL);
for (size_t i = 0; i < isc__trampoline_max; i++) {
tmp[i] = trampolines[i];
}
for (size_t i = isc__trampoline_max; i < 2 * isc__trampoline_max; i++) {
tmp[i] = NULL;
}
free(trampolines);
trampolines = tmp;
isc__trampoline_max = isc__trampoline_max * 2;
goto again;
done:
INSIST(trampoline != NULL);
uv_mutex_unlock(&isc__trampoline_lock);
return (trampoline);
}
void
isc__trampoline_detach(isc__trampoline_t *trampoline) {
uv_mutex_lock(&isc__trampoline_lock);
REQUIRE(trampoline->self == isc_thread_self());
REQUIRE(trampoline->tid > 0);
REQUIRE((size_t)trampoline->tid < isc__trampoline_max);
REQUIRE(trampolines[trampoline->tid] == trampoline);
trampolines[trampoline->tid] = NULL;
if (isc__trampoline_min > (size_t)trampoline->tid) {
isc__trampoline_min = trampoline->tid;
}
free(trampoline->jemalloc_enforce_init);
free(trampoline);
uv_mutex_unlock(&isc__trampoline_lock);
return;
}
void
isc__trampoline_attach(isc__trampoline_t *trampoline) {
uv_mutex_lock(&isc__trampoline_lock);
REQUIRE(trampoline->self == ISC__TRAMPOLINE_UNUSED);
REQUIRE(trampoline->tid > 0);
REQUIRE((size_t)trampoline->tid < isc__trampoline_max);
REQUIRE(trampolines[trampoline->tid] == trampoline);
/* Initialize the trampoline */
isc_tid_v = trampoline->tid;
trampoline->self = isc_thread_self();
/*
* Ensure every thread starts with a malloc() call to prevent memory
* bloat caused by a jemalloc quirk. While this dummy allocation is
* not used for anything, free() must not be immediately called for it
* so that an optimizing compiler does not strip away such a pair of
* malloc() + free() calls altogether, as it would foil the fix.
*/
trampoline->jemalloc_enforce_init = malloc(8);
/*
* Re-seed the random number generator in each thread.
*/
isc__random_initialize();
uv_mutex_unlock(&isc__trampoline_lock);
}
isc_threadresult_t
isc__trampoline_run(isc_threadarg_t arg) {
isc__trampoline_t *trampoline = (isc__trampoline_t *)arg;
isc_threadresult_t result;
isc__trampoline_attach(trampoline);
isc__iterated_hash_initialize();
/* Run the main function */
result = (trampoline->start)(trampoline->arg);
isc__iterated_hash_shutdown();
isc__trampoline_detach(trampoline);
return (result);
}

View File

@@ -1,90 +0,0 @@
/*
* Copyright (C) Internet Systems Consortium, Inc. ("ISC")
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, you can obtain one at https://mozilla.org/MPL/2.0/.
*
* See the COPYRIGHT file distributed with this work for additional
* information regarding copyright ownership.
*/
#pragma once
#include <isc/thread.h>
/*! \file isc/trampoline_p.h
* \brief isc__trampoline: allows safe reuse of thread ID numbers.
*
* The 'isc_hp' hazard pointer API uses an internal thread ID
* variable ('tid_v') that is incremented for each new thread that uses
* hazard pointers. This thread ID is then used as an index into a global
* shared table of hazard pointer state.
*
* Since the thread ID is only incremented and never decremented, the
* table can overflow if threads are frequently created and destroyed.
*
* A trampoline is a thin wrapper around any function to be called from
* a newly launched thread. It maintains a table of thread IDs used by
* current and previous threads; when a thread is destroyed, its slot in
* the trampoline table becomes available, and the next thread to occupy
* that slot can use the same thread ID that its predecessor did.
*
* The trampoline table initially has space for 64 worker threads in
* addition to the main thread. If more threads than that are in
* concurrent use, the table is reallocated with twice as much space.
* (Note that the number of concurrent threads is currently capped at
* 128 by the queue and hazard pointer implementations.)
*/
typedef struct isc__trampoline isc__trampoline_t;
void
isc__trampoline_initialize(void);
/*%<
* Initialize the thread trampoline internal structures, must be called only
* once as a library constructor (see lib/isc/lib.c).
*/
void
isc__trampoline_shutdown(void);
/*%<
* Destroy the thread trampoline internal structures, must be called only
* once as a library destructor (see lib/isc/lib.c).
*/
isc__trampoline_t *
isc__trampoline_get(isc_threadfunc_t start_routine, isc_threadarg_t arg);
/*%<
* Get a free thread trampoline structure and initialize it with
* start_routine and arg passed to start_routine.
*
* Requires:
*\li 'start_routine' is a valid non-NULL thread start_routine
*/
void
isc__trampoline_attach(isc__trampoline_t *trampoline);
void
isc__trampoline_detach(isc__trampoline_t *trampoline);
/*%<
* Attach/detach the trampoline to/from the current thread.
*
* Requires:
* \li 'trampoline' is a valid isc__trampoline_t
*/
isc_threadresult_t
isc__trampoline_run(isc_threadarg_t arg);
/*%<
* Run the thread trampoline, this will get passed to the actual
* pthread_create(), initialize the isc_tid_v.
*
* Requires:
*\li 'arg' is a valid isc_trampoline_t
*
* Returns:
*\li return value from start_routine (see isc__trampoline_get())
*/