mirror of
https://gitlab.isc.org/isc-projects/bind9
synced 2025-08-30 05:57:52 +00:00
fix shutdown deadlocks
- ensure isc_nm_pause() and isc_nm_resume() work the same whether run from inside or outside of the netmgr. - promote 'stop' events to the priority event level so they can run while the netmgr is pausing or paused. - when pausing, drain the priority queue before acquiring an interlock; this prevents a deadlock when another thread is waiting for us to complete a task. - release interlock after pausing, reacquire it when resuming, so that stop events can happen. some incidental changes: - use a function to enqueue pause and resume events (this was part of a different change attempt that didn't work out; I kept it because I thought was more readable). - make mgr->nworkers a signed int to remove some annoying integer casts.
This commit is contained in:
parent
4c8f6ebeb1
commit
c44423127d
@ -249,7 +249,6 @@ typedef enum isc__netievent_type {
|
||||
netievent_udpclose,
|
||||
netievent_udpsend,
|
||||
netievent_udpread,
|
||||
netievent_udpstop,
|
||||
netievent_udpcancel,
|
||||
|
||||
netievent_tcpconnect,
|
||||
@ -258,7 +257,6 @@ typedef enum isc__netievent_type {
|
||||
netievent_tcpstartread,
|
||||
netievent_tcppauseread,
|
||||
netievent_tcpaccept,
|
||||
netievent_tcpstop,
|
||||
netievent_tcpcancel,
|
||||
|
||||
netievent_tcpdnsaccept,
|
||||
@ -267,7 +265,6 @@ typedef enum isc__netievent_type {
|
||||
netievent_tcpdnssend,
|
||||
netievent_tcpdnsread,
|
||||
netievent_tcpdnscancel,
|
||||
netievent_tcpdnsstop,
|
||||
|
||||
netievent_tlsclose,
|
||||
netievent_tlssend,
|
||||
@ -282,12 +279,10 @@ typedef enum isc__netievent_type {
|
||||
netievent_tlsdnssend,
|
||||
netievent_tlsdnsread,
|
||||
netievent_tlsdnscancel,
|
||||
netievent_tlsdnsstop,
|
||||
netievent_tlsdnscycle,
|
||||
netievent_tlsdnsshutdown,
|
||||
|
||||
netievent_httpclose,
|
||||
netievent_httpstop,
|
||||
netievent_httpsend,
|
||||
|
||||
netievent_shutdown,
|
||||
@ -301,19 +296,26 @@ typedef enum isc__netievent_type {
|
||||
netievent_task,
|
||||
netievent_privilegedtask,
|
||||
|
||||
netievent_prio = 0xff, /* event type values higher than this
|
||||
* will be treated as high-priority
|
||||
* events, which can be processed
|
||||
* while the netmgr is paused.
|
||||
/*
|
||||
* event type values higher than this will be treated
|
||||
* as high-priority events, which can be processed
|
||||
* while the netmgr is pausing or paused.
|
||||
*/
|
||||
netievent_prio = 0xff,
|
||||
|
||||
netievent_udplisten,
|
||||
netievent_udpstop,
|
||||
netievent_tcplisten,
|
||||
netievent_tcpstop,
|
||||
netievent_tcpdnslisten,
|
||||
netievent_tcpdnsstop,
|
||||
netievent_tlsdnslisten,
|
||||
netievent_tlsdnsstop,
|
||||
netievent_httpstop,
|
||||
|
||||
netievent_resume,
|
||||
netievent_detach,
|
||||
netievent_close,
|
||||
|
||||
} isc__netievent_type;
|
||||
|
||||
typedef union {
|
||||
@ -658,7 +660,7 @@ struct isc_nm {
|
||||
int magic;
|
||||
isc_refcount_t references;
|
||||
isc_mem_t *mctx;
|
||||
uint32_t nworkers;
|
||||
int nworkers;
|
||||
isc_mutex_t lock;
|
||||
isc_condition_t wkstatecond;
|
||||
isc_condition_t wkpausecond;
|
||||
|
@ -336,7 +336,7 @@ nm_destroy(isc_nm_t **mgr0) {
|
||||
|
||||
mgr->magic = 0;
|
||||
|
||||
for (size_t i = 0; i < mgr->nworkers; i++) {
|
||||
for (int i = 0; i < mgr->nworkers; i++) {
|
||||
isc__networker_t *worker = &mgr->workers[i];
|
||||
isc__netievent_t *event = isc__nm_get_netievent_stop(mgr);
|
||||
isc__nm_enqueue_ievent(worker, event);
|
||||
@ -348,7 +348,7 @@ nm_destroy(isc_nm_t **mgr0) {
|
||||
}
|
||||
UNLOCK(&mgr->lock);
|
||||
|
||||
for (size_t i = 0; i < mgr->nworkers; i++) {
|
||||
for (int i = 0; i < mgr->nworkers; i++) {
|
||||
isc__networker_t *worker = &mgr->workers[i];
|
||||
isc__netievent_t *ievent = NULL;
|
||||
int r;
|
||||
@ -414,33 +414,45 @@ nm_destroy(isc_nm_t **mgr0) {
|
||||
#endif /* WIN32 */
|
||||
}
|
||||
|
||||
static void
|
||||
enqueue_pause(isc__networker_t *worker) {
|
||||
isc__netievent_pause_t *event =
|
||||
isc__nm_get_netievent_pause(worker->mgr);
|
||||
isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event);
|
||||
}
|
||||
|
||||
static void
|
||||
isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0) {
|
||||
UNUSED(ev0);
|
||||
REQUIRE(worker->paused == false);
|
||||
|
||||
worker->paused = true;
|
||||
uv_stop(&worker->loop);
|
||||
}
|
||||
|
||||
void
|
||||
isc_nm_pause(isc_nm_t *mgr) {
|
||||
REQUIRE(VALID_NM(mgr));
|
||||
uint_fast32_t pausing = 0;
|
||||
REQUIRE(!atomic_load(&mgr->paused));
|
||||
|
||||
isc__nm_acquire_interlocked_force(mgr);
|
||||
|
||||
for (size_t i = 0; i < mgr->nworkers; i++) {
|
||||
for (int i = 0; i < mgr->nworkers; i++) {
|
||||
isc__networker_t *worker = &mgr->workers[i];
|
||||
if (i != (size_t)isc_nm_tid()) {
|
||||
isc__netievent_resume_t *event =
|
||||
isc__nm_get_netievent_pause(mgr);
|
||||
pausing++;
|
||||
isc__nm_enqueue_ievent(worker,
|
||||
(isc__netievent_t *)event);
|
||||
} else {
|
||||
if (i == isc_nm_tid()) {
|
||||
isc__nm_async_pause(worker, NULL);
|
||||
} else {
|
||||
enqueue_pause(worker);
|
||||
}
|
||||
}
|
||||
|
||||
if (isc__nm_in_netthread()) {
|
||||
isc_barrier_wait(&mgr->pausing);
|
||||
drain_priority_queue(&mgr->workers[isc_nm_tid()]);
|
||||
}
|
||||
|
||||
LOCK(&mgr->lock);
|
||||
while (atomic_load(&mgr->workers_paused) != pausing) {
|
||||
while (atomic_load(&mgr->workers_paused) != mgr->workers_running) {
|
||||
WAIT(&mgr->wkstatecond, &mgr->lock);
|
||||
}
|
||||
UNLOCK(&mgr->lock);
|
||||
@ -449,24 +461,39 @@ isc_nm_pause(isc_nm_t *mgr) {
|
||||
true));
|
||||
}
|
||||
|
||||
static void
|
||||
enqueue_resume(isc__networker_t *worker) {
|
||||
isc__netievent_resume_t *event =
|
||||
isc__nm_get_netievent_resume(worker->mgr);
|
||||
isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event);
|
||||
}
|
||||
|
||||
static void
|
||||
isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0) {
|
||||
UNUSED(ev0);
|
||||
REQUIRE(worker->paused == true);
|
||||
|
||||
worker->paused = false;
|
||||
}
|
||||
|
||||
void
|
||||
isc_nm_resume(isc_nm_t *mgr) {
|
||||
REQUIRE(VALID_NM(mgr));
|
||||
REQUIRE(atomic_load(&mgr->paused));
|
||||
|
||||
for (size_t i = 0; i < mgr->nworkers; i++) {
|
||||
for (int i = 0; i < mgr->nworkers; i++) {
|
||||
isc__networker_t *worker = &mgr->workers[i];
|
||||
if (i != (size_t)isc_nm_tid()) {
|
||||
isc__netievent_resume_t *event =
|
||||
isc__nm_get_netievent_resume(mgr);
|
||||
isc__nm_enqueue_ievent(worker,
|
||||
(isc__netievent_t *)event);
|
||||
} else {
|
||||
if (i == isc_nm_tid()) {
|
||||
isc__nm_async_resume(worker, NULL);
|
||||
} else {
|
||||
enqueue_resume(worker);
|
||||
}
|
||||
}
|
||||
|
||||
if (isc__nm_in_netthread()) {
|
||||
drain_privilege_queue(&mgr->workers[isc_nm_tid()]);
|
||||
|
||||
atomic_fetch_sub(&mgr->workers_paused, 1);
|
||||
isc_barrier_wait(&mgr->resuming);
|
||||
}
|
||||
|
||||
@ -512,7 +539,7 @@ isc__netmgr_shutdown(isc_nm_t *mgr) {
|
||||
REQUIRE(VALID_NM(mgr));
|
||||
|
||||
atomic_store(&mgr->closing, true);
|
||||
for (size_t i = 0; i < mgr->nworkers; i++) {
|
||||
for (int i = 0; i < mgr->nworkers; i++) {
|
||||
isc__netievent_t *event = NULL;
|
||||
event = isc__nm_get_netievent_shutdown(mgr);
|
||||
isc__nm_enqueue_ievent(&mgr->workers[i], event);
|
||||
@ -742,23 +769,6 @@ isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0) {
|
||||
uv_close((uv_handle_t *)&worker->async, NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0) {
|
||||
UNUSED(ev0);
|
||||
REQUIRE(worker->paused == false);
|
||||
|
||||
worker->paused = true;
|
||||
uv_stop(&worker->loop);
|
||||
}
|
||||
|
||||
static void
|
||||
isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0) {
|
||||
UNUSED(ev0);
|
||||
REQUIRE(worker->paused == true);
|
||||
|
||||
worker->paused = false;
|
||||
}
|
||||
|
||||
void
|
||||
isc_nm_task_enqueue(isc_nm_t *nm, isc_task_t *task, int threadid) {
|
||||
isc__netievent_t *event = NULL;
|
||||
@ -2756,16 +2766,25 @@ isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0) {
|
||||
|
||||
bool
|
||||
isc__nm_acquire_interlocked(isc_nm_t *mgr) {
|
||||
if (!isc__nm_in_netthread()) {
|
||||
return (false);
|
||||
}
|
||||
|
||||
LOCK(&mgr->lock);
|
||||
bool success = atomic_compare_exchange_strong(
|
||||
&mgr->interlocked, &(int){ ISC_NETMGR_NON_INTERLOCKED },
|
||||
isc_nm_tid());
|
||||
|
||||
UNLOCK(&mgr->lock);
|
||||
return (success);
|
||||
}
|
||||
|
||||
void
|
||||
isc__nm_drop_interlocked(isc_nm_t *mgr) {
|
||||
if (!isc__nm_in_netthread()) {
|
||||
return;
|
||||
}
|
||||
|
||||
LOCK(&mgr->lock);
|
||||
int tid = atomic_exchange(&mgr->interlocked,
|
||||
ISC_NETMGR_NON_INTERLOCKED);
|
||||
@ -2776,6 +2795,10 @@ isc__nm_drop_interlocked(isc_nm_t *mgr) {
|
||||
|
||||
void
|
||||
isc__nm_acquire_interlocked_force(isc_nm_t *mgr) {
|
||||
if (!isc__nm_in_netthread()) {
|
||||
return;
|
||||
}
|
||||
|
||||
LOCK(&mgr->lock);
|
||||
while (!atomic_compare_exchange_strong(
|
||||
&mgr->interlocked, &(int){ ISC_NETMGR_NON_INTERLOCKED },
|
||||
|
Loading…
x
Reference in New Issue
Block a user