From e1c4a69197acd963aac4ca219910efc635271f97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Mon, 20 Jan 2020 11:39:14 +0100 Subject: [PATCH 1/3] Fix a race in taskmgr between worker and task pausing/unpausing. To reproduce the race - create a task, send two events to it, first one must take some time. Then, from the outside, pause(), unpause() and detach() the task. When the long-running event is processed by the task it is in task_state_running state. When we called pause() the state changed to task_state_paused, on unpause we checked that there are events in the task queue, changed the state to task_state_ready and enqueued the task on the workers readyq. We then detach the task. The dispatch() is done with processing the event, it processes the second event in the queue, and then shuts down the task and frees it (as it's not referenced anymore). Dispatcher then takes the, already freed, task from the queue where it was wrongly put, causing an use-after free and, subsequently, either an assertion failure or a segmentation fault. The probability of this happening is very slim, yet it might happen under a very high load, more probably on a recursive resolver than on an authoritative. The fix introduces a new 'task_state_pausing' state - to which tasks are moved if they're being paused while still running. They are moved to task_state_paused state when dispatcher is done with them, and if we unpause a task in paused state it's moved back to task_state_running and not requeued. --- lib/isc/task.c | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/lib/isc/task.c b/lib/isc/task.c index 33114993d4..f859849935 100644 --- a/lib/isc/task.c +++ b/lib/isc/task.c @@ -78,13 +78,17 @@ ***/ typedef enum { - task_state_idle, task_state_ready, task_state_paused, - task_state_running, task_state_done + task_state_idle, /* not doing anything, events queue empty */ + task_state_ready, /* waiting in worker's queue */ + task_state_paused, /* not running, paused */ + task_state_pausing, /* running, waiting to be paused */ + task_state_running, /* actively processing events */ + task_state_done /* shutting down, no events or references */ } task_state_t; #if defined(HAVE_LIBXML2) || defined(HAVE_JSON_C) static const char *statenames[] = { - "idle", "ready", "running", "done", + "idle", "ready", "paused", "pausing", "running", "done", }; #endif @@ -381,6 +385,7 @@ task_shutdown(isc__task_t *task) { } INSIST(task->state == task_state_ready || task->state == task_state_paused || + task->state == task_state_pausing || task->state == task_state_running); /* @@ -502,7 +507,8 @@ task_send(isc__task_t *task, isc_event_t **eventp, int c) { } INSIST(task->state == task_state_ready || task->state == task_state_running || - task->state == task_state_paused); + task->state == task_state_paused || + task->state == task_state_pausing); ENQUEUE(task->events, event, ev_link); task->nevents++; *eventp = NULL; @@ -1200,10 +1206,12 @@ dispatch(isc__taskmgr_t *manager, unsigned int threadid) { finished = true; task->state = task_state_done; } else { - /* It might be paused */ if (task->state == task_state_running) { task->state = task_state_idle; + } else if (task->state == + task_state_pausing) { + task->state = task_state_paused; } } done = true; @@ -1226,6 +1234,9 @@ dispatch(isc__taskmgr_t *manager, unsigned int threadid) { */ task->state = task_state_ready; requeue = true; + } else if (task->state == + task_state_pausing) { + task->state = task_state_paused; } done = true; } @@ -1682,8 +1693,12 @@ isc_task_pause(isc_task_t *task0) { INSIST(task->state == task_state_idle || task->state == task_state_ready || task->state == task_state_running); - running = (task->state == task_state_running); - task->state = task_state_paused; + if (task->state == task_state_running) { + running = true; + task->state = task_state_pausing; + } else { + task->state = task_state_paused; + } UNLOCK(&task->lock); if (running) { @@ -1706,13 +1721,18 @@ isc_task_unpause(isc_task_t *task0) { REQUIRE(ISCAPI_TASK_VALID(task0)); LOCK(&task->lock); - INSIST(task->state == task_state_paused); - if (!EMPTY(task->events)) { - task->state = task_state_ready; - was_idle = true; + INSIST(task->state == task_state_paused || + task->state == task_state_pausing); + /* If the task was pausing we can't reschedule it */ + if (task->state == task_state_pausing) { + task->state = task_state_running; } else { task->state = task_state_idle; } + if (task->state == task_state_idle && !EMPTY(task->events)) { + task->state = task_state_ready; + was_idle = true; + } UNLOCK(&task->lock); if (was_idle) { From 1beba0fa59e8d905472be85981c8e329d6da001b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Mon, 20 Jan 2020 12:12:29 +0100 Subject: [PATCH 2/3] Unit test for the taskmgr pause/unpause race --- lib/isc/tests/task_test.c | 80 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/lib/isc/tests/task_test.c b/lib/isc/tests/task_test.c index d44aee5898..c7093a6967 100644 --- a/lib/isc/tests/task_test.c +++ b/lib/isc/tests/task_test.c @@ -48,7 +48,7 @@ static isc_condition_t cv; atomic_int_fast32_t counter; static int active[10]; -static atomic_bool done; +static atomic_bool done, done2; static int _setup(void **state) { @@ -429,6 +429,83 @@ privilege_drop(void **state) { assert_null(task2); } +static void +sleep_cb(isc_task_t *task, isc_event_t *event) { + UNUSED(task); + int p = *(int*)event->ev_arg; + if (p == 1) { + /* + * Signal the main thread that we're running, so that + * it can trigger the race. + */ + LOCK(&lock); + atomic_store(&done2, true); + SIGNAL(&cv); + UNLOCK(&lock); + /* + * Wait for the operations in the main thread to be finished. + */ + LOCK(&lock); + while (!atomic_load(&done)) { + WAIT(&cv, &lock); + } + UNLOCK(&lock); + } else { + /* + * Wait for the operations in the main thread to be finished. + */ + LOCK(&lock); + atomic_store(&done2, true); + SIGNAL(&cv); + UNLOCK(&lock); + } + isc_event_free(&event); +} + +static void +pause_unpause(void **state) { + isc_result_t result; + isc_task_t *task = NULL; + isc_event_t *event1,*event2 = NULL; + UNUSED(state); + atomic_store(&done, false); + atomic_store(&done2, false); + + result = isc_task_create(taskmgr, 0, &task); + assert_int_equal(result, ISC_R_SUCCESS); + + event1 = isc_event_allocate(test_mctx, task, ISC_TASKEVENT_TEST, + sleep_cb, &(int){1}, sizeof (isc_event_t)); + assert_non_null(event1); + event2 = isc_event_allocate(test_mctx, task, ISC_TASKEVENT_TEST, + sleep_cb, &(int){2}, sizeof (isc_event_t)); + assert_non_null(event2); + isc_task_send(task, &event1); + isc_task_send(task, &event2); + /* Wait for event1 to be running */ + LOCK(&lock); + while (!atomic_load(&done2)) { + WAIT(&cv, &lock); + } + UNLOCK(&lock); + /* Pause-unpause-detach is what causes the race */ + isc_task_pause(task); + isc_task_unpause(task); + isc_task_detach(&task); + /* Signal event1 to finish */ + LOCK(&lock); + atomic_store(&done2, false); + atomic_store(&done, true); + SIGNAL(&cv); + UNLOCK(&lock); + /* Wait for event2 to finish */ + LOCK(&lock); + while (!atomic_load(&done2)) { + WAIT(&cv, &lock); + } + UNLOCK(&lock); +} + /* * Basic task functions: */ @@ -1482,6 +1559,7 @@ main(int argc, char **argv) { cmocka_unit_test_setup_teardown(purgeevent, _setup2, _teardown), cmocka_unit_test_setup_teardown(purgeevent_notpurge, _setup, _teardown), + cmocka_unit_test_setup_teardown(pause_unpause, _setup, _teardown), }; int c; From 63b702d0d07dfaa2f77006522e0ad35c7d138104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Witold=20Kr=C4=99cicki?= Date: Mon, 20 Jan 2020 22:34:10 +0100 Subject: [PATCH 3/3] CHANGES note --- CHANGES | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES b/CHANGES index 00267360b1..c858ca5164 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,5 @@ +5349. [bug] Fix a race in task_pause/unpause. [GL #1571] + 5348. [bug] dnssec-settime -Psync was not being honoured. [GL !2893]