diff --git a/CHANGES b/CHANGES index 3ef4e36530..2f23b7a7d0 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,6 @@ +6108. [func] Support for simple lock-free singly-linked stacks. + [GL !7470] + 6107. [cleanup] Remove the dns_sdb API and rewrite the named builtin databases to implement dns_db directly. [GL #3882] diff --git a/doc/dev/dev.md b/doc/dev/dev.md index 22cc05eaa0..a6f5aefedc 100644 --- a/doc/dev/dev.md +++ b/doc/dev/dev.md @@ -646,7 +646,7 @@ More macros are provided for iterating the list: isc_foo_t *foo; for (foo = ISC_LIST_HEAD(foolist); - foo != NULL; + foo != NULL; foo = ISC_LIST_NEXT(foo, link)) { /* do things */ @@ -659,6 +659,45 @@ Items can be removed from the list using `ISC_LIST_UNLINK`: ISC_LIST_UNLINK(foolist, foo, link); +##### Atomic stacks + +In ``, there are also similar macros for singly-linked +stacks (`ISC_STACK`) and atomic stacks (`ISC_ASTACK`). + +Lock-free linked data structures have pitfalls that the `ISC_ASTACK` +macros avoid by having a restricted API. Firstly, it is difficult to +implement atomic doubly-linked lists without at least a double-width +compare-exchange, which is not a widely supported primitive, so this +stack is singly-linked. Secondly, when individual elements can be +removed from a list (whether doubly-linked or singly-linked), we run +into the ABA problem, where removes and (re-)inserts race and as a +result the links get in a tangle. Safe support for removing individual +elements requires some kind of garbage collection, or extending link +pointers with generation counters. Instead the ASTACK macros only +provide a function for emptying the entire stack in one go, +`ISC_ASTACK_TO_STACK()`. This only requires setting the `top` pointer to +`NULL`, which cannot cause `ISC_ASTACK_PUSH()` to go wrong. + +The `ISC_ASTACK` macros are used the same way as the non-atomic +doubly-linked lists described above, except that not all of the +`ISC_LIST` macros have `ISC_ASTACK` equivalents, and stacks have a +`TOP` instead of a `HEAD`. + +As well as read-only iteration similar to `ISC_LIST` described above, +there is an idiom for emptying an atomic stack into a regular stack, +and processing its contents: + + ISC_STACK(isc_foo_t) drain = ISC_ASTACK_TO_STACK(foo_work); + while (!ISC_STACK_EMPTY(drain)) { + isc_foo_t *foo = ISC_STACK_POP(drain, link); + /* do things */ + /* maybe again? ISC_ASTACK_PUSH(foo_work, foo, link) */ + } + +There is also an `ISC_ASTACK_ADD()` macro, which is a convenience +wrapper for pushing an element onto a stack if it has not already been +added. + #### Names The `dns_name` API has facilities for processing DNS names and labels, diff --git a/lib/isc/Makefile.am b/lib/isc/Makefile.am index 77a667700c..3876a1ce0e 100644 --- a/lib/isc/Makefile.am +++ b/lib/isc/Makefile.am @@ -76,9 +76,10 @@ libisc_la_HEADERS = \ include/isc/rwlock.h \ include/isc/safe.h \ include/isc/serial.h \ - include/isc/signal.h\ + include/isc/signal.h \ include/isc/siphash.h \ include/isc/sockaddr.h \ + include/isc/stack.h \ include/isc/stats.h \ include/isc/stdio.h \ include/isc/stdtime.h \ diff --git a/lib/isc/include/isc/atomic.h b/lib/isc/include/isc/atomic.h index 5e470d288f..e39e07880b 100644 --- a/lib/isc/include/isc/atomic.h +++ b/lib/isc/include/isc/atomic.h @@ -42,9 +42,6 @@ #define atomic_compare_exchange_strong_relaxed(o, e, d) \ atomic_compare_exchange_strong_explicit( \ (o), (e), (d), memory_order_relaxed, memory_order_relaxed) -#define atomic_compare_exchange_strong_acq_rel(o, e, d) \ - atomic_compare_exchange_strong_explicit( \ - (o), (e), (d), memory_order_acq_rel, memory_order_acquire) /* Acquire-Release Memory Ordering */ @@ -59,6 +56,8 @@ atomic_fetch_and_explicit((o), (v), memory_order_release) #define atomic_fetch_or_release(o, v) \ atomic_fetch_or_explicit((o), (v), memory_order_release) +#define atomic_exchange_acquire(o, v) \ + atomic_exchange_explicit((o), (v), memory_order_acquire) #define atomic_exchange_acq_rel(o, v) \ atomic_exchange_explicit((o), (v), memory_order_acq_rel) #define atomic_fetch_sub_acq_rel(o, v) \ @@ -73,3 +72,6 @@ /* compare/exchange that MUST succeed */ #define atomic_compare_exchange_enforced(o, e, d) \ RUNTIME_CHECK(atomic_compare_exchange_strong((o), (e), (d))) + +/* more comfortable atomic pointer declarations */ +#define atomic_ptr(type) _Atomic(type *) diff --git a/lib/isc/include/isc/stack.h b/lib/isc/include/isc/stack.h new file mode 100644 index 0000000000..92475b9d43 --- /dev/null +++ b/lib/isc/include/isc/stack.h @@ -0,0 +1,189 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +#pragma once + +#include +#include +#include +#include + +/* + * Single links, aka stack links + */ + +#define ISC_SLINK(type) \ + struct { \ + type *next; \ + } +#define ISC_SLINK_INITIALIZER_TYPE(type) \ + { \ + .next = ISC_LINK_TOMBSTONE(type), \ + } +#define ISC_SLINK_INIT_TYPE(elt, link, type) \ + { \ + (elt)->link.next = ISC_LINK_TOMBSTONE(type); \ + } + +#define ISC_SLINK_INITIALIZER ISC_SLINK_INITIALIZER_TYPE(void) +#define ISC_SLINK_INIT(elt, link) ISC_SLINK_INIT_TYPE(elt, link, void) + +#define ISC_SLINK_LINKED_TYPE(elt, link, type) \ + ((type *)((elt)->link.next) != ISC_LINK_TOMBSTONE(type)) +#define ISC_SLINK_LINKED(elt, link) ISC_SLINK_LINKED_TYPE(elt, link, void) + +#define ISC_SLINK_NEXT(elt, link) ((elt)->link.next) + +/* + * Simple singly-linked stack implementation + */ + +#define ISC_STACK(type) \ + struct { \ + type *top; \ + } + +#define ISC_STACK_INITIALIZER \ + { \ + .top = NULL, \ + } +#define ISC_STACK_INIT(stack) \ + { \ + (stack).top = NULL; \ + } + +#define ISC_STACK_TOP(stack) ((stack).top) +#define ISC_STACK_EMPTY(stack) ((stack).top == NULL) + +#define ISC_STACK_PUSHUNSAFE(stack, elt, link) \ + { \ + (elt)->link.next = (stack).top; \ + (stack).top = (elt); \ + } + +#define ISC_STACK_PUSH(stack, elt, link) \ + { \ + INSIST(!ISC_SLINK_LINKED(elt, link)); \ + ISC_STACK_PUSHUNSAFE(stack, elt, link); \ + } + +/* + * This is slightly round about because when `type` is `void` + * we can't directly access `__top->link.next`. + */ +#define ISC_STACK_POP_TYPE(stack, link, type) \ + ({ \ + type *__top = (stack).top; \ + if (__top != NULL) { \ + type *__next = ISC_SLINK_NEXT((stack).top, link); \ + ISC_SLINK_INIT_TYPE((stack).top, link, type); \ + (stack).top = __next; \ + } \ + __top; \ + }) + +#define ISC_STACK_POP(stack, link) ISC_STACK_POP_TYPE(stack, link, void) + +/* + * Helper to add element if not already on stack. + */ +#define ISC_STACK_ADD(stack, elt, link) \ + { \ + if (!ISC_STACK_LINKED(elt, link)) { \ + ISC_STACK_PUSHUNSAFE(stack, elt, link); \ + } \ + } + +/* + * This is a simplified implementation of the Treiber stack algorithm with + * back-off. + * + * The original paper which describes the algorithm can be found here: + * https://dominoweb.draco.res.ibm.com/58319a2ed2b1078985257003004617ef.html + * + * We sidestep the ABA problem when removing the elements by adding additional + * constraint: once the element has been removed from the stack, it cannot be + * re-added. + * + * This is actually not a problem for planned usage of the ISC_ASTACK where we + * only use ISC_ASTACK_PUSH() to add individual elements and ISC_ASTACK_DRAIN() + * to empty the stack in a single atomic operation. + * + * To make things simple, there's no implementation for ISC_ASTACK_POP() because + * that's complex to implement and requires double-word CAS (for ABA counter). + * + * The pointers are wrapped in structs so that their types are + * distinct, and to match the ISC_LIST macros. + * + * See doc/dev/dev.md for examples. + */ + +#define ISC_ASTACK(type) \ + struct { \ + atomic_ptr(type) atop; \ + } +#define ISC_ASTACK_INITIALIZER \ + { \ + .atop = NULL, \ + } +#define ISC_ASTACK_INIT(stack) \ + { \ + (stack).atop = NULL; \ + } + +/* + * ATOMIC: for performance, this kind of retry loop should use a weak + * CAS with relaxed ordering in the failure case; on success, release + * ordering ensures that writing the element contents happens before + * reading them, following the acquire in ISC_ASTACK_TOP() and _DRAIN(). + */ +#define ISC_ASTACK_PUSHUNSAFE(stack, elt, link) \ + { \ + (elt)->link.next = atomic_load_relaxed(&(stack).atop); \ + while (!atomic_compare_exchange_weak_explicit( \ + &(stack).atop, &ISC_SLINK_NEXT(elt, link), elt, \ + memory_order_release, memory_order_relaxed)) \ + { \ + isc_pause(); \ + } \ + } + +#define ISC_ASTACK_PUSH(stack, elt, link) \ + { \ + INSIST(!ISC_SLINK_LINKED(elt, link)); \ + ISC_ASTACK_PUSHUNSAFE(stack, elt, link); \ + } + +/* + * Helper to add element if not already on stack. + */ +#define ISC_ASTACK_ADD(stack, elt, link) \ + { \ + if (!ISC_SLINK_LINKED(elt, link)) { \ + ISC_ASTACK_PUSHUNSAFE(stack, elt, link); \ + } \ + } + +/* + * ATOMIC: acquire ordering pairs with ISC_ASTACK_PUSHUNSAFEe() + */ +#define ISC_ASTACK_TOP(stack) atomic_load_acquire(&(stack).atop) +#define ISC_ASTACK_EMPTY(stack) (ISC_ASTACK_TOP(stack) == NULL) + +/* + * ATOMIC: acquire ordering pairs with ISC_ASTACK_PUSHUNSAFE() + */ +#define ISC_ASTACK_TO_STACK(stack) \ + { \ + .top = atomic_exchange_acquire(&(stack).atop, NULL), \ + }