2
0
mirror of https://github.com/openvswitch/ovs synced 2025-09-03 15:55:19 +00:00

ovs-rcu: New library.

RCU allows multiple threads to read objects in parallel without any
performance penalty.  The following commit will introduce the first use.

Signed-off-by: Ben Pfaff <blp@nicira.com>
Acked-by: Andy Zhou <azhou@nicira.com>
This commit is contained in:
Ben Pfaff
2014-03-18 16:34:28 -07:00
parent 06f64d032a
commit 0f2ea84841
7 changed files with 510 additions and 1 deletions

View File

@@ -145,6 +145,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/ovs-atomic-locked.h \
lib/ovs-atomic-pthreads.h \
lib/ovs-atomic.h \
lib/ovs-rcu.c \
lib/ovs-rcu.h \
lib/ovs-thread.c \
lib/ovs-thread.h \
lib/ovsdb-data.c \

293
lib/ovs-rcu.c Normal file
View File

@@ -0,0 +1,293 @@
/*
* Copyright (c) 2014 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <config.h>
#include "ovs-rcu.h"
#include "guarded-list.h"
#include "list.h"
#include "ovs-thread.h"
#include "poll-loop.h"
#include "seq.h"
struct ovsrcu_cb {
void (*function)(void *aux);
void *aux;
};
struct ovsrcu_cbset {
struct list list_node;
struct ovsrcu_cb cbs[16];
int n_cbs;
};
struct ovsrcu_perthread {
struct list list_node; /* In global list. */
struct ovs_mutex mutex;
uint64_t seqno;
struct ovsrcu_cbset *cbset;
};
static struct seq *global_seqno;
static pthread_key_t perthread_key;
static struct list ovsrcu_threads;
static struct ovs_mutex ovsrcu_threads_mutex;
static struct guarded_list flushed_cbsets;
static struct seq *flushed_cbsets_seq;
static void ovsrcu_init(void);
static void ovsrcu_flush_cbset(struct ovsrcu_perthread *);
static void ovsrcu_unregister__(struct ovsrcu_perthread *);
static bool ovsrcu_call_postponed(void);
static void *ovsrcu_postpone_thread(void *arg OVS_UNUSED);
static void ovsrcu_synchronize(void);
static struct ovsrcu_perthread *
ovsrcu_perthread_get(void)
{
struct ovsrcu_perthread *perthread;
ovsrcu_init();
perthread = pthread_getspecific(perthread_key);
if (!perthread) {
perthread = xmalloc(sizeof *perthread);
ovs_mutex_init(&perthread->mutex);
perthread->seqno = seq_read(global_seqno);
perthread->cbset = NULL;
ovs_mutex_lock(&ovsrcu_threads_mutex);
list_push_back(&ovsrcu_threads, &perthread->list_node);
ovs_mutex_unlock(&ovsrcu_threads_mutex);
pthread_setspecific(perthread_key, perthread);
}
return perthread;
}
/* Indicates the end of a quiescent state. See "Details" near the top of
* ovs-rcu.h.
*
* Quiescent states don't stack or nest, so this always ends a quiescent state
* even if ovsrcu_quiesce_start() was called multiple times in a row. */
void
ovsrcu_quiesce_end(void)
{
ovsrcu_perthread_get();
}
static void
ovsrcu_quiesced(void)
{
if (single_threaded()) {
ovsrcu_call_postponed();
} else {
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
if (ovsthread_once_start(&once)) {
xpthread_create(NULL, NULL, ovsrcu_postpone_thread, NULL);
ovsthread_once_done(&once);
}
}
}
/* Indicates the beginning of a quiescent state. See "Details" near the top of
* ovs-rcu.h. */
void
ovsrcu_quiesce_start(void)
{
struct ovsrcu_perthread *perthread;
ovsrcu_init();
perthread = pthread_getspecific(perthread_key);
if (perthread) {
pthread_setspecific(perthread_key, NULL);
ovsrcu_unregister__(perthread);
}
ovsrcu_quiesced();
}
/* Indicates a momentary quiescent state. See "Details" near the top of
* ovs-rcu.h. */
void
ovsrcu_quiesce(void)
{
ovsrcu_perthread_get()->seqno = seq_read(global_seqno);
seq_change(global_seqno);
ovsrcu_quiesced();
}
static void
ovsrcu_synchronize(void)
{
uint64_t target_seqno;
if (single_threaded()) {
return;
}
target_seqno = seq_read(global_seqno);
ovsrcu_quiesce_start();
for (;;) {
uint64_t cur_seqno = seq_read(global_seqno);
struct ovsrcu_perthread *perthread;
bool done = true;
ovs_mutex_lock(&ovsrcu_threads_mutex);
LIST_FOR_EACH (perthread, list_node, &ovsrcu_threads) {
if (perthread->seqno <= target_seqno) {
done = false;
break;
}
}
ovs_mutex_unlock(&ovsrcu_threads_mutex);
if (done) {
break;
}
seq_wait(global_seqno, cur_seqno);
poll_block();
}
ovsrcu_quiesce_end();
}
/* Registers 'function' to be called, passing 'aux' as argument, after the
* next grace period.
*
* This function is more conveniently called through the ovsrcu_postpone()
* macro, which provides a type-safe way to allow 'function''s parameter to be
* any pointer type. */
void
ovsrcu_postpone__(void (*function)(void *aux), void *aux)
{
struct ovsrcu_perthread *perthread = ovsrcu_perthread_get();
struct ovsrcu_cbset *cbset;
struct ovsrcu_cb *cb;
cbset = perthread->cbset;
if (!cbset) {
cbset = perthread->cbset = xmalloc(sizeof *perthread->cbset);
cbset->n_cbs = 0;
}
cb = &cbset->cbs[cbset->n_cbs++];
cb->function = function;
cb->aux = aux;
if (cbset->n_cbs >= ARRAY_SIZE(cbset->cbs)) {
ovsrcu_flush_cbset(perthread);
}
}
static bool
ovsrcu_call_postponed(void)
{
struct ovsrcu_cbset *cbset, *next_cbset;
struct list cbsets;
guarded_list_pop_all(&flushed_cbsets, &cbsets);
if (list_is_empty(&cbsets)) {
return false;
}
ovsrcu_synchronize();
LIST_FOR_EACH_SAFE (cbset, next_cbset, list_node, &cbsets) {
struct ovsrcu_cb *cb;
for (cb = cbset->cbs; cb < &cbset->cbs[cbset->n_cbs]; cb++) {
cb->function(cb->aux);
}
list_remove(&cbset->list_node);
free(cbset);
}
return true;
}
static void *
ovsrcu_postpone_thread(void *arg OVS_UNUSED)
{
pthread_detach(pthread_self());
for (;;) {
uint64_t seqno = seq_read(flushed_cbsets_seq);
if (!ovsrcu_call_postponed()) {
seq_wait(flushed_cbsets_seq, seqno);
poll_block();
}
}
OVS_NOT_REACHED();
}
static void
ovsrcu_flush_cbset(struct ovsrcu_perthread *perthread)
{
struct ovsrcu_cbset *cbset = perthread->cbset;
if (cbset) {
guarded_list_push_back(&flushed_cbsets, &cbset->list_node, SIZE_MAX);
perthread->cbset = NULL;
seq_change(flushed_cbsets_seq);
}
}
static void
ovsrcu_unregister__(struct ovsrcu_perthread *perthread)
{
if (perthread->cbset) {
ovsrcu_flush_cbset(perthread);
}
ovs_mutex_lock(&ovsrcu_threads_mutex);
list_remove(&perthread->list_node);
ovs_mutex_unlock(&ovsrcu_threads_mutex);
ovs_mutex_destroy(&perthread->mutex);
free(perthread);
seq_change(global_seqno);
}
static void
ovsrcu_thread_exit_cb(void *perthread)
{
ovsrcu_unregister__(perthread);
}
static void
ovsrcu_init(void)
{
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
if (ovsthread_once_start(&once)) {
global_seqno = seq_create();
xpthread_key_create(&perthread_key, ovsrcu_thread_exit_cb);
list_init(&ovsrcu_threads);
ovs_mutex_init(&ovsrcu_threads_mutex);
guarded_list_init(&flushed_cbsets);
flushed_cbsets_seq = seq_create();
ovsthread_once_done(&once);
}
}

182
lib/ovs-rcu.h Normal file
View File

@@ -0,0 +1,182 @@
/*
* Copyright (c) 2014 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef OVS_RCU_H
#define OVS_RCU_H 1
/* Read-Copy-Update (RCU)
* ======================
*
* Introduction
* ------------
*
* Atomic pointer access makes it pretty easy to implement lock-free
* algorithms. There is one big problem, though: when a writer updates a
* pointer to point to a new data structure, some thread might be reading the
* old version, and there's no convenient way to free the old version when all
* threads are done with the old version.
*
* The function ovsrcu_postpone() solves that problem. The function pointer
* passed in as its argument is called only after all threads are done with old
* versions of data structures. The function callback frees an old version of
* data no longer in use. This technique is called "read-copy-update", or RCU
* for short.
*
*
* Details
* -------
*
* A "quiescent state" is a time at which a thread holds no pointers to memory
* that is managed by RCU; that is, when the thread is known not to reference
* memory that might be an old version of some object freed via RCU. For
* example, poll_block() includes a quiescent state, as does
* ovs_mutex_cond_wait().
*
* The following functions manage the recognition of quiescent states:
*
* void ovsrcu_quiesce(void)
*
* Recognizes a momentary quiescent state in the current thread.
*
* void ovsrcu_quiesce_start(void)
* void ovsrcu_quiesce_end(void)
*
* Brackets a time period during which the current thread is quiescent.
*
* A newly created thread is initially active, not quiescent.
*
* When a quiescient state has occurred in every thread, we say that a "grace
* period" has occurred. Following a grace period, all of the callbacks
* postponed before the start of the grace period may be invoked. OVS takes
* care of this automatically through the RCU mechanism: while a process still
* has only a single thread, it invokes the postponed callbacks directly from
* ovsrcu_quiesce() and ovsrcu_quiesce_start(); after additional threads have
* been created, it creates an extra helper thread to invoke callbacks.
*
*
* Use
* ---
*
* Use OVSRCU_TYPE(TYPE) to declare a pointer to RCU-protected data, e.g. the
* following declares an RCU-protected "struct flow *" named flowp:
*
* OVSRCU_TYPE(struct flow *) flowp;
*
* Use ovsrcu_get(TYPE, VAR) to read an RCU-protected pointer, e.g. to read the
* pointer variable declared above:
*
* struct flow *flow = ovsrcu_get(struct flow *, flowp);
*
* Use ovsrcu_set() to write an RCU-protected pointer and ovsrcu_postpone() to
* free the previous data. If more than one thread can write the pointer, then
* some form of external synchronization, e.g. a mutex, is needed to prevent
* writers from interfering with one another. For example, to write the
* pointer variable declared above while safely freeing the old value:
*
* static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
*
* static void
* free_flow(struct flow *flow)
* {
* free(flow);
* }
*
* void
* change_flow(struct flow *new_flow)
* {
* ovs_mutex_lock(&mutex);
* ovsrcu_postpone(free_flow,
* ovsrcu_get_protected(struct flow *, &flowp));
* ovsrcu_set(&flowp, new_flow);
* ovs_mutex_unlock(&mutex);
* }
*
*/
#include "compiler.h"
#include "ovs-atomic.h"
/* Use OVSRCU_TYPE(TYPE) to declare a pointer to RCU-protected data, e.g. the
* following declares an RCU-protected "struct flow *" named flowp:
*
* OVSRCU_TYPE(struct flow *) flowp;
*
* Use ovsrcu_get(TYPE, VAR) to read an RCU-protected pointer, e.g. to read the
* pointer variable declared above:
*
* struct flow *flow = ovsrcu_get(struct flow *, flowp);
*
* If the pointer variable is currently protected against change (because
* the current thread holds a mutex that protects it), ovsrcu_get_protected()
* may be used instead. Only on the Alpha architecture is this likely to
* generate different code, but it may be useful documentation.
*
* (With GNU C or Clang, you get a compiler error if TYPE is wrong; other
* compilers will merrily carry along accepting the wrong type.)
*/
#if __GNUC__
#define OVSRCU_TYPE(TYPE) struct { ATOMIC(TYPE) p; }
#define ovsrcu_get__(TYPE, VAR, ORDER) \
({ \
TYPE value__; \
\
atomic_read_explicit(CONST_CAST(ATOMIC(TYPE) *, &(VAR)->p), \
&value__, ORDER); \
\
value__; \
})
#define ovsrcu_get(TYPE, VAR) \
CONST_CAST(TYPE, ovsrcu_get__(TYPE, VAR, memory_order_consume))
#define ovsrcu_get_protected(TYPE, VAR) \
CONST_CAST(TYPE, ovsrcu_get__(TYPE, VAR, memory_order_relaxed))
#else /* not GNU C */
typedef struct ovsrcu_pointer { ATOMIC(void *) p; };
#define OVSRCU_TYPE(TYPE) struct ovsrcu_pointer
static inline void *
ovsrcu_get__(const struct ovsrcu_pointer *pointer, memory_order order)
{
void *value;
atomic_read_explicit(&CONST_CAST(struct ovsrcu_pointer *, pointer)->p,
&value, order);
return value;
}
#define ovsrcu_get(TYPE, VAR) \
CONST_CAST(TYPE, ovsrcu_get__(VAR, memory_order_consume))
#define ovsrcu_get_protected(TYPE, VAR) \
CONST_CAST(TYPE, ovsrcu_get__(VAR, memory_order_relaxed))
#endif
/* Writes VALUE to the RCU-protected pointer whose address is VAR.
*
* Users require external synchronization (e.g. a mutex). See "Usage" above
* for an example. */
#define ovsrcu_set(VAR, VALUE) \
atomic_store_explicit(&(VAR)->p, VALUE, memory_order_release)
/* Calls FUNCTION passing ARG as its pointer-type argument following the next
* grace period. See "Usage" above for example. */
void ovsrcu_postpone__(void (*function)(void *aux), void *aux);
#define ovsrcu_postpone(FUNCTION, ARG) \
((void) sizeof((FUNCTION)(ARG), 1), \
(void) sizeof(*(ARG)), \
ovsrcu_postpone__((void (*)(void *))(FUNCTION), ARG))
/* Quiescent states. */
void ovsrcu_quiesce_start(void);
void ovsrcu_quiesce_end(void);
void ovsrcu_quiesce(void);
#endif /* ovs-rcu.h */

View File

@@ -22,6 +22,7 @@
#include <unistd.h>
#include "compiler.h"
#include "hash.h"
#include "ovs-rcu.h"
#include "poll-loop.h"
#include "socket-util.h"
#include "util.h"
@@ -223,7 +224,12 @@ void
ovs_mutex_cond_wait(pthread_cond_t *cond, const struct ovs_mutex *mutex_)
{
struct ovs_mutex *mutex = CONST_CAST(struct ovs_mutex *, mutex_);
int error = pthread_cond_wait(cond, &mutex->lock);
int error;
ovsrcu_quiesce_start();
error = pthread_cond_wait(cond, &mutex->lock);
ovsrcu_quiesce_end();
if (OVS_UNLIKELY(error)) {
ovs_abort(error, "pthread_cond_wait failed");
}
@@ -264,6 +270,7 @@ ovsthread_wrapper(void *aux_)
aux = *auxp;
free(auxp);
ovsrcu_quiesce_end();
return aux.start(aux.arg);
}
@@ -277,6 +284,7 @@ xpthread_create(pthread_t *threadp, pthread_attr_t *attr,
forbid_forking("multiple threads exist");
multithreaded = true;
ovsrcu_quiesce_end();
aux = xmalloc(sizeof *aux);
aux->start = start;
@@ -307,6 +315,12 @@ ovsthread_once_done(struct ovsthread_once *once)
ovs_mutex_unlock(&once->mutex);
}
bool
single_threaded(void)
{
return !multithreaded;
}
/* Asserts that the process has not yet created any threads (beyond the initial
* thread).
*

View File

@@ -598,6 +598,8 @@ void ovsthread_counter_inc(struct ovsthread_counter *, unsigned long long int);
unsigned long long int ovsthread_counter_read(
const struct ovsthread_counter *);
bool single_threaded(void);
void assert_single_threaded_at(const char *where);
#define assert_single_threaded() assert_single_threaded_at(SOURCE_LOCATOR)

View File

@@ -31,6 +31,7 @@
#include "fatal-signal.h"
#include "hash.h"
#include "hmap.h"
#include "ovs-rcu.h"
#include "ovs-thread.h"
#include "signals.h"
#include "seq.h"
@@ -286,6 +287,12 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE *handles OVS_UNUSED,
time_left = timeout_when - now;
}
if (!time_left) {
ovsrcu_quiesce();
} else {
ovsrcu_quiesce_start();
}
#ifndef _WIN32
retval = poll(pollfds, n_pollfds, time_left);
if (retval < 0) {
@@ -306,6 +313,10 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE *handles OVS_UNUSED,
}
#endif
if (time_left) {
ovsrcu_quiesce_end();
}
if (deadline <= time_msec()) {
#ifndef _WIN32
fatal_signal_handler(SIGALRM);

View File

@@ -32,6 +32,7 @@
#include "ofproto-dpif-ipfix.h"
#include "ofproto-dpif-sflow.h"
#include "ofproto-dpif-xlate.h"
#include "ovs-rcu.h"
#include "packets.h"
#include "poll-loop.h"
#include "seq.h"
@@ -297,6 +298,8 @@ void
udpif_set_threads(struct udpif *udpif, size_t n_handlers,
size_t n_revalidators)
{
ovsrcu_quiesce_start();
/* Stop the old threads (if any). */
if (udpif->handlers &&
(udpif->n_handlers != n_handlers
@@ -405,6 +408,8 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers,
xpthread_create(&udpif->dispatcher, NULL, udpif_dispatcher, udpif);
xpthread_create(&udpif->flow_dumper, NULL, udpif_flow_dumper, udpif);
}
ovsrcu_quiesce_end();
}
/* Waits for all ongoing upcall translations to complete. This ensures that