2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-31 06:15:47 +00:00

timeval: Remove CACHE_TIME scheme.

This commit removes the CACHE_TIME scheme from timeval module.  This
is for eliminating the lock contention over the read/write lock of
the cached time.  To get the time, the thread now will directly do
the system call 'clock_gettime()'.

As a side effect, timer can only be warpped after timer is stopped
by 'appctl time/stop' command.

Signed-off-by: Alex Wang <alexw@nicira.com>
Signed-off-by: Ethan Jackson <ethan@nicira.com>
Acked-by: Ethan Jackson <ethan@nicira.com>
This commit is contained in:
Alex Wang
2013-09-04 15:49:19 -07:00
committed by Ethan Jackson
parent c4f5d00bb7
commit 31ef9f5178
13 changed files with 17 additions and 339 deletions

View File

@@ -289,7 +289,6 @@ fork_and_clean_up(void)
fatal_signal_fork();
} else if (!pid) {
/* Running in child process. */
time_postfork();
lockfile_postfork();
}
return pid;

View File

@@ -41,14 +41,12 @@ VLOG_DEFINE_THIS_MODULE(timeval);
struct clock {
clockid_t id; /* CLOCK_MONOTONIC or CLOCK_REALTIME. */
struct ovs_rwlock rwlock; /* Mutual exclusion for 'cache'. */
/* Features for use by unit tests. Protected by 'rwlock'. */
struct ovs_rwlock rwlock;
struct timespec warp; /* Offset added for unit tests. */
bool stopped; /* Disables real-time updates if true. */
/* Relevant only if CACHE_TIME is true. */
volatile sig_atomic_t tick; /* Has the timer ticked? Set by signal. */
struct timespec cache; /* Last time read from kernel. */
};
@@ -67,11 +65,6 @@ static long long int deadline = LLONG_MAX;
* up. */
DEFINE_STATIC_PER_THREAD_DATA(long long int, last_wakeup, 0);
static void set_up_timer(void);
static void set_up_signal(int flags);
static void sigalrm_handler(int);
static void block_sigalrm(sigset_t *);
static void unblock_sigalrm(const sigset_t *);
static void log_poll_interval(long long int last_wakeup);
static struct rusage *get_recent_rusage(void);
static void refresh_rusage(void);
@@ -83,7 +76,6 @@ init_clock(struct clock *c, clockid_t id)
{
memset(c, 0, sizeof *c);
c->id = id;
ovs_rwlock_init(&c->rwlock);
xclock_gettime(c->id, &c->cache);
}
@@ -99,9 +91,6 @@ do_init_time(void)
: CLOCK_REALTIME));
init_clock(&wall_clock, CLOCK_REALTIME);
boot_time = timespec_to_msec(&monotonic_clock.cache);
set_up_signal(SA_RESTART);
set_up_timer();
}
/* Initializes the timetracking module, if not already initialized. */
@@ -112,86 +101,16 @@ time_init(void)
pthread_once(&once, do_init_time);
}
static void
set_up_signal(int flags)
{
struct sigaction sa;
memset(&sa, 0, sizeof sa);
sa.sa_handler = sigalrm_handler;
sigemptyset(&sa.sa_mask);
sa.sa_flags = flags;
xsigaction(SIGALRM, &sa, NULL);
}
static void
set_up_timer(void)
{
static timer_t timer_id; /* "static" to avoid apparent memory leak. */
struct itimerspec itimer;
if (!CACHE_TIME) {
return;
}
if (timer_create(monotonic_clock.id, NULL, &timer_id)) {
VLOG_FATAL("timer_create failed (%s)", ovs_strerror(errno));
}
itimer.it_interval.tv_sec = 0;
itimer.it_interval.tv_nsec = TIME_UPDATE_INTERVAL * 1000 * 1000;
itimer.it_value = itimer.it_interval;
if (timer_settime(timer_id, 0, &itimer, NULL)) {
VLOG_FATAL("timer_settime failed (%s)", ovs_strerror(errno));
}
}
/* Set up the interval timer, to ensure that time advances even without calling
* time_refresh().
*
* A child created with fork() does not inherit the parent's interval timer, so
* this function needs to be called from the child after fork(). */
void
time_postfork(void)
{
assert_single_threaded();
time_init();
set_up_timer();
}
/* Forces a refresh of the current time from the kernel. It is not usually
* necessary to call this function, since the time will be refreshed
* automatically at least every TIME_UPDATE_INTERVAL milliseconds. If
* CACHE_TIME is false, we will always refresh the current time so this
* function has no effect. */
void
time_refresh(void)
{
monotonic_clock.tick = wall_clock.tick = true;
}
static void
time_timespec__(struct clock *c, struct timespec *ts)
{
time_init();
for (;;) {
/* Use the cached time by preference, but fall through if there's been
* a clock tick. */
ovs_rwlock_rdlock(&c->rwlock);
if (c->stopped || !c->tick) {
timespec_add(ts, &c->cache, &c->warp);
ovs_rwlock_unlock(&c->rwlock);
return;
}
ovs_rwlock_unlock(&c->rwlock);
/* Refresh the cache. */
ovs_rwlock_wrlock(&c->rwlock);
if (c->tick) {
c->tick = false;
xclock_gettime(c->id, &c->cache);
}
if (!c->stopped) {
xclock_gettime(c->id, ts);
} else {
ovs_rwlock_rdlock(&c->rwlock);
timespec_add(ts, &c->cache, &c->warp);
ovs_rwlock_unlock(&c->rwlock);
}
}
@@ -268,7 +187,6 @@ time_alarm(unsigned int secs)
assert_single_threaded();
time_init();
time_refresh();
now = time_msec();
msecs = secs * 1000LL;
@@ -286,8 +204,6 @@ time_alarm(unsigned int secs)
* timeout is reached. (Because of this property, this function will
* never return -EINTR.)
*
* - As a side effect, refreshes the current time (like time_refresh()).
*
* Stores the number of milliseconds elapsed during poll in '*elapsed'. */
int
time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when,
@@ -295,18 +211,14 @@ time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when,
{
long long int *last_wakeup = last_wakeup_get();
long long int start;
sigset_t oldsigs;
bool blocked;
int retval;
time_init();
time_refresh();
if (*last_wakeup) {
log_poll_interval(*last_wakeup);
}
coverage_clear();
start = time_msec();
blocked = false;
timeout_when = MIN(timeout_when, deadline);
@@ -327,7 +239,6 @@ time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when,
retval = -errno;
}
time_refresh();
if (deadline <= time_msec()) {
fatal_signal_handler(SIGALRM);
if (retval < 0) {
@@ -339,14 +250,6 @@ time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when,
if (retval != -EINTR) {
break;
}
if (!blocked && CACHE_TIME) {
block_sigalrm(&oldsigs);
blocked = true;
}
}
if (blocked) {
unblock_sigalrm(&oldsigs);
}
*last_wakeup = time_msec();
refresh_rusage();
@@ -354,27 +257,6 @@ time_poll(struct pollfd *pollfds, int n_pollfds, long long int timeout_when,
return retval;
}
static void
sigalrm_handler(int sig_nr OVS_UNUSED)
{
monotonic_clock.tick = wall_clock.tick = true;
}
static void
block_sigalrm(sigset_t *oldsigs)
{
sigset_t sigalrm;
sigemptyset(&sigalrm);
sigaddset(&sigalrm, SIGALRM);
xpthread_sigmask(SIG_BLOCK, &sigalrm, oldsigs);
}
static void
unblock_sigalrm(const sigset_t *oldsigs)
{
xpthread_sigmask(SIG_SETMASK, oldsigs, NULL);
}
long long int
timespec_to_msec(const struct timespec *ts)
{
@@ -570,6 +452,7 @@ timeval_stop_cb(struct unixctl_conn *conn,
{
ovs_rwlock_wrlock(&monotonic_clock.rwlock);
monotonic_clock.stopped = true;
xclock_gettime(monotonic_clock.id, &monotonic_clock.cache);
ovs_rwlock_unlock(&monotonic_clock.rwlock);
unixctl_command_reply(conn, NULL);

View File

@@ -40,25 +40,6 @@ BUILD_ASSERT_DECL(TYPE_IS_SIGNED(time_t));
#define TIME_MAX TYPE_MAXIMUM(time_t)
#define TIME_MIN TYPE_MINIMUM(time_t)
/* Interval between updates to the reported time, in ms. This should not be
* adjusted much below 10 ms or so with the current implementation, or too
* much time will be wasted in signal handlers and calls to clock_gettime(). */
#define TIME_UPDATE_INTERVAL 25
/* True on systems that support a monotonic clock. Compared to just getting
* the value of a variable, clock_gettime() is somewhat expensive, even on
* systems that try hard to optimize it (such as x86-64 Linux), so it's
* worthwhile to minimize calls via caching. */
#ifndef CACHE_TIME
#if defined ESX
#define CACHE_TIME 0
#else
#define CACHE_TIME 1
#endif
#endif /* ifndef CACHE_TIME */
void time_postfork(void);
void time_refresh(void);
time_t time_now(void);
time_t time_wall(void);
long long int time_msec(void);

View File

@@ -46,22 +46,6 @@ AC_DEFUN([OVS_CHECK_NDEBUG],
[ndebug=false])
AM_CONDITIONAL([NDEBUG], [test x$ndebug = xtrue])])
dnl Checks for --enable-cache-time and defines CACHE_TIME if it is specified.
AC_DEFUN([OVS_CHECK_CACHE_TIME],
[AC_ARG_ENABLE(
[cache-time],
[AC_HELP_STRING([--enable-cache-time],
[Override time caching default (for testing only)])],
[case "${enableval}" in
(yes) cache_time=1;;
(no) cache_time=0;;
(*) AC_MSG_ERROR([bad value ${enableval} for --enable-cache-time]) ;;
esac
AC_DEFINE_UNQUOTED([CACHE_TIME], [$cache_time],
[Define to 1 to enable time caching, to 0 to disable time caching, or
leave undefined to use the default (as one should
ordinarily do).])])])
dnl Checks for ESX.
AC_DEFUN([OVS_CHECK_ESX],
[AC_CHECK_HEADER([vmware.h],

View File

@@ -3878,7 +3878,7 @@ subfacet_max_idle(const struct dpif_backer *backer)
* pass made by update_stats(), because the former function never looks at
* uninstallable subfacets.
*/
enum { BUCKET_WIDTH = ROUND_UP(100, TIME_UPDATE_INTERVAL) };
enum { BUCKET_WIDTH = 100 };
enum { N_BUCKETS = 5000 / BUCKET_WIDTH };
int buckets[N_BUCKETS] = { 0 };
int total, subtotal, bucket;

View File

@@ -32,7 +32,6 @@ TESTSUITE_AT = \
tests/json.at \
tests/jsonrpc.at \
tests/jsonrpc-py.at \
tests/timeval.at \
tests/tunnel.at \
tests/lockfile.at \
tests/reconnect.at \
@@ -122,7 +121,6 @@ valgrind_wrappers = \
tests/valgrind/test-reconnect \
tests/valgrind/test-sha1 \
tests/valgrind/test-stp \
tests/valgrind/test-timeval \
tests/valgrind/test-type-props \
tests/valgrind/test-unix-socket \
tests/valgrind/test-uuid \
@@ -293,10 +291,6 @@ noinst_PROGRAMS += tests/test-sha1
tests_test_sha1_SOURCES = tests/test-sha1.c
tests_test_sha1_LDADD = lib/libopenvswitch.a $(SSL_LIBS)
noinst_PROGRAMS += tests/test-timeval
tests_test_timeval_SOURCES = tests/test-timeval.c
tests_test_timeval_LDADD = lib/libopenvswitch.a $(SSL_LIBS)
noinst_PROGRAMS += tests/test-strtok_r
tests_test_strtok_r_SOURCES = tests/test-strtok_r.c

View File

@@ -285,6 +285,7 @@ AT_SETUP([learning action - self-modifying flow])
OVS_VSWITCHD_START
ADD_OF_PORTS([br0], 1, 2, 3)
ovs-appctl time/stop
# Set up flow table for TCPv4 port learning.
AT_CHECK([[ovs-ofctl add-flow br0 'actions=load:3->NXM_NX_REG0[0..15],learn(table=0,priority=65535,NXM_OF_ETH_SRC[],NXM_OF_VLAN_TCI[0..11],output:NXM_NX_REG0[0..15]),output:2']])

View File

@@ -645,6 +645,7 @@ NXT_PACKET_IN (xid=0x0): table_id=7 cookie=0x9 total_len=64 in_port=1 tun_id=0x6
udp,metadata=0,in_port=0,dl_vlan=80,dl_vlan_pcp=0,dl_src=80:81:81:81:81:81,dl_dst=82:82:82:82:82:82,nw_src=83.83.83.83,nw_dst=84.84.84.84,nw_tos=0,nw_ecn=0,nw_ttl=0,tp_src=85,tp_dst=86 udp_csum:43a1
])
ovs-appctl time/stop
AT_CHECK([ovs-appctl time/warp 5000], [0], [ignore])
dnl Checksum SCTP.
@@ -1841,6 +1842,7 @@ AT_SETUP([ofproto-dpif - NetFlow flow expiration])
OVS_VSWITCHD_START([set Bridge br0 fail-mode=standalone])
ADD_OF_PORTS([br0], 1, 2)
ovs-appctl time/stop
ON_EXIT([kill `cat test-netflow.pid`])
AT_CHECK([test-netflow --log-file --detach --no-chdir --pidfile 0:127.0.0.1 > netflow.log], [0], [], [ignore])
AT_CAPTURE_FILE([netflow.log])
@@ -1996,6 +1998,7 @@ get_ages () {
AT_CHECK([ovs-ofctl add-flow br0 hard_timeout=199,idle_timeout=188,actions=drop])
get_ages duration1 hard1 idle1
ovs-appctl time/stop
# Warp time forward by 10 seconds, then modify the flow's actions.
ovs-appctl time/warp 10000
get_ages duration2 hard2 idle2
@@ -2059,6 +2062,8 @@ AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip], [0],
[NXST_FLOW reply:
idle_timeout=60, actions=fin_timeout(idle_timeout=5)
])
ovs-appctl time/stop
# Check that a TCP SYN packet does not change the timeout. (Because
# flow stats updates are mainly what implements the fin_timeout
# feature, we warp forward a couple of times to ensure that flow stats
@@ -2263,6 +2268,7 @@ AT_SETUP([ofproto-dpif - port duration])
OVS_VSWITCHD_START([set Bridge br0 protocols=OpenFlow13])
ADD_OF_PORTS([br0], 1, 2)
ovs-appctl time/stop
ovs-appctl time/warp 10000
AT_CHECK([ovs-ofctl -O openflow13 dump-ports br0], [0], [stdout])
@@ -2474,6 +2480,7 @@ AT_CHECK([ovs-appctl netdev-dummy/set-admin-state up], 0, [OK
ADD_OF_PORTS([br0], [7])
AT_CHECK([ovs-ofctl add-flow br0 action=normal])
AT_CHECK([ovs-ofctl add-flow br1 action=normal])
ovs-appctl time/stop
ovs-appctl time/warp 5000
AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
@@ -2619,6 +2626,7 @@ AT_DATA([flows.txt], [dnl
table=0 in_port=1 actions=load:2->NXM_NX_REG0[[0..15]],learn(table=1,priority=65535,NXM_OF_ETH_SRC[[]],NXM_OF_VLAN_TCI[[0..11]],output:NXM_NX_REG0[[0..15]]),output:2
])
AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
ovs-appctl time/stop
# We send each packet twice because the first packet in each flow causes the
# flow table to change and thus revalidations, which (depending on timing)
# can keep a megaflow from being installed. The revalidations are done by

View File

@@ -96,7 +96,6 @@ do_fork(void)
{
switch (fork()) {
case 0:
time_postfork();
lockfile_postfork();
return CHILD;

View File

@@ -1,145 +0,0 @@
/*
* Copyright (c) 2009, 2010, 2011 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <config.h>
#include "timeval.h"
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/types.h>
#include <unistd.h>
#include "command-line.h"
#include "daemon.h"
#include "util.h"
#undef NDEBUG
#include <assert.h>
static long long int
gettimeofday_in_msec(void)
{
struct timeval tv;
xgettimeofday(&tv);
return timeval_to_msec(&tv);
}
static void
do_test(void)
{
/* Wait until we are awakened by a signal (typically EINTR due to the
* setitimer()). Then ensure that, if time has really advanced by
* TIME_UPDATE_INTERVAL, then time_msec() reports that it advanced.
*/
long long int start_time_msec, start_time_wall;
long long int start_gtod;
start_time_msec = time_msec();
start_time_wall = time_wall_msec();
start_gtod = gettimeofday_in_msec();
for (;;) {
/* Wait up to 1 second. Using select() to do the timeout avoids
* interfering with the interval timer. */
struct timeval timeout;
int retval;
timeout.tv_sec = 1;
timeout.tv_usec = 0;
retval = select(0, NULL, NULL, NULL, &timeout);
if (retval != -1) {
ovs_fatal(0, "select returned %d", retval);
} else if (errno != EINTR) {
ovs_fatal(errno, "select reported unexpected error");
}
if (gettimeofday_in_msec() - start_gtod >= TIME_UPDATE_INTERVAL) {
/* gettimeofday() and time_msec() have different granularities in
* their time sources. Depending on the rounding used this could
* result in a slight difference, so we allow for 1 ms of slop. */
assert(time_msec() - start_time_msec >= TIME_UPDATE_INTERVAL - 1);
assert(time_wall_msec() - start_time_wall >=
TIME_UPDATE_INTERVAL - 1);
break;
}
}
}
static void
usage(void)
{
ovs_fatal(0, "usage: %s TEST, where TEST is \"plain\" or \"daemon\"",
program_name);
}
int
main(int argc, char *argv[])
{
proctitle_init(argc, argv);
set_program_name(argv[0]);
if (argc != 2) {
usage();
} else if (!strcmp(argv[1], "plain")) {
/* If we're not caching time there isn't much to test and SIGALRM won't
* be around to pull us out of the select() call, so just skip out */
if (!CACHE_TIME) {
exit (77);
}
do_test();
} else if (!strcmp(argv[1], "daemon")) {
/* Test that time still advances even in a daemon. This is an
* interesting test because fork() cancels the interval timer. */
char cwd[1024], *pidfile;
FILE *success;
if (!CACHE_TIME) {
exit (77);
}
assert(getcwd(cwd, sizeof cwd) == cwd);
unlink("test-timeval.success");
/* Daemonize, with a pidfile in the current directory. */
set_detach();
pidfile = xasprintf("%s/test-timeval.pid", cwd);
set_pidfile(pidfile);
free(pidfile);
set_no_chdir();
daemonize();
/* Run the test. */
do_test();
/* Report success by writing out a file, since the ultimate invoker of
* test-timeval can't wait on the daemonized process. */
success = fopen("test-timeval.success", "w");
if (!success) {
ovs_fatal(errno, "test-timeval.success: create failed");
}
fprintf(success, "success\n");
fclose(success);
} else {
usage();
}
return 0;
}

View File

@@ -92,7 +92,6 @@ m4_include([tests/uuid.at])
m4_include([tests/json.at])
m4_include([tests/jsonrpc.at])
m4_include([tests/jsonrpc-py.at])
m4_include([tests/timeval.at])
m4_include([tests/tunnel.at])
m4_include([tests/lockfile.at])
m4_include([tests/reconnect.at])

View File

@@ -1,22 +0,0 @@
AT_BANNER([timeval unit tests])
AT_SETUP([check that time advances])
AT_KEYWORDS([timeval])
AT_CHECK([test-timeval plain], [0])
AT_CLEANUP
AT_SETUP([check that time advances after daemonize()])
AT_KEYWORDS([timeval])
AT_CHECK([test-timeval daemon], [0])
AT_CHECK(
[# First try a quick sleep, so that the test completes very quickly
# in the normal case. POSIX doesn't require fractional times to
# work, so this might not work.
sleep 0.1; if test -e test-timeval.success; then echo success; exit 0; fi
# Then wait up to 2 seconds.
sleep 1; if test -e test-timeval.success; then echo success; exit 0; fi
sleep 1; if test -e test-timeval.success; then echo success; exit 0; fi
echo failure; exit 1],
[0], [success
], [])
AT_CLEANUP

View File

@@ -548,7 +548,6 @@ bridge_reconfigure_ofp(void)
long long int deadline;
struct bridge *br;
time_refresh();
deadline = time_msec() + OFP_PORT_ACTION_WINDOW;
/* The kernel will reject any attempt to add a given port to a datapath if
@@ -567,7 +566,6 @@ bridge_reconfigure_ofp(void)
list_remove(&garbage->list_node);
free(garbage);
time_refresh();
if (time_msec() >= deadline) {
return false;
}
@@ -580,7 +578,6 @@ bridge_reconfigure_ofp(void)
HMAP_FOR_EACH_SAFE (if_cfg, next, hmap_node, &br->if_cfg_todo) {
iface_create(br, if_cfg, OFPP_NONE);
time_refresh();
if (time_msec() >= deadline) {
return false;
}