mirror of
https://github.com/openvswitch/ovs
synced 2025-10-13 14:07:02 +00:00
Nothing ever caused the reconnect FSM to transition to the S_START_CONNECT state, so we might as well get rid of the code for it.
559 lines
18 KiB
C
559 lines
18 KiB
C
/*
|
|
* Copyright (c) 2008, 2009, 2010 Nicira Networks.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <config.h>
|
|
#include "reconnect.h"
|
|
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "poll-loop.h"
|
|
|
|
#define THIS_MODULE VLM_reconnect
|
|
#include "vlog.h"
|
|
|
|
#define STATES \
|
|
STATE(VOID, 1 << 0) \
|
|
STATE(BACKOFF, 1 << 1) \
|
|
STATE(CONNECT_IN_PROGRESS, 1 << 3) \
|
|
STATE(ACTIVE, 1 << 4) \
|
|
STATE(IDLE, 1 << 5) \
|
|
STATE(RECONNECT, 1 << 6)
|
|
enum state {
|
|
#define STATE(NAME, VALUE) S_##NAME = VALUE,
|
|
STATES
|
|
#undef STATE
|
|
};
|
|
|
|
static bool
|
|
is_connected_state(enum state state)
|
|
{
|
|
return (state & (S_ACTIVE | S_IDLE)) != 0;
|
|
}
|
|
|
|
struct reconnect {
|
|
/* Configuration. */
|
|
char *name;
|
|
int min_backoff;
|
|
int max_backoff;
|
|
int probe_interval;
|
|
|
|
/* State. */
|
|
enum state state;
|
|
long long int state_entered;
|
|
int backoff;
|
|
long long int last_received;
|
|
long long int last_connected;
|
|
unsigned int max_tries;
|
|
|
|
/* These values are simply for statistics reporting, not otherwise used
|
|
* directly by anything internal. */
|
|
long long int creation_time;
|
|
unsigned int n_attempted_connections, n_successful_connections;
|
|
unsigned int total_connected_duration;
|
|
unsigned int seqno;
|
|
};
|
|
|
|
static void reconnect_transition__(struct reconnect *, long long int now,
|
|
enum state state);
|
|
static long long int reconnect_deadline__(const struct reconnect *);
|
|
static bool reconnect_may_retry(struct reconnect *);
|
|
|
|
static const char *
|
|
reconnect_state_name__(enum state state)
|
|
{
|
|
switch (state) {
|
|
#define STATE(NAME, VALUE) case S_##NAME: return #NAME;
|
|
STATES
|
|
#undef STATE
|
|
}
|
|
return "***ERROR***";
|
|
}
|
|
|
|
/* Creates and returns a new reconnect FSM with default settings. The FSM is
|
|
* initially disabled. The caller will likely want to call reconnect_enable()
|
|
* and reconnect_set_name() on the returned object. */
|
|
struct reconnect *
|
|
reconnect_create(long long int now)
|
|
{
|
|
struct reconnect *fsm = xzalloc(sizeof *fsm);
|
|
|
|
fsm->name = xstrdup("void");
|
|
fsm->min_backoff = 1000;
|
|
fsm->max_backoff = 8000;
|
|
fsm->probe_interval = 5000;
|
|
|
|
fsm->state = S_VOID;
|
|
fsm->state_entered = now;
|
|
fsm->backoff = 0;
|
|
fsm->last_received = now;
|
|
fsm->last_connected = now;
|
|
fsm->max_tries = UINT_MAX;
|
|
fsm->creation_time = now;
|
|
|
|
return fsm;
|
|
}
|
|
|
|
/* Frees 'fsm'. */
|
|
void
|
|
reconnect_destroy(struct reconnect *fsm)
|
|
{
|
|
if (fsm) {
|
|
free(fsm->name);
|
|
free(fsm);
|
|
}
|
|
}
|
|
|
|
/* Returns 'fsm''s name. */
|
|
const char *
|
|
reconnect_get_name(const struct reconnect *fsm)
|
|
{
|
|
return fsm->name;
|
|
}
|
|
|
|
/* Sets 'fsm''s name to 'name'. If 'name' is null, then "void" is used
|
|
* instead.
|
|
*
|
|
* The name set for 'fsm' is used in log messages. */
|
|
void
|
|
reconnect_set_name(struct reconnect *fsm, const char *name)
|
|
{
|
|
free(fsm->name);
|
|
fsm->name = xstrdup(name ? name : "void");
|
|
}
|
|
|
|
/* Return the minimum number of milliseconds to back off between consecutive
|
|
* connection attempts. The default is 1000 ms. */
|
|
int
|
|
reconnect_get_min_backoff(const struct reconnect *fsm)
|
|
{
|
|
return fsm->min_backoff;
|
|
}
|
|
|
|
/* Return the maximum number of milliseconds to back off between consecutive
|
|
* connection attempts. The default is 8000 ms. */
|
|
int
|
|
reconnect_get_max_backoff(const struct reconnect *fsm)
|
|
{
|
|
return fsm->max_backoff;
|
|
}
|
|
|
|
/* Returns the "probe interval" for 'fsm' in milliseconds. If this is zero, it
|
|
* disables the connection keepalive feature. If it is nonzero, then if the
|
|
* interval passes while 'fsm' is connected and without reconnect_received()
|
|
* being called for 'fsm', reconnect_run() returns RECONNECT_PROBE. If the
|
|
* interval passes again without reconnect_received() being called,
|
|
* reconnect_run() returns RECONNECT_DISCONNECT for 'fsm'. */
|
|
int
|
|
reconnect_get_probe_interval(const struct reconnect *fsm)
|
|
{
|
|
return fsm->probe_interval;
|
|
}
|
|
|
|
/* Limits the maximum number of times that 'fsm' will ask the client to try to
|
|
* reconnect to 'max_tries'. UINT_MAX (the default) means an unlimited number
|
|
* of tries.
|
|
*
|
|
* After the number of tries has expired, the 'fsm' will disable itself
|
|
* instead of backing off and retrying. */
|
|
void
|
|
reconnect_set_max_tries(struct reconnect *fsm, unsigned int max_tries)
|
|
{
|
|
fsm->max_tries = max_tries;
|
|
}
|
|
|
|
/* Returns the current remaining number of connection attempts, UINT_MAX if
|
|
* the number is unlimited. */
|
|
unsigned int
|
|
reconnect_get_max_tries(struct reconnect *fsm)
|
|
{
|
|
return fsm->max_tries;
|
|
}
|
|
|
|
/* Configures the backoff parameters for 'fsm'. 'min_backoff' is the minimum
|
|
* number of milliseconds, and 'max_backoff' is the maximum, between connection
|
|
* attempts.
|
|
*
|
|
* 'min_backoff' must be at least 1000, and 'max_backoff' must be greater than
|
|
* or equal to 'min_backoff'. */
|
|
void
|
|
reconnect_set_backoff(struct reconnect *fsm, int min_backoff, int max_backoff)
|
|
{
|
|
fsm->min_backoff = MAX(min_backoff, 1000);
|
|
fsm->max_backoff = max_backoff ? MAX(max_backoff, 1000) : 8000;
|
|
if (fsm->min_backoff > fsm->max_backoff) {
|
|
fsm->max_backoff = fsm->min_backoff;
|
|
}
|
|
|
|
if (fsm->state == S_BACKOFF && fsm->backoff > max_backoff) {
|
|
fsm->backoff = max_backoff;
|
|
}
|
|
}
|
|
|
|
/* Sets the "probe interval" for 'fsm' to 'probe_interval', in milliseconds.
|
|
* If this is zero, it disables the connection keepalive feature. If it is
|
|
* nonzero, then if the interval passes while 'fsm' is connected and without
|
|
* reconnect_received() being called for 'fsm', reconnect_run() returns
|
|
* RECONNECT_PROBE. If the interval passes again without reconnect_received()
|
|
* being called, reconnect_run() returns RECONNECT_DISCONNECT for 'fsm'.
|
|
*
|
|
* If 'probe_interval' is nonzero, then it will be forced to a value of at
|
|
* least 1000 ms. */
|
|
void
|
|
reconnect_set_probe_interval(struct reconnect *fsm, int probe_interval)
|
|
{
|
|
fsm->probe_interval = probe_interval ? MAX(1000, probe_interval) : 0;
|
|
}
|
|
|
|
/* Returns true if 'fsm' has been enabled with reconnect_enable(). Calling
|
|
* another function that indicates a change in connection state, such as
|
|
* reconnect_disconnected() or reconnect_force_reconnect(), will also enable
|
|
* a reconnect FSM. */
|
|
bool
|
|
reconnect_is_enabled(const struct reconnect *fsm)
|
|
{
|
|
return fsm->state != S_VOID;
|
|
}
|
|
|
|
/* If 'fsm' is disabled (the default for newly created FSMs), enables it, so
|
|
* that the next call to reconnect_run() for 'fsm' will return
|
|
* RECONNECT_CONNECT.
|
|
*
|
|
* If 'fsm' is not disabled, this function has no effect. */
|
|
void
|
|
reconnect_enable(struct reconnect *fsm, long long int now)
|
|
{
|
|
if (fsm->state == S_VOID && reconnect_may_retry(fsm)) {
|
|
reconnect_transition__(fsm, now, S_BACKOFF);
|
|
fsm->backoff = 0;
|
|
}
|
|
}
|
|
|
|
/* Disables 'fsm'. Until 'fsm' is enabled again, reconnect_run() will always
|
|
* return 0. */
|
|
void
|
|
reconnect_disable(struct reconnect *fsm, long long int now)
|
|
{
|
|
if (fsm->state != S_VOID) {
|
|
reconnect_transition__(fsm, now, S_VOID);
|
|
}
|
|
}
|
|
|
|
/* If 'fsm' is enabled and currently connected (or attempting to connect),
|
|
* forces reconnect_run() for 'fsm' to return RECONNECT_DISCONNECT the next
|
|
* time it is called, which should cause the client to drop the connection (or
|
|
* attempt), back off, and then reconnect. */
|
|
void
|
|
reconnect_force_reconnect(struct reconnect *fsm, long long int now)
|
|
{
|
|
if (fsm->state & (S_CONNECT_IN_PROGRESS | S_ACTIVE | S_IDLE)) {
|
|
reconnect_transition__(fsm, now, S_RECONNECT);
|
|
}
|
|
}
|
|
|
|
/* Tell 'fsm' that the connection dropped or that a connection attempt failed.
|
|
* 'error' specifies the reason: a positive value represents an errno value,
|
|
* EOF indicates that the connection was closed by the peer (e.g. read()
|
|
* returned 0), and 0 indicates no specific error.
|
|
*
|
|
* The FSM will back off, then reconnect. */
|
|
void
|
|
reconnect_disconnected(struct reconnect *fsm, long long int now, int error)
|
|
{
|
|
if (!(fsm->state & (S_BACKOFF | S_VOID))) {
|
|
/* Report what happened. */
|
|
if (fsm->state & (S_ACTIVE | S_IDLE)) {
|
|
if (error > 0) {
|
|
VLOG_WARN("%s: connection dropped (%s)",
|
|
fsm->name, strerror(error));
|
|
} else if (error == EOF) {
|
|
VLOG_INFO("%s: connection closed by peer", fsm->name);
|
|
} else {
|
|
VLOG_INFO("%s: connection dropped", fsm->name);
|
|
}
|
|
} else {
|
|
if (error > 0) {
|
|
VLOG_WARN("%s: connection attempt failed (%s)",
|
|
fsm->name, strerror(error));
|
|
} else {
|
|
VLOG_INFO("%s: connection attempt timed out", fsm->name);
|
|
}
|
|
}
|
|
|
|
/* Back off. */
|
|
if (fsm->state & (S_ACTIVE | S_IDLE)
|
|
&& fsm->last_received - fsm->last_connected >= fsm->backoff) {
|
|
fsm->backoff = fsm->min_backoff;
|
|
} else {
|
|
if (fsm->backoff < fsm->min_backoff) {
|
|
fsm->backoff = fsm->min_backoff;
|
|
} else if (fsm->backoff >= fsm->max_backoff / 2) {
|
|
fsm->backoff = fsm->max_backoff;
|
|
} else {
|
|
fsm->backoff *= 2;
|
|
}
|
|
VLOG_INFO("%s: waiting %.3g seconds before reconnect\n",
|
|
fsm->name, fsm->backoff / 1000.0);
|
|
}
|
|
|
|
reconnect_transition__(fsm, now,
|
|
reconnect_may_retry(fsm) ? S_BACKOFF : S_VOID);
|
|
}
|
|
}
|
|
|
|
/* Tell 'fsm' that a connection attempt is in progress.
|
|
*
|
|
* The FSM will start a timer, after which the connection attempt will be
|
|
* aborted (by returning RECONNECT_DISCONNECT from reconect_run()). */
|
|
void
|
|
reconnect_connecting(struct reconnect *fsm, long long int now)
|
|
{
|
|
if (fsm->state != S_CONNECT_IN_PROGRESS) {
|
|
VLOG_INFO("%s: connecting...", fsm->name);
|
|
reconnect_transition__(fsm, now, S_CONNECT_IN_PROGRESS);
|
|
}
|
|
}
|
|
|
|
/* Tell 'fsm' that the connection was successful.
|
|
*
|
|
* The FSM will start the probe interval timer, which is reset by
|
|
* reconnect_received(). If the timer expires, a probe will be sent (by
|
|
* returning RECONNECT_PROBE from reconnect_run()). If the timer expires
|
|
* again without being reset, the connection will be aborted (by returning
|
|
* RECONNECT_DISCONNECT from reconnect_run()). */
|
|
void
|
|
reconnect_connected(struct reconnect *fsm, long long int now)
|
|
{
|
|
if (!is_connected_state(fsm->state)) {
|
|
reconnect_connecting(fsm, now);
|
|
|
|
VLOG_INFO("%s: connected", fsm->name);
|
|
reconnect_transition__(fsm, now, S_ACTIVE);
|
|
fsm->last_connected = now;
|
|
}
|
|
}
|
|
|
|
/* Tell 'fsm' that the connection attempt failed.
|
|
*
|
|
* The FSM will back off and attempt to reconnect. */
|
|
void
|
|
reconnect_connect_failed(struct reconnect *fsm, long long int now, int error)
|
|
{
|
|
reconnect_connecting(fsm, now);
|
|
reconnect_disconnected(fsm, now, error);
|
|
}
|
|
|
|
/* Tell 'fsm' that some data was received. This resets the probe interval
|
|
* timer, so that the connection is known not to be idle. */
|
|
void
|
|
reconnect_received(struct reconnect *fsm, long long int now)
|
|
{
|
|
if (fsm->state != S_ACTIVE) {
|
|
reconnect_transition__(fsm, now, S_ACTIVE);
|
|
}
|
|
fsm->last_received = now;
|
|
}
|
|
|
|
static void
|
|
reconnect_transition__(struct reconnect *fsm, long long int now,
|
|
enum state state)
|
|
{
|
|
if (fsm->state == S_CONNECT_IN_PROGRESS) {
|
|
fsm->n_attempted_connections++;
|
|
if (state == S_ACTIVE) {
|
|
fsm->n_successful_connections++;
|
|
}
|
|
}
|
|
if (is_connected_state(fsm->state) != is_connected_state(state)) {
|
|
if (is_connected_state(fsm->state)) {
|
|
fsm->total_connected_duration += now - fsm->last_connected;
|
|
}
|
|
fsm->seqno++;
|
|
}
|
|
|
|
VLOG_DBG("%s: entering %s", fsm->name, reconnect_state_name__(state));
|
|
fsm->state = state;
|
|
fsm->state_entered = now;
|
|
}
|
|
|
|
static long long int
|
|
reconnect_deadline__(const struct reconnect *fsm)
|
|
{
|
|
assert(fsm->state_entered != LLONG_MIN);
|
|
switch (fsm->state) {
|
|
case S_VOID:
|
|
return LLONG_MAX;
|
|
|
|
case S_BACKOFF:
|
|
return fsm->state_entered + fsm->backoff;
|
|
|
|
case S_CONNECT_IN_PROGRESS:
|
|
return fsm->state_entered + MAX(1000, fsm->backoff);
|
|
|
|
case S_ACTIVE:
|
|
if (fsm->probe_interval) {
|
|
long long int base = MAX(fsm->last_received, fsm->state_entered);
|
|
return base + fsm->probe_interval;
|
|
}
|
|
return LLONG_MAX;
|
|
|
|
case S_IDLE:
|
|
return fsm->state_entered + fsm->probe_interval;
|
|
|
|
case S_RECONNECT:
|
|
return fsm->state_entered;
|
|
}
|
|
|
|
NOT_REACHED();
|
|
}
|
|
|
|
/* Assesses whether any action should be taken on 'fsm'. The return value is
|
|
* one of:
|
|
*
|
|
* - 0: The client need not take any action.
|
|
*
|
|
* - RECONNECT_CONNECT: The client should start a connection attempt and
|
|
* indicate this by calling reconnect_connecting(). If the connection
|
|
* attempt has definitely succeeded, it should call
|
|
* reconnect_connected(). If the connection attempt has definitely
|
|
* failed, it should call reconnect_connect_failed().
|
|
*
|
|
* The FSM is smart enough to back off correctly after successful
|
|
* connections that quickly abort, so it is OK to call
|
|
* reconnect_connected() after a low-level successful connection
|
|
* (e.g. connect()) even if the connection might soon abort due to a
|
|
* failure at a high-level (e.g. SSL negotiation failure).
|
|
*
|
|
* - RECONNECT_DISCONNECT: The client should abort the current connection
|
|
* or connection attempt and call reconnect_disconnected() or
|
|
* reconnect_connect_failed() to indicate it.
|
|
*
|
|
* - RECONNECT_PROBE: The client should send some kind of request to the
|
|
* peer that will elicit a response, to ensure that the connection is
|
|
* indeed in working order. (This will only be returned if the "probe
|
|
* interval" is nonzero--see reconnect_set_probe_interval()).
|
|
*/
|
|
enum reconnect_action
|
|
reconnect_run(struct reconnect *fsm, long long int now)
|
|
{
|
|
if (now >= reconnect_deadline__(fsm)) {
|
|
switch (fsm->state) {
|
|
case S_VOID:
|
|
return 0;
|
|
|
|
case S_BACKOFF:
|
|
return RECONNECT_CONNECT;
|
|
|
|
case S_CONNECT_IN_PROGRESS:
|
|
return RECONNECT_DISCONNECT;
|
|
|
|
case S_ACTIVE:
|
|
VLOG_DBG("%s: idle %lld ms, sending inactivity probe", fsm->name,
|
|
now - MAX(fsm->last_received, fsm->state_entered));
|
|
reconnect_transition__(fsm, now, S_IDLE);
|
|
return RECONNECT_PROBE;
|
|
|
|
case S_IDLE:
|
|
VLOG_ERR("%s: no response to inactivity probe after %.3g "
|
|
"seconds, disconnecting",
|
|
fsm->name, (now - fsm->state_entered) / 1000.0);
|
|
return RECONNECT_DISCONNECT;
|
|
|
|
case S_RECONNECT:
|
|
return RECONNECT_DISCONNECT;
|
|
}
|
|
|
|
NOT_REACHED();
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* Causes the next call to poll_block() to wake up when reconnect_run() should
|
|
* be called on 'fsm'. */
|
|
void
|
|
reconnect_wait(struct reconnect *fsm, long long int now)
|
|
{
|
|
int timeout = reconnect_timeout(fsm, now);
|
|
if (timeout >= 0) {
|
|
poll_timer_wait(timeout);
|
|
}
|
|
}
|
|
|
|
/* Returns the number of milliseconds after which reconnect_run() should be
|
|
* called on 'fsm' if nothing else notable happens in the meantime, or a
|
|
* negative number if this is currently unnecessary. */
|
|
int
|
|
reconnect_timeout(struct reconnect *fsm, long long int now)
|
|
{
|
|
long long int deadline = reconnect_deadline__(fsm);
|
|
if (deadline != LLONG_MAX) {
|
|
long long int remaining = deadline - now;
|
|
return MAX(0, MIN(INT_MAX, remaining));
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/* Returns true if 'fsm' is currently believed to be connected, that is, if
|
|
* reconnect_connected() was called more recently than any call to
|
|
* reconnect_connect_failed() or reconnect_disconnected() or
|
|
* reconnect_disable(), and false otherwise. */
|
|
bool
|
|
reconnect_is_connected(const struct reconnect *fsm)
|
|
{
|
|
return is_connected_state(fsm->state);
|
|
}
|
|
|
|
/* Returns the number of milliseconds for which 'fsm' has been continuously
|
|
* connected to its peer. (If 'fsm' is not currently connected, this is 0.) */
|
|
unsigned int
|
|
reconnect_get_connection_duration(const struct reconnect *fsm,
|
|
long long int now)
|
|
{
|
|
return reconnect_is_connected(fsm) ? now - fsm->last_connected : 0;
|
|
}
|
|
|
|
/* Copies various statistics for 'fsm' into '*stats'. */
|
|
void
|
|
reconnect_get_stats(const struct reconnect *fsm, long long int now,
|
|
struct reconnect_stats *stats)
|
|
{
|
|
stats->creation_time = fsm->creation_time;
|
|
stats->last_received = fsm->last_received;
|
|
stats->last_connected = fsm->last_connected;
|
|
stats->backoff = fsm->backoff;
|
|
stats->seqno = fsm->seqno;
|
|
stats->is_connected = reconnect_is_connected(fsm);
|
|
stats->current_connection_duration
|
|
= reconnect_get_connection_duration(fsm, now);
|
|
stats->total_connected_duration = (stats->current_connection_duration
|
|
+ fsm->total_connected_duration);
|
|
stats->n_attempted_connections = fsm->n_attempted_connections;
|
|
stats->n_successful_connections = fsm->n_successful_connections;
|
|
stats->state = reconnect_state_name__(fsm->state);
|
|
stats->state_elapsed = now - fsm->state_entered;
|
|
}
|
|
|
|
static bool
|
|
reconnect_may_retry(struct reconnect *fsm)
|
|
{
|
|
bool may_retry = fsm->max_tries > 0;
|
|
if (may_retry && fsm->max_tries != UINT_MAX) {
|
|
fsm->max_tries--;
|
|
}
|
|
return may_retry;
|
|
}
|