mirror of
https://github.com/openvswitch/ovs
synced 2025-08-22 09:58:01 +00:00
reconnect: Add ability to do a number of retries without backoff.
This is aimed at an upcoming database clustering implementation, where it's desirable to try all of the cluster members quickly before backing off to retry them again in sequence. Signed-off-by: Ben Pfaff <blp@ovn.org> Acked-by: Russell Bryant <russell@ovn.org> Signed-off-by: Ben Pfaff <blp@ovn.org> Acked-by: Justin Pettit <jpettit@ovn.org>
This commit is contained in:
parent
f70b61d33d
commit
5ee527e223
@ -62,6 +62,7 @@ struct reconnect {
|
|||||||
long long int last_connected;
|
long long int last_connected;
|
||||||
long long int last_disconnected;
|
long long int last_disconnected;
|
||||||
unsigned int max_tries;
|
unsigned int max_tries;
|
||||||
|
unsigned int backoff_free_tries;
|
||||||
|
|
||||||
/* These values are simply for statistics reporting, not otherwise used
|
/* These values are simply for statistics reporting, not otherwise used
|
||||||
* directly by anything internal. */
|
* directly by anything internal. */
|
||||||
@ -206,6 +207,15 @@ reconnect_get_max_tries(struct reconnect *fsm)
|
|||||||
return fsm->max_tries;
|
return fsm->max_tries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Sets the number of connection attempts that will be made without backoff to
|
||||||
|
* 'backoff_free_tries'. Values 0 and 1 both represent a single attempt. */
|
||||||
|
void
|
||||||
|
reconnect_set_backoff_free_tries(struct reconnect *fsm,
|
||||||
|
unsigned int backoff_free_tries)
|
||||||
|
{
|
||||||
|
fsm->backoff_free_tries = backoff_free_tries;
|
||||||
|
}
|
||||||
|
|
||||||
/* Configures the backoff parameters for 'fsm'. 'min_backoff' is the minimum
|
/* Configures the backoff parameters for 'fsm'. 'min_backoff' is the minimum
|
||||||
* number of milliseconds, and 'max_backoff' is the maximum, between connection
|
* number of milliseconds, and 'max_backoff' is the maximum, between connection
|
||||||
* attempts. The current backoff is also the duration that 'fsm' is willing to
|
* attempts. The current backoff is also the duration that 'fsm' is willing to
|
||||||
@ -346,7 +356,7 @@ reconnect_disconnected(struct reconnect *fsm, long long int now, int error)
|
|||||||
VLOG(fsm->info, "%s: error listening for connections",
|
VLOG(fsm->info, "%s: error listening for connections",
|
||||||
fsm->name);
|
fsm->name);
|
||||||
}
|
}
|
||||||
} else {
|
} else if (fsm->backoff < fsm->max_backoff) {
|
||||||
const char *type = fsm->passive ? "listen" : "connection";
|
const char *type = fsm->passive ? "listen" : "connection";
|
||||||
if (error > 0) {
|
if (error > 0) {
|
||||||
VLOG_INFO("%s: %s attempt failed (%s)",
|
VLOG_INFO("%s: %s attempt failed (%s)",
|
||||||
@ -354,35 +364,47 @@ reconnect_disconnected(struct reconnect *fsm, long long int now, int error)
|
|||||||
} else {
|
} else {
|
||||||
VLOG(fsm->info, "%s: %s attempt timed out", fsm->name, type);
|
VLOG(fsm->info, "%s: %s attempt timed out", fsm->name, type);
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
/* We have reached the maximum backoff, so suppress logging to
|
||||||
|
* avoid wastefully filling the log. (Previously we logged that we
|
||||||
|
* were suppressing further logging, see below.) */
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fsm->state & (S_ACTIVE | S_IDLE)) {
|
if (fsm->state & (S_ACTIVE | S_IDLE)) {
|
||||||
fsm->last_disconnected = now;
|
fsm->last_disconnected = now;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!reconnect_may_retry(fsm)) {
|
||||||
|
reconnect_transition__(fsm, now, S_VOID);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* Back off. */
|
/* Back off. */
|
||||||
if (fsm->state & (S_ACTIVE | S_IDLE)
|
if (fsm->backoff_free_tries > 1) {
|
||||||
&& (fsm->last_activity - fsm->last_connected >= fsm->backoff
|
fsm->backoff_free_tries--;
|
||||||
|| fsm->passive)) {
|
fsm->backoff = 0;
|
||||||
|
} else if (fsm->state & (S_ACTIVE | S_IDLE)
|
||||||
|
&& (fsm->last_activity - fsm->last_connected >= fsm->backoff
|
||||||
|
|| fsm->passive)) {
|
||||||
fsm->backoff = fsm->passive ? 0 : fsm->min_backoff;
|
fsm->backoff = fsm->passive ? 0 : fsm->min_backoff;
|
||||||
} else {
|
} else {
|
||||||
if (fsm->backoff < fsm->min_backoff) {
|
if (fsm->backoff < fsm->min_backoff) {
|
||||||
fsm->backoff = fsm->min_backoff;
|
fsm->backoff = fsm->min_backoff;
|
||||||
} else if (fsm->backoff >= fsm->max_backoff / 2) {
|
} else if (fsm->backoff < fsm->max_backoff / 2) {
|
||||||
fsm->backoff = fsm->max_backoff;
|
|
||||||
} else {
|
|
||||||
fsm->backoff *= 2;
|
fsm->backoff *= 2;
|
||||||
}
|
VLOG(fsm->info, "%s: waiting %.3g seconds before %s",
|
||||||
if (fsm->passive) {
|
fsm->name, fsm->backoff / 1000.0,
|
||||||
VLOG(fsm->info, "%s: waiting %.3g seconds before trying to "
|
fsm->passive ? "trying to listen again" : "reconnect");
|
||||||
"listen again", fsm->name, fsm->backoff / 1000.0);
|
|
||||||
} else {
|
} else {
|
||||||
VLOG(fsm->info, "%s: waiting %.3g seconds before reconnect",
|
if (fsm->backoff < fsm->max_backoff) {
|
||||||
fsm->name, fsm->backoff / 1000.0);
|
VLOG_INFO("%s: continuing to %s in the background but "
|
||||||
|
"suppressing further logging", fsm->name,
|
||||||
|
fsm->passive ? "try to listen" : "reconnect");
|
||||||
|
}
|
||||||
|
fsm->backoff = fsm->max_backoff;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
reconnect_transition__(fsm, now, S_BACKOFF);
|
||||||
reconnect_transition__(fsm, now,
|
|
||||||
reconnect_may_retry(fsm) ? S_BACKOFF : S_VOID);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -397,7 +419,7 @@ reconnect_connecting(struct reconnect *fsm, long long int now)
|
|||||||
if (fsm->state != S_CONNECTING) {
|
if (fsm->state != S_CONNECTING) {
|
||||||
if (fsm->passive) {
|
if (fsm->passive) {
|
||||||
VLOG(fsm->info, "%s: listening...", fsm->name);
|
VLOG(fsm->info, "%s: listening...", fsm->name);
|
||||||
} else {
|
} else if (fsm->backoff < fsm->max_backoff) {
|
||||||
VLOG(fsm->info, "%s: connecting...", fsm->name);
|
VLOG(fsm->info, "%s: connecting...", fsm->name);
|
||||||
}
|
}
|
||||||
reconnect_transition__(fsm, now, S_CONNECTING);
|
reconnect_transition__(fsm, now, S_CONNECTING);
|
||||||
|
@ -51,6 +51,8 @@ int reconnect_get_probe_interval(const struct reconnect *);
|
|||||||
|
|
||||||
void reconnect_set_max_tries(struct reconnect *, unsigned int max_tries);
|
void reconnect_set_max_tries(struct reconnect *, unsigned int max_tries);
|
||||||
unsigned int reconnect_get_max_tries(struct reconnect *);
|
unsigned int reconnect_get_max_tries(struct reconnect *);
|
||||||
|
void reconnect_set_backoff_free_tries(struct reconnect *,
|
||||||
|
unsigned int backoff_free_tries);
|
||||||
|
|
||||||
void reconnect_set_backoff(struct reconnect *,
|
void reconnect_set_backoff(struct reconnect *,
|
||||||
int min_backoff, int max_backoff);
|
int min_backoff, int max_backoff);
|
||||||
@ -65,6 +67,7 @@ void reconnect_enable(struct reconnect *, long long int now);
|
|||||||
void reconnect_disable(struct reconnect *, long long int now);
|
void reconnect_disable(struct reconnect *, long long int now);
|
||||||
|
|
||||||
void reconnect_force_reconnect(struct reconnect *, long long int now);
|
void reconnect_force_reconnect(struct reconnect *, long long int now);
|
||||||
|
void reconnect_skip_backoff(struct reconnect *);
|
||||||
|
|
||||||
bool reconnect_is_connected(const struct reconnect *);
|
bool reconnect_is_connected(const struct reconnect *);
|
||||||
unsigned int reconnect_get_last_connect_elapsed(const struct reconnect *,
|
unsigned int reconnect_get_last_connect_elapsed(const struct reconnect *,
|
||||||
|
@ -154,6 +154,7 @@ class Reconnect(object):
|
|||||||
self.last_connected = None
|
self.last_connected = None
|
||||||
self.last_disconnected = None
|
self.last_disconnected = None
|
||||||
self.max_tries = None
|
self.max_tries = None
|
||||||
|
self.backoff_free_tries = 0
|
||||||
|
|
||||||
self.creation_time = now
|
self.creation_time = now
|
||||||
self.n_attempted_connections = 0
|
self.n_attempted_connections = 0
|
||||||
@ -242,6 +243,12 @@ class Reconnect(object):
|
|||||||
self.backoff > self.max_backoff):
|
self.backoff > self.max_backoff):
|
||||||
self.backoff = self.max_backoff
|
self.backoff = self.max_backoff
|
||||||
|
|
||||||
|
def set_backoff_free_tries(self, backoff_free_tries):
|
||||||
|
"""Sets the number of connection attempts that will be made without
|
||||||
|
backoff to 'backoff_free_tries'. Values 0 and 1 both
|
||||||
|
represent a single attempt."""
|
||||||
|
self.backoff_free_tries = backoff_free_tries
|
||||||
|
|
||||||
def set_probe_interval(self, probe_interval):
|
def set_probe_interval(self, probe_interval):
|
||||||
"""Sets the "probe interval" to 'probe_interval', in milliseconds. If
|
"""Sets the "probe interval" to 'probe_interval', in milliseconds. If
|
||||||
this is zero, it disables the connection keepalive feature. If it is
|
this is zero, it disables the connection keepalive feature. If it is
|
||||||
@ -337,7 +344,7 @@ class Reconnect(object):
|
|||||||
else:
|
else:
|
||||||
self.info_level("%s: error listening for connections"
|
self.info_level("%s: error listening for connections"
|
||||||
% self.name)
|
% self.name)
|
||||||
else:
|
elif self.backoff < self.max_backoff:
|
||||||
if self.passive:
|
if self.passive:
|
||||||
type_ = "listen"
|
type_ = "listen"
|
||||||
else:
|
else:
|
||||||
@ -352,8 +359,15 @@ class Reconnect(object):
|
|||||||
if (self.state in (Reconnect.Active, Reconnect.Idle)):
|
if (self.state in (Reconnect.Active, Reconnect.Idle)):
|
||||||
self.last_disconnected = now
|
self.last_disconnected = now
|
||||||
|
|
||||||
|
if not self.__may_retry():
|
||||||
|
self._transition(now, Reconnect.Void)
|
||||||
|
return
|
||||||
|
|
||||||
# Back off
|
# Back off
|
||||||
if (self.state in (Reconnect.Active, Reconnect.Idle) and
|
if self.backoff_free_tries > 1:
|
||||||
|
self.backoff_free_tries -= 1
|
||||||
|
self.backoff = 0
|
||||||
|
elif (self.state in (Reconnect.Active, Reconnect.Idle) and
|
||||||
(self.last_activity - self.last_connected >= self.backoff or
|
(self.last_activity - self.last_connected >= self.backoff or
|
||||||
self.passive)):
|
self.passive)):
|
||||||
if self.passive:
|
if self.passive:
|
||||||
@ -363,23 +377,26 @@ class Reconnect(object):
|
|||||||
else:
|
else:
|
||||||
if self.backoff < self.min_backoff:
|
if self.backoff < self.min_backoff:
|
||||||
self.backoff = self.min_backoff
|
self.backoff = self.min_backoff
|
||||||
elif self.backoff >= self.max_backoff / 2:
|
elif self.backoff < self.max_backoff / 2:
|
||||||
self.backoff = self.max_backoff
|
|
||||||
else:
|
|
||||||
self.backoff *= 2
|
self.backoff *= 2
|
||||||
|
if self.passive:
|
||||||
if self.passive:
|
action = "trying to listen again"
|
||||||
self.info_level("%s: waiting %.3g seconds before trying "
|
else:
|
||||||
"to listen again"
|
action = "reconnect"
|
||||||
% (self.name, self.backoff / 1000.0))
|
self.info_level("%s: waiting %.3g seconds before %s"
|
||||||
|
% (self.name, self.backoff / 1000.0,
|
||||||
|
action))
|
||||||
else:
|
else:
|
||||||
self.info_level("%s: waiting %.3g seconds before reconnect"
|
if self.backoff < self.max_backoff:
|
||||||
% (self.name, self.backoff / 1000.0))
|
if self.passive:
|
||||||
|
action = "try to listen"
|
||||||
if self.__may_retry():
|
else:
|
||||||
self._transition(now, Reconnect.Backoff)
|
action = "reconnect"
|
||||||
else:
|
self.info_level("%s: continuing to %s in the "
|
||||||
self._transition(now, Reconnect.Void)
|
"background but suppressing further "
|
||||||
|
"logging" % (self.name, action))
|
||||||
|
self.backoff = self.max_backoff
|
||||||
|
self._transition(now, Reconnect.Backoff)
|
||||||
|
|
||||||
def connecting(self, now):
|
def connecting(self, now):
|
||||||
"""Tell this FSM that a connection or listening attempt is in progress.
|
"""Tell this FSM that a connection or listening attempt is in progress.
|
||||||
@ -390,7 +407,7 @@ class Reconnect(object):
|
|||||||
if self.state != Reconnect.ConnectInProgress:
|
if self.state != Reconnect.ConnectInProgress:
|
||||||
if self.passive:
|
if self.passive:
|
||||||
self.info_level("%s: listening..." % self.name)
|
self.info_level("%s: listening..." % self.name)
|
||||||
else:
|
elif self.backoff < self.max_backoff:
|
||||||
self.info_level("%s: connecting..." % self.name)
|
self.info_level("%s: connecting..." % self.name)
|
||||||
self._transition(now, Reconnect.ConnectInProgress)
|
self._transition(now, Reconnect.ConnectInProgress)
|
||||||
|
|
||||||
|
@ -1036,6 +1036,60 @@ timeout
|
|||||||
in BACKOFF for 2000 ms (2000 ms backoff)
|
in BACKOFF for 2000 ms (2000 ms backoff)
|
||||||
])
|
])
|
||||||
|
|
||||||
|
######################################################################
|
||||||
|
RECONNECT_CHECK([backoff-free tries work],
|
||||||
|
[set-backoff-free-tries 2
|
||||||
|
enable
|
||||||
|
|
||||||
|
# Connection fails quickly.
|
||||||
|
run
|
||||||
|
connect-failed ECONNREFUSED
|
||||||
|
|
||||||
|
# No backoff.
|
||||||
|
run
|
||||||
|
timeout
|
||||||
|
|
||||||
|
# Connection fails quickly again.
|
||||||
|
run
|
||||||
|
connect-failed ECONNREFUSED
|
||||||
|
|
||||||
|
# Back off for 1000 ms.
|
||||||
|
run
|
||||||
|
timeout
|
||||||
|
],
|
||||||
|
[### t=1000 ###
|
||||||
|
set-backoff-free-tries 2
|
||||||
|
enable
|
||||||
|
in BACKOFF for 0 ms (0 ms backoff)
|
||||||
|
|
||||||
|
# Connection fails quickly.
|
||||||
|
run
|
||||||
|
should connect
|
||||||
|
connect-failed ECONNREFUSED
|
||||||
|
0 successful connections out of 1 attempts, seqno 0
|
||||||
|
|
||||||
|
# No backoff.
|
||||||
|
run
|
||||||
|
should connect
|
||||||
|
timeout
|
||||||
|
advance 0 ms
|
||||||
|
|
||||||
|
# Connection fails quickly again.
|
||||||
|
run
|
||||||
|
should connect
|
||||||
|
connect-failed ECONNREFUSED
|
||||||
|
in BACKOFF for 0 ms (1000 ms backoff)
|
||||||
|
0 successful connections out of 2 attempts, seqno 0
|
||||||
|
|
||||||
|
# Back off for 1000 ms.
|
||||||
|
run
|
||||||
|
timeout
|
||||||
|
advance 1000 ms
|
||||||
|
|
||||||
|
### t=2000 ###
|
||||||
|
in BACKOFF for 1000 ms (1000 ms backoff)
|
||||||
|
])
|
||||||
|
|
||||||
######################################################################
|
######################################################################
|
||||||
RECONNECT_CHECK([max-tries of 1 honored],
|
RECONNECT_CHECK([max-tries of 1 honored],
|
||||||
[set-max-tries 1
|
[set-max-tries 1
|
||||||
@ -1090,7 +1144,7 @@ timeout
|
|||||||
run
|
run
|
||||||
should disconnect
|
should disconnect
|
||||||
disconnected
|
disconnected
|
||||||
in VOID for 0 ms (1000 ms backoff)
|
in VOID for 0 ms (0 ms backoff)
|
||||||
1 successful connections out of 1 attempts, seqno 2
|
1 successful connections out of 1 attempts, seqno 2
|
||||||
disconnected
|
disconnected
|
||||||
disconnected at 11000 ms (0 ms ago)
|
disconnected at 11000 ms (0 ms ago)
|
||||||
|
@ -207,6 +207,12 @@ do_set_max_tries(struct ovs_cmdl_context *ctx)
|
|||||||
reconnect_set_max_tries(reconnect, atoi(ctx->argv[1]));
|
reconnect_set_max_tries(reconnect, atoi(ctx->argv[1]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
do_set_backoff_free_tries(struct ovs_cmdl_context *ctx)
|
||||||
|
{
|
||||||
|
reconnect_set_backoff_free_tries(reconnect, atoi(ctx->argv[1]));
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
diff_stats(const struct reconnect_stats *old,
|
diff_stats(const struct reconnect_stats *old,
|
||||||
const struct reconnect_stats *new,
|
const struct reconnect_stats *new,
|
||||||
@ -284,6 +290,8 @@ static const struct ovs_cmdl_command all_commands[] = {
|
|||||||
{ "advance", NULL, 1, 1, do_advance, OVS_RO },
|
{ "advance", NULL, 1, 1, do_advance, OVS_RO },
|
||||||
{ "timeout", NULL, 0, 0, do_timeout, OVS_RO },
|
{ "timeout", NULL, 0, 0, do_timeout, OVS_RO },
|
||||||
{ "set-max-tries", NULL, 1, 1, do_set_max_tries, OVS_RO },
|
{ "set-max-tries", NULL, 1, 1, do_set_max_tries, OVS_RO },
|
||||||
|
{ "set-backoff-free-tries", NULL, 1, 1, do_set_backoff_free_tries,
|
||||||
|
OVS_RO },
|
||||||
{ "passive", NULL, 0, 0, do_set_passive, OVS_RO },
|
{ "passive", NULL, 0, 0, do_set_passive, OVS_RO },
|
||||||
{ "listening", NULL, 0, 0, do_listening, OVS_RO },
|
{ "listening", NULL, 0, 0, do_listening, OVS_RO },
|
||||||
{ "listen-error", NULL, 1, 1, do_listen_error, OVS_RO },
|
{ "listen-error", NULL, 1, 1, do_listen_error, OVS_RO },
|
||||||
|
@ -104,6 +104,10 @@ def do_set_max_tries(arg):
|
|||||||
r.set_max_tries(int(arg))
|
r.set_max_tries(int(arg))
|
||||||
|
|
||||||
|
|
||||||
|
def do_set_backoff_free_tries(arg):
|
||||||
|
r.set_backoff_free_tries(int(arg))
|
||||||
|
|
||||||
|
|
||||||
def diff_stats(old, new, delta):
|
def diff_stats(old, new, delta):
|
||||||
if (old.state != new.state or
|
if (old.state != new.state or
|
||||||
old.state_elapsed != new.state_elapsed or
|
old.state_elapsed != new.state_elapsed or
|
||||||
@ -173,6 +177,7 @@ def main():
|
|||||||
"advance": do_advance,
|
"advance": do_advance,
|
||||||
"timeout": do_timeout,
|
"timeout": do_timeout,
|
||||||
"set-max-tries": do_set_max_tries,
|
"set-max-tries": do_set_max_tries,
|
||||||
|
"set-backoff-free-tries": do_set_backoff_free_tries,
|
||||||
"passive": do_set_passive,
|
"passive": do_set_passive,
|
||||||
"listening": do_listening,
|
"listening": do_listening,
|
||||||
"listen-error": do_listen_error
|
"listen-error": do_listen_error
|
||||||
|
Loading…
x
Reference in New Issue
Block a user