2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 09:58:01 +00:00

reconnect: Add ability to do a number of retries without backoff.

This is aimed at an upcoming database clustering implementation, where it's
desirable to try all of the cluster members quickly before backing off to
retry them again in sequence.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Russell Bryant <russell@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>
This commit is contained in:
Ben Pfaff 2018-01-22 11:04:58 -08:00
parent f70b61d33d
commit 5ee527e223
6 changed files with 145 additions and 36 deletions

View File

@ -62,6 +62,7 @@ struct reconnect {
long long int last_connected;
long long int last_disconnected;
unsigned int max_tries;
unsigned int backoff_free_tries;
/* These values are simply for statistics reporting, not otherwise used
* directly by anything internal. */
@ -206,6 +207,15 @@ reconnect_get_max_tries(struct reconnect *fsm)
return fsm->max_tries;
}
/* Sets the number of connection attempts that will be made without backoff to
* 'backoff_free_tries'. Values 0 and 1 both represent a single attempt. */
void
reconnect_set_backoff_free_tries(struct reconnect *fsm,
unsigned int backoff_free_tries)
{
fsm->backoff_free_tries = backoff_free_tries;
}
/* Configures the backoff parameters for 'fsm'. 'min_backoff' is the minimum
* number of milliseconds, and 'max_backoff' is the maximum, between connection
* attempts. The current backoff is also the duration that 'fsm' is willing to
@ -346,7 +356,7 @@ reconnect_disconnected(struct reconnect *fsm, long long int now, int error)
VLOG(fsm->info, "%s: error listening for connections",
fsm->name);
}
} else {
} else if (fsm->backoff < fsm->max_backoff) {
const char *type = fsm->passive ? "listen" : "connection";
if (error > 0) {
VLOG_INFO("%s: %s attempt failed (%s)",
@ -354,35 +364,47 @@ reconnect_disconnected(struct reconnect *fsm, long long int now, int error)
} else {
VLOG(fsm->info, "%s: %s attempt timed out", fsm->name, type);
}
} else {
/* We have reached the maximum backoff, so suppress logging to
* avoid wastefully filling the log. (Previously we logged that we
* were suppressing further logging, see below.) */
}
if (fsm->state & (S_ACTIVE | S_IDLE)) {
fsm->last_disconnected = now;
}
if (!reconnect_may_retry(fsm)) {
reconnect_transition__(fsm, now, S_VOID);
return;
}
/* Back off. */
if (fsm->state & (S_ACTIVE | S_IDLE)
if (fsm->backoff_free_tries > 1) {
fsm->backoff_free_tries--;
fsm->backoff = 0;
} else if (fsm->state & (S_ACTIVE | S_IDLE)
&& (fsm->last_activity - fsm->last_connected >= fsm->backoff
|| fsm->passive)) {
fsm->backoff = fsm->passive ? 0 : fsm->min_backoff;
} else {
if (fsm->backoff < fsm->min_backoff) {
fsm->backoff = fsm->min_backoff;
} else if (fsm->backoff >= fsm->max_backoff / 2) {
fsm->backoff = fsm->max_backoff;
} else {
} else if (fsm->backoff < fsm->max_backoff / 2) {
fsm->backoff *= 2;
}
if (fsm->passive) {
VLOG(fsm->info, "%s: waiting %.3g seconds before trying to "
"listen again", fsm->name, fsm->backoff / 1000.0);
VLOG(fsm->info, "%s: waiting %.3g seconds before %s",
fsm->name, fsm->backoff / 1000.0,
fsm->passive ? "trying to listen again" : "reconnect");
} else {
VLOG(fsm->info, "%s: waiting %.3g seconds before reconnect",
fsm->name, fsm->backoff / 1000.0);
if (fsm->backoff < fsm->max_backoff) {
VLOG_INFO("%s: continuing to %s in the background but "
"suppressing further logging", fsm->name,
fsm->passive ? "try to listen" : "reconnect");
}
fsm->backoff = fsm->max_backoff;
}
}
reconnect_transition__(fsm, now,
reconnect_may_retry(fsm) ? S_BACKOFF : S_VOID);
reconnect_transition__(fsm, now, S_BACKOFF);
}
}
@ -397,7 +419,7 @@ reconnect_connecting(struct reconnect *fsm, long long int now)
if (fsm->state != S_CONNECTING) {
if (fsm->passive) {
VLOG(fsm->info, "%s: listening...", fsm->name);
} else {
} else if (fsm->backoff < fsm->max_backoff) {
VLOG(fsm->info, "%s: connecting...", fsm->name);
}
reconnect_transition__(fsm, now, S_CONNECTING);

View File

@ -51,6 +51,8 @@ int reconnect_get_probe_interval(const struct reconnect *);
void reconnect_set_max_tries(struct reconnect *, unsigned int max_tries);
unsigned int reconnect_get_max_tries(struct reconnect *);
void reconnect_set_backoff_free_tries(struct reconnect *,
unsigned int backoff_free_tries);
void reconnect_set_backoff(struct reconnect *,
int min_backoff, int max_backoff);
@ -65,6 +67,7 @@ void reconnect_enable(struct reconnect *, long long int now);
void reconnect_disable(struct reconnect *, long long int now);
void reconnect_force_reconnect(struct reconnect *, long long int now);
void reconnect_skip_backoff(struct reconnect *);
bool reconnect_is_connected(const struct reconnect *);
unsigned int reconnect_get_last_connect_elapsed(const struct reconnect *,

View File

@ -154,6 +154,7 @@ class Reconnect(object):
self.last_connected = None
self.last_disconnected = None
self.max_tries = None
self.backoff_free_tries = 0
self.creation_time = now
self.n_attempted_connections = 0
@ -242,6 +243,12 @@ class Reconnect(object):
self.backoff > self.max_backoff):
self.backoff = self.max_backoff
def set_backoff_free_tries(self, backoff_free_tries):
"""Sets the number of connection attempts that will be made without
backoff to 'backoff_free_tries'. Values 0 and 1 both
represent a single attempt."""
self.backoff_free_tries = backoff_free_tries
def set_probe_interval(self, probe_interval):
"""Sets the "probe interval" to 'probe_interval', in milliseconds. If
this is zero, it disables the connection keepalive feature. If it is
@ -337,7 +344,7 @@ class Reconnect(object):
else:
self.info_level("%s: error listening for connections"
% self.name)
else:
elif self.backoff < self.max_backoff:
if self.passive:
type_ = "listen"
else:
@ -352,8 +359,15 @@ class Reconnect(object):
if (self.state in (Reconnect.Active, Reconnect.Idle)):
self.last_disconnected = now
if not self.__may_retry():
self._transition(now, Reconnect.Void)
return
# Back off
if (self.state in (Reconnect.Active, Reconnect.Idle) and
if self.backoff_free_tries > 1:
self.backoff_free_tries -= 1
self.backoff = 0
elif (self.state in (Reconnect.Active, Reconnect.Idle) and
(self.last_activity - self.last_connected >= self.backoff or
self.passive)):
if self.passive:
@ -363,23 +377,26 @@ class Reconnect(object):
else:
if self.backoff < self.min_backoff:
self.backoff = self.min_backoff
elif self.backoff >= self.max_backoff / 2:
self.backoff = self.max_backoff
else:
elif self.backoff < self.max_backoff / 2:
self.backoff *= 2
if self.passive:
self.info_level("%s: waiting %.3g seconds before trying "
"to listen again"
% (self.name, self.backoff / 1000.0))
action = "trying to listen again"
else:
self.info_level("%s: waiting %.3g seconds before reconnect"
% (self.name, self.backoff / 1000.0))
if self.__may_retry():
action = "reconnect"
self.info_level("%s: waiting %.3g seconds before %s"
% (self.name, self.backoff / 1000.0,
action))
else:
if self.backoff < self.max_backoff:
if self.passive:
action = "try to listen"
else:
action = "reconnect"
self.info_level("%s: continuing to %s in the "
"background but suppressing further "
"logging" % (self.name, action))
self.backoff = self.max_backoff
self._transition(now, Reconnect.Backoff)
else:
self._transition(now, Reconnect.Void)
def connecting(self, now):
"""Tell this FSM that a connection or listening attempt is in progress.
@ -390,7 +407,7 @@ class Reconnect(object):
if self.state != Reconnect.ConnectInProgress:
if self.passive:
self.info_level("%s: listening..." % self.name)
else:
elif self.backoff < self.max_backoff:
self.info_level("%s: connecting..." % self.name)
self._transition(now, Reconnect.ConnectInProgress)

View File

@ -1036,6 +1036,60 @@ timeout
in BACKOFF for 2000 ms (2000 ms backoff)
])
######################################################################
RECONNECT_CHECK([backoff-free tries work],
[set-backoff-free-tries 2
enable
# Connection fails quickly.
run
connect-failed ECONNREFUSED
# No backoff.
run
timeout
# Connection fails quickly again.
run
connect-failed ECONNREFUSED
# Back off for 1000 ms.
run
timeout
],
[### t=1000 ###
set-backoff-free-tries 2
enable
in BACKOFF for 0 ms (0 ms backoff)
# Connection fails quickly.
run
should connect
connect-failed ECONNREFUSED
0 successful connections out of 1 attempts, seqno 0
# No backoff.
run
should connect
timeout
advance 0 ms
# Connection fails quickly again.
run
should connect
connect-failed ECONNREFUSED
in BACKOFF for 0 ms (1000 ms backoff)
0 successful connections out of 2 attempts, seqno 0
# Back off for 1000 ms.
run
timeout
advance 1000 ms
### t=2000 ###
in BACKOFF for 1000 ms (1000 ms backoff)
])
######################################################################
RECONNECT_CHECK([max-tries of 1 honored],
[set-max-tries 1
@ -1090,7 +1144,7 @@ timeout
run
should disconnect
disconnected
in VOID for 0 ms (1000 ms backoff)
in VOID for 0 ms (0 ms backoff)
1 successful connections out of 1 attempts, seqno 2
disconnected
disconnected at 11000 ms (0 ms ago)

View File

@ -207,6 +207,12 @@ do_set_max_tries(struct ovs_cmdl_context *ctx)
reconnect_set_max_tries(reconnect, atoi(ctx->argv[1]));
}
static void
do_set_backoff_free_tries(struct ovs_cmdl_context *ctx)
{
reconnect_set_backoff_free_tries(reconnect, atoi(ctx->argv[1]));
}
static void
diff_stats(const struct reconnect_stats *old,
const struct reconnect_stats *new,
@ -284,6 +290,8 @@ static const struct ovs_cmdl_command all_commands[] = {
{ "advance", NULL, 1, 1, do_advance, OVS_RO },
{ "timeout", NULL, 0, 0, do_timeout, OVS_RO },
{ "set-max-tries", NULL, 1, 1, do_set_max_tries, OVS_RO },
{ "set-backoff-free-tries", NULL, 1, 1, do_set_backoff_free_tries,
OVS_RO },
{ "passive", NULL, 0, 0, do_set_passive, OVS_RO },
{ "listening", NULL, 0, 0, do_listening, OVS_RO },
{ "listen-error", NULL, 1, 1, do_listen_error, OVS_RO },

View File

@ -104,6 +104,10 @@ def do_set_max_tries(arg):
r.set_max_tries(int(arg))
def do_set_backoff_free_tries(arg):
r.set_backoff_free_tries(int(arg))
def diff_stats(old, new, delta):
if (old.state != new.state or
old.state_elapsed != new.state_elapsed or
@ -173,6 +177,7 @@ def main():
"advance": do_advance,
"timeout": do_timeout,
"set-max-tries": do_set_max_tries,
"set-backoff-free-tries": do_set_backoff_free_tries,
"passive": do_set_passive,
"listening": do_listening,
"listen-error": do_listen_error