2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 01:51:26 +00:00

reconnect: Add Python implementation of received_attempt(), and test.

This follows up on commit 4241d652e465 ("jsonrpc: Avoid disconnecting
prematurely due to long poll intervals."), which implemented the same
thing in C.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Requested-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
This commit is contained in:
Ben Pfaff 2020-12-21 16:21:00 -08:00
parent 98b1d633c4
commit fcf281b0b6
5 changed files with 85 additions and 11 deletions

View File

@ -570,13 +570,16 @@ class Session(object):
if self.rpc is not None:
received_bytes = self.rpc.get_received_bytes()
error, msg = self.rpc.recv()
now = ovs.timeval.msec()
self.reconnect.receive_attempted(now)
if received_bytes != self.rpc.get_received_bytes():
# Data was successfully received.
#
# Previously we only counted receiving a full message as
# activity, but with large messages or a slow connection that
# policy could time out the session mid-message.
self.reconnect.activity(ovs.timeval.msec())
self.reconnect.activity(now)
if not error:
if msg.type == Message.T_REQUEST and msg.method == "echo":

View File

@ -95,7 +95,10 @@ class Reconnect(object):
def deadline(fsm):
if fsm.probe_interval:
base = max(fsm.last_activity, fsm.state_entered)
return base + fsm.probe_interval
expiration = base + fsm.probe_interval
if (fsm.last_receive_attempt is None or
fsm.last_receive_attempt >= expiration):
return expiration
return None
@staticmethod
@ -113,7 +116,10 @@ class Reconnect(object):
@staticmethod
def deadline(fsm):
if fsm.probe_interval:
return fsm.state_entered + fsm.probe_interval
expiration = fsm.state_entered + fsm.probe_interval
if (fsm.last_receive_attempt is None or
fsm.last_receive_attempt >= expiration):
return expiration
return None
@staticmethod
@ -153,6 +159,7 @@ class Reconnect(object):
self.last_activity = now
self.last_connected = None
self.last_disconnected = None
self.last_receive_attempt = now
self.max_tries = None
self.backoff_free_tries = 0
@ -472,6 +479,16 @@ class Reconnect(object):
self._transition(now, Reconnect.Active)
self.last_activity = now
def receive_attempted(self, now):
"""Tell 'fsm' that some attempt to receive data on the connection was
made at 'now'. The FSM only allows probe interval timer to expire when
some attempt to receive data on the connection was received after the
time when it should have expired. This helps in the case where there's
a long delay in the poll loop and then reconnect_run() executes before
the code to try to receive anything from the remote runs. (To disable
this feature, pass None for 'now'.)"""
self.last_receive_attempt = now
def _transition(self, now, state):
if self.state == Reconnect.ConnectInProgress:
self.n_attempted_connections += 1

View File

@ -38,7 +38,12 @@ RECONNECT_CHECK([quick connect, idle disconnect],
run
connected
# Send inactivity probe.
# Try timeout without noting that we tried to receive.
# (This does nothing since we never timeout in this case.)
timeout
# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
timeout
run
@ -61,7 +66,13 @@ connected
connected
last connected 0 ms ago, connected 0 ms total
# Send inactivity probe.
# Try timeout without noting that we tried to receive.
# (This does nothing since we never timeout in this case.)
timeout
no timeout
# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
timeout
advance 5000 ms
@ -99,7 +110,12 @@ advance 500
run
connected
# Send inactivity probe.
# Try timeout without noting that we tried to receive.
# (This does nothing since we never timeout in this case.)
timeout
# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
timeout
run
@ -131,7 +147,13 @@ connected
connected
last connected 0 ms ago, connected 0 ms total
# Send inactivity probe.
# Try timeout without noting that we tried to receive.
# (This does nothing since we never timeout in this case.)
timeout
no timeout
# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
timeout
advance 5000 ms
@ -357,7 +379,8 @@ connect-failed
######################################################################
RECONNECT_CHECK([connections with no data preserve backoff],
[enable
[receive-attempted LLONG_MAX
enable
# First connect, then idle timeout kills connection.
run
@ -397,6 +420,7 @@ disconnected
# Back off for 4000 ms.
timeout
], [### t=1000 ###
receive-attempted LLONG_MAX
enable
in BACKOFF for 0 ms (0 ms backoff)
@ -1083,7 +1107,8 @@ timeout
######################################################################
RECONNECT_CHECK([max-tries of 1 honored],
[set-max-tries 1
[receive-attempted LLONG_MAX
set-max-tries 1
enable
# Connection succeeds.
@ -1100,6 +1125,7 @@ run
disconnected
],
[### t=1000 ###
receive-attempted LLONG_MAX
set-max-tries 1
1 tries left
enable
@ -1185,6 +1211,8 @@ activity
# Connection times out.
timeout
receive-attempted LLONG_MAX
timeout
run
timeout
run
@ -1243,6 +1271,9 @@ activity
created 1000, last activity 3000, last connected 2000
# Connection times out.
timeout
no timeout
receive-attempted LLONG_MAX
timeout
advance 5000 ms

View File

@ -48,7 +48,6 @@ test_reconnect_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
now = 1000;
reconnect = reconnect_create(now);
reconnect_receive_attempted(reconnect, LLONG_MAX);
reconnect_set_name(reconnect, "remote");
reconnect_get_stats(reconnect, now, &prev);
printf("### t=%d ###\n", now);
@ -278,6 +277,18 @@ do_listen_error(struct ovs_cmdl_context *ctx)
reconnect_listen_error(reconnect, now, atoi(ctx->argv[1]));
}
static void
do_receive_attempted(struct ovs_cmdl_context *ctx OVS_UNUSED)
{
if (!strcmp(ctx->argv[1], "now")) {
reconnect_receive_attempted(reconnect, now);
} else if (!strcmp(ctx->argv[1], "LLONG_MAX")) {
reconnect_receive_attempted(reconnect, LLONG_MAX);
} else {
ovs_fatal(0, "%s: bad argument %s", ctx->argv[0], ctx->argv[1]);
}
}
static const struct ovs_cmdl_command all_commands[] = {
{ "enable", NULL, 0, 0, do_enable, OVS_RO },
{ "disable", NULL, 0, 0, do_disable, OVS_RO },
@ -296,6 +307,7 @@ static const struct ovs_cmdl_command all_commands[] = {
{ "passive", NULL, 0, 0, do_set_passive, OVS_RO },
{ "listening", NULL, 0, 0, do_listening, OVS_RO },
{ "listen-error", NULL, 1, 1, do_listen_error, OVS_RO },
{ "receive-attempted", NULL, 1, 1, do_receive_attempted, OVS_RO },
{ NULL, NULL, 0, 0, NULL, OVS_RO },
};

View File

@ -163,6 +163,16 @@ def do_listen_error(arg):
r.listen_error(now, int(arg))
def do_receive_attempted(arg):
if arg == "now":
r.receive_attempted(now)
elif arg == "LLONG_MAX":
r.receive_attempted(None)
else:
sys.stderr.write("receive-attempted: bad argument %s\n" % arg)
sys.exit(1)
def main():
commands = {
"enable": do_enable,
@ -180,7 +190,8 @@ def main():
"set-backoff-free-tries": do_set_backoff_free_tries,
"passive": do_set_passive,
"listening": do_listening,
"listen-error": do_listen_error
"listen-error": do_listen_error,
"receive-attempted": do_receive_attempted
}
global now