2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-29 13:27:59 +00:00

reconnect: Add Python implementation of received_attempt(), and test.

This follows up on commit 4241d652e465 ("jsonrpc: Avoid disconnecting
prematurely due to long poll intervals."), which implemented the same
thing in C.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Requested-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Ilya Maximets <i.maximets@ovn.org>
This commit is contained in:
Ben Pfaff 2020-12-21 16:21:00 -08:00
parent 98b1d633c4
commit fcf281b0b6
5 changed files with 85 additions and 11 deletions

View File

@ -570,13 +570,16 @@ class Session(object):
if self.rpc is not None: if self.rpc is not None:
received_bytes = self.rpc.get_received_bytes() received_bytes = self.rpc.get_received_bytes()
error, msg = self.rpc.recv() error, msg = self.rpc.recv()
now = ovs.timeval.msec()
self.reconnect.receive_attempted(now)
if received_bytes != self.rpc.get_received_bytes(): if received_bytes != self.rpc.get_received_bytes():
# Data was successfully received. # Data was successfully received.
# #
# Previously we only counted receiving a full message as # Previously we only counted receiving a full message as
# activity, but with large messages or a slow connection that # activity, but with large messages or a slow connection that
# policy could time out the session mid-message. # policy could time out the session mid-message.
self.reconnect.activity(ovs.timeval.msec()) self.reconnect.activity(now)
if not error: if not error:
if msg.type == Message.T_REQUEST and msg.method == "echo": if msg.type == Message.T_REQUEST and msg.method == "echo":

View File

@ -95,7 +95,10 @@ class Reconnect(object):
def deadline(fsm): def deadline(fsm):
if fsm.probe_interval: if fsm.probe_interval:
base = max(fsm.last_activity, fsm.state_entered) base = max(fsm.last_activity, fsm.state_entered)
return base + fsm.probe_interval expiration = base + fsm.probe_interval
if (fsm.last_receive_attempt is None or
fsm.last_receive_attempt >= expiration):
return expiration
return None return None
@staticmethod @staticmethod
@ -113,7 +116,10 @@ class Reconnect(object):
@staticmethod @staticmethod
def deadline(fsm): def deadline(fsm):
if fsm.probe_interval: if fsm.probe_interval:
return fsm.state_entered + fsm.probe_interval expiration = fsm.state_entered + fsm.probe_interval
if (fsm.last_receive_attempt is None or
fsm.last_receive_attempt >= expiration):
return expiration
return None return None
@staticmethod @staticmethod
@ -153,6 +159,7 @@ class Reconnect(object):
self.last_activity = now self.last_activity = now
self.last_connected = None self.last_connected = None
self.last_disconnected = None self.last_disconnected = None
self.last_receive_attempt = now
self.max_tries = None self.max_tries = None
self.backoff_free_tries = 0 self.backoff_free_tries = 0
@ -472,6 +479,16 @@ class Reconnect(object):
self._transition(now, Reconnect.Active) self._transition(now, Reconnect.Active)
self.last_activity = now self.last_activity = now
def receive_attempted(self, now):
"""Tell 'fsm' that some attempt to receive data on the connection was
made at 'now'. The FSM only allows probe interval timer to expire when
some attempt to receive data on the connection was received after the
time when it should have expired. This helps in the case where there's
a long delay in the poll loop and then reconnect_run() executes before
the code to try to receive anything from the remote runs. (To disable
this feature, pass None for 'now'.)"""
self.last_receive_attempt = now
def _transition(self, now, state): def _transition(self, now, state):
if self.state == Reconnect.ConnectInProgress: if self.state == Reconnect.ConnectInProgress:
self.n_attempted_connections += 1 self.n_attempted_connections += 1

View File

@ -38,7 +38,12 @@ RECONNECT_CHECK([quick connect, idle disconnect],
run run
connected connected
# Send inactivity probe. # Try timeout without noting that we tried to receive.
# (This does nothing since we never timeout in this case.)
timeout
# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
timeout timeout
run run
@ -61,7 +66,13 @@ connected
connected connected
last connected 0 ms ago, connected 0 ms total last connected 0 ms ago, connected 0 ms total
# Send inactivity probe. # Try timeout without noting that we tried to receive.
# (This does nothing since we never timeout in this case.)
timeout
no timeout
# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
timeout timeout
advance 5000 ms advance 5000 ms
@ -99,7 +110,12 @@ advance 500
run run
connected connected
# Send inactivity probe. # Try timeout without noting that we tried to receive.
# (This does nothing since we never timeout in this case.)
timeout
# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
timeout timeout
run run
@ -131,7 +147,13 @@ connected
connected connected
last connected 0 ms ago, connected 0 ms total last connected 0 ms ago, connected 0 ms total
# Send inactivity probe. # Try timeout without noting that we tried to receive.
# (This does nothing since we never timeout in this case.)
timeout
no timeout
# Now disable the receive-attempted feature and timeout again.
receive-attempted LLONG_MAX
timeout timeout
advance 5000 ms advance 5000 ms
@ -357,7 +379,8 @@ connect-failed
###################################################################### ######################################################################
RECONNECT_CHECK([connections with no data preserve backoff], RECONNECT_CHECK([connections with no data preserve backoff],
[enable [receive-attempted LLONG_MAX
enable
# First connect, then idle timeout kills connection. # First connect, then idle timeout kills connection.
run run
@ -397,6 +420,7 @@ disconnected
# Back off for 4000 ms. # Back off for 4000 ms.
timeout timeout
], [### t=1000 ### ], [### t=1000 ###
receive-attempted LLONG_MAX
enable enable
in BACKOFF for 0 ms (0 ms backoff) in BACKOFF for 0 ms (0 ms backoff)
@ -1083,7 +1107,8 @@ timeout
###################################################################### ######################################################################
RECONNECT_CHECK([max-tries of 1 honored], RECONNECT_CHECK([max-tries of 1 honored],
[set-max-tries 1 [receive-attempted LLONG_MAX
set-max-tries 1
enable enable
# Connection succeeds. # Connection succeeds.
@ -1100,6 +1125,7 @@ run
disconnected disconnected
], ],
[### t=1000 ### [### t=1000 ###
receive-attempted LLONG_MAX
set-max-tries 1 set-max-tries 1
1 tries left 1 tries left
enable enable
@ -1185,6 +1211,8 @@ activity
# Connection times out. # Connection times out.
timeout timeout
receive-attempted LLONG_MAX
timeout
run run
timeout timeout
run run
@ -1243,6 +1271,9 @@ activity
created 1000, last activity 3000, last connected 2000 created 1000, last activity 3000, last connected 2000
# Connection times out. # Connection times out.
timeout
no timeout
receive-attempted LLONG_MAX
timeout timeout
advance 5000 ms advance 5000 ms

View File

@ -48,7 +48,6 @@ test_reconnect_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
now = 1000; now = 1000;
reconnect = reconnect_create(now); reconnect = reconnect_create(now);
reconnect_receive_attempted(reconnect, LLONG_MAX);
reconnect_set_name(reconnect, "remote"); reconnect_set_name(reconnect, "remote");
reconnect_get_stats(reconnect, now, &prev); reconnect_get_stats(reconnect, now, &prev);
printf("### t=%d ###\n", now); printf("### t=%d ###\n", now);
@ -278,6 +277,18 @@ do_listen_error(struct ovs_cmdl_context *ctx)
reconnect_listen_error(reconnect, now, atoi(ctx->argv[1])); reconnect_listen_error(reconnect, now, atoi(ctx->argv[1]));
} }
static void
do_receive_attempted(struct ovs_cmdl_context *ctx OVS_UNUSED)
{
if (!strcmp(ctx->argv[1], "now")) {
reconnect_receive_attempted(reconnect, now);
} else if (!strcmp(ctx->argv[1], "LLONG_MAX")) {
reconnect_receive_attempted(reconnect, LLONG_MAX);
} else {
ovs_fatal(0, "%s: bad argument %s", ctx->argv[0], ctx->argv[1]);
}
}
static const struct ovs_cmdl_command all_commands[] = { static const struct ovs_cmdl_command all_commands[] = {
{ "enable", NULL, 0, 0, do_enable, OVS_RO }, { "enable", NULL, 0, 0, do_enable, OVS_RO },
{ "disable", NULL, 0, 0, do_disable, OVS_RO }, { "disable", NULL, 0, 0, do_disable, OVS_RO },
@ -296,6 +307,7 @@ static const struct ovs_cmdl_command all_commands[] = {
{ "passive", NULL, 0, 0, do_set_passive, OVS_RO }, { "passive", NULL, 0, 0, do_set_passive, OVS_RO },
{ "listening", NULL, 0, 0, do_listening, OVS_RO }, { "listening", NULL, 0, 0, do_listening, OVS_RO },
{ "listen-error", NULL, 1, 1, do_listen_error, OVS_RO }, { "listen-error", NULL, 1, 1, do_listen_error, OVS_RO },
{ "receive-attempted", NULL, 1, 1, do_receive_attempted, OVS_RO },
{ NULL, NULL, 0, 0, NULL, OVS_RO }, { NULL, NULL, 0, 0, NULL, OVS_RO },
}; };

View File

@ -163,6 +163,16 @@ def do_listen_error(arg):
r.listen_error(now, int(arg)) r.listen_error(now, int(arg))
def do_receive_attempted(arg):
if arg == "now":
r.receive_attempted(now)
elif arg == "LLONG_MAX":
r.receive_attempted(None)
else:
sys.stderr.write("receive-attempted: bad argument %s\n" % arg)
sys.exit(1)
def main(): def main():
commands = { commands = {
"enable": do_enable, "enable": do_enable,
@ -180,7 +190,8 @@ def main():
"set-backoff-free-tries": do_set_backoff_free_tries, "set-backoff-free-tries": do_set_backoff_free_tries,
"passive": do_set_passive, "passive": do_set_passive,
"listening": do_listening, "listening": do_listening,
"listen-error": do_listen_error "listen-error": do_listen_error,
"receive-attempted": do_receive_attempted
} }
global now global now