diff --git a/lib/reconnect.c b/lib/reconnect.c index a929ddfd2..89a0bcaf9 100644 --- a/lib/reconnect.c +++ b/lib/reconnect.c @@ -75,7 +75,8 @@ struct reconnect { static void reconnect_transition__(struct reconnect *, long long int now, enum state state); -static long long int reconnect_deadline__(const struct reconnect *); +static long long int reconnect_deadline__(const struct reconnect *, + long long int now); static bool reconnect_may_retry(struct reconnect *); static const char * @@ -539,7 +540,7 @@ reconnect_transition__(struct reconnect *fsm, long long int now, } static long long int -reconnect_deadline__(const struct reconnect *fsm) +reconnect_deadline__(const struct reconnect *fsm, long long int now) { ovs_assert(fsm->state_entered != LLONG_MIN); switch (fsm->state) { @@ -557,8 +558,18 @@ reconnect_deadline__(const struct reconnect *fsm) if (fsm->probe_interval) { long long int base = MAX(fsm->last_activity, fsm->state_entered); long long int expiration = base + fsm->probe_interval; - if (fsm->last_receive_attempt >= expiration) { + if (now < expiration || fsm->last_receive_attempt >= expiration) { + /* We still have time before the expiration or the time has + * already passed and there was no activity. In the first case + * we need to wait for the expiration, in the second - we're + * already past the deadline. */ return expiration; + } else { + /* Time has already passed, but we didn't attempt to receive + * anything. We need to wake up and try to receive even if + * nothing is pending, so we can update the expiration time or + * transition to a different state. */ + return now + 1; } } return LLONG_MAX; @@ -566,8 +577,10 @@ reconnect_deadline__(const struct reconnect *fsm) case S_IDLE: if (fsm->probe_interval) { long long int expiration = fsm->state_entered + fsm->probe_interval; - if (fsm->last_receive_attempt >= expiration) { + if (now < expiration || fsm->last_receive_attempt >= expiration) { return expiration; + } else { + return now + 1; } } return LLONG_MAX; @@ -618,7 +631,7 @@ reconnect_deadline__(const struct reconnect *fsm) enum reconnect_action reconnect_run(struct reconnect *fsm, long long int now) { - if (now >= reconnect_deadline__(fsm)) { + if (now >= reconnect_deadline__(fsm, now)) { switch (fsm->state) { case S_VOID: return 0; @@ -671,7 +684,7 @@ reconnect_wait(struct reconnect *fsm, long long int now) int reconnect_timeout(struct reconnect *fsm, long long int now) { - long long int deadline = reconnect_deadline__(fsm); + long long int deadline = reconnect_deadline__(fsm, now); if (deadline != LLONG_MAX) { long long int remaining = deadline - now; return MAX(0, MIN(INT_MAX, remaining)); diff --git a/python/ovs/reconnect.py b/python/ovs/reconnect.py index c4c6c87e9..6b0d023ae 100644 --- a/python/ovs/reconnect.py +++ b/python/ovs/reconnect.py @@ -44,7 +44,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return None @staticmethod @@ -56,7 +56,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return None @staticmethod @@ -68,7 +68,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return fsm.state_entered + fsm.backoff @staticmethod @@ -80,7 +80,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return fsm.state_entered + max(1000, fsm.backoff) @staticmethod @@ -92,13 +92,24 @@ class Reconnect(object): is_connected = True @staticmethod - def deadline(fsm): + def deadline(fsm, now): if fsm.probe_interval: base = max(fsm.last_activity, fsm.state_entered) expiration = base + fsm.probe_interval - if (fsm.last_receive_attempt is None or + if (now < expiration or + fsm.last_receive_attempt is None or fsm.last_receive_attempt >= expiration): + # We still have time before the expiration or the time has + # already passed and there was no activity. In the first + # case we need to wait for the expiration, in the second - + # we're already past the deadline. */ return expiration + else: + # Time has already passed, but we didn't attempt to receive + # anything. We need to wake up and try to receive even if + # nothing is pending, so we can update the expiration time + # or transition to a different state. + return now + 1 return None @staticmethod @@ -114,12 +125,15 @@ class Reconnect(object): is_connected = True @staticmethod - def deadline(fsm): + def deadline(fsm, now): if fsm.probe_interval: expiration = fsm.state_entered + fsm.probe_interval - if (fsm.last_receive_attempt is None or + if (now < expiration or + fsm.last_receive_attempt is None or fsm.last_receive_attempt >= expiration): return expiration + else: + return now + 1 return None @staticmethod @@ -134,7 +148,7 @@ class Reconnect(object): is_connected = False @staticmethod - def deadline(fsm): + def deadline(fsm, now): return fsm.state_entered @staticmethod @@ -545,7 +559,7 @@ class Reconnect(object): returned if the "probe interval" is nonzero--see self.set_probe_interval()).""" - deadline = self.state.deadline(self) + deadline = self.state.deadline(self, now) if deadline is not None and now >= deadline: return self.state.run(self, now) else: @@ -562,7 +576,7 @@ class Reconnect(object): """Returns the number of milliseconds after which self.run() should be called if nothing else notable happens in the meantime, or None if this is currently unnecessary.""" - deadline = self.state.deadline(self) + deadline = self.state.deadline(self, now) if deadline is not None: remaining = deadline - now return max(0, remaining) diff --git a/tests/reconnect.at b/tests/reconnect.at index 0f74709f5..5bca84351 100644 --- a/tests/reconnect.at +++ b/tests/reconnect.at @@ -39,8 +39,19 @@ run connected # Try timeout without noting that we tried to receive. -# (This does nothing since we never timeout in this case.) +# Timeout should be scheduled to the next probe interval. timeout +run + +# Once we reached the timeout, it should not expire until the receive actually +# attempted. However, we still need to wake up as soon as possible in order to +# have a chance to mark the receive attempt even if nothing was received. +timeout +run + +# Short time advance past the original probe interval, but not expired still. +timeout +run # Now disable the receive-attempted feature and timeout again. receive-attempted LLONG_MAX @@ -67,18 +78,37 @@ connected last connected 0 ms ago, connected 0 ms total # Try timeout without noting that we tried to receive. -# (This does nothing since we never timeout in this case.) -timeout - no timeout - -# Now disable the receive-attempted feature and timeout again. -receive-attempted LLONG_MAX +# Timeout should be scheduled to the next probe interval. timeout advance 5000 ms ### t=6000 ### in ACTIVE for 5000 ms (0 ms backoff) run + +# Once we reached the timeout, it should not expire until the receive actually +# attempted. However, we still need to wake up as soon as possible in order to +# have a chance to mark the receive attempt even if nothing was received. +timeout + advance 1 ms + +### t=6001 ### + in ACTIVE for 5001 ms (0 ms backoff) +run + +# Short time advance past the original probe interval, but not expired still. +timeout + advance 1 ms + +### t=6002 ### + in ACTIVE for 5002 ms (0 ms backoff) +run + +# Now disable the receive-attempted feature and timeout again. +receive-attempted LLONG_MAX +timeout + advance 0 ms +run should send probe in IDLE for 0 ms (0 ms backoff) @@ -86,7 +116,7 @@ run timeout advance 5000 ms -### t=11000 ### +### t=11002 ### in IDLE for 5000 ms (0 ms backoff) run should disconnect @@ -94,7 +124,7 @@ disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 disconnected - disconnected at 11000 ms (0 ms ago) + disconnected at 11002 ms (0 ms ago) ]) ###################################################################### @@ -111,8 +141,19 @@ run connected # Try timeout without noting that we tried to receive. -# (This does nothing since we never timeout in this case.) +# Timeout should be scheduled to the next probe interval. timeout +run + +# Once we reached the timeout, it should not expire until the receive actually +# attempted. However, we still need to wake up as soon as possible in order to +# have a chance to mark the receive attempt even if nothing was received. +timeout +run + +# Short time advance past the original probe interval, but not expired still. +timeout +run # Now disable the receive-attempted feature and timeout again. receive-attempted LLONG_MAX @@ -148,18 +189,37 @@ connected last connected 0 ms ago, connected 0 ms total # Try timeout without noting that we tried to receive. -# (This does nothing since we never timeout in this case.) -timeout - no timeout - -# Now disable the receive-attempted feature and timeout again. -receive-attempted LLONG_MAX +# Timeout should be scheduled to the next probe interval. timeout advance 5000 ms ### t=6500 ### in ACTIVE for 5000 ms (0 ms backoff) run + +# Once we reached the timeout, it should not expire until the receive actually +# attempted. However, we still need to wake up as soon as possible in order to +# have a chance to mark the receive attempt even if nothing was received. +timeout + advance 1 ms + +### t=6501 ### + in ACTIVE for 5001 ms (0 ms backoff) +run + +# Short time advance past the original probe interval, but not expired still. +timeout + advance 1 ms + +### t=6502 ### + in ACTIVE for 5002 ms (0 ms backoff) +run + +# Now disable the receive-attempted feature and timeout again. +receive-attempted LLONG_MAX +timeout + advance 0 ms +run should send probe in IDLE for 0 ms (0 ms backoff) @@ -167,7 +227,7 @@ run timeout advance 5000 ms -### t=11500 ### +### t=11502 ### in IDLE for 5000 ms (0 ms backoff) run should disconnect @@ -175,7 +235,7 @@ disconnected in BACKOFF for 0 ms (1000 ms backoff) 1 successful connections out of 1 attempts, seqno 2 disconnected - disconnected at 11500 ms (0 ms ago) + disconnected at 11502 ms (0 ms ago) ]) ###################################################################### @@ -1271,14 +1331,14 @@ activity created 1000, last activity 3000, last connected 2000 # Connection times out. -timeout - no timeout -receive-attempted LLONG_MAX timeout advance 5000 ms ### t=8000 ### in ACTIVE for 6000 ms (1000 ms backoff) +receive-attempted LLONG_MAX +timeout + advance 0 ms run should send probe in IDLE for 0 ms (1000 ms backoff)