mirror of
https://github.com/openvswitch/ovs
synced 2025-09-05 08:45:23 +00:00
raft: Reintroduce jsonrpc inactivity probes.
It's not enough to just have heartbeats. RAFT heartbeats are unidirectional, i.e. leader sends them to followers but not the other way around. Missing heartbeats provokes followers to start election, but if leader will not receive any replies it will not do anything while there is a quorum, i.e. there are enough other servers to make decisions. This leads to situation that while TCP connection is established, leader will continue to blindly send messages to it. In our case this leads to growing send backlog. Connection will be terminated eventually due to excessive send backlog, but this this might take a lot of time and wasted process memory. At the same time 'candidate' will continue to send vote requests to the dead connection on its side. To fix that we need to reintroduce inactivity probes that will drop connection if there was no incoming traffic for a long time and remote server doesn't reply to the "echo" request. Probe interval might be chosen based on an election timeout to avoid issues described in commitdb5a066c17
. Reported-by: Carlos Goncalves <cgoncalves@redhat.com> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1929690 Fixes:db5a066c17
("raft: Disable RAFT jsonrpc inactivity probe.") Acked-by: Han Zhou <hzhou@ovn.org> Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
This commit is contained in:
32
ovsdb/raft.c
32
ovsdb/raft.c
@@ -940,6 +940,34 @@ raft_reset_ping_timer(struct raft *raft)
|
||||
raft->ping_timeout = time_msec() + raft->election_timer / 3;
|
||||
}
|
||||
|
||||
static void
|
||||
raft_conn_update_probe_interval(struct raft *raft, struct raft_conn *r_conn)
|
||||
{
|
||||
/* Inactivity probe will be sent if connection will remain idle for the
|
||||
* time of an election timeout. Connection will be dropped if inactivity
|
||||
* will last twice that time.
|
||||
*
|
||||
* It's not enough to just have heartbeats if connection is still
|
||||
* established, but no packets received from the other side. Without
|
||||
* inactivity probe follower will just try to initiate election
|
||||
* indefinitely staying in 'candidate' role. And the leader will continue
|
||||
* to send heartbeats to the dead connection thinking that remote server
|
||||
* is still part of the cluster. */
|
||||
int probe_interval = raft->election_timer + ELECTION_RANGE_MSEC;
|
||||
|
||||
jsonrpc_session_set_probe_interval(r_conn->js, probe_interval);
|
||||
}
|
||||
|
||||
static void
|
||||
raft_update_probe_intervals(struct raft *raft)
|
||||
{
|
||||
struct raft_conn *r_conn;
|
||||
|
||||
LIST_FOR_EACH (r_conn, list_node, &raft->conns) {
|
||||
raft_conn_update_probe_interval(raft, r_conn);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
raft_add_conn(struct raft *raft, struct jsonrpc_session *js,
|
||||
const struct uuid *sid, bool incoming)
|
||||
@@ -954,7 +982,7 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js,
|
||||
&conn->sid);
|
||||
conn->incoming = incoming;
|
||||
conn->js_seqno = jsonrpc_session_get_seqno(conn->js);
|
||||
jsonrpc_session_set_probe_interval(js, 0);
|
||||
raft_conn_update_probe_interval(raft, conn);
|
||||
jsonrpc_session_set_backlog_threshold(js, raft->conn_backlog_max_n_msgs,
|
||||
raft->conn_backlog_max_n_bytes);
|
||||
}
|
||||
@@ -2804,6 +2832,7 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index)
|
||||
raft->election_timer, e->election_timer);
|
||||
raft->election_timer = e->election_timer;
|
||||
raft->election_timer_new = 0;
|
||||
raft_update_probe_intervals(raft);
|
||||
}
|
||||
if (e->servers) {
|
||||
/* raft_run_reconfigure() can write a new Raft entry, which can
|
||||
@@ -2820,6 +2849,7 @@ raft_update_commit_index(struct raft *raft, uint64_t new_commit_index)
|
||||
VLOG_INFO("Election timer changed from %"PRIu64" to %"PRIu64,
|
||||
raft->election_timer, e->election_timer);
|
||||
raft->election_timer = e->election_timer;
|
||||
raft_update_probe_intervals(raft);
|
||||
}
|
||||
}
|
||||
/* Check if any pending command can be completed, and complete it.
|
||||
|
Reference in New Issue
Block a user