2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-29 13:27:59 +00:00

raft: Make backlog thresholds configurable.

New appctl 'cluster/set-backlog-threshold' to configure thresholds
on backlog of raft jsonrpc connections.  Could be used, for example,
in some extreme conditions where size of a database expected to be
very large, i.e. comparable with default 4GB threshold.

Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
This commit is contained in:
Ilya Maximets 2020-10-25 02:45:05 +02:00
parent eca34ebd7c
commit 80e3becdc1
3 changed files with 56 additions and 5 deletions

1
NEWS
View File

@ -8,6 +8,7 @@ Post-v2.14.0
after every DB compaction back to OS. Disabled by default. after every DB compaction back to OS. Disabled by default.
* Maximum backlog on RAFT connections limited to 500 messages or 4GB. * Maximum backlog on RAFT connections limited to 500 messages or 4GB.
Once threshold reached, connection is dropped (and re-established). Once threshold reached, connection is dropped (and re-established).
Use the 'cluster/set-backlog-threshold' command to change limits.
- DPDK: - DPDK:
* Removed support for vhost-user dequeue zero-copy. * Removed support for vhost-user dequeue zero-copy.
- The environment variable OVS_UNBOUND_CONF, if set, is now used - The environment variable OVS_UNBOUND_CONF, if set, is now used

View File

@ -381,6 +381,11 @@ This command must be executed on the leader. It initiates the change to the
cluster. To see if the change takes effect (committed), use cluster. To see if the change takes effect (committed), use
\fBcluster/status\fR to show the current setting. Once a change is committed, \fBcluster/status\fR to show the current setting. Once a change is committed,
it persists at server restarts. it persists at server restarts.
.IP "\fBcluster/set\-backlog\-threshold \fIdb\fR \fIn_msgs\fR \fIn_bytes\fR"
Sets the backlog limits for \fIdb\fR's RAFT connections to a maximum of
\fIn_msgs\fR messages or \fIn_bytes\fR bytes. If the backlog on one of the
connections reaches the limit, it will be disconnected (and re-established).
Values are checked only if the backlog contains more than 50 messages.
. .
.so lib/vlog-unixctl.man .so lib/vlog-unixctl.man
.so lib/memory-unixctl.man .so lib/memory-unixctl.man

View File

@ -305,6 +305,12 @@ struct raft {
bool ever_had_leader; /* There has been leader elected since the raft bool ever_had_leader; /* There has been leader elected since the raft
is initialized, meaning it is ever is initialized, meaning it is ever
connected. */ connected. */
/* Connection backlog limits. */
#define DEFAULT_MAX_BACKLOG_N_MSGS 500
#define DEFAULT_MAX_BACKLOG_N_BYTES UINT32_MAX
size_t conn_backlog_max_n_msgs; /* Number of messages. */
size_t conn_backlog_max_n_bytes; /* Number of bytes. */
}; };
/* All Raft structures. */ /* All Raft structures. */
@ -412,6 +418,9 @@ raft_alloc(void)
raft->election_timer = ELECTION_BASE_MSEC; raft->election_timer = ELECTION_BASE_MSEC;
raft->conn_backlog_max_n_msgs = DEFAULT_MAX_BACKLOG_N_MSGS;
raft->conn_backlog_max_n_bytes = DEFAULT_MAX_BACKLOG_N_BYTES;
return raft; return raft;
} }
@ -925,9 +934,6 @@ raft_reset_ping_timer(struct raft *raft)
raft->ping_timeout = time_msec() + raft->election_timer / 3; raft->ping_timeout = time_msec() + raft->election_timer / 3;
} }
#define RAFT_MAX_BACKLOG_N_MSGS 500
#define RAFT_MAX_BACKLOG_BYTES UINT32_MAX
static void static void
raft_add_conn(struct raft *raft, struct jsonrpc_session *js, raft_add_conn(struct raft *raft, struct jsonrpc_session *js,
const struct uuid *sid, bool incoming) const struct uuid *sid, bool incoming)
@ -943,8 +949,8 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js,
conn->incoming = incoming; conn->incoming = incoming;
conn->js_seqno = jsonrpc_session_get_seqno(conn->js); conn->js_seqno = jsonrpc_session_get_seqno(conn->js);
jsonrpc_session_set_probe_interval(js, 0); jsonrpc_session_set_probe_interval(js, 0);
jsonrpc_session_set_backlog_threshold(js, RAFT_MAX_BACKLOG_N_MSGS, jsonrpc_session_set_backlog_threshold(js, raft->conn_backlog_max_n_msgs,
RAFT_MAX_BACKLOG_BYTES); raft->conn_backlog_max_n_bytes);
} }
/* Starts the local server in an existing Raft cluster, using the local copy of /* Starts the local server in an existing Raft cluster, using the local copy of
@ -4717,6 +4723,42 @@ raft_unixctl_change_election_timer(struct unixctl_conn *conn,
unixctl_command_reply(conn, "change of election timer initiated."); unixctl_command_reply(conn, "change of election timer initiated.");
} }
static void
raft_unixctl_set_backlog_threshold(struct unixctl_conn *conn,
int argc OVS_UNUSED, const char *argv[],
void *aux OVS_UNUSED)
{
const char *cluster_name = argv[1];
unsigned long long n_msgs, n_bytes;
struct raft_conn *r_conn;
struct raft *raft = raft_lookup_by_name(cluster_name);
if (!raft) {
unixctl_command_reply_error(conn, "unknown cluster");
return;
}
if (!str_to_ullong(argv[2], 10, &n_msgs)
|| !str_to_ullong(argv[3], 10, &n_bytes)) {
unixctl_command_reply_error(conn, "invalid argument");
return;
}
if (n_msgs < 50 || n_msgs > SIZE_MAX || n_bytes > SIZE_MAX) {
unixctl_command_reply_error(conn, "values out of range");
return;
}
raft->conn_backlog_max_n_msgs = n_msgs;
raft->conn_backlog_max_n_bytes = n_bytes;
LIST_FOR_EACH (r_conn, list_node, &raft->conns) {
jsonrpc_session_set_backlog_threshold(r_conn->js, n_msgs, n_bytes);
}
unixctl_command_reply(conn, NULL);
}
static void static void
raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED,
int argc OVS_UNUSED, const char *argv[], int argc OVS_UNUSED, const char *argv[],
@ -4777,6 +4819,9 @@ raft_init(void)
raft_unixctl_kick, NULL); raft_unixctl_kick, NULL);
unixctl_command_register("cluster/change-election-timer", "DB TIME", 2, 2, unixctl_command_register("cluster/change-election-timer", "DB TIME", 2, 2,
raft_unixctl_change_election_timer, NULL); raft_unixctl_change_election_timer, NULL);
unixctl_command_register("cluster/set-backlog-threshold",
"DB N_MSGS N_BYTES", 3, 3,
raft_unixctl_set_backlog_threshold, NULL);
unixctl_command_register("cluster/failure-test", "FAILURE SCENARIO", 1, 1, unixctl_command_register("cluster/failure-test", "FAILURE SCENARIO", 1, 1,
raft_unixctl_failure_test, NULL); raft_unixctl_failure_test, NULL);
ovsthread_once_done(&once); ovsthread_once_done(&once);