2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-22 01:51:26 +00:00

ovsdb-tool: add --election-timer=ms option to 'create-cluster'

After creating the new clustered database write a raft entry that
sets the desired election timer. This allows CMSes to set the
election timer at cluster start and avoid an error-prone
election timer modification process after the cluster is up.

Reported-at: https://bugzilla.redhat.com/1831778

Signed-off-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
This commit is contained in:
Dan Williams 2021-05-25 11:21:15 -05:00 committed by Ben Pfaff
parent 13c0eaa7b4
commit fb1e7863e8
5 changed files with 93 additions and 12 deletions

3
NEWS
View File

@ -12,6 +12,9 @@ Post-v2.15.0
- DPDK:
* OVS validated with DPDK 20.11.1. It is recommended to use this version
until further releases.
- ovsdb-tool:
* New option '--election-timer' to the 'create-cluster' command to set the
leader election timer during cluster creation.
v2.15.0 - 15 Feb 2021

View File

@ -10,7 +10,7 @@ ovsdb\-tool \- Open vSwitch database management utility
.IP "Database Creation Commands:"
\fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate \fR[\fIdb\fR [\fIschema\fR]]
.br
\fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate\-cluster \fIdb contents address\fR
\fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-\-election\-timer=\fIms\fR] \fBcreate\-cluster \fIdb contents address\fR
.br
\fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR...
.IP "Version Management Commands:"
@ -89,7 +89,7 @@ format, as specified in the OVSDB specification. The new database is
initially empty. (You can use \fBcp\fR to copy a database including
both its schema and data.)
.
.IP "\fBcreate\-cluster\fI db contents local"
.IP "[\fB\-\-election\-timer=\fIms\fR] \fBcreate\-cluster\fI db contents local"
Use this command to initialize the first server in a high-availability
cluster of 3 (or more) database servers, e.g. for a database in an
environment that cannot tolerate a single point of failure. It creates
@ -108,6 +108,12 @@ file that contains either an OVSDB schema in JSON format or a
standalone OVSDB database. If it is a schema file, the new database
will initially be empty, with the given schema. If it is a database
file, the new database will have the same schema and contents.
.IP
Leader election will be initiated by a follower if there is no heartbeat
received from the cluster leader within the specified election timer.
The default leader election timer is 1000 miliseconds. To use a different value
when creating the database, specify \fB\-\-election\-timer=\fIms\fR, where
\fIms\fR is a value in miliseconds between 100 and 600000 inclusive.
.
.IP "[\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR..."
Use this command to initialize each server after the first one in an

View File

@ -58,6 +58,9 @@ static const char *rbac_role;
/* --cid: Cluster ID for "join-cluster" command. */
static struct uuid cid;
/* --election-timer: Election timer for "create-cluster" command. */
static uint64_t election_timer;
static const struct ovs_cmdl_command *get_all_commands(void);
OVS_NO_RETURN static void usage(void);
@ -85,12 +88,14 @@ parse_options(int argc, char *argv[])
{
enum {
OPT_RBAC_ROLE = UCHAR_MAX + 1,
OPT_CID
OPT_CID,
OPT_ELECTION_TIMER,
};
static const struct option long_options[] = {
{"more", no_argument, NULL, 'm'},
{"rbac-role", required_argument, NULL, OPT_RBAC_ROLE},
{"cid", required_argument, NULL, OPT_CID},
{"election-timer", required_argument, NULL, OPT_ELECTION_TIMER},
{"verbose", optional_argument, NULL, 'v'},
{"help", no_argument, NULL, 'h'},
{"option", no_argument, NULL, 'o'},
@ -100,6 +105,7 @@ parse_options(int argc, char *argv[])
char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
for (;;) {
struct ovsdb_error *error;
int c;
c = getopt_long(argc, argv, short_options, long_options, NULL);
@ -122,6 +128,14 @@ parse_options(int argc, char *argv[])
}
break;
case OPT_ELECTION_TIMER:
election_timer = atoll(optarg);
error = raft_validate_election_timer(election_timer);
if (error) {
ovs_fatal(0, "%s", ovsdb_error_to_string_free(error));
}
break;
case 'h':
usage();
@ -153,7 +167,7 @@ usage(void)
printf("%s: Open vSwitch database management utility\n"
"usage: %s [OPTIONS] COMMAND [ARG...]\n"
" create [DB [SCHEMA]] create DB with the given SCHEMA\n"
" create-cluster DB CONTENTS LOCAL\n"
" [--election-timer=ms] create-cluster DB CONTENTS LOCAL\n"
" create clustered DB with given CONTENTS and LOCAL address\n"
" [--cid=UUID] join-cluster DB NAME LOCAL REMOTE...\n"
" join clustered DB with given NAME and LOCAL and REMOTE addrs\n"
@ -303,7 +317,7 @@ do_create_cluster(struct ovs_cmdl_context *ctx)
/* Create database file. */
struct json *snapshot = json_array_create_2(schema_json, data);
check_ovsdb_error(raft_create_cluster(db_file_name, schema->name,
local, snapshot));
local, snapshot, election_timer));
ovsdb_schema_destroy(schema);
json_destroy(snapshot);
}

View File

@ -201,6 +201,8 @@ struct raft {
#define ELECTION_BASE_MSEC 1000
#define ELECTION_RANGE_MSEC 1000
#define ELECTION_MIN_MSEC 100
#define ELECTION_MAX_MSEC 600000
/* The election timeout base value for leader election, in milliseconds.
* It can be set by unixctl cluster/change-election-timer. Default value is
* ELECTION_BASE_MSEC. */
@ -446,11 +448,16 @@ raft_alloc(void)
* This only creates the on-disk file. Use raft_open() to start operating the
* new server.
*
* The optional election_timer argument, when greater than zero, sets the given
* leader election timer for the new cluster, in miliseconds. If non-zero, it
* must be between 100 and 600000 inclusive.
*
* Returns null if successful, otherwise an ovsdb_error describing the
* problem. */
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
raft_create_cluster(const char *file_name, const char *name,
const char *local_address, const struct json *data)
const char *local_address, const struct json *data,
const uint64_t election_timer)
{
/* Parse and verify validity of the local address. */
struct ovsdb_error *error = raft_address_validate(local_address);
@ -458,6 +465,14 @@ raft_create_cluster(const char *file_name, const char *name,
return error;
}
/* Validate optional election timer */
if (election_timer > 0) {
error = raft_validate_election_timer(election_timer);
if (error) {
return error;
}
}
/* Create log file. */
struct ovsdb_log *log;
error = ovsdb_log_open(file_name, RAFT_MAGIC, OVSDB_LOG_CREATE_EXCL,
@ -467,6 +482,8 @@ raft_create_cluster(const char *file_name, const char *name,
}
/* Write log file. */
const uint64_t term = 1;
uint64_t index = 1;
struct raft_header h = {
.sid = uuid_random(),
.cid = uuid_random(),
@ -474,9 +491,9 @@ raft_create_cluster(const char *file_name, const char *name,
.local_address = xstrdup(local_address),
.joining = false,
.remote_addresses = SSET_INITIALIZER(&h.remote_addresses),
.snap_index = 1,
.snap_index = index++,
.snap = {
.term = 1,
.term = term,
.data = json_nullable_clone(data),
.eid = uuid_random(),
.servers = json_object_create(),
@ -487,11 +504,33 @@ raft_create_cluster(const char *file_name, const char *name,
json_string_create(local_address));
error = ovsdb_log_write_and_free(log, raft_header_to_json(&h));
raft_header_uninit(&h);
if (!error) {
error = ovsdb_log_commit_block(log);
if (error) {
goto error;
}
ovsdb_log_close(log);
if (election_timer > 0) {
struct raft_record r = {
.type = RAFT_REC_ENTRY,
.term = term,
.entry = {
.index = index,
.data = NULL,
.servers = NULL,
.election_timer = election_timer,
.eid = UUID_ZERO,
},
};
error = ovsdb_log_write_and_free(log, raft_record_to_json(&r));
raft_record_uninit(&r);
if (error) {
goto error;
}
}
error = ovsdb_log_commit_block(log);
error:
ovsdb_log_close(log);
return error;
}
@ -1078,6 +1117,21 @@ raft_get_memory_usage(const struct raft *raft, struct simap *usage)
simap_increase(usage, "raft-log", raft->log_end - raft->log_start);
}
/* Returns an error if the election timer (in miliseconds) is out of bounds.
* Values smaller than 100ms or bigger than 10min don't make sense.
*/
struct ovsdb_error *
raft_validate_election_timer(const uint64_t ms)
{
/* Validate optional election timer */
if (ms < ELECTION_MIN_MSEC || ms > ELECTION_MAX_MSEC) {
return ovsdb_error(NULL, "election timer must be between %d and "
"%d, in msec.", ELECTION_MIN_MSEC,
ELECTION_MAX_MSEC);
}
return NULL;
}
/* Returns true if 'raft' has completed joining its cluster, has not left or
* initiated leaving the cluster, does not have failed disk storage, and is
* apparently connected to the leader in a healthy way (or is itself the

View File

@ -80,7 +80,8 @@ struct sset;
struct ovsdb_error *raft_create_cluster(const char *file_name,
const char *name,
const char *local_address,
const struct json *snapshot)
const struct json *snapshot,
const uint64_t election_timer)
OVS_WARN_UNUSED_RESULT;
struct ovsdb_error *raft_join_cluster(const char *file_name, const char *name,
const char *local_address,
@ -116,6 +117,9 @@ bool raft_is_connected(const struct raft *);
bool raft_is_leader(const struct raft *);
void raft_get_memory_usage(const struct raft *, struct simap *usage);
/* Parameter validation */
struct ovsdb_error *raft_validate_election_timer(const uint64_t ms);
/* Joining a cluster. */
bool raft_is_joining(const struct raft *);