2
0
mirror of https://github.com/openvswitch/ovs synced 2025-08-29 13:27:59 +00:00

ovsdb-tool: add --election-timer=ms option to 'create-cluster'

After creating the new clustered database write a raft entry that
sets the desired election timer. This allows CMSes to set the
election timer at cluster start and avoid an error-prone
election timer modification process after the cluster is up.

Reported-at: https://bugzilla.redhat.com/1831778

Signed-off-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>
This commit is contained in:
Dan Williams 2021-05-25 11:21:15 -05:00 committed by Ben Pfaff
parent 13c0eaa7b4
commit fb1e7863e8
5 changed files with 93 additions and 12 deletions

3
NEWS
View File

@ -12,6 +12,9 @@ Post-v2.15.0
- DPDK: - DPDK:
* OVS validated with DPDK 20.11.1. It is recommended to use this version * OVS validated with DPDK 20.11.1. It is recommended to use this version
until further releases. until further releases.
- ovsdb-tool:
* New option '--election-timer' to the 'create-cluster' command to set the
leader election timer during cluster creation.
v2.15.0 - 15 Feb 2021 v2.15.0 - 15 Feb 2021

View File

@ -10,7 +10,7 @@ ovsdb\-tool \- Open vSwitch database management utility
.IP "Database Creation Commands:" .IP "Database Creation Commands:"
\fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate \fR[\fIdb\fR [\fIschema\fR]] \fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate \fR[\fIdb\fR [\fIschema\fR]]
.br .br
\fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate\-cluster \fIdb contents address\fR \fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-\-election\-timer=\fIms\fR] \fBcreate\-cluster \fIdb contents address\fR
.br .br
\fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR... \fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR...
.IP "Version Management Commands:" .IP "Version Management Commands:"
@ -89,7 +89,7 @@ format, as specified in the OVSDB specification. The new database is
initially empty. (You can use \fBcp\fR to copy a database including initially empty. (You can use \fBcp\fR to copy a database including
both its schema and data.) both its schema and data.)
. .
.IP "\fBcreate\-cluster\fI db contents local" .IP "[\fB\-\-election\-timer=\fIms\fR] \fBcreate\-cluster\fI db contents local"
Use this command to initialize the first server in a high-availability Use this command to initialize the first server in a high-availability
cluster of 3 (or more) database servers, e.g. for a database in an cluster of 3 (or more) database servers, e.g. for a database in an
environment that cannot tolerate a single point of failure. It creates environment that cannot tolerate a single point of failure. It creates
@ -108,6 +108,12 @@ file that contains either an OVSDB schema in JSON format or a
standalone OVSDB database. If it is a schema file, the new database standalone OVSDB database. If it is a schema file, the new database
will initially be empty, with the given schema. If it is a database will initially be empty, with the given schema. If it is a database
file, the new database will have the same schema and contents. file, the new database will have the same schema and contents.
.IP
Leader election will be initiated by a follower if there is no heartbeat
received from the cluster leader within the specified election timer.
The default leader election timer is 1000 miliseconds. To use a different value
when creating the database, specify \fB\-\-election\-timer=\fIms\fR, where
\fIms\fR is a value in miliseconds between 100 and 600000 inclusive.
. .
.IP "[\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR..." .IP "[\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR..."
Use this command to initialize each server after the first one in an Use this command to initialize each server after the first one in an

View File

@ -58,6 +58,9 @@ static const char *rbac_role;
/* --cid: Cluster ID for "join-cluster" command. */ /* --cid: Cluster ID for "join-cluster" command. */
static struct uuid cid; static struct uuid cid;
/* --election-timer: Election timer for "create-cluster" command. */
static uint64_t election_timer;
static const struct ovs_cmdl_command *get_all_commands(void); static const struct ovs_cmdl_command *get_all_commands(void);
OVS_NO_RETURN static void usage(void); OVS_NO_RETURN static void usage(void);
@ -85,12 +88,14 @@ parse_options(int argc, char *argv[])
{ {
enum { enum {
OPT_RBAC_ROLE = UCHAR_MAX + 1, OPT_RBAC_ROLE = UCHAR_MAX + 1,
OPT_CID OPT_CID,
OPT_ELECTION_TIMER,
}; };
static const struct option long_options[] = { static const struct option long_options[] = {
{"more", no_argument, NULL, 'm'}, {"more", no_argument, NULL, 'm'},
{"rbac-role", required_argument, NULL, OPT_RBAC_ROLE}, {"rbac-role", required_argument, NULL, OPT_RBAC_ROLE},
{"cid", required_argument, NULL, OPT_CID}, {"cid", required_argument, NULL, OPT_CID},
{"election-timer", required_argument, NULL, OPT_ELECTION_TIMER},
{"verbose", optional_argument, NULL, 'v'}, {"verbose", optional_argument, NULL, 'v'},
{"help", no_argument, NULL, 'h'}, {"help", no_argument, NULL, 'h'},
{"option", no_argument, NULL, 'o'}, {"option", no_argument, NULL, 'o'},
@ -100,6 +105,7 @@ parse_options(int argc, char *argv[])
char *short_options = ovs_cmdl_long_options_to_short_options(long_options); char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
for (;;) { for (;;) {
struct ovsdb_error *error;
int c; int c;
c = getopt_long(argc, argv, short_options, long_options, NULL); c = getopt_long(argc, argv, short_options, long_options, NULL);
@ -122,6 +128,14 @@ parse_options(int argc, char *argv[])
} }
break; break;
case OPT_ELECTION_TIMER:
election_timer = atoll(optarg);
error = raft_validate_election_timer(election_timer);
if (error) {
ovs_fatal(0, "%s", ovsdb_error_to_string_free(error));
}
break;
case 'h': case 'h':
usage(); usage();
@ -153,7 +167,7 @@ usage(void)
printf("%s: Open vSwitch database management utility\n" printf("%s: Open vSwitch database management utility\n"
"usage: %s [OPTIONS] COMMAND [ARG...]\n" "usage: %s [OPTIONS] COMMAND [ARG...]\n"
" create [DB [SCHEMA]] create DB with the given SCHEMA\n" " create [DB [SCHEMA]] create DB with the given SCHEMA\n"
" create-cluster DB CONTENTS LOCAL\n" " [--election-timer=ms] create-cluster DB CONTENTS LOCAL\n"
" create clustered DB with given CONTENTS and LOCAL address\n" " create clustered DB with given CONTENTS and LOCAL address\n"
" [--cid=UUID] join-cluster DB NAME LOCAL REMOTE...\n" " [--cid=UUID] join-cluster DB NAME LOCAL REMOTE...\n"
" join clustered DB with given NAME and LOCAL and REMOTE addrs\n" " join clustered DB with given NAME and LOCAL and REMOTE addrs\n"
@ -303,7 +317,7 @@ do_create_cluster(struct ovs_cmdl_context *ctx)
/* Create database file. */ /* Create database file. */
struct json *snapshot = json_array_create_2(schema_json, data); struct json *snapshot = json_array_create_2(schema_json, data);
check_ovsdb_error(raft_create_cluster(db_file_name, schema->name, check_ovsdb_error(raft_create_cluster(db_file_name, schema->name,
local, snapshot)); local, snapshot, election_timer));
ovsdb_schema_destroy(schema); ovsdb_schema_destroy(schema);
json_destroy(snapshot); json_destroy(snapshot);
} }

View File

@ -201,6 +201,8 @@ struct raft {
#define ELECTION_BASE_MSEC 1000 #define ELECTION_BASE_MSEC 1000
#define ELECTION_RANGE_MSEC 1000 #define ELECTION_RANGE_MSEC 1000
#define ELECTION_MIN_MSEC 100
#define ELECTION_MAX_MSEC 600000
/* The election timeout base value for leader election, in milliseconds. /* The election timeout base value for leader election, in milliseconds.
* It can be set by unixctl cluster/change-election-timer. Default value is * It can be set by unixctl cluster/change-election-timer. Default value is
* ELECTION_BASE_MSEC. */ * ELECTION_BASE_MSEC. */
@ -446,11 +448,16 @@ raft_alloc(void)
* This only creates the on-disk file. Use raft_open() to start operating the * This only creates the on-disk file. Use raft_open() to start operating the
* new server. * new server.
* *
* The optional election_timer argument, when greater than zero, sets the given
* leader election timer for the new cluster, in miliseconds. If non-zero, it
* must be between 100 and 600000 inclusive.
*
* Returns null if successful, otherwise an ovsdb_error describing the * Returns null if successful, otherwise an ovsdb_error describing the
* problem. */ * problem. */
struct ovsdb_error * OVS_WARN_UNUSED_RESULT struct ovsdb_error * OVS_WARN_UNUSED_RESULT
raft_create_cluster(const char *file_name, const char *name, raft_create_cluster(const char *file_name, const char *name,
const char *local_address, const struct json *data) const char *local_address, const struct json *data,
const uint64_t election_timer)
{ {
/* Parse and verify validity of the local address. */ /* Parse and verify validity of the local address. */
struct ovsdb_error *error = raft_address_validate(local_address); struct ovsdb_error *error = raft_address_validate(local_address);
@ -458,6 +465,14 @@ raft_create_cluster(const char *file_name, const char *name,
return error; return error;
} }
/* Validate optional election timer */
if (election_timer > 0) {
error = raft_validate_election_timer(election_timer);
if (error) {
return error;
}
}
/* Create log file. */ /* Create log file. */
struct ovsdb_log *log; struct ovsdb_log *log;
error = ovsdb_log_open(file_name, RAFT_MAGIC, OVSDB_LOG_CREATE_EXCL, error = ovsdb_log_open(file_name, RAFT_MAGIC, OVSDB_LOG_CREATE_EXCL,
@ -467,6 +482,8 @@ raft_create_cluster(const char *file_name, const char *name,
} }
/* Write log file. */ /* Write log file. */
const uint64_t term = 1;
uint64_t index = 1;
struct raft_header h = { struct raft_header h = {
.sid = uuid_random(), .sid = uuid_random(),
.cid = uuid_random(), .cid = uuid_random(),
@ -474,9 +491,9 @@ raft_create_cluster(const char *file_name, const char *name,
.local_address = xstrdup(local_address), .local_address = xstrdup(local_address),
.joining = false, .joining = false,
.remote_addresses = SSET_INITIALIZER(&h.remote_addresses), .remote_addresses = SSET_INITIALIZER(&h.remote_addresses),
.snap_index = 1, .snap_index = index++,
.snap = { .snap = {
.term = 1, .term = term,
.data = json_nullable_clone(data), .data = json_nullable_clone(data),
.eid = uuid_random(), .eid = uuid_random(),
.servers = json_object_create(), .servers = json_object_create(),
@ -487,11 +504,33 @@ raft_create_cluster(const char *file_name, const char *name,
json_string_create(local_address)); json_string_create(local_address));
error = ovsdb_log_write_and_free(log, raft_header_to_json(&h)); error = ovsdb_log_write_and_free(log, raft_header_to_json(&h));
raft_header_uninit(&h); raft_header_uninit(&h);
if (!error) { if (error) {
error = ovsdb_log_commit_block(log); goto error;
} }
ovsdb_log_close(log);
if (election_timer > 0) {
struct raft_record r = {
.type = RAFT_REC_ENTRY,
.term = term,
.entry = {
.index = index,
.data = NULL,
.servers = NULL,
.election_timer = election_timer,
.eid = UUID_ZERO,
},
};
error = ovsdb_log_write_and_free(log, raft_record_to_json(&r));
raft_record_uninit(&r);
if (error) {
goto error;
}
}
error = ovsdb_log_commit_block(log);
error:
ovsdb_log_close(log);
return error; return error;
} }
@ -1078,6 +1117,21 @@ raft_get_memory_usage(const struct raft *raft, struct simap *usage)
simap_increase(usage, "raft-log", raft->log_end - raft->log_start); simap_increase(usage, "raft-log", raft->log_end - raft->log_start);
} }
/* Returns an error if the election timer (in miliseconds) is out of bounds.
* Values smaller than 100ms or bigger than 10min don't make sense.
*/
struct ovsdb_error *
raft_validate_election_timer(const uint64_t ms)
{
/* Validate optional election timer */
if (ms < ELECTION_MIN_MSEC || ms > ELECTION_MAX_MSEC) {
return ovsdb_error(NULL, "election timer must be between %d and "
"%d, in msec.", ELECTION_MIN_MSEC,
ELECTION_MAX_MSEC);
}
return NULL;
}
/* Returns true if 'raft' has completed joining its cluster, has not left or /* Returns true if 'raft' has completed joining its cluster, has not left or
* initiated leaving the cluster, does not have failed disk storage, and is * initiated leaving the cluster, does not have failed disk storage, and is
* apparently connected to the leader in a healthy way (or is itself the * apparently connected to the leader in a healthy way (or is itself the

View File

@ -80,7 +80,8 @@ struct sset;
struct ovsdb_error *raft_create_cluster(const char *file_name, struct ovsdb_error *raft_create_cluster(const char *file_name,
const char *name, const char *name,
const char *local_address, const char *local_address,
const struct json *snapshot) const struct json *snapshot,
const uint64_t election_timer)
OVS_WARN_UNUSED_RESULT; OVS_WARN_UNUSED_RESULT;
struct ovsdb_error *raft_join_cluster(const char *file_name, const char *name, struct ovsdb_error *raft_join_cluster(const char *file_name, const char *name,
const char *local_address, const char *local_address,
@ -116,6 +117,9 @@ bool raft_is_connected(const struct raft *);
bool raft_is_leader(const struct raft *); bool raft_is_leader(const struct raft *);
void raft_get_memory_usage(const struct raft *, struct simap *usage); void raft_get_memory_usage(const struct raft *, struct simap *usage);
/* Parameter validation */
struct ovsdb_error *raft_validate_election_timer(const uint64_t ms);
/* Joining a cluster. */ /* Joining a cluster. */
bool raft_is_joining(const struct raft *); bool raft_is_joining(const struct raft *);