mirror of
https://github.com/openvswitch/ovs
synced 2025-08-29 13:27:59 +00:00
ovsdb-tool: add --election-timer=ms option to 'create-cluster'
After creating the new clustered database write a raft entry that sets the desired election timer. This allows CMSes to set the election timer at cluster start and avoid an error-prone election timer modification process after the cluster is up. Reported-at: https://bugzilla.redhat.com/1831778 Signed-off-by: Dan Williams <dcbw@redhat.com> Signed-off-by: Ben Pfaff <blp@ovn.org>
This commit is contained in:
parent
13c0eaa7b4
commit
fb1e7863e8
3
NEWS
3
NEWS
@ -12,6 +12,9 @@ Post-v2.15.0
|
|||||||
- DPDK:
|
- DPDK:
|
||||||
* OVS validated with DPDK 20.11.1. It is recommended to use this version
|
* OVS validated with DPDK 20.11.1. It is recommended to use this version
|
||||||
until further releases.
|
until further releases.
|
||||||
|
- ovsdb-tool:
|
||||||
|
* New option '--election-timer' to the 'create-cluster' command to set the
|
||||||
|
leader election timer during cluster creation.
|
||||||
|
|
||||||
|
|
||||||
v2.15.0 - 15 Feb 2021
|
v2.15.0 - 15 Feb 2021
|
||||||
|
@ -10,7 +10,7 @@ ovsdb\-tool \- Open vSwitch database management utility
|
|||||||
.IP "Database Creation Commands:"
|
.IP "Database Creation Commands:"
|
||||||
\fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate \fR[\fIdb\fR [\fIschema\fR]]
|
\fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate \fR[\fIdb\fR [\fIschema\fR]]
|
||||||
.br
|
.br
|
||||||
\fBovsdb\-tool \fR[\fIoptions\fR] \fBcreate\-cluster \fIdb contents address\fR
|
\fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-\-election\-timer=\fIms\fR] \fBcreate\-cluster \fIdb contents address\fR
|
||||||
.br
|
.br
|
||||||
\fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR...
|
\fBovsdb\-tool \fR[\fIoptions\fR] [\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR...
|
||||||
.IP "Version Management Commands:"
|
.IP "Version Management Commands:"
|
||||||
@ -89,7 +89,7 @@ format, as specified in the OVSDB specification. The new database is
|
|||||||
initially empty. (You can use \fBcp\fR to copy a database including
|
initially empty. (You can use \fBcp\fR to copy a database including
|
||||||
both its schema and data.)
|
both its schema and data.)
|
||||||
.
|
.
|
||||||
.IP "\fBcreate\-cluster\fI db contents local"
|
.IP "[\fB\-\-election\-timer=\fIms\fR] \fBcreate\-cluster\fI db contents local"
|
||||||
Use this command to initialize the first server in a high-availability
|
Use this command to initialize the first server in a high-availability
|
||||||
cluster of 3 (or more) database servers, e.g. for a database in an
|
cluster of 3 (or more) database servers, e.g. for a database in an
|
||||||
environment that cannot tolerate a single point of failure. It creates
|
environment that cannot tolerate a single point of failure. It creates
|
||||||
@ -108,6 +108,12 @@ file that contains either an OVSDB schema in JSON format or a
|
|||||||
standalone OVSDB database. If it is a schema file, the new database
|
standalone OVSDB database. If it is a schema file, the new database
|
||||||
will initially be empty, with the given schema. If it is a database
|
will initially be empty, with the given schema. If it is a database
|
||||||
file, the new database will have the same schema and contents.
|
file, the new database will have the same schema and contents.
|
||||||
|
.IP
|
||||||
|
Leader election will be initiated by a follower if there is no heartbeat
|
||||||
|
received from the cluster leader within the specified election timer.
|
||||||
|
The default leader election timer is 1000 miliseconds. To use a different value
|
||||||
|
when creating the database, specify \fB\-\-election\-timer=\fIms\fR, where
|
||||||
|
\fIms\fR is a value in miliseconds between 100 and 600000 inclusive.
|
||||||
.
|
.
|
||||||
.IP "[\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR..."
|
.IP "[\fB\-\-cid=\fIuuid\fR] \fBjoin\-cluster\fI db name local remote\fR..."
|
||||||
Use this command to initialize each server after the first one in an
|
Use this command to initialize each server after the first one in an
|
||||||
|
@ -58,6 +58,9 @@ static const char *rbac_role;
|
|||||||
/* --cid: Cluster ID for "join-cluster" command. */
|
/* --cid: Cluster ID for "join-cluster" command. */
|
||||||
static struct uuid cid;
|
static struct uuid cid;
|
||||||
|
|
||||||
|
/* --election-timer: Election timer for "create-cluster" command. */
|
||||||
|
static uint64_t election_timer;
|
||||||
|
|
||||||
static const struct ovs_cmdl_command *get_all_commands(void);
|
static const struct ovs_cmdl_command *get_all_commands(void);
|
||||||
|
|
||||||
OVS_NO_RETURN static void usage(void);
|
OVS_NO_RETURN static void usage(void);
|
||||||
@ -85,12 +88,14 @@ parse_options(int argc, char *argv[])
|
|||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
OPT_RBAC_ROLE = UCHAR_MAX + 1,
|
OPT_RBAC_ROLE = UCHAR_MAX + 1,
|
||||||
OPT_CID
|
OPT_CID,
|
||||||
|
OPT_ELECTION_TIMER,
|
||||||
};
|
};
|
||||||
static const struct option long_options[] = {
|
static const struct option long_options[] = {
|
||||||
{"more", no_argument, NULL, 'm'},
|
{"more", no_argument, NULL, 'm'},
|
||||||
{"rbac-role", required_argument, NULL, OPT_RBAC_ROLE},
|
{"rbac-role", required_argument, NULL, OPT_RBAC_ROLE},
|
||||||
{"cid", required_argument, NULL, OPT_CID},
|
{"cid", required_argument, NULL, OPT_CID},
|
||||||
|
{"election-timer", required_argument, NULL, OPT_ELECTION_TIMER},
|
||||||
{"verbose", optional_argument, NULL, 'v'},
|
{"verbose", optional_argument, NULL, 'v'},
|
||||||
{"help", no_argument, NULL, 'h'},
|
{"help", no_argument, NULL, 'h'},
|
||||||
{"option", no_argument, NULL, 'o'},
|
{"option", no_argument, NULL, 'o'},
|
||||||
@ -100,6 +105,7 @@ parse_options(int argc, char *argv[])
|
|||||||
char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
|
char *short_options = ovs_cmdl_long_options_to_short_options(long_options);
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
struct ovsdb_error *error;
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
c = getopt_long(argc, argv, short_options, long_options, NULL);
|
c = getopt_long(argc, argv, short_options, long_options, NULL);
|
||||||
@ -122,6 +128,14 @@ parse_options(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case OPT_ELECTION_TIMER:
|
||||||
|
election_timer = atoll(optarg);
|
||||||
|
error = raft_validate_election_timer(election_timer);
|
||||||
|
if (error) {
|
||||||
|
ovs_fatal(0, "%s", ovsdb_error_to_string_free(error));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
case 'h':
|
case 'h':
|
||||||
usage();
|
usage();
|
||||||
|
|
||||||
@ -153,7 +167,7 @@ usage(void)
|
|||||||
printf("%s: Open vSwitch database management utility\n"
|
printf("%s: Open vSwitch database management utility\n"
|
||||||
"usage: %s [OPTIONS] COMMAND [ARG...]\n"
|
"usage: %s [OPTIONS] COMMAND [ARG...]\n"
|
||||||
" create [DB [SCHEMA]] create DB with the given SCHEMA\n"
|
" create [DB [SCHEMA]] create DB with the given SCHEMA\n"
|
||||||
" create-cluster DB CONTENTS LOCAL\n"
|
" [--election-timer=ms] create-cluster DB CONTENTS LOCAL\n"
|
||||||
" create clustered DB with given CONTENTS and LOCAL address\n"
|
" create clustered DB with given CONTENTS and LOCAL address\n"
|
||||||
" [--cid=UUID] join-cluster DB NAME LOCAL REMOTE...\n"
|
" [--cid=UUID] join-cluster DB NAME LOCAL REMOTE...\n"
|
||||||
" join clustered DB with given NAME and LOCAL and REMOTE addrs\n"
|
" join clustered DB with given NAME and LOCAL and REMOTE addrs\n"
|
||||||
@ -303,7 +317,7 @@ do_create_cluster(struct ovs_cmdl_context *ctx)
|
|||||||
/* Create database file. */
|
/* Create database file. */
|
||||||
struct json *snapshot = json_array_create_2(schema_json, data);
|
struct json *snapshot = json_array_create_2(schema_json, data);
|
||||||
check_ovsdb_error(raft_create_cluster(db_file_name, schema->name,
|
check_ovsdb_error(raft_create_cluster(db_file_name, schema->name,
|
||||||
local, snapshot));
|
local, snapshot, election_timer));
|
||||||
ovsdb_schema_destroy(schema);
|
ovsdb_schema_destroy(schema);
|
||||||
json_destroy(snapshot);
|
json_destroy(snapshot);
|
||||||
}
|
}
|
||||||
|
66
ovsdb/raft.c
66
ovsdb/raft.c
@ -201,6 +201,8 @@ struct raft {
|
|||||||
|
|
||||||
#define ELECTION_BASE_MSEC 1000
|
#define ELECTION_BASE_MSEC 1000
|
||||||
#define ELECTION_RANGE_MSEC 1000
|
#define ELECTION_RANGE_MSEC 1000
|
||||||
|
#define ELECTION_MIN_MSEC 100
|
||||||
|
#define ELECTION_MAX_MSEC 600000
|
||||||
/* The election timeout base value for leader election, in milliseconds.
|
/* The election timeout base value for leader election, in milliseconds.
|
||||||
* It can be set by unixctl cluster/change-election-timer. Default value is
|
* It can be set by unixctl cluster/change-election-timer. Default value is
|
||||||
* ELECTION_BASE_MSEC. */
|
* ELECTION_BASE_MSEC. */
|
||||||
@ -446,11 +448,16 @@ raft_alloc(void)
|
|||||||
* This only creates the on-disk file. Use raft_open() to start operating the
|
* This only creates the on-disk file. Use raft_open() to start operating the
|
||||||
* new server.
|
* new server.
|
||||||
*
|
*
|
||||||
|
* The optional election_timer argument, when greater than zero, sets the given
|
||||||
|
* leader election timer for the new cluster, in miliseconds. If non-zero, it
|
||||||
|
* must be between 100 and 600000 inclusive.
|
||||||
|
*
|
||||||
* Returns null if successful, otherwise an ovsdb_error describing the
|
* Returns null if successful, otherwise an ovsdb_error describing the
|
||||||
* problem. */
|
* problem. */
|
||||||
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
||||||
raft_create_cluster(const char *file_name, const char *name,
|
raft_create_cluster(const char *file_name, const char *name,
|
||||||
const char *local_address, const struct json *data)
|
const char *local_address, const struct json *data,
|
||||||
|
const uint64_t election_timer)
|
||||||
{
|
{
|
||||||
/* Parse and verify validity of the local address. */
|
/* Parse and verify validity of the local address. */
|
||||||
struct ovsdb_error *error = raft_address_validate(local_address);
|
struct ovsdb_error *error = raft_address_validate(local_address);
|
||||||
@ -458,6 +465,14 @@ raft_create_cluster(const char *file_name, const char *name,
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Validate optional election timer */
|
||||||
|
if (election_timer > 0) {
|
||||||
|
error = raft_validate_election_timer(election_timer);
|
||||||
|
if (error) {
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Create log file. */
|
/* Create log file. */
|
||||||
struct ovsdb_log *log;
|
struct ovsdb_log *log;
|
||||||
error = ovsdb_log_open(file_name, RAFT_MAGIC, OVSDB_LOG_CREATE_EXCL,
|
error = ovsdb_log_open(file_name, RAFT_MAGIC, OVSDB_LOG_CREATE_EXCL,
|
||||||
@ -467,6 +482,8 @@ raft_create_cluster(const char *file_name, const char *name,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Write log file. */
|
/* Write log file. */
|
||||||
|
const uint64_t term = 1;
|
||||||
|
uint64_t index = 1;
|
||||||
struct raft_header h = {
|
struct raft_header h = {
|
||||||
.sid = uuid_random(),
|
.sid = uuid_random(),
|
||||||
.cid = uuid_random(),
|
.cid = uuid_random(),
|
||||||
@ -474,9 +491,9 @@ raft_create_cluster(const char *file_name, const char *name,
|
|||||||
.local_address = xstrdup(local_address),
|
.local_address = xstrdup(local_address),
|
||||||
.joining = false,
|
.joining = false,
|
||||||
.remote_addresses = SSET_INITIALIZER(&h.remote_addresses),
|
.remote_addresses = SSET_INITIALIZER(&h.remote_addresses),
|
||||||
.snap_index = 1,
|
.snap_index = index++,
|
||||||
.snap = {
|
.snap = {
|
||||||
.term = 1,
|
.term = term,
|
||||||
.data = json_nullable_clone(data),
|
.data = json_nullable_clone(data),
|
||||||
.eid = uuid_random(),
|
.eid = uuid_random(),
|
||||||
.servers = json_object_create(),
|
.servers = json_object_create(),
|
||||||
@ -487,11 +504,33 @@ raft_create_cluster(const char *file_name, const char *name,
|
|||||||
json_string_create(local_address));
|
json_string_create(local_address));
|
||||||
error = ovsdb_log_write_and_free(log, raft_header_to_json(&h));
|
error = ovsdb_log_write_and_free(log, raft_header_to_json(&h));
|
||||||
raft_header_uninit(&h);
|
raft_header_uninit(&h);
|
||||||
if (!error) {
|
if (error) {
|
||||||
error = ovsdb_log_commit_block(log);
|
goto error;
|
||||||
}
|
}
|
||||||
ovsdb_log_close(log);
|
|
||||||
|
|
||||||
|
if (election_timer > 0) {
|
||||||
|
struct raft_record r = {
|
||||||
|
.type = RAFT_REC_ENTRY,
|
||||||
|
.term = term,
|
||||||
|
.entry = {
|
||||||
|
.index = index,
|
||||||
|
.data = NULL,
|
||||||
|
.servers = NULL,
|
||||||
|
.election_timer = election_timer,
|
||||||
|
.eid = UUID_ZERO,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
error = ovsdb_log_write_and_free(log, raft_record_to_json(&r));
|
||||||
|
raft_record_uninit(&r);
|
||||||
|
if (error) {
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
error = ovsdb_log_commit_block(log);
|
||||||
|
|
||||||
|
error:
|
||||||
|
ovsdb_log_close(log);
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1078,6 +1117,21 @@ raft_get_memory_usage(const struct raft *raft, struct simap *usage)
|
|||||||
simap_increase(usage, "raft-log", raft->log_end - raft->log_start);
|
simap_increase(usage, "raft-log", raft->log_end - raft->log_start);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Returns an error if the election timer (in miliseconds) is out of bounds.
|
||||||
|
* Values smaller than 100ms or bigger than 10min don't make sense.
|
||||||
|
*/
|
||||||
|
struct ovsdb_error *
|
||||||
|
raft_validate_election_timer(const uint64_t ms)
|
||||||
|
{
|
||||||
|
/* Validate optional election timer */
|
||||||
|
if (ms < ELECTION_MIN_MSEC || ms > ELECTION_MAX_MSEC) {
|
||||||
|
return ovsdb_error(NULL, "election timer must be between %d and "
|
||||||
|
"%d, in msec.", ELECTION_MIN_MSEC,
|
||||||
|
ELECTION_MAX_MSEC);
|
||||||
|
}
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
/* Returns true if 'raft' has completed joining its cluster, has not left or
|
/* Returns true if 'raft' has completed joining its cluster, has not left or
|
||||||
* initiated leaving the cluster, does not have failed disk storage, and is
|
* initiated leaving the cluster, does not have failed disk storage, and is
|
||||||
* apparently connected to the leader in a healthy way (or is itself the
|
* apparently connected to the leader in a healthy way (or is itself the
|
||||||
|
@ -80,7 +80,8 @@ struct sset;
|
|||||||
struct ovsdb_error *raft_create_cluster(const char *file_name,
|
struct ovsdb_error *raft_create_cluster(const char *file_name,
|
||||||
const char *name,
|
const char *name,
|
||||||
const char *local_address,
|
const char *local_address,
|
||||||
const struct json *snapshot)
|
const struct json *snapshot,
|
||||||
|
const uint64_t election_timer)
|
||||||
OVS_WARN_UNUSED_RESULT;
|
OVS_WARN_UNUSED_RESULT;
|
||||||
struct ovsdb_error *raft_join_cluster(const char *file_name, const char *name,
|
struct ovsdb_error *raft_join_cluster(const char *file_name, const char *name,
|
||||||
const char *local_address,
|
const char *local_address,
|
||||||
@ -116,6 +117,9 @@ bool raft_is_connected(const struct raft *);
|
|||||||
bool raft_is_leader(const struct raft *);
|
bool raft_is_leader(const struct raft *);
|
||||||
void raft_get_memory_usage(const struct raft *, struct simap *usage);
|
void raft_get_memory_usage(const struct raft *, struct simap *usage);
|
||||||
|
|
||||||
|
/* Parameter validation */
|
||||||
|
struct ovsdb_error *raft_validate_election_timer(const uint64_t ms);
|
||||||
|
|
||||||
/* Joining a cluster. */
|
/* Joining a cluster. */
|
||||||
bool raft_is_joining(const struct raft *);
|
bool raft_is_joining(const struct raft *);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user