2017-12-31 21:15:58 -08:00
|
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2017, 2018 Nicira, Inc.
|
|
|
|
|
*
|
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
|
* You may obtain a copy of the License at:
|
|
|
|
|
*
|
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
*
|
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
|
* limitations under the License.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <config.h>
|
|
|
|
|
|
|
|
|
|
#include "raft-private.h"
|
|
|
|
|
|
raft: Don't keep full json objects in memory if no longer needed.
Raft log entries (and raft database snapshot) contains json objects
of the data. Follower receives append requests with data that gets
parsed and added to the raft log. Leader receives execution requests,
parses data out of them and adds to the log. In both cases, later
ovsdb-server reads the log with ovsdb_storage_read(), constructs
transaction and updates the database. On followers these json objects
in common case are never used again. Leader may use them to send
append requests or snapshot installation requests to followers.
However, all these operations (except for ovsdb_storage_read()) are
just serializing the json in order to send it over the network.
Json objects are significantly larger than their serialized string
representation. For example, the snapshot of the database from one of
the ovn-heater scale tests takes 270 MB as a string, but 1.6 GB as
a json object from the total 3.8 GB consumed by ovsdb-server process.
ovsdb_storage_read() for a given raft entry happens only once in a
lifetime, so after this call, we can serialize the json object, store
the string representation and free the actual json object that ovsdb
will never need again. This can save a lot of memory and can also
save serialization time, because each raft entry for append requests
and snapshot installation requests serialized only once instead of
doing that every time such request needs to be sent.
JSON_SERIALIZED_OBJECT can be used in order to seamlessly integrate
pre-serialized data into raft_header and similar json objects.
One major special case is creation of a database snapshot.
Snapshot installation request received over the network will be parsed
and read by ovsdb-server just like any other raft log entry. However,
snapshots created locally with raft_store_snapshot() will never be
read back, because they reflect the current state of the database,
hence already applied. For this case we can free the json object
right after writing snapshot on disk.
Tests performed with ovn-heater on 60 node density-light scenario,
where on-disk database goes up to 97 MB, shows average memory
consumption of ovsdb-server Southbound DB processes decreased by 58%
(from 602 MB to 256 MB per process) and peak memory consumption
decreased by 40% (from 1288 MB to 771 MB).
Test with 120 nodes on density-heavy scenario with 270 MB on-disk
database shows 1.5 GB memory consumption decrease as expected.
Also, total CPU time consumed by the Southbound DB process reduced
from 296 to 256 minutes. Number of unreasonably long poll intervals
reduced from 2896 down to 1934.
Acked-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-08-24 21:00:38 +02:00
|
|
|
|
#include "coverage.h"
|
2017-12-31 21:15:58 -08:00
|
|
|
|
#include "openvswitch/dynamic-string.h"
|
|
|
|
|
#include "ovsdb-error.h"
|
|
|
|
|
#include "ovsdb-parser.h"
|
|
|
|
|
#include "socket-util.h"
|
|
|
|
|
#include "sset.h"
|
raft: Don't keep full json objects in memory if no longer needed.
Raft log entries (and raft database snapshot) contains json objects
of the data. Follower receives append requests with data that gets
parsed and added to the raft log. Leader receives execution requests,
parses data out of them and adds to the log. In both cases, later
ovsdb-server reads the log with ovsdb_storage_read(), constructs
transaction and updates the database. On followers these json objects
in common case are never used again. Leader may use them to send
append requests or snapshot installation requests to followers.
However, all these operations (except for ovsdb_storage_read()) are
just serializing the json in order to send it over the network.
Json objects are significantly larger than their serialized string
representation. For example, the snapshot of the database from one of
the ovn-heater scale tests takes 270 MB as a string, but 1.6 GB as
a json object from the total 3.8 GB consumed by ovsdb-server process.
ovsdb_storage_read() for a given raft entry happens only once in a
lifetime, so after this call, we can serialize the json object, store
the string representation and free the actual json object that ovsdb
will never need again. This can save a lot of memory and can also
save serialization time, because each raft entry for append requests
and snapshot installation requests serialized only once instead of
doing that every time such request needs to be sent.
JSON_SERIALIZED_OBJECT can be used in order to seamlessly integrate
pre-serialized data into raft_header and similar json objects.
One major special case is creation of a database snapshot.
Snapshot installation request received over the network will be parsed
and read by ovsdb-server just like any other raft log entry. However,
snapshots created locally with raft_store_snapshot() will never be
read back, because they reflect the current state of the database,
hence already applied. For this case we can free the json object
right after writing snapshot on disk.
Tests performed with ovn-heater on 60 node density-light scenario,
where on-disk database goes up to 97 MB, shows average memory
consumption of ovsdb-server Southbound DB processes decreased by 58%
(from 602 MB to 256 MB per process) and peak memory consumption
decreased by 40% (from 1288 MB to 771 MB).
Test with 120 nodes on density-heavy scenario with 270 MB on-disk
database shows 1.5 GB memory consumption decrease as expected.
Also, total CPU time consumed by the Southbound DB process reduced
from 296 to 256 minutes. Number of unreasonably long poll intervals
reduced from 2896 down to 1934.
Acked-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-08-24 21:00:38 +02:00
|
|
|
|
|
|
|
|
|
COVERAGE_DEFINE(raft_entry_serialize);
|
2017-12-31 21:15:58 -08:00
|
|
|
|
|
|
|
|
|
/* Addresses of Raft servers. */
|
|
|
|
|
|
|
|
|
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
raft_address_validate(const char *address)
|
|
|
|
|
{
|
|
|
|
|
if (!strncmp(address, "unix:", 5)) {
|
|
|
|
|
return NULL;
|
|
|
|
|
} else if (!strncmp(address, "ssl:", 4) || !strncmp(address, "tcp:", 4)) {
|
|
|
|
|
struct sockaddr_storage ss;
|
2022-03-10 23:33:17 +01:00
|
|
|
|
bool dns_failure = false;
|
|
|
|
|
|
|
|
|
|
if (!inet_parse_active(address + 4, -1, &ss, true, &dns_failure)
|
|
|
|
|
&& !dns_failure) {
|
2017-12-31 21:15:58 -08:00
|
|
|
|
return ovsdb_error(NULL, "%s: syntax error in address", address);
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
} else {
|
|
|
|
|
return ovsdb_error(NULL, "%s: expected \"tcp\" or \"ssl\" address",
|
|
|
|
|
address);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
raft_address_validate_json(const struct json *address)
|
|
|
|
|
{
|
|
|
|
|
if (address->type != JSON_STRING) {
|
|
|
|
|
return ovsdb_syntax_error(address, NULL,
|
|
|
|
|
"server address is not string");
|
|
|
|
|
}
|
|
|
|
|
return raft_address_validate(json_string(address));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Constructs and returns a "nickname" for a Raft server based on its 'address'
|
|
|
|
|
* and server ID 'sid'. The nickname is just a short name for the server to
|
|
|
|
|
* use in log messages, to make them more readable.
|
|
|
|
|
*
|
|
|
|
|
* The caller must eventually free the returned string. */
|
|
|
|
|
char *
|
|
|
|
|
raft_address_to_nickname(const char *address, const struct uuid *sid)
|
|
|
|
|
{
|
|
|
|
|
if (!strncmp(address, "unix:", 5)) {
|
|
|
|
|
const char *p = address + 5;
|
|
|
|
|
|
|
|
|
|
const char *slash = strrchr(p, '/');
|
|
|
|
|
if (slash) {
|
|
|
|
|
p = slash + 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int len = strcspn(p, ".");
|
|
|
|
|
if (len) {
|
|
|
|
|
return xmemdup0(p, len);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return xasprintf(SID_FMT, SID_ARGS(sid));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Sets of Raft server addresses. */
|
|
|
|
|
|
|
|
|
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
raft_addresses_from_json(const struct json *json, struct sset *addresses)
|
|
|
|
|
{
|
|
|
|
|
sset_init(addresses);
|
|
|
|
|
|
|
|
|
|
const struct json_array *array = json_array(json);
|
|
|
|
|
if (!array->n) {
|
|
|
|
|
return ovsdb_syntax_error(json, NULL,
|
|
|
|
|
"at least one remote address is required");
|
|
|
|
|
}
|
|
|
|
|
for (size_t i = 0; i < array->n; i++) {
|
|
|
|
|
const struct json *address = array->elems[i];
|
|
|
|
|
struct ovsdb_error *error = raft_address_validate_json(address);
|
|
|
|
|
if (error) {
|
|
|
|
|
sset_destroy(addresses);
|
|
|
|
|
sset_init(addresses);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
sset_add(addresses, json_string(address));
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct json *
|
|
|
|
|
raft_addresses_to_json(const struct sset *sset)
|
|
|
|
|
{
|
|
|
|
|
struct json *array;
|
|
|
|
|
const char *s;
|
|
|
|
|
|
|
|
|
|
array = json_array_create_empty();
|
|
|
|
|
SSET_FOR_EACH (s, sset) {
|
|
|
|
|
json_array_add(array, json_string_create(s));
|
|
|
|
|
}
|
|
|
|
|
return array;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* raft_server. */
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
raft_server_phase_to_string(enum raft_server_phase phase)
|
|
|
|
|
{
|
|
|
|
|
switch (phase) {
|
|
|
|
|
case RAFT_PHASE_STABLE: return "stable";
|
|
|
|
|
case RAFT_PHASE_CATCHUP: return "adding: catchup";
|
|
|
|
|
case RAFT_PHASE_CAUGHT_UP: return "adding: caught up";
|
|
|
|
|
case RAFT_PHASE_COMMITTING: return "adding: committing";
|
|
|
|
|
case RAFT_PHASE_REMOVE: return "removing";
|
|
|
|
|
default: return "<error>";
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
raft_server_destroy(struct raft_server *s)
|
|
|
|
|
{
|
|
|
|
|
if (s) {
|
|
|
|
|
free(s->address);
|
|
|
|
|
free(s->nickname);
|
|
|
|
|
free(s);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
raft_servers_destroy(struct hmap *servers)
|
|
|
|
|
{
|
2022-03-23 12:56:17 +01:00
|
|
|
|
struct raft_server *s;
|
|
|
|
|
HMAP_FOR_EACH_SAFE (s, hmap_node, servers) {
|
2017-12-31 21:15:58 -08:00
|
|
|
|
hmap_remove(servers, &s->hmap_node);
|
|
|
|
|
raft_server_destroy(s);
|
|
|
|
|
}
|
|
|
|
|
hmap_destroy(servers);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct raft_server *
|
|
|
|
|
raft_server_add(struct hmap *servers, const struct uuid *sid,
|
|
|
|
|
const char *address)
|
|
|
|
|
{
|
|
|
|
|
struct raft_server *s = xzalloc(sizeof *s);
|
|
|
|
|
s->sid = *sid;
|
|
|
|
|
s->address = xstrdup(address);
|
|
|
|
|
s->nickname = raft_address_to_nickname(address, sid);
|
|
|
|
|
s->phase = RAFT_PHASE_STABLE;
|
|
|
|
|
hmap_insert(servers, &s->hmap_node, uuid_hash(sid));
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
struct raft_server *
|
|
|
|
|
raft_server_find(const struct hmap *servers, const struct uuid *sid)
|
|
|
|
|
{
|
|
|
|
|
struct raft_server *s;
|
|
|
|
|
HMAP_FOR_EACH_IN_BUCKET (s, hmap_node, uuid_hash(sid), servers) {
|
|
|
|
|
if (uuid_equals(sid, &s->sid)) {
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
raft_servers_get_nickname__(const struct hmap *servers, const struct uuid *sid)
|
|
|
|
|
{
|
|
|
|
|
const struct raft_server *s = raft_server_find(servers, sid);
|
|
|
|
|
return s ? s->nickname : NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
raft_servers_get_nickname(const struct hmap *servers,
|
|
|
|
|
const struct uuid *sid,
|
|
|
|
|
char buf[SID_LEN + 1], size_t bufsize)
|
|
|
|
|
{
|
|
|
|
|
const char *s = raft_servers_get_nickname__(servers, sid);
|
|
|
|
|
if (s) {
|
|
|
|
|
return s;
|
|
|
|
|
}
|
|
|
|
|
snprintf(buf, bufsize, SID_FMT, SID_ARGS(sid));
|
|
|
|
|
return buf;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
raft_servers_from_json__(const struct json *json, struct hmap *servers)
|
|
|
|
|
{
|
|
|
|
|
if (!json || json->type != JSON_OBJECT) {
|
|
|
|
|
return ovsdb_syntax_error(json, NULL, "servers must be JSON object");
|
|
|
|
|
} else if (shash_is_empty(json_object(json))) {
|
|
|
|
|
return ovsdb_syntax_error(json, NULL, "must have at least one server");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse new servers. */
|
|
|
|
|
struct shash_node *node;
|
|
|
|
|
SHASH_FOR_EACH (node, json_object(json)) {
|
|
|
|
|
/* Parse server UUID. */
|
|
|
|
|
struct uuid sid;
|
|
|
|
|
if (!uuid_from_string(&sid, node->name)) {
|
|
|
|
|
return ovsdb_syntax_error(json, NULL, "%s is not a UUID",
|
|
|
|
|
node->name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const struct json *address = node->data;
|
|
|
|
|
struct ovsdb_error *error = raft_address_validate_json(address);
|
|
|
|
|
if (error) {
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
raft_server_add(servers, &sid, json_string(address));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
raft_servers_from_json(const struct json *json, struct hmap *servers)
|
|
|
|
|
{
|
|
|
|
|
hmap_init(servers);
|
|
|
|
|
struct ovsdb_error *error = raft_servers_from_json__(json, servers);
|
|
|
|
|
if (error) {
|
|
|
|
|
raft_servers_destroy(servers);
|
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
raft_servers_validate_json(const struct json *json)
|
|
|
|
|
{
|
|
|
|
|
struct hmap servers = HMAP_INITIALIZER(&servers);
|
|
|
|
|
struct ovsdb_error *error = raft_servers_from_json__(json, &servers);
|
|
|
|
|
raft_servers_destroy(&servers);
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct json *
|
|
|
|
|
raft_servers_to_json(const struct hmap *servers)
|
|
|
|
|
{
|
|
|
|
|
struct json *json = json_object_create();
|
|
|
|
|
struct raft_server *s;
|
|
|
|
|
HMAP_FOR_EACH (s, hmap_node, servers) {
|
|
|
|
|
char sid_s[UUID_LEN + 1];
|
|
|
|
|
sprintf(sid_s, UUID_FMT, UUID_ARGS(&s->sid));
|
|
|
|
|
json_object_put_string(json, sid_s, s->address);
|
|
|
|
|
}
|
|
|
|
|
return json;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
raft_servers_format(const struct hmap *servers, struct ds *ds)
|
|
|
|
|
{
|
|
|
|
|
int i = 0;
|
|
|
|
|
const struct raft_server *s;
|
|
|
|
|
HMAP_FOR_EACH (s, hmap_node, servers) {
|
|
|
|
|
if (i++) {
|
|
|
|
|
ds_put_cstr(ds, ", ");
|
|
|
|
|
}
|
|
|
|
|
ds_put_format(ds, SID_FMT"(%s)", SID_ARGS(&s->sid), s->address);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Raft log entries. */
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
raft_entry_clone(struct raft_entry *dst, const struct raft_entry *src)
|
|
|
|
|
{
|
|
|
|
|
dst->term = src->term;
|
raft: Don't keep full json objects in memory if no longer needed.
Raft log entries (and raft database snapshot) contains json objects
of the data. Follower receives append requests with data that gets
parsed and added to the raft log. Leader receives execution requests,
parses data out of them and adds to the log. In both cases, later
ovsdb-server reads the log with ovsdb_storage_read(), constructs
transaction and updates the database. On followers these json objects
in common case are never used again. Leader may use them to send
append requests or snapshot installation requests to followers.
However, all these operations (except for ovsdb_storage_read()) are
just serializing the json in order to send it over the network.
Json objects are significantly larger than their serialized string
representation. For example, the snapshot of the database from one of
the ovn-heater scale tests takes 270 MB as a string, but 1.6 GB as
a json object from the total 3.8 GB consumed by ovsdb-server process.
ovsdb_storage_read() for a given raft entry happens only once in a
lifetime, so after this call, we can serialize the json object, store
the string representation and free the actual json object that ovsdb
will never need again. This can save a lot of memory and can also
save serialization time, because each raft entry for append requests
and snapshot installation requests serialized only once instead of
doing that every time such request needs to be sent.
JSON_SERIALIZED_OBJECT can be used in order to seamlessly integrate
pre-serialized data into raft_header and similar json objects.
One major special case is creation of a database snapshot.
Snapshot installation request received over the network will be parsed
and read by ovsdb-server just like any other raft log entry. However,
snapshots created locally with raft_store_snapshot() will never be
read back, because they reflect the current state of the database,
hence already applied. For this case we can free the json object
right after writing snapshot on disk.
Tests performed with ovn-heater on 60 node density-light scenario,
where on-disk database goes up to 97 MB, shows average memory
consumption of ovsdb-server Southbound DB processes decreased by 58%
(from 602 MB to 256 MB per process) and peak memory consumption
decreased by 40% (from 1288 MB to 771 MB).
Test with 120 nodes on density-heavy scenario with 270 MB on-disk
database shows 1.5 GB memory consumption decrease as expected.
Also, total CPU time consumed by the Southbound DB process reduced
from 296 to 256 minutes. Number of unreasonably long poll intervals
reduced from 2896 down to 1934.
Acked-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-08-24 21:00:38 +02:00
|
|
|
|
dst->data.full_json = json_nullable_clone(src->data.full_json);
|
|
|
|
|
dst->data.serialized = json_nullable_clone(src->data.serialized);
|
2017-12-31 21:15:58 -08:00
|
|
|
|
dst->eid = src->eid;
|
|
|
|
|
dst->servers = json_nullable_clone(src->servers);
|
2019-08-22 14:08:22 -07:00
|
|
|
|
dst->election_timer = src->election_timer;
|
2017-12-31 21:15:58 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
raft_entry_uninit(struct raft_entry *e)
|
|
|
|
|
{
|
|
|
|
|
if (e) {
|
raft: Don't keep full json objects in memory if no longer needed.
Raft log entries (and raft database snapshot) contains json objects
of the data. Follower receives append requests with data that gets
parsed and added to the raft log. Leader receives execution requests,
parses data out of them and adds to the log. In both cases, later
ovsdb-server reads the log with ovsdb_storage_read(), constructs
transaction and updates the database. On followers these json objects
in common case are never used again. Leader may use them to send
append requests or snapshot installation requests to followers.
However, all these operations (except for ovsdb_storage_read()) are
just serializing the json in order to send it over the network.
Json objects are significantly larger than their serialized string
representation. For example, the snapshot of the database from one of
the ovn-heater scale tests takes 270 MB as a string, but 1.6 GB as
a json object from the total 3.8 GB consumed by ovsdb-server process.
ovsdb_storage_read() for a given raft entry happens only once in a
lifetime, so after this call, we can serialize the json object, store
the string representation and free the actual json object that ovsdb
will never need again. This can save a lot of memory and can also
save serialization time, because each raft entry for append requests
and snapshot installation requests serialized only once instead of
doing that every time such request needs to be sent.
JSON_SERIALIZED_OBJECT can be used in order to seamlessly integrate
pre-serialized data into raft_header and similar json objects.
One major special case is creation of a database snapshot.
Snapshot installation request received over the network will be parsed
and read by ovsdb-server just like any other raft log entry. However,
snapshots created locally with raft_store_snapshot() will never be
read back, because they reflect the current state of the database,
hence already applied. For this case we can free the json object
right after writing snapshot on disk.
Tests performed with ovn-heater on 60 node density-light scenario,
where on-disk database goes up to 97 MB, shows average memory
consumption of ovsdb-server Southbound DB processes decreased by 58%
(from 602 MB to 256 MB per process) and peak memory consumption
decreased by 40% (from 1288 MB to 771 MB).
Test with 120 nodes on density-heavy scenario with 270 MB on-disk
database shows 1.5 GB memory consumption decrease as expected.
Also, total CPU time consumed by the Southbound DB process reduced
from 296 to 256 minutes. Number of unreasonably long poll intervals
reduced from 2896 down to 1934.
Acked-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-08-24 21:00:38 +02:00
|
|
|
|
json_destroy(e->data.full_json);
|
|
|
|
|
json_destroy(e->data.serialized);
|
2017-12-31 21:15:58 -08:00
|
|
|
|
json_destroy(e->servers);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct json *
|
|
|
|
|
raft_entry_to_json(const struct raft_entry *e)
|
|
|
|
|
{
|
|
|
|
|
struct json *json = json_object_create();
|
|
|
|
|
raft_put_uint64(json, "term", e->term);
|
raft: Don't keep full json objects in memory if no longer needed.
Raft log entries (and raft database snapshot) contains json objects
of the data. Follower receives append requests with data that gets
parsed and added to the raft log. Leader receives execution requests,
parses data out of them and adds to the log. In both cases, later
ovsdb-server reads the log with ovsdb_storage_read(), constructs
transaction and updates the database. On followers these json objects
in common case are never used again. Leader may use them to send
append requests or snapshot installation requests to followers.
However, all these operations (except for ovsdb_storage_read()) are
just serializing the json in order to send it over the network.
Json objects are significantly larger than their serialized string
representation. For example, the snapshot of the database from one of
the ovn-heater scale tests takes 270 MB as a string, but 1.6 GB as
a json object from the total 3.8 GB consumed by ovsdb-server process.
ovsdb_storage_read() for a given raft entry happens only once in a
lifetime, so after this call, we can serialize the json object, store
the string representation and free the actual json object that ovsdb
will never need again. This can save a lot of memory and can also
save serialization time, because each raft entry for append requests
and snapshot installation requests serialized only once instead of
doing that every time such request needs to be sent.
JSON_SERIALIZED_OBJECT can be used in order to seamlessly integrate
pre-serialized data into raft_header and similar json objects.
One major special case is creation of a database snapshot.
Snapshot installation request received over the network will be parsed
and read by ovsdb-server just like any other raft log entry. However,
snapshots created locally with raft_store_snapshot() will never be
read back, because they reflect the current state of the database,
hence already applied. For this case we can free the json object
right after writing snapshot on disk.
Tests performed with ovn-heater on 60 node density-light scenario,
where on-disk database goes up to 97 MB, shows average memory
consumption of ovsdb-server Southbound DB processes decreased by 58%
(from 602 MB to 256 MB per process) and peak memory consumption
decreased by 40% (from 1288 MB to 771 MB).
Test with 120 nodes on density-heavy scenario with 270 MB on-disk
database shows 1.5 GB memory consumption decrease as expected.
Also, total CPU time consumed by the Southbound DB process reduced
from 296 to 256 minutes. Number of unreasonably long poll intervals
reduced from 2896 down to 1934.
Acked-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-08-24 21:00:38 +02:00
|
|
|
|
if (raft_entry_has_data(e)) {
|
|
|
|
|
json_object_put(json, "data",
|
|
|
|
|
json_clone(raft_entry_get_serialized_data(e)));
|
ovsdb: Introduce and use specialized uuid print functions.
According to profiling data, converting UUIDs to strings is a frequent
operation in some workloads. This typically results in a call to
xasprintf(), which internally calls vsnprintf() twice, first to
calculate the required buffer size, and then to format the string.
This patch introduces specialized functions for printing UUIDs, which
both reduces code duplication and improves performance.
For example, on my laptop, 10,000,000 calls to the new uuid_to_string()
function takes 1296 ms, while the same number of xasprintf() calls using
UUID_FMT take 2498 ms.
Signed-off-by: Dmitry Porokh <dporokh@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2025-04-25 20:45:53 +00:00
|
|
|
|
json_object_put_uuid(json, "eid", &e->eid);
|
2017-12-31 21:15:58 -08:00
|
|
|
|
}
|
|
|
|
|
if (e->servers) {
|
|
|
|
|
json_object_put(json, "servers", json_clone(e->servers));
|
|
|
|
|
}
|
2019-08-19 09:30:00 -07:00
|
|
|
|
if (e->election_timer) {
|
|
|
|
|
raft_put_uint64(json, "election_timer", e->election_timer);
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-31 21:15:58 -08:00
|
|
|
|
return json;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
raft_entry_from_json(struct json *json, struct raft_entry *e)
|
|
|
|
|
{
|
|
|
|
|
memset(e, 0, sizeof *e);
|
|
|
|
|
|
|
|
|
|
struct ovsdb_parser p;
|
|
|
|
|
ovsdb_parser_init(&p, json, "raft log entry");
|
|
|
|
|
e->term = raft_parse_required_uint64(&p, "term");
|
raft: Don't keep full json objects in memory if no longer needed.
Raft log entries (and raft database snapshot) contains json objects
of the data. Follower receives append requests with data that gets
parsed and added to the raft log. Leader receives execution requests,
parses data out of them and adds to the log. In both cases, later
ovsdb-server reads the log with ovsdb_storage_read(), constructs
transaction and updates the database. On followers these json objects
in common case are never used again. Leader may use them to send
append requests or snapshot installation requests to followers.
However, all these operations (except for ovsdb_storage_read()) are
just serializing the json in order to send it over the network.
Json objects are significantly larger than their serialized string
representation. For example, the snapshot of the database from one of
the ovn-heater scale tests takes 270 MB as a string, but 1.6 GB as
a json object from the total 3.8 GB consumed by ovsdb-server process.
ovsdb_storage_read() for a given raft entry happens only once in a
lifetime, so after this call, we can serialize the json object, store
the string representation and free the actual json object that ovsdb
will never need again. This can save a lot of memory and can also
save serialization time, because each raft entry for append requests
and snapshot installation requests serialized only once instead of
doing that every time such request needs to be sent.
JSON_SERIALIZED_OBJECT can be used in order to seamlessly integrate
pre-serialized data into raft_header and similar json objects.
One major special case is creation of a database snapshot.
Snapshot installation request received over the network will be parsed
and read by ovsdb-server just like any other raft log entry. However,
snapshots created locally with raft_store_snapshot() will never be
read back, because they reflect the current state of the database,
hence already applied. For this case we can free the json object
right after writing snapshot on disk.
Tests performed with ovn-heater on 60 node density-light scenario,
where on-disk database goes up to 97 MB, shows average memory
consumption of ovsdb-server Southbound DB processes decreased by 58%
(from 602 MB to 256 MB per process) and peak memory consumption
decreased by 40% (from 1288 MB to 771 MB).
Test with 120 nodes on density-heavy scenario with 270 MB on-disk
database shows 1.5 GB memory consumption decrease as expected.
Also, total CPU time consumed by the Southbound DB process reduced
from 296 to 256 minutes. Number of unreasonably long poll intervals
reduced from 2896 down to 1934.
Acked-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-08-24 21:00:38 +02:00
|
|
|
|
raft_entry_set_parsed_data(e,
|
2017-12-31 21:15:58 -08:00
|
|
|
|
ovsdb_parser_member(&p, "data", OP_OBJECT | OP_ARRAY | OP_OPTIONAL));
|
raft: Don't keep full json objects in memory if no longer needed.
Raft log entries (and raft database snapshot) contains json objects
of the data. Follower receives append requests with data that gets
parsed and added to the raft log. Leader receives execution requests,
parses data out of them and adds to the log. In both cases, later
ovsdb-server reads the log with ovsdb_storage_read(), constructs
transaction and updates the database. On followers these json objects
in common case are never used again. Leader may use them to send
append requests or snapshot installation requests to followers.
However, all these operations (except for ovsdb_storage_read()) are
just serializing the json in order to send it over the network.
Json objects are significantly larger than their serialized string
representation. For example, the snapshot of the database from one of
the ovn-heater scale tests takes 270 MB as a string, but 1.6 GB as
a json object from the total 3.8 GB consumed by ovsdb-server process.
ovsdb_storage_read() for a given raft entry happens only once in a
lifetime, so after this call, we can serialize the json object, store
the string representation and free the actual json object that ovsdb
will never need again. This can save a lot of memory and can also
save serialization time, because each raft entry for append requests
and snapshot installation requests serialized only once instead of
doing that every time such request needs to be sent.
JSON_SERIALIZED_OBJECT can be used in order to seamlessly integrate
pre-serialized data into raft_header and similar json objects.
One major special case is creation of a database snapshot.
Snapshot installation request received over the network will be parsed
and read by ovsdb-server just like any other raft log entry. However,
snapshots created locally with raft_store_snapshot() will never be
read back, because they reflect the current state of the database,
hence already applied. For this case we can free the json object
right after writing snapshot on disk.
Tests performed with ovn-heater on 60 node density-light scenario,
where on-disk database goes up to 97 MB, shows average memory
consumption of ovsdb-server Southbound DB processes decreased by 58%
(from 602 MB to 256 MB per process) and peak memory consumption
decreased by 40% (from 1288 MB to 771 MB).
Test with 120 nodes on density-heavy scenario with 270 MB on-disk
database shows 1.5 GB memory consumption decrease as expected.
Also, total CPU time consumed by the Southbound DB process reduced
from 296 to 256 minutes. Number of unreasonably long poll intervals
reduced from 2896 down to 1934.
Acked-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-08-24 21:00:38 +02:00
|
|
|
|
e->eid = raft_entry_has_data(e)
|
|
|
|
|
? raft_parse_required_uuid(&p, "eid") : UUID_ZERO;
|
2017-12-31 21:15:58 -08:00
|
|
|
|
e->servers = json_nullable_clone(
|
|
|
|
|
ovsdb_parser_member(&p, "servers", OP_OBJECT | OP_OPTIONAL));
|
|
|
|
|
if (e->servers) {
|
|
|
|
|
ovsdb_parser_put_error(&p, raft_servers_validate_json(e->servers));
|
|
|
|
|
}
|
2019-08-19 09:30:00 -07:00
|
|
|
|
e->election_timer = raft_parse_optional_uint64(&p, "election_timer");
|
2017-12-31 21:15:58 -08:00
|
|
|
|
|
|
|
|
|
struct ovsdb_error *error = ovsdb_parser_finish(&p);
|
|
|
|
|
if (error) {
|
|
|
|
|
raft_entry_uninit(e);
|
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
raft_entry_equals(const struct raft_entry *a, const struct raft_entry *b)
|
|
|
|
|
{
|
|
|
|
|
return (a->term == b->term
|
|
|
|
|
&& uuid_equals(&a->eid, &b->eid)
|
raft: Don't keep full json objects in memory if no longer needed.
Raft log entries (and raft database snapshot) contains json objects
of the data. Follower receives append requests with data that gets
parsed and added to the raft log. Leader receives execution requests,
parses data out of them and adds to the log. In both cases, later
ovsdb-server reads the log with ovsdb_storage_read(), constructs
transaction and updates the database. On followers these json objects
in common case are never used again. Leader may use them to send
append requests or snapshot installation requests to followers.
However, all these operations (except for ovsdb_storage_read()) are
just serializing the json in order to send it over the network.
Json objects are significantly larger than their serialized string
representation. For example, the snapshot of the database from one of
the ovn-heater scale tests takes 270 MB as a string, but 1.6 GB as
a json object from the total 3.8 GB consumed by ovsdb-server process.
ovsdb_storage_read() for a given raft entry happens only once in a
lifetime, so after this call, we can serialize the json object, store
the string representation and free the actual json object that ovsdb
will never need again. This can save a lot of memory and can also
save serialization time, because each raft entry for append requests
and snapshot installation requests serialized only once instead of
doing that every time such request needs to be sent.
JSON_SERIALIZED_OBJECT can be used in order to seamlessly integrate
pre-serialized data into raft_header and similar json objects.
One major special case is creation of a database snapshot.
Snapshot installation request received over the network will be parsed
and read by ovsdb-server just like any other raft log entry. However,
snapshots created locally with raft_store_snapshot() will never be
read back, because they reflect the current state of the database,
hence already applied. For this case we can free the json object
right after writing snapshot on disk.
Tests performed with ovn-heater on 60 node density-light scenario,
where on-disk database goes up to 97 MB, shows average memory
consumption of ovsdb-server Southbound DB processes decreased by 58%
(from 602 MB to 256 MB per process) and peak memory consumption
decreased by 40% (from 1288 MB to 771 MB).
Test with 120 nodes on density-heavy scenario with 270 MB on-disk
database shows 1.5 GB memory consumption decrease as expected.
Also, total CPU time consumed by the Southbound DB process reduced
from 296 to 256 minutes. Number of unreasonably long poll intervals
reduced from 2896 down to 1934.
Acked-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-08-24 21:00:38 +02:00
|
|
|
|
&& json_equal(a->servers, b->servers)
|
|
|
|
|
&& json_equal(raft_entry_get_parsed_data(a),
|
|
|
|
|
raft_entry_get_parsed_data(b)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
raft_entry_has_data(const struct raft_entry *e)
|
|
|
|
|
{
|
|
|
|
|
return e->data.full_json || e->data.serialized;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
raft_entry_data_serialize(struct raft_entry *e)
|
|
|
|
|
{
|
|
|
|
|
if (!raft_entry_has_data(e) || e->data.serialized) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
COVERAGE_INC(raft_entry_serialize);
|
|
|
|
|
e->data.serialized = json_serialized_object_create(e->data.full_json);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
raft_entry_set_parsed_data_nocopy(struct raft_entry *e, struct json *json)
|
|
|
|
|
{
|
|
|
|
|
ovs_assert(!json || json->type != JSON_SERIALIZED_OBJECT);
|
|
|
|
|
e->data.full_json = json;
|
|
|
|
|
e->data.serialized = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
raft_entry_set_parsed_data(struct raft_entry *e, const struct json *json)
|
|
|
|
|
{
|
|
|
|
|
raft_entry_set_parsed_data_nocopy(e, json_nullable_clone(json));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Returns a pointer to the fully parsed json object of the data.
|
|
|
|
|
* Caller takes the ownership of the result.
|
|
|
|
|
*
|
|
|
|
|
* Entry will no longer contain a fully parsed json object.
|
|
|
|
|
* Subsequent calls for the same raft entry will return NULL. */
|
|
|
|
|
struct json * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
raft_entry_steal_parsed_data(struct raft_entry *e)
|
|
|
|
|
{
|
|
|
|
|
/* Ensure that serialized version exists. */
|
|
|
|
|
raft_entry_data_serialize(e);
|
|
|
|
|
|
|
|
|
|
struct json *json = e->data.full_json;
|
|
|
|
|
e->data.full_json = NULL;
|
|
|
|
|
|
|
|
|
|
return json;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Returns a pointer to the fully parsed json object of the data, if any. */
|
|
|
|
|
const struct json *
|
|
|
|
|
raft_entry_get_parsed_data(const struct raft_entry *e)
|
|
|
|
|
{
|
|
|
|
|
return e->data.full_json;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Returns a pointer to the JSON_SERIALIZED_OBJECT of the data. */
|
|
|
|
|
const struct json *
|
|
|
|
|
raft_entry_get_serialized_data(const struct raft_entry *e)
|
|
|
|
|
{
|
|
|
|
|
raft_entry_data_serialize(CONST_CAST(struct raft_entry *, e));
|
|
|
|
|
return e->data.serialized;
|
2017-12-31 21:15:58 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
raft_header_uninit(struct raft_header *h)
|
|
|
|
|
{
|
|
|
|
|
if (!h) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
free(h->name);
|
|
|
|
|
free(h->local_address);
|
|
|
|
|
sset_destroy(&h->remote_addresses);
|
|
|
|
|
raft_entry_uninit(&h->snap);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
raft_header_from_json__(struct raft_header *h, struct ovsdb_parser *p)
|
|
|
|
|
{
|
|
|
|
|
/* Parse always-required fields. */
|
|
|
|
|
h->sid = raft_parse_required_uuid(p, "server_id");
|
|
|
|
|
h->name = nullable_xstrdup(raft_parse_required_string(p, "name"));
|
|
|
|
|
h->local_address = nullable_xstrdup(
|
|
|
|
|
raft_parse_required_string(p, "local_address"));
|
|
|
|
|
|
|
|
|
|
/* Parse "remote_addresses", if present.
|
|
|
|
|
*
|
|
|
|
|
* If this is present, then this database file is for the special case of a
|
|
|
|
|
* server that was created with "ovsdb-tool join-cluster" and has not yet
|
|
|
|
|
* joined its cluster, */
|
|
|
|
|
const struct json *remote_addresses
|
|
|
|
|
= ovsdb_parser_member(p, "remote_addresses", OP_ARRAY | OP_OPTIONAL);
|
|
|
|
|
h->joining = remote_addresses != NULL;
|
|
|
|
|
if (h->joining) {
|
|
|
|
|
struct ovsdb_error *error = raft_addresses_from_json(
|
|
|
|
|
remote_addresses, &h->remote_addresses);
|
|
|
|
|
if (error) {
|
|
|
|
|
ovsdb_parser_put_error(p, error);
|
|
|
|
|
} else if (sset_find_and_delete(&h->remote_addresses, h->local_address)
|
|
|
|
|
&& sset_is_empty(&h->remote_addresses)) {
|
|
|
|
|
ovsdb_parser_raise_error(p, "at least one remote address (other "
|
|
|
|
|
"than the local address) is required");
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
/* The set of servers is mandatory. */
|
|
|
|
|
h->snap.servers = json_nullable_clone(
|
|
|
|
|
ovsdb_parser_member(p, "prev_servers", OP_OBJECT));
|
|
|
|
|
if (h->snap.servers) {
|
|
|
|
|
ovsdb_parser_put_error(p, raft_servers_validate_json(
|
|
|
|
|
h->snap.servers));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Term, index, and snapshot are optional, but if any of them is
|
|
|
|
|
* present, all of them must be. */
|
|
|
|
|
h->snap_index = raft_parse_optional_uint64(p, "prev_index");
|
|
|
|
|
if (h->snap_index) {
|
raft: Don't keep full json objects in memory if no longer needed.
Raft log entries (and raft database snapshot) contains json objects
of the data. Follower receives append requests with data that gets
parsed and added to the raft log. Leader receives execution requests,
parses data out of them and adds to the log. In both cases, later
ovsdb-server reads the log with ovsdb_storage_read(), constructs
transaction and updates the database. On followers these json objects
in common case are never used again. Leader may use them to send
append requests or snapshot installation requests to followers.
However, all these operations (except for ovsdb_storage_read()) are
just serializing the json in order to send it over the network.
Json objects are significantly larger than their serialized string
representation. For example, the snapshot of the database from one of
the ovn-heater scale tests takes 270 MB as a string, but 1.6 GB as
a json object from the total 3.8 GB consumed by ovsdb-server process.
ovsdb_storage_read() for a given raft entry happens only once in a
lifetime, so after this call, we can serialize the json object, store
the string representation and free the actual json object that ovsdb
will never need again. This can save a lot of memory and can also
save serialization time, because each raft entry for append requests
and snapshot installation requests serialized only once instead of
doing that every time such request needs to be sent.
JSON_SERIALIZED_OBJECT can be used in order to seamlessly integrate
pre-serialized data into raft_header and similar json objects.
One major special case is creation of a database snapshot.
Snapshot installation request received over the network will be parsed
and read by ovsdb-server just like any other raft log entry. However,
snapshots created locally with raft_store_snapshot() will never be
read back, because they reflect the current state of the database,
hence already applied. For this case we can free the json object
right after writing snapshot on disk.
Tests performed with ovn-heater on 60 node density-light scenario,
where on-disk database goes up to 97 MB, shows average memory
consumption of ovsdb-server Southbound DB processes decreased by 58%
(from 602 MB to 256 MB per process) and peak memory consumption
decreased by 40% (from 1288 MB to 771 MB).
Test with 120 nodes on density-heavy scenario with 270 MB on-disk
database shows 1.5 GB memory consumption decrease as expected.
Also, total CPU time consumed by the Southbound DB process reduced
from 296 to 256 minutes. Number of unreasonably long poll intervals
reduced from 2896 down to 1934.
Acked-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-08-24 21:00:38 +02:00
|
|
|
|
raft_entry_set_parsed_data(
|
|
|
|
|
&h->snap, ovsdb_parser_member(p, "prev_data", OP_ANY));
|
2017-12-31 21:15:58 -08:00
|
|
|
|
h->snap.eid = raft_parse_required_uuid(p, "prev_eid");
|
|
|
|
|
h->snap.term = raft_parse_required_uint64(p, "prev_term");
|
2019-08-22 14:08:22 -07:00
|
|
|
|
h->snap.election_timer = raft_parse_optional_uint64(
|
|
|
|
|
p, "prev_election_timer");
|
2017-12-31 21:15:58 -08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse cluster ID. If we're joining a cluster, this is optional,
|
|
|
|
|
* otherwise it is mandatory. */
|
|
|
|
|
raft_parse_uuid(p, "cluster_id", h->joining, &h->cid);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
raft_header_from_json(struct raft_header *h, const struct json *json)
|
|
|
|
|
{
|
|
|
|
|
struct ovsdb_parser p;
|
|
|
|
|
ovsdb_parser_init(&p, json, "raft header");
|
|
|
|
|
memset(h, 0, sizeof *h);
|
|
|
|
|
sset_init(&h->remote_addresses);
|
|
|
|
|
raft_header_from_json__(h, &p);
|
|
|
|
|
struct ovsdb_error *error = ovsdb_parser_finish(&p);
|
|
|
|
|
if (error) {
|
|
|
|
|
raft_header_uninit(h);
|
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct json *
|
|
|
|
|
raft_header_to_json(const struct raft_header *h)
|
|
|
|
|
{
|
|
|
|
|
struct json *json = json_object_create();
|
|
|
|
|
|
ovsdb: Introduce and use specialized uuid print functions.
According to profiling data, converting UUIDs to strings is a frequent
operation in some workloads. This typically results in a call to
xasprintf(), which internally calls vsnprintf() twice, first to
calculate the required buffer size, and then to format the string.
This patch introduces specialized functions for printing UUIDs, which
both reduces code duplication and improves performance.
For example, on my laptop, 10,000,000 calls to the new uuid_to_string()
function takes 1296 ms, while the same number of xasprintf() calls using
UUID_FMT take 2498 ms.
Signed-off-by: Dmitry Porokh <dporokh@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2025-04-25 20:45:53 +00:00
|
|
|
|
json_object_put_uuid(json, "server_id", &h->sid);
|
2017-12-31 21:15:58 -08:00
|
|
|
|
if (!uuid_is_zero(&h->cid)) {
|
ovsdb: Introduce and use specialized uuid print functions.
According to profiling data, converting UUIDs to strings is a frequent
operation in some workloads. This typically results in a call to
xasprintf(), which internally calls vsnprintf() twice, first to
calculate the required buffer size, and then to format the string.
This patch introduces specialized functions for printing UUIDs, which
both reduces code duplication and improves performance.
For example, on my laptop, 10,000,000 calls to the new uuid_to_string()
function takes 1296 ms, while the same number of xasprintf() calls using
UUID_FMT take 2498 ms.
Signed-off-by: Dmitry Porokh <dporokh@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2025-04-25 20:45:53 +00:00
|
|
|
|
json_object_put_uuid(json, "cluster_id", &h->cid);
|
2017-12-31 21:15:58 -08:00
|
|
|
|
}
|
|
|
|
|
json_object_put_string(json, "local_address", h->local_address);
|
|
|
|
|
json_object_put_string(json, "name", h->name);
|
|
|
|
|
|
|
|
|
|
if (!sset_is_empty(&h->remote_addresses)) {
|
|
|
|
|
json_object_put(json, "remote_addresses",
|
|
|
|
|
raft_addresses_to_json(&h->remote_addresses));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (h->snap.servers) {
|
|
|
|
|
json_object_put(json, "prev_servers", json_clone(h->snap.servers));
|
|
|
|
|
}
|
|
|
|
|
if (h->snap_index) {
|
|
|
|
|
raft_put_uint64(json, "prev_index", h->snap_index);
|
|
|
|
|
raft_put_uint64(json, "prev_term", h->snap.term);
|
raft: Don't keep full json objects in memory if no longer needed.
Raft log entries (and raft database snapshot) contains json objects
of the data. Follower receives append requests with data that gets
parsed and added to the raft log. Leader receives execution requests,
parses data out of them and adds to the log. In both cases, later
ovsdb-server reads the log with ovsdb_storage_read(), constructs
transaction and updates the database. On followers these json objects
in common case are never used again. Leader may use them to send
append requests or snapshot installation requests to followers.
However, all these operations (except for ovsdb_storage_read()) are
just serializing the json in order to send it over the network.
Json objects are significantly larger than their serialized string
representation. For example, the snapshot of the database from one of
the ovn-heater scale tests takes 270 MB as a string, but 1.6 GB as
a json object from the total 3.8 GB consumed by ovsdb-server process.
ovsdb_storage_read() for a given raft entry happens only once in a
lifetime, so after this call, we can serialize the json object, store
the string representation and free the actual json object that ovsdb
will never need again. This can save a lot of memory and can also
save serialization time, because each raft entry for append requests
and snapshot installation requests serialized only once instead of
doing that every time such request needs to be sent.
JSON_SERIALIZED_OBJECT can be used in order to seamlessly integrate
pre-serialized data into raft_header and similar json objects.
One major special case is creation of a database snapshot.
Snapshot installation request received over the network will be parsed
and read by ovsdb-server just like any other raft log entry. However,
snapshots created locally with raft_store_snapshot() will never be
read back, because they reflect the current state of the database,
hence already applied. For this case we can free the json object
right after writing snapshot on disk.
Tests performed with ovn-heater on 60 node density-light scenario,
where on-disk database goes up to 97 MB, shows average memory
consumption of ovsdb-server Southbound DB processes decreased by 58%
(from 602 MB to 256 MB per process) and peak memory consumption
decreased by 40% (from 1288 MB to 771 MB).
Test with 120 nodes on density-heavy scenario with 270 MB on-disk
database shows 1.5 GB memory consumption decrease as expected.
Also, total CPU time consumed by the Southbound DB process reduced
from 296 to 256 minutes. Number of unreasonably long poll intervals
reduced from 2896 down to 1934.
Acked-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
2021-08-24 21:00:38 +02:00
|
|
|
|
if (raft_entry_has_data(&h->snap)) {
|
|
|
|
|
json_object_put(json, "prev_data",
|
|
|
|
|
json_clone(raft_entry_get_serialized_data(&h->snap)));
|
2017-12-31 21:15:58 -08:00
|
|
|
|
}
|
ovsdb: Introduce and use specialized uuid print functions.
According to profiling data, converting UUIDs to strings is a frequent
operation in some workloads. This typically results in a call to
xasprintf(), which internally calls vsnprintf() twice, first to
calculate the required buffer size, and then to format the string.
This patch introduces specialized functions for printing UUIDs, which
both reduces code duplication and improves performance.
For example, on my laptop, 10,000,000 calls to the new uuid_to_string()
function takes 1296 ms, while the same number of xasprintf() calls using
UUID_FMT take 2498 ms.
Signed-off-by: Dmitry Porokh <dporokh@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2025-04-25 20:45:53 +00:00
|
|
|
|
json_object_put_uuid(json, "prev_eid", &h->snap.eid);
|
2019-08-22 14:08:22 -07:00
|
|
|
|
if (h->snap.election_timer) {
|
|
|
|
|
raft_put_uint64(json, "prev_election_timer",
|
|
|
|
|
h->snap.election_timer);
|
|
|
|
|
}
|
2017-12-31 21:15:58 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return json;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
raft_record_uninit(struct raft_record *r)
|
|
|
|
|
{
|
|
|
|
|
if (!r) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
free(r->comment);
|
|
|
|
|
|
|
|
|
|
switch (r->type) {
|
|
|
|
|
case RAFT_REC_ENTRY:
|
|
|
|
|
json_destroy(r->entry.data);
|
|
|
|
|
json_destroy(r->entry.servers);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case RAFT_REC_NOTE:
|
|
|
|
|
free(r->note);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case RAFT_REC_TERM:
|
|
|
|
|
case RAFT_REC_VOTE:
|
|
|
|
|
case RAFT_REC_COMMIT_INDEX:
|
|
|
|
|
case RAFT_REC_LEADER:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
raft_record_from_json__(struct raft_record *r, struct ovsdb_parser *p)
|
|
|
|
|
{
|
|
|
|
|
r->comment = nullable_xstrdup(raft_parse_optional_string(p, "comment"));
|
|
|
|
|
|
|
|
|
|
/* Parse "note". */
|
|
|
|
|
const char *note = raft_parse_optional_string(p, "note");
|
|
|
|
|
if (note) {
|
|
|
|
|
r->type = RAFT_REC_NOTE;
|
|
|
|
|
r->term = 0;
|
|
|
|
|
r->note = xstrdup(note);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse "commit_index". */
|
|
|
|
|
r->commit_index = raft_parse_optional_uint64(p, "commit_index");
|
|
|
|
|
if (r->commit_index) {
|
|
|
|
|
r->type = RAFT_REC_COMMIT_INDEX;
|
|
|
|
|
r->term = 0;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* All remaining types of log records include "term", plus at most one of:
|
|
|
|
|
*
|
|
|
|
|
* - "index" plus zero or more of "data", "eid", and "servers". "data"
|
|
|
|
|
* and "eid" must be both present or both absent.
|
|
|
|
|
*
|
|
|
|
|
* - "vote".
|
|
|
|
|
*
|
|
|
|
|
* - "leader".
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/* Parse "term".
|
|
|
|
|
*
|
|
|
|
|
* A Raft leader can replicate entries from previous terms to the other
|
|
|
|
|
* servers in the cluster, retaining the original terms on those entries
|
|
|
|
|
* (see section 3.6.2 "Committing entries from previous terms" for more
|
|
|
|
|
* information), so it's OK for the term in a log record to precede the
|
|
|
|
|
* current term. */
|
|
|
|
|
r->term = raft_parse_required_uint64(p, "term");
|
|
|
|
|
|
|
|
|
|
/* Parse "leader". */
|
|
|
|
|
if (raft_parse_optional_uuid(p, "leader", &r->sid)) {
|
|
|
|
|
r->type = RAFT_REC_LEADER;
|
|
|
|
|
if (uuid_is_zero(&r->sid)) {
|
|
|
|
|
ovsdb_parser_raise_error(p, "record says leader is all-zeros SID");
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parse "vote". */
|
|
|
|
|
if (raft_parse_optional_uuid(p, "vote", &r->sid)) {
|
|
|
|
|
r->type = RAFT_REC_VOTE;
|
|
|
|
|
if (uuid_is_zero(&r->sid)) {
|
|
|
|
|
ovsdb_parser_raise_error(p, "record votes for all-zeros SID");
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If "index" is present parse the rest of the entry, otherwise it's just a
|
|
|
|
|
* term update. */
|
|
|
|
|
r->entry.index = raft_parse_optional_uint64(p, "index");
|
|
|
|
|
if (!r->entry.index) {
|
|
|
|
|
r->type = RAFT_REC_TERM;
|
|
|
|
|
} else {
|
|
|
|
|
r->type = RAFT_REC_ENTRY;
|
|
|
|
|
r->entry.servers = json_nullable_clone(
|
|
|
|
|
ovsdb_parser_member(p, "servers", OP_OBJECT | OP_OPTIONAL));
|
|
|
|
|
if (r->entry.servers) {
|
|
|
|
|
ovsdb_parser_put_error(
|
|
|
|
|
p, raft_servers_validate_json(r->entry.servers));
|
|
|
|
|
}
|
2019-08-19 09:30:00 -07:00
|
|
|
|
r->entry.election_timer = raft_parse_optional_uint64(
|
|
|
|
|
p, "election_timer");
|
2017-12-31 21:15:58 -08:00
|
|
|
|
r->entry.data = json_nullable_clone(
|
|
|
|
|
ovsdb_parser_member(p, "data",
|
|
|
|
|
OP_OBJECT | OP_ARRAY | OP_OPTIONAL));
|
|
|
|
|
r->entry.eid = (r->entry.data
|
|
|
|
|
? raft_parse_required_uuid(p, "eid")
|
|
|
|
|
: UUID_ZERO);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
|
|
|
|
|
raft_record_from_json(struct raft_record *r, const struct json *json)
|
|
|
|
|
{
|
|
|
|
|
struct ovsdb_parser p;
|
|
|
|
|
ovsdb_parser_init(&p, json, "raft log record");
|
|
|
|
|
raft_record_from_json__(r, &p);
|
|
|
|
|
struct ovsdb_error *error = ovsdb_parser_finish(&p);
|
|
|
|
|
if (error) {
|
|
|
|
|
raft_record_uninit(r);
|
|
|
|
|
}
|
|
|
|
|
return error;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct json *
|
|
|
|
|
raft_record_to_json(const struct raft_record *r)
|
|
|
|
|
{
|
|
|
|
|
struct json *json = json_object_create();
|
|
|
|
|
|
|
|
|
|
if (r->comment && *r->comment) {
|
|
|
|
|
json_object_put_string(json, "comment", r->comment);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (r->type) {
|
|
|
|
|
case RAFT_REC_ENTRY:
|
|
|
|
|
raft_put_uint64(json, "term", r->term);
|
|
|
|
|
raft_put_uint64(json, "index", r->entry.index);
|
|
|
|
|
if (r->entry.data) {
|
|
|
|
|
json_object_put(json, "data", json_clone(r->entry.data));
|
|
|
|
|
}
|
|
|
|
|
if (r->entry.servers) {
|
|
|
|
|
json_object_put(json, "servers", json_clone(r->entry.servers));
|
|
|
|
|
}
|
2019-08-19 09:30:00 -07:00
|
|
|
|
if (r->entry.election_timer) {
|
|
|
|
|
raft_put_uint64(json, "election_timer", r->entry.election_timer);
|
|
|
|
|
}
|
2017-12-31 21:15:58 -08:00
|
|
|
|
if (!uuid_is_zero(&r->entry.eid)) {
|
ovsdb: Introduce and use specialized uuid print functions.
According to profiling data, converting UUIDs to strings is a frequent
operation in some workloads. This typically results in a call to
xasprintf(), which internally calls vsnprintf() twice, first to
calculate the required buffer size, and then to format the string.
This patch introduces specialized functions for printing UUIDs, which
both reduces code duplication and improves performance.
For example, on my laptop, 10,000,000 calls to the new uuid_to_string()
function takes 1296 ms, while the same number of xasprintf() calls using
UUID_FMT take 2498 ms.
Signed-off-by: Dmitry Porokh <dporokh@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2025-04-25 20:45:53 +00:00
|
|
|
|
json_object_put_uuid(json, "eid", &r->entry.eid);
|
2017-12-31 21:15:58 -08:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case RAFT_REC_TERM:
|
|
|
|
|
raft_put_uint64(json, "term", r->term);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case RAFT_REC_VOTE:
|
|
|
|
|
raft_put_uint64(json, "term", r->term);
|
ovsdb: Introduce and use specialized uuid print functions.
According to profiling data, converting UUIDs to strings is a frequent
operation in some workloads. This typically results in a call to
xasprintf(), which internally calls vsnprintf() twice, first to
calculate the required buffer size, and then to format the string.
This patch introduces specialized functions for printing UUIDs, which
both reduces code duplication and improves performance.
For example, on my laptop, 10,000,000 calls to the new uuid_to_string()
function takes 1296 ms, while the same number of xasprintf() calls using
UUID_FMT take 2498 ms.
Signed-off-by: Dmitry Porokh <dporokh@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2025-04-25 20:45:53 +00:00
|
|
|
|
json_object_put_uuid(json, "vote", &r->sid);
|
2017-12-31 21:15:58 -08:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case RAFT_REC_NOTE:
|
|
|
|
|
json_object_put(json, "note", json_string_create(r->note));
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case RAFT_REC_COMMIT_INDEX:
|
|
|
|
|
raft_put_uint64(json, "commit_index", r->commit_index);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case RAFT_REC_LEADER:
|
|
|
|
|
raft_put_uint64(json, "term", r->term);
|
ovsdb: Introduce and use specialized uuid print functions.
According to profiling data, converting UUIDs to strings is a frequent
operation in some workloads. This typically results in a call to
xasprintf(), which internally calls vsnprintf() twice, first to
calculate the required buffer size, and then to format the string.
This patch introduces specialized functions for printing UUIDs, which
both reduces code duplication and improves performance.
For example, on my laptop, 10,000,000 calls to the new uuid_to_string()
function takes 1296 ms, while the same number of xasprintf() calls using
UUID_FMT take 2498 ms.
Signed-off-by: Dmitry Porokh <dporokh@nvidia.com>
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
2025-04-25 20:45:53 +00:00
|
|
|
|
json_object_put_uuid(json, "leader", &r->sid);
|
2017-12-31 21:15:58 -08:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
OVS_NOT_REACHED();
|
|
|
|
|
}
|
|
|
|
|
return json;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Puts 'integer' into JSON 'object' with the given 'name'.
|
|
|
|
|
*
|
|
|
|
|
* The OVS JSON implementation only supports integers in the range
|
|
|
|
|
* INT64_MIN...INT64_MAX, which causes trouble for values from INT64_MAX+1 to
|
|
|
|
|
* UINT64_MAX. We map those into the negative range. */
|
|
|
|
|
void
|
|
|
|
|
raft_put_uint64(struct json *object, const char *name, uint64_t integer)
|
|
|
|
|
{
|
|
|
|
|
json_object_put(object, name, json_integer_create(integer));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Parses an integer from parser 'p' with the given 'name'.
|
|
|
|
|
*
|
|
|
|
|
* The OVS JSON implementation only supports integers in the range
|
|
|
|
|
* INT64_MIN...INT64_MAX, which causes trouble for values from INT64_MAX+1 to
|
|
|
|
|
* UINT64_MAX. We map the negative range back into positive numbers. */
|
|
|
|
|
static uint64_t
|
|
|
|
|
raft_parse_uint64__(struct ovsdb_parser *p, const char *name, bool optional)
|
|
|
|
|
{
|
|
|
|
|
enum ovsdb_parser_types types = OP_INTEGER | (optional ? OP_OPTIONAL : 0);
|
|
|
|
|
const struct json *json = ovsdb_parser_member(p, name, types);
|
|
|
|
|
return json ? json_integer(json) : 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
|
raft_parse_optional_uint64(struct ovsdb_parser *p, const char *name)
|
|
|
|
|
{
|
|
|
|
|
return raft_parse_uint64__(p, name, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint64_t
|
|
|
|
|
raft_parse_required_uint64(struct ovsdb_parser *p, const char *name)
|
|
|
|
|
{
|
|
|
|
|
return raft_parse_uint64__(p, name, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
raft_parse_boolean__(struct ovsdb_parser *p, const char *name, bool optional)
|
|
|
|
|
{
|
|
|
|
|
enum ovsdb_parser_types types = OP_BOOLEAN | (optional ? OP_OPTIONAL : 0);
|
|
|
|
|
const struct json *json = ovsdb_parser_member(p, name, types);
|
|
|
|
|
return json ? json_boolean(json) : -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
raft_parse_required_boolean(struct ovsdb_parser *p, const char *name)
|
|
|
|
|
{
|
|
|
|
|
return raft_parse_boolean__(p, name, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Returns true or false if present, -1 if absent. */
|
|
|
|
|
int
|
|
|
|
|
raft_parse_optional_boolean(struct ovsdb_parser *p, const char *name)
|
|
|
|
|
{
|
|
|
|
|
return raft_parse_boolean__(p, name, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static const char *
|
|
|
|
|
raft_parse_string__(struct ovsdb_parser *p, const char *name, bool optional)
|
|
|
|
|
{
|
|
|
|
|
enum ovsdb_parser_types types = OP_STRING | (optional ? OP_OPTIONAL : 0);
|
|
|
|
|
const struct json *json = ovsdb_parser_member(p, name, types);
|
|
|
|
|
return json ? json_string(json) : NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
raft_parse_required_string(struct ovsdb_parser *p, const char *name)
|
|
|
|
|
{
|
|
|
|
|
return raft_parse_string__(p, name, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const char *
|
|
|
|
|
raft_parse_optional_string(struct ovsdb_parser *p, const char *name)
|
|
|
|
|
{
|
|
|
|
|
return raft_parse_string__(p, name, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
raft_parse_uuid(struct ovsdb_parser *p, const char *name, bool optional,
|
|
|
|
|
struct uuid *uuid)
|
|
|
|
|
{
|
|
|
|
|
const char *s = raft_parse_string__(p, name, optional);
|
|
|
|
|
if (s) {
|
|
|
|
|
if (uuid_from_string(uuid, s)) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
ovsdb_parser_raise_error(p, "%s is not a valid UUID", name);
|
|
|
|
|
}
|
|
|
|
|
*uuid = UUID_ZERO;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
struct uuid
|
|
|
|
|
raft_parse_required_uuid(struct ovsdb_parser *p, const char *name)
|
|
|
|
|
{
|
|
|
|
|
struct uuid uuid;
|
|
|
|
|
raft_parse_uuid(p, name, false, &uuid);
|
|
|
|
|
return uuid;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool
|
|
|
|
|
raft_parse_optional_uuid(struct ovsdb_parser *p, const char *name,
|
|
|
|
|
struct uuid *uuid)
|
|
|
|
|
{
|
|
|
|
|
return raft_parse_uuid(p, name, true, uuid);
|
|
|
|
|
}
|
|
|
|
|
|