ovs/ovsdb/ovsdb.h

/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2017 Nicira, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef OVSDB_OVSDB_H
#define OVSDB_OVSDB_H 1

#include "compiler.h"
#include "openvswitch/hmap.h"
#include "openvswitch/list.h"
#include "openvswitch/shash.h"
#include "openvswitch/uuid.h"
#include "ovs-thread.h"

struct json;
struct ovsdb_log;
struct ovsdb_session;
struct ovsdb_txn;
struct simap;

/* Database schema. */
struct ovsdb_schema {
    char *name;
    char *version;
    char *cksum;
    struct shash tables;        /* Contains "struct ovsdb_table_schema *"s. */
};

struct ovsdb_schema *ovsdb_schema_create(const char *name,
                                         const char *version,
                                         const char *cksum);
struct ovsdb_schema *ovsdb_schema_clone(const struct ovsdb_schema *);
void ovsdb_schema_destroy(struct ovsdb_schema *);

struct ovsdb_error *ovsdb_schema_from_file(const char *file_name,
                                           struct ovsdb_schema **)
    OVS_WARN_UNUSED_RESULT;
struct ovsdb_error *ovsdb_schema_from_json(const struct json *,
                                           struct ovsdb_schema **)
    OVS_WARN_UNUSED_RESULT;
struct json *ovsdb_schema_to_json(const struct ovsdb_schema *);

bool ovsdb_schema_equal(const struct ovsdb_schema *,
                        const struct ovsdb_schema *);

struct ovsdb_error *ovsdb_schema_check_for_ephemeral_columns(
    const struct ovsdb_schema *) OVS_WARN_UNUSED_RESULT;
void ovsdb_schema_persist_ephemeral_columns(struct ovsdb_schema *,
                                            const char *filename);

struct ovsdb_version {
    unsigned int x;
    unsigned int y;
    unsigned int z;
};
bool ovsdb_parse_version(const char *, struct ovsdb_version *);
bool ovsdb_is_valid_version(const char *);

/* Database. */
struct ovsdb_txn_history_node {
    struct ovs_list node; /* Element in struct ovsdb's txn_history list */
    struct ovsdb_txn *txn;
};

struct ovsdb_compaction_state {
    pthread_t thread;          /* Thread handle. */

    struct ovsdb *db;          /* Copy of a database data to compact. */

    struct json *data;         /* 'db' as a serialized json. */
    struct json *schema;       /* 'db' schema json. */
    uint64_t applied_index;    /* Last applied index reported by the storage
                                * at the moment of a database copy. */

    /* Completion signaling. */
    struct seq *done;
    uint64_t seqno;

    uint64_t init_time;        /* Time spent by the main thread preparing. */
    uint64_t thread_time;      /* Time spent for compaction by the thread. */
};

struct ovsdb {
    char *name;
    struct ovsdb_schema *schema;
    struct ovsdb_storage *storage; /* If nonnull, log for transactions. */
    struct uuid prereq;
    struct ovs_list monitors;   /* Contains "struct ovsdb_monitor"s. */
    struct shash tables;        /* Contains "struct ovsdb_table *"s. */

    /* Triggers. */
    struct ovs_list triggers;   /* Contains "struct ovsdb_trigger"s. */
    bool run_triggers;
    bool run_triggers_now;

    struct ovsdb_table *rbac_role;

    /* History trasanctions for incremental monitor transfer. */
    bool need_txn_history;     /* Need to maintain history of transactions. */
    unsigned int n_txn_history; /* Current number of history transactions. */
    unsigned int n_txn_history_atoms; /* Total number of atoms in history. */
    struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. */

    size_t n_atoms;  /* Total number of ovsdb atoms in the database. */

    bool read_only;  /* If 'true', JSON-RPC clients are not allowed to change
                      * the data. */

    /* Relay mode. */
    bool is_relay;  /* True, if database is in relay mode. */
    /* List that holds transactions waiting to be forwarded to the server. */
    struct ovs_list txn_forward_new;
    /* Hash map for transactions that are already sent and waits for reply. */
    struct hmap txn_forward_sent;

    /* Database compaction. */
    struct ovsdb_compaction_state *snap_state;
};

/* Total number of 'weak reference' objects in all databases
 * and transactions. */
extern size_t n_weak_refs;

struct ovsdb *ovsdb_create(struct ovsdb_schema *, struct ovsdb_storage *);
void ovsdb_destroy(struct ovsdb *);

void ovsdb_no_data_conversion_disable(void);
bool ovsdb_conversion_with_no_data_supported(const struct ovsdb *);

void ovsdb_get_memory_usage(const struct ovsdb *, struct simap *usage);

struct ovsdb_table *ovsdb_get_table(const struct ovsdb *, const char *);

struct ovsdb_txn *ovsdb_execute_compose(
    struct ovsdb *, const struct ovsdb_session *, const struct json *params,
    bool read_only, const char *role, const char *id,
    long long int elapsed_msec, long long int *timeout_msec,
    bool *durable, bool *forwarding_needed, struct json **);

struct json *ovsdb_execute(struct ovsdb *, const struct ovsdb_session *,
                           const struct json *params, bool read_only,
                           const char *role, const char *id,
                           long long int elapsed_msec,
                           long long int *timeout_msec);

struct ovsdb_error *ovsdb_snapshot(struct ovsdb *, bool trim_memory)
    OVS_WARN_UNUSED_RESULT;
void ovsdb_snapshot_wait(struct ovsdb *);
bool ovsdb_snapshot_in_progress(struct ovsdb *);
bool ovsdb_snapshot_ready(struct ovsdb *);

void ovsdb_replace(struct ovsdb *dst, struct ovsdb *src);

#endif /* ovsdb/ovsdb.h */
-												ovsdb: Introduce experimental support for clustered databases.

This commit adds support for OVSDB clustering via Raft.  Please read
ovsdb(7) for information on how to set up a clustered database.  It is
simple and boils down to running "ovsdb-tool create-cluster" on one server
and "ovsdb-tool join-cluster" on each of the others and then starting
ovsdb-server in the usual way on all of them.

One you have a clustered database, you configure ovn-controller and
ovn-northd to use it by pointing them to all of the servers, e.g. where
previously you might have said "tcp:1.2.3.4" was the database server,
now you say that it is "tcp:1.2.3.4,tcp:5.6.7.8,tcp:9.10.11.12".

This also adds support for database clustering to ovs-sandbox.

Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: aginwala <aginwala@asu.edu>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-12-31 21:15:58 -08:00
+								/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2017 Nicira, Inc.
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								 *
 								 * Licensed under the Apache License, Version 2.0 (the "License");
 								 * you may not use this file except in compliance with the License.
 								 * You may obtain a copy of the License at:
 								 *
 								 *     http://www.apache.org/licenses/LICENSE-2.0
 								 *
 								 * Unless required by applicable law or agreed to in writing, software
 								 * distributed under the License is distributed on an "AS IS" BASIS,
 								 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 								 * See the License for the specific language governing permissions and
 								 * limitations under the License.
 								 */
 								#ifndef OVSDB_OVSDB_H
 								#define OVSDB_OVSDB_H 1
 								#include "compiler.h"
-												json: Move from lib to include/openvswitch.

To easily allow both in- and out-of-tree building of the Python
wrapper for the OVS JSON parser (e.g. w/ pip), move json.h to
include/openvswitch. This also requires moving lib/{hmap,shash}.h.

Both hmap.h and shash.h were #include-ing "util.h" even though the
headers themselves did not use anything from there, but rather from
include/openvswitch/util.h. Fixing that required including util.h
in several C files mostly due to OVS_NOT_REACHED and things like
xmalloc.

Signed-off-by: Terry Wilson <twilson@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-12 16:37:34 -05:00
+								#include "openvswitch/hmap.h"
-												list: Remove lib/list.h completely.

All code is now in include/openvswitch/list.h.

Signed-off-by: Ben Warren <ben@skyportsystems.com>
Acked-by: Ryan Moats <rmoats@us.ibm.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-03-25 14:10:21 -07:00
+								#include "openvswitch/list.h"
-												json: Move from lib to include/openvswitch.

To easily allow both in- and out-of-tree building of the Python
wrapper for the OVS JSON parser (e.g. w/ pip), move json.h to
include/openvswitch. This also requires moving lib/{hmap,shash}.h.

Both hmap.h and shash.h were #include-ing "util.h" even though the
headers themselves did not use anything from there, but rather from
include/openvswitch/util.h. Fixing that required including util.h
in several C files mostly due to OVS_NOT_REACHED and things like
xmalloc.

Signed-off-by: Terry Wilson <twilson@redhat.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-12 16:37:34 -05:00
+								#include "openvswitch/shash.h"
-												ovsdb: Introduce experimental support for clustered databases.

This commit adds support for OVSDB clustering via Raft.  Please read
ovsdb(7) for information on how to set up a clustered database.  It is
simple and boils down to running "ovsdb-tool create-cluster" on one server
and "ovsdb-tool join-cluster" on each of the others and then starting
ovsdb-server in the usual way on all of them.

One you have a clustered database, you configure ovn-controller and
ovn-northd to use it by pointing them to all of the servers, e.g. where
previously you might have said "tcp:1.2.3.4" was the database server,
now you say that it is "tcp:1.2.3.4,tcp:5.6.7.8,tcp:9.10.11.12".

This also adds support for database clustering to ovs-sandbox.

Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: aginwala <aginwala@asu.edu>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-12-31 21:15:58 -08:00
+								#include "openvswitch/uuid.h"
-												ovsdb: Add missing ovs-thread include.

MSVC doesn't have pthread_t defined by default as other compilers,
so the build fails without the header.

Fixes: 3cd2cbd684e0 ("ovsdb: Prepare snapshot JSON in a separate thread.")
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Dumitru Ceara <dceara@redhat.com>

											
										
										
											2022-07-13 22:38:36 +02:00
+								#include "ovs-thread.h"
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
 								struct json;
-												ovsdb: Add replication support and refactor files in terms of replication.

An upcoming commit will add support for replicating tables across JSON-RPC
connection.  As a prerequisite ovsdb itself must support basic replication.
This commit adds that support and then reimplements the ovsdb file storage
in terms of that replication.

											
										
										
											2009-11-13 13:37:55 -08:00
+								struct ovsdb_log;
-												ovsdb: Implement a "lock" feature in the database protocol.

This provides clients a way to coordinate their access to the database.
This is a voluntary, not mandatory, locking protocols, that is, clients
are not prevented from modifying the database unless they cooperate with
the locking protocol.  It is also not related to any of the ACID properties
of database transactions.  It is strictly a way for clients to coordinate
among themselves.

The following commit will introduce one user.

											
										
										
											2011-07-26 10:24:17 -07:00
+								struct ovsdb_session;
-												ovsdb: Add replication support and refactor files in terms of replication.

An upcoming commit will add support for replicating tables across JSON-RPC
connection.  As a prerequisite ovsdb itself must support basic replication.
This commit adds that support and then reimplements the ovsdb file storage
in terms of that replication.

											
										
										
											2009-11-13 13:37:55 -08:00
+								struct ovsdb_txn;
-												Add support for tracking and logging daemon memory usage.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-08 15:44:21 -07:00
+								struct simap;
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
 								/* Database schema. */
 								struct ovsdb_schema {
 								    char *name;
-												Implement database schema versioning.

As the database schema evolves, it might be useful to have an identifier
for the particular version in use.  This commit adds that feature.

											
										
										
											2010-12-27 14:26:47 -08:00
+								    char *version;
-												ovsdb-tool: Add commands for printing the database checksum.

											
										
										
											2011-02-08 15:23:33 -08:00
+								    char *cksum;
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								    struct shash tables;        /* Contains "struct ovsdb_table_schema *"s. */
 								};
-												Implement database schema versioning.

As the database schema evolves, it might be useful to have an identifier
for the particular version in use.  This commit adds that feature.

											
										
										
											2010-12-27 14:26:47 -08:00
+								struct ovsdb_schema *ovsdb_schema_create(const char *name,
-												ovsdb-tool: Add commands for printing the database checksum.

											
										
										
											2011-02-08 15:23:33 -08:00
+								                                         const char *version,
 								                                         const char *cksum);
-												ovsdb: Add functions to clone schemas.

These will be used by an upcoming commit.

											
										
										
											2010-02-10 15:37:52 -08:00
+								struct ovsdb_schema *ovsdb_schema_clone(const struct ovsdb_schema *);
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								void ovsdb_schema_destroy(struct ovsdb_schema *);
 								struct ovsdb_error *ovsdb_schema_from_file(const char *file_name,
 								                                           struct ovsdb_schema **)
-												lib: Move compiler.h to <openvswitch/compiler.h>

The following macros are renamed to avoid conflicts with other headers:
 * WARN_UNUSED_RESULT to OVS_WARN_UNUSED_RESULT
 * PRINTF_FORMAT to OVS_PRINTF_FORMAT
 * NO_RETURN to OVS_NO_RETURN

Signed-off-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-15 14:10:38 +01:00
+								    OVS_WARN_UNUSED_RESULT;
-												ovsdb: Add support for online schema conversion.

With this change, "ovsdb-client convert" can be used to convert a database
from one schema to another without taking the database offline.

This can be useful to minimize downtime for a database during a software
upgrade.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>

											
										
										
											2017-12-28 13:21:11 -08:00
+								struct ovsdb_error *ovsdb_schema_from_json(const struct json *,
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								                                           struct ovsdb_schema **)
-												lib: Move compiler.h to <openvswitch/compiler.h>

The following macros are renamed to avoid conflicts with other headers:
 * WARN_UNUSED_RESULT to OVS_WARN_UNUSED_RESULT
 * PRINTF_FORMAT to OVS_PRINTF_FORMAT
 * NO_RETURN to OVS_NO_RETURN

Signed-off-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-15 14:10:38 +01:00
+								    OVS_WARN_UNUSED_RESULT;
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								struct json *ovsdb_schema_to_json(const struct ovsdb_schema *);
-												ovsdb-tool: New command "needs-conversion".

											
										
										
											2011-02-08 15:57:14 -08:00
 								bool ovsdb_schema_equal(const struct ovsdb_schema *,
 								                        const struct ovsdb_schema *);
-												ovsdb: Introduce experimental support for clustered databases.

This commit adds support for OVSDB clustering via Raft.  Please read
ovsdb(7) for information on how to set up a clustered database.  It is
simple and boils down to running "ovsdb-tool create-cluster" on one server
and "ovsdb-tool join-cluster" on each of the others and then starting
ovsdb-server in the usual way on all of them.

One you have a clustered database, you configure ovn-controller and
ovn-northd to use it by pointing them to all of the servers, e.g. where
previously you might have said "tcp:1.2.3.4" was the database server,
now you say that it is "tcp:1.2.3.4,tcp:5.6.7.8,tcp:9.10.11.12".

This also adds support for database clustering to ovs-sandbox.

Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: aginwala <aginwala@asu.edu>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-12-31 21:15:58 -08:00
 								struct ovsdb_error *ovsdb_schema_check_for_ephemeral_columns(
 								    const struct ovsdb_schema *) OVS_WARN_UNUSED_RESULT;
 								void ovsdb_schema_persist_ephemeral_columns(struct ovsdb_schema *,
 								                                            const char *filename);
 								struct ovsdb_version {
 								    unsigned int x;
 								    unsigned int y;
 								    unsigned int z;
 								};
 								bool ovsdb_parse_version(const char *, struct ovsdb_version *);
 								bool ovsdb_is_valid_version(const char *);
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
 								/* Database. */
-												ovsdb-server: Transaction history tracking.

Maintaining last N (n = 100) transactions in memory, which will be
used for future patches for generating monitor data from any point
in this N transactions.

Signed-off-by: Han Zhou <hzhou8@ebay.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-02-28 09:15:17 -08:00
+								struct ovsdb_txn_history_node {
 								    struct ovs_list node; /* Element in struct ovsdb's txn_history list */
 								    struct ovsdb_txn *txn;
 								};
-												ovsdb: Prepare snapshot JSON in a separate thread.

Conversion of the database data into JSON object, serialization
and destruction of that object are the most heavy operations
during the database compaction.  If these operations are moved
to a separate thread, the main thread can continue processing
database requests in the meantime.

With this change, the compaction is split in 3 phases:

1. Initialization:
   - Create a copy of the database.
   - Remember current database index.
   - Start a separate thread to convert a copy of the database
     into serialized JSON object.

2. Wait:
   - Continue normal operation until compaction thread is done.
   - Meanwhile, compaction thread:
     * Convert database copy to JSON.
     * Serialize resulted JSON.
     * Destroy original JSON object.

3. Finish:
   - Destroy the database copy.
   - Take the snapshot created by the thread.
   - Write on disk.

The key for this schema to be fast is the ability to create
a shallow copy of the database.  This doesn't take too much
time allowing the thread to do most of work.

Database copy is created and destroyed only by the main thread,
so there is no need for synchronization.

Such solution allows to reduce the time main thread is blocked
by compaction by 80-90%.  For example, in ovn-heater tests
with 120 node density-heavy scenario, where compaction normally
takes 5-6 seconds at the end of a test, measured compaction
times was all below 1 second with the change applied.  Also,
note that these measured times are the sum of phases 1 and 3,
so actual poll intervals are about half a second in this case.

Only implemented for raft storage for now.  The implementation
for standalone databases can be added later by using a file
offset as a database index and copying newly added changes
from the old file to a new one during ovsdb_log_replace().

Reported-at: https://bugzilla.redhat.com/2069108
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-07-01 01:34:07 +02:00
+								struct ovsdb_compaction_state {
 								    pthread_t thread;          /* Thread handle. */
 								    struct ovsdb *db;          /* Copy of a database data to compact. */
 								    struct json *data;         /* 'db' as a serialized json. */
 								    struct json *schema;       /* 'db' schema json. */
 								    uint64_t applied_index;    /* Last applied index reported by the storage
 								                                * at the moment of a database copy. */
 								    /* Completion signaling. */
 								    struct seq *done;
 								    uint64_t seqno;
 								    uint64_t init_time;        /* Time spent by the main thread preparing. */
 								    uint64_t thread_time;      /* Time spent for compaction by the thread. */
 								};
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								struct ovsdb {
-												ovsdb: Introduce experimental support for clustered databases.

This commit adds support for OVSDB clustering via Raft.  Please read
ovsdb(7) for information on how to set up a clustered database.  It is
simple and boils down to running "ovsdb-tool create-cluster" on one server
and "ovsdb-tool join-cluster" on each of the others and then starting
ovsdb-server in the usual way on all of them.

One you have a clustered database, you configure ovn-controller and
ovn-northd to use it by pointing them to all of the servers, e.g. where
previously you might have said "tcp:1.2.3.4" was the database server,
now you say that it is "tcp:1.2.3.4,tcp:5.6.7.8,tcp:9.10.11.12".

This also adds support for database clustering to ovs-sandbox.

Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: aginwala <aginwala@asu.edu>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-12-31 21:15:58 -08:00
+								    char *name;
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								    struct ovsdb_schema *schema;
-												ovsdb: Introduce experimental support for clustered databases.

This commit adds support for OVSDB clustering via Raft.  Please read
ovsdb(7) for information on how to set up a clustered database.  It is
simple and boils down to running "ovsdb-tool create-cluster" on one server
and "ovsdb-tool join-cluster" on each of the others and then starting
ovsdb-server in the usual way on all of them.

One you have a clustered database, you configure ovn-controller and
ovn-northd to use it by pointing them to all of the servers, e.g. where
previously you might have said "tcp:1.2.3.4" was the database server,
now you say that it is "tcp:1.2.3.4,tcp:5.6.7.8,tcp:9.10.11.12".

This also adds support for database clustering to ovs-sandbox.

Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: aginwala <aginwala@asu.edu>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-12-31 21:15:58 -08:00
+								    struct ovsdb_storage *storage; /* If nonnull, log for transactions. */
 								    struct uuid prereq;
-												ovsdb: Drop distinction between monitors and replicas.

Until now, OVSDB distinguished "monitors", which are associated with OVSDB
JSON-RPC client sessions and allow clients to find out about database
changes, from "replicas", which are associated with databases and also find
out about database changes and act on them in some way.  Now that
committing to disk has been broken into a separate concept, there is a
one-to-one and "onto" relationship between monitors and replicas: every
monitor M has a replica R and R is associated with M as well.  It's easier
if we just treat them as a single entity, and that's what this commit
implements.

Signed-off-by: Ben Pfaff <blp@ovn.org>
Acked-by: Justin Pettit <jpettit@ovn.org>

											
										
										
											2018-01-22 11:20:47 -08:00
+								    struct ovs_list monitors;   /* Contains "struct ovsdb_monitor"s. */
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								    struct shash tables;        /* Contains "struct ovsdb_table *"s. */
 								    /* Triggers. */
-												list: Rename struct list to struct ovs_list

struct list is a common name and can't be used in public headers.

Signed-off-by: Thomas Graf <tgraf@noironetworks.com>
Acked-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2014-12-15 14:10:38 +01:00
+								    struct ovs_list triggers;   /* Contains "struct ovsdb_trigger"s. */
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								    bool run_triggers;
-												raft: Avoid busy loop during leader election.

When a server doesn't see a leader yet, e.g. during leader re-election,
if a transaction comes from a client, it will cause 100% CPU busy loop.
With debug log enabled it is like:

2020-02-28T04:04:35.631Z|00059|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00062|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00065|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00068|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00071|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00074|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
2020-02-28T04:04:35.631Z|00077|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164
...

The problem is that in ovsdb_trigger_try(), all cluster errors are treated
as temporary error and retry immediately. This patch fixes it by introducing
'run_triggers_now', which tells if a retry is needed immediately. When the
cluster error is with detail 'not leader', we don't immediately retry, but
will wait for the next poll event to trigger the retry. When 'not leader'
status changes, there must be a event, i.e. raft RPC that changes the
status, so the trigger is guaranteed to be triggered, without busy loop.

Signed-off-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2020-02-28 18:07:07 -08:00
+								    bool run_triggers_now;
-												ovsdb: add support for role-based access controls

Add suport for ovsdb RBAC (role-based access control). This includes:

   - Support for "RBAC_Role" table. A db schema containing a table
     by this name will enable role-based access controls using
     this table for RBAC role configuration.

     The "RBAC_Role" table has one row per role, with each row having a
     "name" column (role name) and a "permissions" column (map of
     table name to UUID of row in separate permission table.) The
     permission table has one row per access control configuration,
     with the following columns:
          "name"          - name of table to which this row applies
          "authorization" - set of column names and column:key pairs
                            to be compared against client ID to
                            determine authorization status
          "insert_delete" - boolean, true if insertions and
                            authorized deletions are allowed.
          "update"        - Set of columns and column:key pairs for
                            which authorized updates are allowed.
   - Support for a new "role" column in the remote configuration
     table.
   - Logic for applying the RBAC role and permission tables, in
     combination with session role from the remote connection table
     and client id, to determine whether operations modifying database
     contents should be permitted.
   - Support for specifying RBAC role string as a command-line option
     to ovsdb-tool (Ben Pfaff).

Signed-off-by: Lance Richardson <lrichard@redhat.com>
Co-authored-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-05-31 19:04:32 -04:00
 								    struct ovsdb_table *rbac_role;
-												ovsdb-server: Transaction history tracking.

Maintaining last N (n = 100) transactions in memory, which will be
used for future patches for generating monitor data from any point
in this N transactions.

Signed-off-by: Han Zhou <hzhou8@ebay.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-02-28 09:15:17 -08:00
 								    /* History trasanctions for incremental monitor transfer. */
 								    bool need_txn_history;     /* Need to maintain history of transactions. */
 								    unsigned int n_txn_history; /* Current number of history transactions. */
-												ovsdb: Don't let transaction history grow larger than the database.

If user frequently changes a lot of rows in a database, transaction
history could grow way larger than the database itself.  This wastes
a lot of memory and also makes monitor_cond_since slower than
usual monotor_cond if the transaction id is old enough, because
re-construction of the changes from a history is slower than just
creation of initial database snapshot.  This is also the case if
user deleted a lot of data, so transaction history still holds all of
it while the database itself doesn't.

In case of current lb-per-service model in ovn-kubernetes, each
load-balancer is added to every logical switch/router.  Such a
transaction touches more than a half of a OVN_Northbound database.
And each of these transactions is added to the transaction history.
Since transaction history depth is 100, in worst case scenario,
it will hold 100 copies of a database increasing memory consumption
dramatically.  In tests with 3000 LBs and 120 LSs, memory goes up
to 3 GB, while holding at 30 MB if transaction history disabled in
the code.

Fixing that by keeping count of the number of ovsdb_atom's in the
database and not allowing the total number of atoms in transaction
history to grow larger than this value.  Counting atoms is fairly
cheap because we don't need to iterate over them, so it doesn't have
significant performance impact.  It would be ideal to measure the
size of individual atoms, but that will hit the performance.
Counting cells instead of atoms is not sufficient, because OVN
users are adding hundreds or thousands of atoms to a single cell,
so they are largely different in size.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Han Zhou <hzhou@ovn.org>
Acked-by: Dumitru Ceara <dceara@redhat.com>

											
										
										
											2021-09-28 13:17:21 +02:00
+								    unsigned int n_txn_history_atoms; /* Total number of atoms in history. */
-												ovsdb-server: Transaction history tracking.

Maintaining last N (n = 100) transactions in memory, which will be
used for future patches for generating monitor data from any point
in this N transactions.

Signed-off-by: Han Zhou <hzhou8@ebay.com>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2019-02-28 09:15:17 -08:00
+								    struct ovs_list txn_history; /* Contains "struct ovsdb_txn_history_node. */
-												ovsdb: New ovsdb 'relay' service model.

New database service model 'relay' that is needed to scale out
read-mostly database access, e.g. ovn-controller connections to
OVN_Southbound.

In this service model ovsdb-server connects to existing OVSDB
server and maintains in-memory copy of the database.  It serves
read-only transactions and monitor requests by its own, but
forwards write transactions to the relay source.

Key differences from the active-backup replication:
- support for "write" transactions (next commit).
- no on-disk storage. (probably, faster operation)
- support for multiple remotes (connect to the clustered db).
- doesn't try to keep connection as long as possible, but
  faster reconnects to other remotes to avoid missing updates.
- No need to know the complete database schema beforehand,
  only the schema name.
- can be used along with other standalone and clustered databases
  by the same ovsdb-server process. (doesn't turn the whole
  jsonrpc server to read-only mode)
- supports modern version of monitors (monitor_cond_since),
  because based on ovsdb-cs.
- could be chained, i.e. multiple relays could be connected
  one to another in a row or in a tree-like form.
- doesn't increase availability.
- cannot be converted to other service models or become a main
  active server.

Some performance test results can be found here:
  https://mail.openvswitch.org/pipermail/ovs-dev/2021-July/385825.html

Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-06-01 23:27:36 +02:00
-												ovsdb: Don't let transaction history grow larger than the database.

If user frequently changes a lot of rows in a database, transaction
history could grow way larger than the database itself.  This wastes
a lot of memory and also makes monitor_cond_since slower than
usual monotor_cond if the transaction id is old enough, because
re-construction of the changes from a history is slower than just
creation of initial database snapshot.  This is also the case if
user deleted a lot of data, so transaction history still holds all of
it while the database itself doesn't.

In case of current lb-per-service model in ovn-kubernetes, each
load-balancer is added to every logical switch/router.  Such a
transaction touches more than a half of a OVN_Northbound database.
And each of these transactions is added to the transaction history.
Since transaction history depth is 100, in worst case scenario,
it will hold 100 copies of a database increasing memory consumption
dramatically.  In tests with 3000 LBs and 120 LSs, memory goes up
to 3 GB, while holding at 30 MB if transaction history disabled in
the code.

Fixing that by keeping count of the number of ovsdb_atom's in the
database and not allowing the total number of atoms in transaction
history to grow larger than this value.  Counting atoms is fairly
cheap because we don't need to iterate over them, so it doesn't have
significant performance impact.  It would be ideal to measure the
size of individual atoms, but that will hit the performance.
Counting cells instead of atoms is not sufficient, because OVN
users are adding hundreds or thousands of atoms to a single cell,
so they are largely different in size.

Signed-off-by: Ilya Maximets <i.maximets@ovn.org>
Acked-by: Han Zhou <hzhou@ovn.org>
Acked-by: Dumitru Ceara <dceara@redhat.com>

											
										
										
											2021-09-28 13:17:21 +02:00
+								    size_t n_atoms;  /* Total number of ovsdb atoms in the database. */
-												ovsdb: Allow database itself to be read-only.

Currently, the read-only option can be set on connections or JSON-RPC
server as a whole.  However, there is no way to allow modifications in
one database, but not in the other.

Adding an internal read-only flag for a database itself.  Will be used
later for running active and backup databases in a single process.

Marking the _Server database as read only is not necessary, because
modifications of internal databases are not allowed anyway, but it
doesn't hurt.

Acked-by: Mike Pattrick <mkp@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2024-01-09 23:49:01 +01:00
+								    bool read_only;  /* If 'true', JSON-RPC clients are not allowed to change
 								                      * the data. */
-												ovsdb: New ovsdb 'relay' service model.

New database service model 'relay' that is needed to scale out
read-mostly database access, e.g. ovn-controller connections to
OVN_Southbound.

In this service model ovsdb-server connects to existing OVSDB
server and maintains in-memory copy of the database.  It serves
read-only transactions and monitor requests by its own, but
forwards write transactions to the relay source.

Key differences from the active-backup replication:
- support for "write" transactions (next commit).
- no on-disk storage. (probably, faster operation)
- support for multiple remotes (connect to the clustered db).
- doesn't try to keep connection as long as possible, but
  faster reconnects to other remotes to avoid missing updates.
- No need to know the complete database schema beforehand,
  only the schema name.
- can be used along with other standalone and clustered databases
  by the same ovsdb-server process. (doesn't turn the whole
  jsonrpc server to read-only mode)
- supports modern version of monitors (monitor_cond_since),
  because based on ovsdb-cs.
- could be chained, i.e. multiple relays could be connected
  one to another in a row or in a tree-like form.
- doesn't increase availability.
- cannot be converted to other service models or become a main
  active server.

Some performance test results can be found here:
  https://mail.openvswitch.org/pipermail/ovs-dev/2021-July/385825.html

Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-06-01 23:27:36 +02:00
+								    /* Relay mode. */
-												ovsdb: relay: Add support for transaction forwarding.

Current version of ovsdb relay allows to scale out read-only
access to the primary database.  However, many clients are not
read-only but read-mostly.  For example, ovn-controller.

In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only.  Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client.  At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.

This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.

Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding.  Later, ovsdb_relay_run() will send all new transactions
to the relay source.  Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply.  After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client.  Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.

Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-04-15 19:05:40 +02:00
+								    bool is_relay;  /* True, if database is in relay mode. */
 								    /* List that holds transactions waiting to be forwarded to the server. */
 								    struct ovs_list txn_forward_new;
 								    /* Hash map for transactions that are already sent and waits for reply. */
 								    struct hmap txn_forward_sent;
-												ovsdb: Prepare snapshot JSON in a separate thread.

Conversion of the database data into JSON object, serialization
and destruction of that object are the most heavy operations
during the database compaction.  If these operations are moved
to a separate thread, the main thread can continue processing
database requests in the meantime.

With this change, the compaction is split in 3 phases:

1. Initialization:
   - Create a copy of the database.
   - Remember current database index.
   - Start a separate thread to convert a copy of the database
     into serialized JSON object.

2. Wait:
   - Continue normal operation until compaction thread is done.
   - Meanwhile, compaction thread:
     * Convert database copy to JSON.
     * Serialize resulted JSON.
     * Destroy original JSON object.

3. Finish:
   - Destroy the database copy.
   - Take the snapshot created by the thread.
   - Write on disk.

The key for this schema to be fast is the ability to create
a shallow copy of the database.  This doesn't take too much
time allowing the thread to do most of work.

Database copy is created and destroyed only by the main thread,
so there is no need for synchronization.

Such solution allows to reduce the time main thread is blocked
by compaction by 80-90%.  For example, in ovn-heater tests
with 120 node density-heavy scenario, where compaction normally
takes 5-6 seconds at the end of a test, measured compaction
times was all below 1 second with the change applied.  Also,
note that these measured times are the sum of phases 1 and 3,
so actual poll intervals are about half a second in this case.

Only implemented for raft storage for now.  The implementation
for standalone databases can be added later by using a file
offset as a database index and copying newly added changes
from the old file to a new one during ovsdb_log_replace().

Reported-at: https://bugzilla.redhat.com/2069108
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-07-01 01:34:07 +02:00
 								    /* Database compaction. */
 								    struct ovsdb_compaction_state *snap_state;
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								};
-												ovsdb: Count weak reference objects.

OVSDB creates a separate object for each weak reference in order to
track them and there could be a significant amount of these objects
in the database.

We also had problems with number of these objects growing out of
bounds recently.  So, adding them to a memory report seems to be
a good thing.

Counting them globally to cover all the copied instances in transactions
and the transaction history (even though there should be none).
It's also hard to count them per-database, because weak references
are stored on destination rows and can be destroyed either while
destroying the destination row or while removing the reference from
the source row.  Also, not all the involved functions have direct
access to the database object.  So, there is no single clear place
where counters should be updated.

Acked-by: Dumitru Ceara <dceara@redhat.com>
Acked-by: Han Zhou <hzhou@ovn.org>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-11-25 13:37:04 +01:00
+								/* Total number of 'weak reference' objects in all databases
 								 * and transactions. */
 								extern size_t n_weak_refs;
-												ovsdb: Introduce experimental support for clustered databases.

This commit adds support for OVSDB clustering via Raft.  Please read
ovsdb(7) for information on how to set up a clustered database.  It is
simple and boils down to running "ovsdb-tool create-cluster" on one server
and "ovsdb-tool join-cluster" on each of the others and then starting
ovsdb-server in the usual way on all of them.

One you have a clustered database, you configure ovn-controller and
ovn-northd to use it by pointing them to all of the servers, e.g. where
previously you might have said "tcp:1.2.3.4" was the database server,
now you say that it is "tcp:1.2.3.4,tcp:5.6.7.8,tcp:9.10.11.12".

This also adds support for database clustering to ovs-sandbox.

Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: aginwala <aginwala@asu.edu>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-12-31 21:15:58 -08:00
+								struct ovsdb *ovsdb_create(struct ovsdb_schema *, struct ovsdb_storage *);
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								void ovsdb_destroy(struct ovsdb *);
-												ovsdb: Perform conversion with no data for clustered databases.

Currently, database schema conversion in case of clustered database
produces a transaction record with both new schema and converted
database data.  So, the sequence of events is following:

  1. Get the new schema.
  2. Convert the database to a new schema.
  3. Translate the newly converted database into JSON.
  4. Write the schema + data JSON to the storage.
  5. Destroy converted version of a database.
  6. Read schema + data JSON from the storage and parse.
  7. Create a new database from a parsed database data.
  8. Replace current database with the new one.

Most of these steps are very computationally expensive.  Also,
conversion to/from JSON is much more expensive than direct database
conversion with ovsdb_convert() that can make use of shallow data
copies.

Instead of doing all that, let's make use of previously introduced
ability to not write the converted data into the storage.  The process
will look like this then:

  1. Get the new schema.
  2. Convert the database to a new schema
     (to verify that it is possible).
  3. Write the schema to the storage.
  4. Destroy converted version of a database.
  5. Read the new schema from the storage and parse.
  6. Convert the database to a new schema.
  7. Replace current database with the new one.

Most of the operations here are performed on the small schema object,
instead of the actual database data.  Two remaining data operations
(actual conversion) are noticeably faster than conversion to/from
JSON due to reference counting and shallow data copies.

Steps 4-6 can be optimized later to not convert twice on the
process that initiates the conversion.

The change results in following performance improvements in conversion
of OVN_Southbound database schema from version 20.23.0 to 20.27.0
(measured on a single-server RAFT cluster with no clients):

          |       Before                |         After
          +---------+-------------------+---------+------------------
  DB size |  Total  | Max poll interval |  Total  | Max poll interval
  --------+---------+-------------------+---------+------------------
   542 MB | 47 sec. |     26 sec.       | 15 sec. |     10 sec.
   225 MB | 19 sec. |     10 sec.       |  6 sec. |    4.5 sec.

542 MB database had 19.5 M atoms, 225 MB database had 7.5 M atoms.

Overall performance improvement is about 3x.

Also, note that before this change database conversion basically
doubles the database file on disk.  Now it only writes a small
schema JSON.

Since the change requires backward-incompatible database file format
changes, documentation is updated on how to perform an upgrade.
Handled the same way as we did for the previous incompatible format
change in 2.15 (column diffs).

Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2022-December/052140.html
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2023-03-27 21:42:59 +02:00
+								void ovsdb_no_data_conversion_disable(void);
 								bool ovsdb_conversion_with_no_data_supported(const struct ovsdb *);
-												Add support for tracking and logging daemon memory usage.

Signed-off-by: Ben Pfaff <blp@nicira.com>

											
										
										
											2012-05-08 15:44:21 -07:00
+								void ovsdb_get_memory_usage(const struct ovsdb *, struct simap *usage);
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								struct ovsdb_table *ovsdb_get_table(const struct ovsdb *, const char *);
-												ovsdb: Introduce experimental support for clustered databases.

This commit adds support for OVSDB clustering via Raft.  Please read
ovsdb(7) for information on how to set up a clustered database.  It is
simple and boils down to running "ovsdb-tool create-cluster" on one server
and "ovsdb-tool join-cluster" on each of the others and then starting
ovsdb-server in the usual way on all of them.

One you have a clustered database, you configure ovn-controller and
ovn-northd to use it by pointing them to all of the servers, e.g. where
previously you might have said "tcp:1.2.3.4" was the database server,
now you say that it is "tcp:1.2.3.4,tcp:5.6.7.8,tcp:9.10.11.12".

This also adds support for database clustering to ovs-sandbox.

Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: aginwala <aginwala@asu.edu>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-12-31 21:15:58 -08:00
+								struct ovsdb_txn *ovsdb_execute_compose(
 								    struct ovsdb *, const struct ovsdb_session *, const struct json *params,
 								    bool read_only, const char *role, const char *id,
 								    long long int elapsed_msec, long long int *timeout_msec,
-												ovsdb: relay: Add support for transaction forwarding.

Current version of ovsdb relay allows to scale out read-only
access to the primary database.  However, many clients are not
read-only but read-mostly.  For example, ovn-controller.

In order to scale out database access for this case ovsdb-server
need to process transactions that are not read-only.  Relay is not
allowed to do that, i.e. not allowed to modify the database, but it
can act like a proxy and forward transactions that includes database
modifications to the primary server and forward replies back to a
client.  At the same time it may serve read-only transactions and
monitor requests by itself greatly reducing the load on primary
server.

This configuration will slightly increase transaction latency, but
it's not very important for read-mostly use cases.

Implementation details:
With this change instead of creating a trigger to commit the
transaction, ovsdb-server will create a trigger for transaction
forwarding.  Later, ovsdb_relay_run() will send all new transactions
to the relay source.  Once transaction reply received from the
relay source, ovsdb-relay module will update the state of the
transaction forwarding with the reply.  After that, trigger_run()
will complete the trigger and jsonrpc_server_run() will send the
reply back to the client.  Since transaction reply from the relay
source will be received after all the updates, client will receive
all the updates before receiving the transaction reply as it is in
a normal scenario with other database models.

Acked-by: Mark D. Gray <mark.d.gray@redhat.com>
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2021-04-15 19:05:40 +02:00
+								    bool *durable, bool *forwarding_needed, struct json **);
-												ovsdb: Introduce experimental support for clustered databases.

This commit adds support for OVSDB clustering via Raft.  Please read
ovsdb(7) for information on how to set up a clustered database.  It is
simple and boils down to running "ovsdb-tool create-cluster" on one server
and "ovsdb-tool join-cluster" on each of the others and then starting
ovsdb-server in the usual way on all of them.

One you have a clustered database, you configure ovn-controller and
ovn-northd to use it by pointing them to all of the servers, e.g. where
previously you might have said "tcp:1.2.3.4" was the database server,
now you say that it is "tcp:1.2.3.4,tcp:5.6.7.8,tcp:9.10.11.12".

This also adds support for database clustering to ovs-sandbox.

Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: aginwala <aginwala@asu.edu>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-12-31 21:15:58 -08:00
-												ovsdb: Implement a "lock" feature in the database protocol.

This provides clients a way to coordinate their access to the database.
This is a voluntary, not mandatory, locking protocols, that is, clients
are not prevented from modifying the database unless they cooperate with
the locking protocol.  It is also not related to any of the ACID properties
of database transactions.  It is strictly a way for clients to coordinate
among themselves.

The following commit will introduce one user.

											
										
										
											2011-07-26 10:24:17 -07:00
+								struct json *ovsdb_execute(struct ovsdb *, const struct ovsdb_session *,
-												ovsdb: Make OVSDB backup sever read only

When ovsdb-sever is running in the backup state, it would be nice to
make sure there is no un-intended changes to the backup database.

This patch makes the ovsdb server only accepts 'read' transactions as
a backup server. When the server role is changed into an active server,
all existing client connections will be reset. After reconnect, all
clinet transactions will then be accepted.

Signed-off-by: Andy Zhou <azhou@ovn.org>
Acked-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2016-07-29 14:39:29 -07:00
+								                           const struct json *params, bool read_only,
-												ovsdb: add support for role-based access controls

Add suport for ovsdb RBAC (role-based access control). This includes:

   - Support for "RBAC_Role" table. A db schema containing a table
     by this name will enable role-based access controls using
     this table for RBAC role configuration.

     The "RBAC_Role" table has one row per role, with each row having a
     "name" column (role name) and a "permissions" column (map of
     table name to UUID of row in separate permission table.) The
     permission table has one row per access control configuration,
     with the following columns:
          "name"          - name of table to which this row applies
          "authorization" - set of column names and column:key pairs
                            to be compared against client ID to
                            determine authorization status
          "insert_delete" - boolean, true if insertions and
                            authorized deletions are allowed.
          "update"        - Set of columns and column:key pairs for
                            which authorized updates are allowed.
   - Support for a new "role" column in the remote configuration
     table.
   - Logic for applying the RBAC role and permission tables, in
     combination with session role from the remote connection table
     and client id, to determine whether operations modifying database
     contents should be permitted.
   - Support for specifying RBAC role string as a command-line option
     to ovsdb-tool (Ben Pfaff).

Signed-off-by: Lance Richardson <lrichard@redhat.com>
Co-authored-by: Ben Pfaff <blp@ovn.org>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-05-31 19:04:32 -04:00
+								                           const char *role, const char *id,
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								                           long long int elapsed_msec,
 								                           long long int *timeout_msec);
-												ovsdb-server: Reclaim heap memory after compaction.

Compaction happens at most once in 10 minutes.  That is a big time
interval for a heavy loaded ovsdb-server in cluster mode.
In 10 minutes raft logs could grow up to tens of thousands of entries
with tens of gigabytes in total size.
While compaction cleans up raft log entries, the memory in many cases
is not returned to the system, but kept in the heap of running
ovsdb-server process, and it could stay in this condition for a really
long time.  In the end one performance spike could lead to a fast
growth of the raft log and this memory will never (for a really long
time) be released to the system even if the database if empty.

Simple example how to reproduce with OVN sandbox:

1. make sandbox SANDBOXFLAGS='--nbdb-model=clustered --sbdb-model=clustered'

2. Run following script that creates 1 port group, adds 4000 acls and
   removes all of that in the end:

   # cat ../memory-test.sh
   pg_name=my_port_group
   export OVN_NB_DAEMON=$(ovn-nbctl --pidfile --detach --log-file -vsocket_util:off)
   ovn-nbctl pg-add $pg_name
   for i in $(seq 1 4000); do
     echo "Iteration: $i"
     ovn-nbctl --log acl-add $pg_name from-lport $i udp drop
   done
   ovn-nbctl acl-del $pg_name
   ovn-nbctl pg-del $pg_name
   ovs-appctl -t $(pwd)/sandbox/nb1 memory/show
   ovn-appctl -t ovn-nbctl exit
   ---

3. Stopping one of Northbound DB servers:
   ovs-appctl -t $(pwd)/sandbox/nb1 exit

   Make sure that ovsdb-server didn't compact the database before
   it was stopped.  Now we have a db file on disk that contains
   4000 fairly big transactions inside.

4. Trying to start same ovsdb-server with this file.

   # cd sandbox && ovsdb-server <...> nb1.db

   At this point ovsdb-server reads all the transactions from db
   file and performs all of them as fast as it can one by one.
   When it finishes this, raft log contains 4000 entries and
   ovsdb-server consumes (on my system) ~13GB of memory while
   database is empty.  And libc will likely never return this memory
   back to system, or, at least, will hold it for a really long time.

This patch adds a new command 'ovsdb-server/memory-trim-on-compaction'.
It's disabled by default, but once enabled, ovsdb-server will call
'malloc_trim(0)' after every successful compaction to try to return
unused heap memory back to system.  This is glibc-specific, so we
need to detect function availability in a build time.
Disabled by default since it adds from 1% to 30% (depending on the
current state) to the snapshot creation time and, also, next memory
allocations will likely require requests to kernel and that might be
slower.  Could be enabled by default later if considered broadly
beneficial.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1888829
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2020-10-24 02:25:48 +02:00
+								struct ovsdb_error *ovsdb_snapshot(struct ovsdb *, bool trim_memory)
 								    OVS_WARN_UNUSED_RESULT;
-												ovsdb: Prepare snapshot JSON in a separate thread.

Conversion of the database data into JSON object, serialization
and destruction of that object are the most heavy operations
during the database compaction.  If these operations are moved
to a separate thread, the main thread can continue processing
database requests in the meantime.

With this change, the compaction is split in 3 phases:

1. Initialization:
   - Create a copy of the database.
   - Remember current database index.
   - Start a separate thread to convert a copy of the database
     into serialized JSON object.

2. Wait:
   - Continue normal operation until compaction thread is done.
   - Meanwhile, compaction thread:
     * Convert database copy to JSON.
     * Serialize resulted JSON.
     * Destroy original JSON object.

3. Finish:
   - Destroy the database copy.
   - Take the snapshot created by the thread.
   - Write on disk.

The key for this schema to be fast is the ability to create
a shallow copy of the database.  This doesn't take too much
time allowing the thread to do most of work.

Database copy is created and destroyed only by the main thread,
so there is no need for synchronization.

Such solution allows to reduce the time main thread is blocked
by compaction by 80-90%.  For example, in ovn-heater tests
with 120 node density-heavy scenario, where compaction normally
takes 5-6 seconds at the end of a test, measured compaction
times was all below 1 second with the change applied.  Also,
note that these measured times are the sum of phases 1 and 3,
so actual poll intervals are about half a second in this case.

Only implemented for raft storage for now.  The implementation
for standalone databases can be added later by using a file
offset as a database index and copying newly added changes
from the old file to a new one during ovsdb_log_replace().

Reported-at: https://bugzilla.redhat.com/2069108
Acked-by: Dumitru Ceara <dceara@redhat.com>
Signed-off-by: Ilya Maximets <i.maximets@ovn.org>

											
										
										
											2022-07-01 01:34:07 +02:00
+								void ovsdb_snapshot_wait(struct ovsdb *);
 								bool ovsdb_snapshot_in_progress(struct ovsdb *);
 								bool ovsdb_snapshot_ready(struct ovsdb *);
-												ovsdb: Introduce experimental support for clustered databases.

This commit adds support for OVSDB clustering via Raft.  Please read
ovsdb(7) for information on how to set up a clustered database.  It is
simple and boils down to running "ovsdb-tool create-cluster" on one server
and "ovsdb-tool join-cluster" on each of the others and then starting
ovsdb-server in the usual way on all of them.

One you have a clustered database, you configure ovn-controller and
ovn-northd to use it by pointing them to all of the servers, e.g. where
previously you might have said "tcp:1.2.3.4" was the database server,
now you say that it is "tcp:1.2.3.4,tcp:5.6.7.8,tcp:9.10.11.12".

This also adds support for database clustering to ovs-sandbox.

Acked-by: Justin Pettit <jpettit@ovn.org>
Tested-by: aginwala <aginwala@asu.edu>
Signed-off-by: Ben Pfaff <blp@ovn.org>

											
										
										
											2017-12-31 21:15:58 -08:00
 								void ovsdb_replace(struct ovsdb *dst, struct ovsdb *src);
-												Initial implementation of OVSDB.

											
										
										
											2009-11-04 15:11:44 -08:00
+								#endif /* ovsdb/ovsdb.h */